weka.classifiers.AbstractClassifier.java Source code

Java tutorial

Introduction

Here is the source code for weka.classifiers.AbstractClassifier.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    AbstractClassifier.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers;

import weka.core.Attribute;
import weka.core.BatchPredictor;
import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.CapabilitiesIgnorer;
import weka.core.CommandlineRunnable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.Utils;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Vector;

/**
 * Abstract classifier. All schemes for numeric or nominal prediction in Weka
 * extend this class. Note that a classifier MUST either implement
 * distributionForInstance() or classifyInstance().
 * 
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision$
 */
public abstract class AbstractClassifier implements Classifier, BatchPredictor, Cloneable, Serializable,
        OptionHandler, CapabilitiesHandler, RevisionHandler, CapabilitiesIgnorer, CommandlineRunnable {

    /** for serialization */
    private static final long serialVersionUID = 6502780192411755341L;

    /** Whether the classifier is run in debug mode. */
    protected boolean m_Debug = false;

    /** Whether capabilities should not be checked before classifier is built. */
    protected boolean m_DoNotCheckCapabilities = false;

    /**
     * The number of decimal places used when printing numbers in the model.
     */
    public static int NUM_DECIMAL_PLACES_DEFAULT = 2;
    protected int m_numDecimalPlaces = NUM_DECIMAL_PLACES_DEFAULT;

    /** Default preferred batch size for batch predictions */
    public static String BATCH_SIZE_DEFAULT = "100";
    protected String m_BatchSize = BATCH_SIZE_DEFAULT;

    /**
     * Creates a new instance of a classifier given it's class name and (optional)
     * arguments to pass to it's setOptions method. If the classifier implements
     * OptionHandler and the options parameter is non-null, the classifier will
     * have it's options set.
     *
     * @param classifierName the fully qualified class name of the classifier
     * @param options an array of options suitable for passing to setOptions. May
     *          be null.
     * @return the newly created classifier, ready for use.
     * @exception Exception if the classifier name is invalid, or the options
     *              supplied are not acceptable to the classifier
     */
    public static Classifier forName(String classifierName, String[] options) throws Exception {

        return ((AbstractClassifier) Utils.forName(Classifier.class, classifierName, options));
    }

    /**
     * Creates a deep copy of the given classifier using serialization.
     *
     * @param model the classifier to copy
     * @return a deep copy of the classifier
     * @exception Exception if an error occurs
     */
    public static Classifier makeCopy(Classifier model) throws Exception {

        return (Classifier) new SerializedObject(model).getObject();
    }

    /**
     * Creates a given number of deep copies of the given classifier using
     * serialization.
     *
     * @param model the classifier to copy
     * @param num the number of classifier copies to create.
     * @return an array of classifiers.
     * @exception Exception if an error occurs
     */
    public static Classifier[] makeCopies(Classifier model, int num) throws Exception {

        if (model == null) {
            throw new Exception("No model classifier set");
        }
        Classifier[] classifiers = new Classifier[num];
        SerializedObject so = new SerializedObject(model);
        for (int i = 0; i < classifiers.length; i++) {
            classifiers[i] = (Classifier) so.getObject();
        }
        return classifiers;
    }

    /**
     * runs the classifier instance with the given options.
     *
     * @param classifier the classifier to run
     * @param options the commandline options
     */
    public static void runClassifier(Classifier classifier, String[] options) {
        try {
            if (classifier instanceof CommandlineRunnable) {
                ((CommandlineRunnable) classifier).preExecution();
            }
            System.out.println(Evaluation.evaluateModel(classifier, options));
        } catch (Exception e) {
            if (((e.getMessage() != null) && (e.getMessage().indexOf("General options") == -1))
                    || (e.getMessage() == null)) {
                e.printStackTrace();
            } else {
                System.err.println(e.getMessage());
            }
        }
        if (classifier instanceof CommandlineRunnable) {
            try {
                ((CommandlineRunnable) classifier).postExecution();
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
    }

    /**
     * Classifies the given test instance. The instance has to belong to a dataset
     * when it's being classified. Note that a classifier MUST implement either
     * this or distributionForInstance().
     *
     * @param instance the instance to be classified
     * @return the predicted most likely class for the instance or
     *         Utils.missingValue() if no prediction is made
     * @exception Exception if an error occurred during the prediction
     */
    @Override
    public double classifyInstance(Instance instance) throws Exception {

        double[] dist = distributionForInstance(instance);
        if (dist == null) {
            throw new Exception("Null distribution predicted");
        }
        switch (instance.classAttribute().type()) {
        case Attribute.NOMINAL:
            double max = 0;
            int maxIndex = 0;

            for (int i = 0; i < dist.length; i++) {
                if (dist[i] > max) {
                    maxIndex = i;
                    max = dist[i];
                }
            }
            if (max > 0) {
                return maxIndex;
            } else {
                return Utils.missingValue();
            }
        case Attribute.NUMERIC:
        case Attribute.DATE:
            return dist[0];
        default:
            return Utils.missingValue();
        }
    }

    /**
     * Predicts the class memberships for a given instance. If an instance is
     * unclassified, the returned array elements must be all zero. If the class is
     * numeric, the array must consist of only one element, which contains the
     * predicted value. Note that a classifier MUST implement either this or
     * classifyInstance().
     *
     * @param instance the instance to be classified
     * @return an array containing the estimated membership probabilities of the
     *         test instance in each class or the numeric prediction
     * @exception Exception if distribution could not be computed successfully
     */
    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {

        double[] dist = new double[instance.numClasses()];
        switch (instance.classAttribute().type()) {
        case Attribute.NOMINAL:
            double classification = classifyInstance(instance);
            if (Utils.isMissingValue(classification)) {
                return dist;
            } else {
                dist[(int) classification] = 1.0;
            }
            return dist;
        case Attribute.NUMERIC:
        case Attribute.DATE:
            dist[0] = classifyInstance(instance);
            return dist;
        default:
            return dist;
        }
    }

    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> newVector = Option.listOptionsForClassHierarchy(this.getClass(), AbstractClassifier.class);

        newVector.addElement(new Option(
                "\tIf set, classifier is run in debug mode and\n" + "\tmay output additional info to the console",
                "output-debug-info", 0, "-output-debug-info"));
        newVector
                .addElement(new Option(
                        "\tIf set, classifier capabilities are not checked before classifier is built\n"
                                + "\t(use with caution).",
                        "-do-not-check-capabilities", 0, "-do-not-check-capabilities"));
        newVector.addElement(new Option("\tThe number of decimal places for the output of numbers in the model"
                + " (default " + m_numDecimalPlaces + ").", "num-decimal-places", 1, "-num-decimal-places"));
        newVector.addElement(
                new Option("\tThe desired batch size for batch prediction " + " (default " + m_BatchSize + ").",
                        "batch-size", 1, "-batch-size"));

        return newVector.elements();
    }

    /**
     * Gets the current settings of the Classifier.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> options = new Vector<String>();
        for (String s : Option.getOptionsForHierarchy(this, AbstractClassifier.class)) {
            options.add(s);
        }

        if (getDebug()) {
            options.add("-output-debug-info");
        }
        if (getDoNotCheckCapabilities()) {
            options.add("-do-not-check-capabilities");
        }
        if (getNumDecimalPlaces() != NUM_DECIMAL_PLACES_DEFAULT) {
            options.add("-num-decimal-places");
            options.add("" + getNumDecimalPlaces());
        }
        if (!(getBatchSize().equals(BATCH_SIZE_DEFAULT))) {
            options.add("-batch-size");
            options.add("" + getBatchSize());
        }
        return options.toArray(new String[0]);
    }

    /**
     * Parses a given list of options. Valid options are:
     * <p>
     *
     * -output-debug-info <br>
     * If set, classifier is run in debug mode and may output additional info to
     * the console.
     * <p>
     *
     * -do-not-check-capabilities <br>
     * If set, classifier capabilities are not checked before classifier is built
     * (use with caution).
     * <p>
     *
     * -num-decimal-places <br>
     * The number of decimal places for the output of numbers in the model.
     * <p>
     *
     * -batch-size <br>
     * The desired batch size for batch prediction.
     * <p>
     *
     * @param options the list of options as an array of strings
     * @exception Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        Option.setOptionsForHierarchy(options, this, AbstractClassifier.class);
        setDebug(Utils.getFlag("output-debug-info", options));
        setDoNotCheckCapabilities(Utils.getFlag("do-not-check-capabilities", options));

        String optionString = Utils.getOption("num-decimal-places", options);
        if (optionString.length() != 0) {
            setNumDecimalPlaces((new Integer(optionString)).intValue());
        }
        optionString = Utils.getOption("batch-size", options);
        if (optionString.length() != 0) {
            setBatchSize(optionString);
        }
    }

    /**
     * Get whether debugging is turned on.
     * 
     * @return true if debugging output is on
     */
    public boolean getDebug() {

        return m_Debug;
    }

    /**
     * Set debugging mode.
     *
     * @param debug true if debug output should be printed
     */
    public void setDebug(boolean debug) {

        m_Debug = debug;
    }

    /**
     * Returns the tip text for this property
     *
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String debugTipText() {
        return "If set to true, classifier may output additional info to " + "the console.";
    }

    /**
     * Get whether capabilities checking is turned off.
     * 
     * @return true if capabilities checking is turned off.
     */
    @Override
    public boolean getDoNotCheckCapabilities() {

        return m_DoNotCheckCapabilities;
    }

    /**
     * Set whether not to check capabilities.
     *
     * @param doNotCheckCapabilities true if capabilities are not to be checked.
     */
    @Override
    public void setDoNotCheckCapabilities(boolean doNotCheckCapabilities) {

        m_DoNotCheckCapabilities = doNotCheckCapabilities;
    }

    /**
     * Returns the tip text for this property
     *
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String doNotCheckCapabilitiesTipText() {
        return "If set, classifier capabilities are not checked before classifier is built"
                + " (Use with caution to reduce runtime).";
    }

    /**
     * Returns the tip text for this property
     *
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String numDecimalPlacesTipText() {
        return "The number of decimal places to be used for the output of numbers in " + "the model.";
    }

    /**
     * Get the number of decimal places.
     */
    public int getNumDecimalPlaces() {
        return m_numDecimalPlaces;
    }

    /**
     * Set the number of decimal places.
     */
    public void setNumDecimalPlaces(int num) {
        m_numDecimalPlaces = num;
    }

    /**
     * Returns the tip text for this property
     *
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String batchSizeTipText() {
        return "The preferred number of instances to process if batch prediction is "
                + "being performed. More or fewer instances may be provided, but this gives "
                + "implementations a chance to specify a preferred batch size.";
    }

    /**
     * Set the preferred batch size for batch prediction.
     * 
     * @param size the batch size to use
     */
    @Override
    public void setBatchSize(String size) {
        m_BatchSize = size;
    }

    /**
     * Get the preferred batch size for batch prediction.
     *
     * @return the preferred batch size
     */
    @Override
    public String getBatchSize() {
        return m_BatchSize;
    }

    /**
     * Return true if this classifier can generate batch predictions in an
     * efficient manner. Default implementation here returns false. Subclasses to
     * override as appropriate.
     * 
     * @return true if this classifier can generate batch predictions in an
     *         efficient manner.
     */
    @Override
    public boolean implementsMoreEfficientBatchPrediction() {
        return false;
    }

    /**
     * Batch prediction method. This default implementation simply calls
     * distributionForInstance() for each instance in the batch. If subclasses can
     * produce batch predictions in a more efficient manner than this they should
     * override this method and also return true from
     * implementsMoreEfficientBatchPrediction()
     * 
     * @param batch the instances to get predictions for
     * @return an array of probability distributions, one for each instance in the
     *         batch
     * @throws Exception if a problem occurs.
     */
    @Override
    public double[][] distributionsForInstances(Instances batch) throws Exception {
        double[][] batchPreds = new double[batch.numInstances()][];
        for (int i = 0; i < batch.numInstances(); i++) {
            batchPreds[i] = distributionForInstance(batch.instance(i));
        }

        return batchPreds;
    }

    /**
     * Returns the Capabilities of this classifier. Maximally permissive
     * capabilities are allowed by default. Derived classifiers should override
     * this method and first disable all capabilities and then enable just those
     * capabilities that make sense for the scheme.
     *
     * @return the capabilities of this object
     * @see Capabilities
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = new Capabilities(this);
        result.enableAll();

        return result;
    }

    /**
     * Returns the revision string.
     *
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Perform any setup stuff that might need to happen before commandline
     * execution. Subclasses should override if they need to do something here
     *
     * @throws Exception if a problem occurs during setup
     */
    @Override
    public void preExecution() throws Exception {
    }

    /**
     * Execute the supplied object.
     *
     * @param toRun the object to execute
     * @param options any options to pass to the object
     * @throws Exception if the object is not of the expected type.
     */
    @Override
    public void run(Object toRun, String[] options) throws Exception {
        if (!(toRun instanceof Classifier)) {
            throw new IllegalArgumentException("Object to run is not a Classifier!");
        }
        runClassifier((Classifier) toRun, options);
    }

    /**
     * Perform any teardown stuff that might need to happen after execution.
     * Subclasses should override if they need to do something here
     *
     * @throws Exception if a problem occurs during teardown
     */
    @Override
    public void postExecution() throws Exception {
    }
}