weka.classifiers.meta.AttributeSelectedClassifier.java Source code

Java tutorial

Introduction

Here is the source code for weka.classifiers.meta.AttributeSelectedClassifier.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    AttributeSelectedClassifier.java
 *    Copyright (C) 2000-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.meta;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.attributeSelection.ASEvaluation;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.AttributeSelection;
import weka.classifiers.SingleClassifierEnhancer;
import weka.core.*;
import weka.core.Capabilities.Capability;

/**
 <!-- globalinfo-start -->
 * Dimensionality of training and test data is reduced by attribute selection before being passed on to a classifier.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -E &lt;attribute evaluator specification&gt;
 *  Full class name of attribute evaluator, followed
 *  by its options.
 *  eg: "weka.attributeSelection.CfsSubsetEval -L"
 *  (default weka.attributeSelection.CfsSubsetEval)</pre>
 * 
 * <pre> -S &lt;search method specification&gt;
 *  Full class name of search method, followed
 *  by its options.
 *  eg: "weka.attributeSelection.BestFirst -D 1"
 *  (default weka.attributeSelection.BestFirst)</pre>
 * 
 * <pre> -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console</pre>
 * 
 * <pre> -W
 *  Full name of base classifier.
 *  (default: weka.classifiers.trees.J48)</pre>
 * 
 * <pre> 
 * Options specific to classifier weka.classifiers.trees.J48:
 * </pre>
 * 
 * <pre> -U
 *  Use unpruned tree.</pre>
 * 
 * <pre> -C &lt;pruning confidence&gt;
 *  Set confidence threshold for pruning.
 *  (default 0.25)</pre>
 * 
 * <pre> -M &lt;minimum number of instances&gt;
 *  Set minimum number of instances per leaf.
 *  (default 2)</pre>
 * 
 * <pre> -R
 *  Use reduced error pruning.</pre>
 * 
 * <pre> -N &lt;number of folds&gt;
 *  Set number of folds for reduced error
 *  pruning. One fold is used as pruning set.
 *  (default 3)</pre>
 * 
 * <pre> -B
 *  Use binary splits only.</pre>
 * 
 * <pre> -S
 *  Don't perform subtree raising.</pre>
 * 
 * <pre> -L
 *  Do not clean up after the tree has been built.</pre>
 * 
 * <pre> -A
 *  Laplace smoothing for predicted probabilities.</pre>
 * 
 * <pre> -Q &lt;seed&gt;
 *  Seed for random data shuffling (default 1).</pre>
 * 
 <!-- options-end -->
 *
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class AttributeSelectedClassifier extends SingleClassifierEnhancer
        implements OptionHandler, Drawable, AdditionalMeasureProducer, WeightedInstancesHandler {

    /** for serialization */
    static final long serialVersionUID = -1151805453487947577L;

    /** The attribute selection object */
    protected AttributeSelection m_AttributeSelection = null;

    /** The attribute evaluator to use */
    protected ASEvaluation m_Evaluator = new weka.attributeSelection.CfsSubsetEval();

    /** The search method to use */
    protected ASSearch m_Search = new weka.attributeSelection.BestFirst();

    /** The header of the dimensionally reduced data */
    protected Instances m_ReducedHeader;

    /** The number of class vals in the training data (1 if class is numeric) */
    protected int m_numClasses;

    /** The number of attributes selected by the attribute selection phase */
    protected double m_numAttributesSelected;

    /** The time taken to select attributes in milliseconds */
    protected double m_selectionTime;

    /** The time taken to select attributes AND build the classifier */
    protected double m_totalTime;

    /**
     * String describing default classifier.
     * 
     * @return the default classifier classname
     */
    protected String defaultClassifierString() {

        return "weka.classifiers.trees.J48";
    }

    /**
     * Default constructor.
     */
    public AttributeSelectedClassifier() {
        m_Classifier = new weka.classifiers.trees.J48();
    }

    /**
     * Returns a string describing this search method
     * @return a description of the search method suitable for
     * displaying in the explorer/experimenter gui
     */
    public String globalInfo() {
        return "Dimensionality of training and test data is reduced by "
                + "attribute selection before being passed on to a classifier.";
    }

    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>(2);

        newVector.addElement(new Option(
                "\tFull class name of attribute evaluator, followed\n" + "\tby its options.\n"
                        + "\teg: \"weka.attributeSelection.CfsSubsetEval -L\"\n"
                        + "\t(default weka.attributeSelection.CfsSubsetEval)",
                "E", 1, "-E <attribute evaluator specification>"));

        newVector.addElement(new Option(
                "\tFull class name of search method, followed\n" + "\tby its options.\n"
                        + "\teg: \"weka.attributeSelection.BestFirst -D 1\"\n"
                        + "\t(default weka.attributeSelection.BestFirst)",
                "S", 1, "-S <search method specification>"));

        newVector.addAll(Collections.list(super.listOptions()));

        if (getEvaluator() instanceof OptionHandler) {
            newVector.addElement(new Option("", "", 0,
                    "\nOptions specific to attribute evaluator " + getEvaluator().getClass().getName() + ":"));
            newVector.addAll(Collections.list(((OptionHandler) getEvaluator()).listOptions()));
        }

        if (getSearch() instanceof OptionHandler) {
            newVector.addElement(new Option("", "", 0,
                    "\nOptions specific to search method " + getSearch().getClass().getName() + ":"));
            newVector.addAll(Collections.list(((OptionHandler) getSearch()).listOptions()));
        }

        return newVector.elements();
    }

    /**
     * Parses a given list of options. <p/>
     *
     <!-- options-start -->
     * Valid options are: <p/>
     * 
     * <pre> -E &lt;attribute evaluator specification&gt;
     *  Full class name of attribute evaluator, followed
     *  by its options.
     *  eg: "weka.attributeSelection.CfsSubsetEval -L"
     *  (default weka.attributeSelection.CfsSubsetEval)</pre>
     * 
     * <pre> -S &lt;search method specification&gt;
     *  Full class name of search method, followed
     *  by its options.
     *  eg: "weka.attributeSelection.BestFirst -D 1"
     *  (default weka.attributeSelection.BestFirst)</pre>
     * 
     * <pre> -D
     *  If set, classifier is run in debug mode and
     *  may output additional info to the console</pre>
     * 
     * <pre> -W
     *  Full name of base classifier.
     *  (default: weka.classifiers.trees.J48)</pre>
     * 
     * <pre> 
     * Options specific to classifier weka.classifiers.trees.J48:
     * </pre>
     * 
     * <pre> -U
     *  Use unpruned tree.</pre>
     * 
     * <pre> -C &lt;pruning confidence&gt;
     *  Set confidence threshold for pruning.
     *  (default 0.25)</pre>
     * 
     * <pre> -M &lt;minimum number of instances&gt;
     *  Set minimum number of instances per leaf.
     *  (default 2)</pre>
     * 
     * <pre> -R
     *  Use reduced error pruning.</pre>
     * 
     * <pre> -N &lt;number of folds&gt;
     *  Set number of folds for reduced error
     *  pruning. One fold is used as pruning set.
     *  (default 3)</pre>
     * 
     * <pre> -B
     *  Use binary splits only.</pre>
     * 
     * <pre> -S
     *  Don't perform subtree raising.</pre>
     * 
     * <pre> -L
     *  Do not clean up after the tree has been built.</pre>
     * 
     * <pre> -A
     *  Laplace smoothing for predicted probabilities.</pre>
     * 
     * <pre> -Q &lt;seed&gt;
     *  Seed for random data shuffling (default 1).</pre>
     * 
     <!-- options-end -->
     *
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    public void setOptions(String[] options) throws Exception {

        // same for attribute evaluator
        String evaluatorString = Utils.getOption('E', options);
        if (evaluatorString.length() == 0)
            evaluatorString = weka.attributeSelection.CfsSubsetEval.class.getName();
        String[] evaluatorSpec = Utils.splitOptions(evaluatorString);
        if (evaluatorSpec.length == 0) {
            throw new Exception("Invalid attribute evaluator specification string");
        }
        String evaluatorName = evaluatorSpec[0];
        evaluatorSpec[0] = "";
        setEvaluator(ASEvaluation.forName(evaluatorName, evaluatorSpec));

        // same for search method
        String searchString = Utils.getOption('S', options);
        if (searchString.length() == 0)
            searchString = weka.attributeSelection.BestFirst.class.getName();
        String[] searchSpec = Utils.splitOptions(searchString);
        if (searchSpec.length == 0) {
            throw new Exception("Invalid search specification string");
        }
        String searchName = searchSpec[0];
        searchSpec[0] = "";
        setSearch(ASSearch.forName(searchName, searchSpec));

        super.setOptions(options);

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the Classifier.
     *
     * @return an array of strings suitable for passing to setOptions
     */
    public String[] getOptions() {

        Vector<String> options = new Vector<String>();

        // same attribute evaluator
        options.add("-E");
        options.add("" + getEvaluatorSpec());

        // same for search
        options.add("-S");
        options.add("" + getSearchSpec());

        Collections.addAll(options, super.getOptions());

        return options.toArray(new String[0]);
    }

    /**
     * Returns the tip text for this property
     * @return tip text for this property suitable for
     * displaying in the explorer/experimenter gui
     */
    public String evaluatorTipText() {
        return "Set the attribute evaluator to use. This evaluator is used "
                + "during the attribute selection phase before the classifier is " + "invoked.";
    }

    /**
     * Sets the attribute evaluator
     *
     * @param evaluator the evaluator with all options set.
     */
    public void setEvaluator(ASEvaluation evaluator) {
        m_Evaluator = evaluator;
    }

    /**
     * Gets the attribute evaluator used
     *
     * @return the attribute evaluator
     */
    public ASEvaluation getEvaluator() {
        return m_Evaluator;
    }

    /**
     * Gets the evaluator specification string, which contains the class name of
     * the attribute evaluator and any options to it
     *
     * @return the evaluator string.
     */
    protected String getEvaluatorSpec() {

        ASEvaluation e = getEvaluator();
        if (e instanceof OptionHandler) {
            return e.getClass().getName() + " " + Utils.joinOptions(((OptionHandler) e).getOptions());
        }
        return e.getClass().getName();
    }

    /**
     * Returns the tip text for this property
     * @return tip text for this property suitable for
     * displaying in the explorer/experimenter gui
     */
    public String searchTipText() {
        return "Set the search method. This search method is used "
                + "during the attribute selection phase before the classifier is " + "invoked.";
    }

    /**
     * Sets the search method
     *
     * @param search the search method with all options set.
     */
    public void setSearch(ASSearch search) {
        m_Search = search;
    }

    /**
     * Gets the search method used
     *
     * @return the search method
     */
    public ASSearch getSearch() {
        return m_Search;
    }

    /**
     * Gets the search specification string, which contains the class name of
     * the search method and any options to it
     *
     * @return the search string.
     */
    protected String getSearchSpec() {

        ASSearch s = getSearch();
        if (s instanceof OptionHandler) {
            return s.getClass().getName() + " " + Utils.joinOptions(((OptionHandler) s).getOptions());
        }
        return s.getClass().getName();
    }

    /**
     * Returns default capabilities of the classifier.
     *
     * @return      the capabilities of this classifier
     */
    public Capabilities getCapabilities() {
        Capabilities result;

        if (getEvaluator() == null)
            result = super.getCapabilities();
        else
            result = getEvaluator().getCapabilities();

        // set dependencies
        for (Capability cap : Capability.values())
            result.enableDependency(cap);

        return result;
    }

    /**
     * Build the classifier on the dimensionally reduced data.
     *
     * @param data the training data
     * @throws Exception if the classifier could not be built successfully
     */
    public void buildClassifier(Instances data) throws Exception {
        if (m_Classifier == null) {
            throw new Exception("No base classifier has been set!");
        }

        if (m_Evaluator == null) {
            throw new Exception("No attribute evaluator has been set!");
        }

        if (m_Search == null) {
            throw new Exception("No search method has been set!");
        }

        // can classifier handle the data?
        getCapabilities().testWithFail(data);

        // get fresh Instances object
        Instances newData = new Instances(data);

        if (newData.numInstances() == 0) {
            m_Classifier.buildClassifier(newData);
            return;
        }
        if (newData.classAttribute().isNominal()) {
            m_numClasses = newData.classAttribute().numValues();
        } else {
            m_numClasses = 1;
        }

        Instances resampledData = null;
        // check to see if training data has all equal weights
        double weight = newData.instance(0).weight();
        boolean ok = false;
        for (int i = 1; i < newData.numInstances(); i++) {
            if (newData.instance(i).weight() != weight) {
                ok = true;
                break;
            }
        }

        if (ok) {
            if (!(m_Evaluator instanceof WeightedInstancesHandler)
                    || !(m_Classifier instanceof WeightedInstancesHandler)) {
                Random r = new Random(1);
                for (int i = 0; i < 10; i++) {
                    r.nextDouble();
                }
                resampledData = newData.resampleWithWeights(r);
            }
        } else {
            // all equal weights in the training data so just use as is
            resampledData = newData;
        }

        m_AttributeSelection = new AttributeSelection();
        m_AttributeSelection.setEvaluator(m_Evaluator);
        m_AttributeSelection.setSearch(m_Search);
        long start = System.currentTimeMillis();
        m_AttributeSelection
                .SelectAttributes((m_Evaluator instanceof WeightedInstancesHandler) ? newData : resampledData);
        long end = System.currentTimeMillis();
        if (m_Classifier instanceof WeightedInstancesHandler) {
            newData = m_AttributeSelection.reduceDimensionality(newData);
            m_Classifier.buildClassifier(newData);
        } else {
            resampledData = m_AttributeSelection.reduceDimensionality(resampledData);
            m_Classifier.buildClassifier(resampledData);
        }

        long end2 = System.currentTimeMillis();
        m_numAttributesSelected = m_AttributeSelection.numberAttributesSelected();
        m_ReducedHeader = new Instances(
                (m_Classifier instanceof WeightedInstancesHandler) ? newData : resampledData, 0);
        m_selectionTime = (double) (end - start);
        m_totalTime = (double) (end2 - start);
    }

    /**
     * Classifies a given instance after attribute selection
     *
     * @param instance the instance to be classified
     * @return the class distribution
     * @throws Exception if instance could not be classified
     * successfully
     */
    public double[] distributionForInstance(Instance instance) throws Exception {

        Instance newInstance;
        if (m_AttributeSelection == null) {
            //      throw new Exception("AttributeSelectedClassifier: No model built yet!");
            newInstance = instance;
        } else {
            newInstance = m_AttributeSelection.reduceDimensionality(instance);
        }

        return m_Classifier.distributionForInstance(newInstance);
    }

    /**
     * Tool tip text for this property
     *
     * @return the tool tip for this property
     */
    public String batchSizeTipText() {
        return "Batch size to use if base learner is a BatchPredictor";
    }

    /**
     * Set the batch size to use. Gets passed through to the base learner if it
     * implements BatchPredictor. Otherwise it is just ignored.
     *
     * @param size the batch size to use
     */
    public void setBatchSize(String size) {

        if (getClassifier() instanceof BatchPredictor) {
            ((BatchPredictor) getClassifier()).setBatchSize(size);
        } else {
            super.setBatchSize(size);
        }
    }

    /**
     * Gets the preferred batch size from the base learner if it implements
     * BatchPredictor. Returns 1 as the preferred batch size otherwise.
     *
     * @return the batch size to use
     */
    public String getBatchSize() {

        if (getClassifier() instanceof BatchPredictor) {
            return ((BatchPredictor) getClassifier()).getBatchSize();
        } else {
            return super.getBatchSize();
        }
    }

    /**
     * Batch scoring method. Calls the appropriate method for the base learner if
     * it implements BatchPredictor. Otherwise it simply calls the
     * distributionForInstance() method repeatedly.
     *
     * @param insts the instances to get predictions for
     * @return an array of probability distributions, one for each instance
     * @throws Exception if a problem occurs
     */
    public double[][] distributionsForInstances(Instances insts) throws Exception {

        if (getClassifier() instanceof BatchPredictor) {
            Instances newInstances;
            if (m_AttributeSelection == null) {
                //      throw new Exception("AttributeSelectedClassifier: No model built yet!");
                newInstances = insts;
            } else {
                newInstances = m_AttributeSelection.reduceDimensionality(insts);
            }
            if (newInstances.numInstances() != insts.numInstances()) {
                throw new WekaException(
                        "FilteredClassifier: filter has returned more/less instances than required.");
            }
            return ((BatchPredictor) getClassifier()).distributionsForInstances(newInstances);
        } else {
            double[][] result = new double[insts.numInstances()][insts.numClasses()];
            for (int i = 0; i < insts.numInstances(); i++) {
                result[i] = distributionForInstance(insts.instance(i));
            }
            return result;
        }
    }

    /**
     * Returns true if the base classifier implements BatchPredictor and is able
     * to generate batch predictions efficiently
     *
     * @return true if the base classifier can generate batch predictions
     *         efficiently
     */
    public boolean implementsMoreEfficientBatchPrediction() {
        if (!(getClassifier() instanceof BatchPredictor)) {
            return super.implementsMoreEfficientBatchPrediction();
        }

        return ((BatchPredictor) getClassifier()).implementsMoreEfficientBatchPrediction();
    }

    /**
     *  Returns the type of graph this classifier
     *  represents.
     *  
     *  @return the type of graph
     */
    public int graphType() {

        if (m_Classifier instanceof Drawable)
            return ((Drawable) m_Classifier).graphType();
        else
            return Drawable.NOT_DRAWABLE;
    }

    /**
     * Returns graph describing the classifier (if possible).
     *
     * @return the graph of the classifier in dotty format
     * @throws Exception if the classifier cannot be graphed
     */
    public String graph() throws Exception {

        if (m_Classifier instanceof Drawable)
            return ((Drawable) m_Classifier).graph();
        else
            throw new Exception("Classifier: " + getClassifierSpec() + " cannot be graphed");
    }

    /**
     * Output a representation of this classifier
     * 
     * @return a representation of this classifier
     */
    public String toString() {
        if (m_AttributeSelection == null) {
            return "AttributeSelectedClassifier: No attribute selection possible.\n\n" + m_Classifier.toString();
        }

        StringBuffer result = new StringBuffer();
        result.append("AttributeSelectedClassifier:\n\n");
        result.append(m_AttributeSelection.toResultsString());
        result.append("\n\nHeader of reduced data:\n" + m_ReducedHeader.toString());
        result.append("\n\nClassifier Model\n" + m_Classifier.toString());

        return result.toString();
    }

    /**
     * Additional measure --- number of attributes selected
     * @return the number of attributes selected
     */
    public double measureNumAttributesSelected() {
        return m_numAttributesSelected;
    }

    /**
     * Additional measure --- time taken (milliseconds) to select the attributes
     * @return the time taken to select attributes
     */
    public double measureSelectionTime() {
        return m_selectionTime;
    }

    /**
     * Additional measure --- time taken (milliseconds) to select attributes
     * and build the classifier
     * @return the total time (select attributes + build classifier)
     */
    public double measureTime() {
        return m_totalTime;
    }

    /**
     * Returns an enumeration of the additional measure names
     * @return an enumeration of the measure names
     */
    public Enumeration<String> enumerateMeasures() {
        Vector<String> newVector = new Vector<String>(3);
        newVector.addElement("measureNumAttributesSelected");
        newVector.addElement("measureSelectionTime");
        newVector.addElement("measureTime");
        if (m_Classifier instanceof AdditionalMeasureProducer) {
            newVector.addAll(Collections.list(((AdditionalMeasureProducer) m_Classifier).enumerateMeasures()));
        }
        return newVector.elements();
    }

    /**
     * Returns the value of the named measure
     * @param additionalMeasureName the name of the measure to query for its value
     * @return the value of the named measure
     * @throws IllegalArgumentException if the named measure is not supported
     */
    public double getMeasure(String additionalMeasureName) {
        if (additionalMeasureName.compareToIgnoreCase("measureNumAttributesSelected") == 0) {
            return measureNumAttributesSelected();
        } else if (additionalMeasureName.compareToIgnoreCase("measureSelectionTime") == 0) {
            return measureSelectionTime();
        } else if (additionalMeasureName.compareToIgnoreCase("measureTime") == 0) {
            return measureTime();
        } else if (m_Classifier instanceof AdditionalMeasureProducer) {
            return ((AdditionalMeasureProducer) m_Classifier).getMeasure(additionalMeasureName);
        } else {
            throw new IllegalArgumentException(
                    additionalMeasureName + " not supported (AttributeSelectedClassifier)");
        }
    }

    /**
     * Returns the revision string.
     * 
     * @return      the revision
     */
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Main method for testing this class.
     *
     * @param argv should contain the following arguments:
     * -t training file [-T test file] [-c class index]
     */
    public static void main(String[] argv) {
        runClassifier(new AttributeSelectedClassifier(), argv);
    }
}