weka.attributeSelection.AttributeSelection.java Source code

Java tutorial

Introduction

Here is the source code for weka.attributeSelection.AttributeSelection.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    AttributeSelection.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.io.Serializable;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Random;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.ConverterUtils.DataSource;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;

/**
 * Attribute selection class. Takes the name of a search class and an evaluation
 * class on the command line.
 * <p/>
 * 
 * Valid options are:
 * <p/>
 * 
 * -h <br/>
 * Display help.
 * <p/>
 * 
 * -i &lt;name of input file&gt; <br/>
 * Specify the training data file.
 * <p/>
 * 
 * -c &lt;class index&gt; <br/>
 * The index of the attribute to use as the class.
 * <p/>
 * 
 * -s &lt;search method&gt; <br/>
 * The full class name of the search method followed by search method options
 * (if any).<br/>
 * Eg. -s "weka.attributeSelection.BestFirst -N 10"
 * <p/>
 * 
 * -x &lt;number of folds&gt; <br/>
 * Perform a cross validation.
 * <p/>
 * 
 * -n &lt;random number seed&gt; <br/>
 * Specify a random number seed. Use in conjuction with -x. (Default = 1).
 * <p/>
 * 
 * ------------------------------------------------------------------------
 * <p/>
 * 
 * Example usage as the main of an attribute evaluator (called FunkyEvaluator):
 * 
 * <pre>
 * public static void main(String[] args) {
 *   runEvaluator(new FunkyEvaluator(), args);
 * }
 * </pre>
 * <p/>
 * 
 * ------------------------------------------------------------------------
 * <p/>
 * 
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class AttributeSelection implements Serializable, RevisionHandler {

    /** for serialization */
    static final long serialVersionUID = 4170171824147584330L;

    /** the instances to select attributes from */
    private Instances m_trainInstances;

    /** the attribute/subset evaluator */
    private ASEvaluation m_ASEvaluator;

    /** the search method */
    private ASSearch m_searchMethod;

    /** the number of folds to use for cross validation */
    private int m_numFolds;

    /** holds a string describing the results of the attribute selection */
    private final StringBuffer m_selectionResults;

    /** rank features (if allowed by the search method) */
    private boolean m_doRank;

    /** do cross validation */
    private boolean m_doXval;

    /** seed used to randomly shuffle instances for cross validation */
    private int m_seed;

    /** number of attributes requested from ranked results */
    private int m_numToSelect;

    /** the selected attributes */
    private int[] m_selectedAttributeSet;

    /** the attribute indexes and associated merits if a ranking is produced */
    private double[][] m_attributeRanking;

    /** if a feature selection run involves an attribute transformer */
    private AttributeTransformer m_transformer = null;

    /**
     * the attribute filter for processing instances with respect to the most
     * recent feature selection run
     */
    private Remove m_attributeFilter = null;

    /**
     * hold statistics for repeated feature selection, such as under cross
     * validation
     */
    private double[][] m_rankResults = null;
    private double[] m_subsetResults = null;

    /**
     * Return the number of attributes selected from the most recent run of
     * attribute selection
     * 
     * @return the number of attributes selected
     */
    public int numberAttributesSelected() throws Exception {
        int[] att = selectedAttributes();
        return att.length - 1;
    }

    /**
     * get the final selected set of attributes.
     * 
     * @return an array of attribute indexes
     * @exception Exception if attribute selection has not been performed yet
     */
    public int[] selectedAttributes() throws Exception {
        if (m_selectedAttributeSet == null) {
            throw new Exception("Attribute selection has not been performed yet!");
        }
        return m_selectedAttributeSet;
    }

    /**
     * get the final ranking of the attributes.
     * 
     * @return a two dimensional array of ranked attribute indexes and their
     *         associated merit scores as doubles.
     * @exception Exception if a ranking has not been produced
     */
    public double[][] rankedAttributes() throws Exception {
        if (m_attributeRanking == null) {
            throw new Exception("Ranking has not been performed");
        }
        return m_attributeRanking;
    }

    /**
     * set the attribute/subset evaluator
     * 
     * @param evaluator the evaluator to use
     */
    public void setEvaluator(ASEvaluation evaluator) {
        m_ASEvaluator = evaluator;
    }

    /**
     * set the search method
     * 
     * @param search the search method to use
     */
    public void setSearch(ASSearch search) {
        m_searchMethod = search;

        if (m_searchMethod instanceof RankedOutputSearch) {
            setRanking(((RankedOutputSearch) m_searchMethod).getGenerateRanking());
        }
    }

    /**
     * set the number of folds for cross validation
     * 
     * @param folds the number of folds
     */
    public void setFolds(int folds) {
        m_numFolds = folds;
    }

    /**
     * produce a ranking (if possible with the set search and evaluator)
     * 
     * @param r true if a ranking is to be produced
     */
    public void setRanking(boolean r) {
        m_doRank = r;
    }

    /**
     * do a cross validation
     * 
     * @param x true if a cross validation is to be performed
     */
    public void setXval(boolean x) {
        m_doXval = x;
    }

    /**
     * set the seed for use in cross validation
     * 
     * @param s the seed
     */
    public void setSeed(int s) {
        m_seed = s;
    }

    /**
     * get a description of the attribute selection
     * 
     * @return a String describing the results of attribute selection
     */
    public String toResultsString() {
        return m_selectionResults.toString();
    }

    /**
     * reduce the dimensionality of a set of instances to include only those
     * attributes chosen by the last run of attribute selection.
     * 
     * @param in the instances to be reduced
     * @return a dimensionality reduced set of instances
     * @exception Exception if the instances can't be reduced
     */
    public Instances reduceDimensionality(Instances in) throws Exception {
        if (m_attributeFilter == null) {
            throw new Exception("No feature selection has been performed yet!");
        }

        if (m_transformer != null) {
            Instances transformed = new Instances(m_transformer.transformedHeader(), in.numInstances());
            for (int i = 0; i < in.numInstances(); i++) {
                transformed.add(m_transformer.convertInstance(in.instance(i)));
            }
            return Filter.useFilter(transformed, m_attributeFilter);
        }

        return Filter.useFilter(in, m_attributeFilter);
    }

    /**
     * reduce the dimensionality of a single instance to include only those
     * attributes chosen by the last run of attribute selection.
     * 
     * @param in the instance to be reduced
     * @return a dimensionality reduced instance
     * @exception Exception if the instance can't be reduced
     */
    public Instance reduceDimensionality(Instance in) throws Exception {
        if (m_attributeFilter == null) {
            throw new Exception("No feature selection has been performed yet!");
        }
        if (m_transformer != null) {
            in = m_transformer.convertInstance(in);
        }
        m_attributeFilter.input(in);
        m_attributeFilter.batchFinished();
        Instance result = m_attributeFilter.output();
        return result;
    }

    /**
     * constructor. Sets defaults for each member varaible. Default attribute
     * evaluator is CfsSubsetEval; default search method is BestFirst.
     */
    public AttributeSelection() {
        setFolds(10);
        setRanking(false);
        setXval(false);
        setSeed(1);
        setEvaluator(new CfsSubsetEval());
        setSearch(new GreedyStepwise());
        m_selectionResults = new StringBuffer();
        m_selectedAttributeSet = null;
        m_attributeRanking = null;
    }

    /**
     * Perform attribute selection with a particular evaluator and a set of
     * options specifying search method and input file etc.
     * 
     * @param ASEvaluator an evaluator object
     * @param options an array of options, not only for the evaluator but also the
     *          search method (if any) and an input data file
     * @return the results of attribute selection as a String
     * @exception Exception if no training file is set
     */
    public static String SelectAttributes(ASEvaluation ASEvaluator, String[] options) throws Exception {
        String trainFileName, searchName;
        Instances train = null;
        ASSearch searchMethod = null;
        String[] optionsTmp = options.clone();
        boolean helpRequested = false;

        try {
            // get basic options (options the same for all attribute selectors
            trainFileName = Utils.getOption('i', options);
            helpRequested = Utils.getFlag('h', optionsTmp);

            if (helpRequested || (trainFileName.length() == 0)) {
                searchName = Utils.getOption('s', optionsTmp);
                if (searchName.length() != 0) {
                    String[] searchOptions = Utils.splitOptions(searchName);
                    searchMethod = (ASSearch) Class.forName(searchOptions[0]).newInstance();
                }

                if (helpRequested) {
                    throw new Exception("Help requested.");
                } else {
                    throw new Exception("No training file given.");
                }
            }
        } catch (Exception e) {
            throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod));
        }

        DataSource source = new DataSource(trainFileName);
        train = source.getDataSet();
        return SelectAttributes(ASEvaluator, options, train);
    }

    /**
     * returns a string summarizing the results of repeated attribute selection
     * runs on splits of a dataset.
     * 
     * @return a summary of attribute selection results
     * @exception Exception if no attribute selection has been performed.
     */
    public String CVResultsString() throws Exception {
        StringBuffer CvString = new StringBuffer();

        if ((m_subsetResults == null && m_rankResults == null) || (m_trainInstances == null)) {
            throw new Exception("Attribute selection has not been performed yet!");
        }

        int fieldWidth = (int) (Math.log(m_trainInstances.numAttributes()) + 1.0);

        CvString.append("\n\n=== Attribute selection " + m_numFolds + " fold cross-validation ");

        if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
                && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)
                && (m_trainInstances.classAttribute().isNominal())) {
            CvString.append("(stratified), seed: ");
            CvString.append(m_seed + " ===\n\n");
        } else {
            CvString.append("seed: " + m_seed + " ===\n\n");
        }

        if ((m_searchMethod instanceof RankedOutputSearch) && (m_doRank == true)) {
            CvString.append("average merit      average rank  attribute\n");

            // calcualte means and std devs
            for (int i = 0; i < m_rankResults[0].length; i++) {
                m_rankResults[0][i] /= m_numFolds; // mean merit
                double var = m_rankResults[0][i] * m_rankResults[0][i] * m_numFolds;
                var = (m_rankResults[2][i] - var);
                var /= m_numFolds;

                if (var <= 0.0) {
                    var = 0.0;
                    m_rankResults[2][i] = 0;
                } else {
                    m_rankResults[2][i] = Math.sqrt(var);
                }

                m_rankResults[1][i] /= m_numFolds; // mean rank
                var = m_rankResults[1][i] * m_rankResults[1][i] * m_numFolds;
                var = (m_rankResults[3][i] - var);
                var /= m_numFolds;

                if (var <= 0.0) {
                    var = 0.0;
                    m_rankResults[3][i] = 0;
                } else {
                    m_rankResults[3][i] = Math.sqrt(var);
                }
            }

            // now sort them by mean rank
            int[] s = Utils.sort(m_rankResults[1]);
            for (int element : s) {
                if (m_rankResults[1][element] > 0) {
                    CvString.append(Utils.doubleToString(/*
                                                          * Math. abs(
                                                          */m_rankResults[0][element]/* ) */, 6, 3) + " +-"
                            + Utils.doubleToString(m_rankResults[2][element], 6, 3) + "   "
                            + Utils.doubleToString(m_rankResults[1][element], fieldWidth + 2, 1) + " +-"
                            + Utils.doubleToString(m_rankResults[3][element], 5, 2) + "  "
                            + Utils.doubleToString((element + 1), fieldWidth, 0) + " "
                            + m_trainInstances.attribute(element).name() + "\n");
                }
            }
        } else {
            CvString.append("number of folds (%)  attribute\n");

            for (int i = 0; i < m_subsetResults.length; i++) {
                if ((m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
                        || (i != m_trainInstances.classIndex())) {
                    CvString.append(Utils.doubleToString(m_subsetResults[i], 12, 0) + "("
                            + Utils.doubleToString((m_subsetResults[i] / m_numFolds * 100.0), 3, 0) + " %)  "
                            + Utils.doubleToString((i + 1), fieldWidth, 0) + " "
                            + m_trainInstances.attribute(i).name() + "\n");
                }
            }
        }

        return CvString.toString();
    }

    /**
     * Select attributes for a split of the data. Calling this function updates
     * the statistics on attribute selection. CVResultsString() returns a string
     * summarizing the results of repeated calls to this function. Assumes that
     * splits are from the same dataset--- ie. have the same number and types of
     * attributes as previous splits.
     * 
     * @param split the instances to select attributes from
     * @exception Exception if an error occurs
     */
    public void selectAttributesCVSplit(Instances split) throws Exception {

        m_ASEvaluator.buildEvaluator(split);
        // Do the search
        int[] attributeSet = m_searchMethod.search(m_ASEvaluator, split);
        // Do any postprocessing that a attribute selection method might
        // require
        attributeSet = m_ASEvaluator.postProcess(attributeSet);
        updateStatsForModelCVSplit(split, m_ASEvaluator, m_searchMethod, attributeSet, m_doRank);
    }

    /**
     * Update the attribute selection stats for a cross-validation fold of the
     * data.
     *
     * @param split the instances in this split/fold of the data
     * @param evaluator the evaluator that was used
     * @param search the search that was used
     * @param attributeSet the final subset produced for the split
     * @param doRank whether to produce a ranking
     * @throws Exception if a problem occurs
     */
    public void updateStatsForModelCVSplit(Instances split, ASEvaluation evaluator, ASSearch search,
            int[] attributeSet, boolean doRank) throws Exception {
        double[][] attributeRanking = null;

        // if the train instances are null then set equal to this split.
        // If this is the case then this function is more than likely being
        // called from outside this class in order to obtain CV statistics
        // and all we need m_trainIstances for is to get at attribute names
        // and types etc.
        if (m_trainInstances == null) {
            m_trainInstances = split;
        }

        // create space to hold statistics
        if (m_rankResults == null && m_subsetResults == null) {
            m_subsetResults = new double[split.numAttributes()];
            m_rankResults = new double[4][split.numAttributes()];
        }

        if ((search instanceof RankedOutputSearch) && doRank) {
            attributeRanking = ((RankedOutputSearch) search).rankedAttributes();
            // System.out.println(attributeRanking[0][1]);
            for (int j = 0; j < attributeRanking.length; j++) {
                // merit
                m_rankResults[0][(int) attributeRanking[j][0]] += attributeRanking[j][1];
                // squared merit
                m_rankResults[2][(int) attributeRanking[j][0]] += (attributeRanking[j][1] * attributeRanking[j][1]);
                // rank
                m_rankResults[1][(int) attributeRanking[j][0]] += (j + 1);
                // squared rank
                m_rankResults[3][(int) attributeRanking[j][0]] += (j + 1) * (j + 1);
                // += (attributeRanking[j][0] * attributeRanking[j][0]);
            }
        } else {
            for (int j = 0; j < attributeSet.length; j++) {
                m_subsetResults[attributeSet[j]]++;
            }
        }
    }

    /**
     * Perform a cross validation for attribute selection. With subset evaluators
     * the number of times each attribute is selected over the cross validation is
     * reported. For attribute evaluators, the average merit and average ranking +
     * std deviation is reported for each attribute.
     * 
     * @return the results of cross validation as a String
     * @exception Exception if an error occurs during cross validation
     */
    public String CrossValidateAttributes() throws Exception {
        Instances cvData = new Instances(m_trainInstances);
        Instances train;

        Random random = new Random(m_seed);
        cvData.randomize(random);

        if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
                && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
            if (cvData.classAttribute().isNominal()) {
                cvData.stratify(m_numFolds);
            }

        }

        for (int i = 0; i < m_numFolds; i++) {
            // Perform attribute selection
            train = cvData.trainCV(m_numFolds, i, random);
            selectAttributesCVSplit(train);
        }

        return CVResultsString();
    }

    /**
     * Perform attribute selection on the supplied training instances.
     * 
     * @param data the instances to select attributes from
     * @exception Exception if there is a problem during selection
     */
    public void SelectAttributes(Instances data) throws Exception {
        int[] attributeSet;

        m_transformer = null;
        m_attributeFilter = null;
        m_trainInstances = data;

        if (m_doXval == true && (m_ASEvaluator instanceof AttributeTransformer)) {
            throw new Exception("Can't cross validate an attribute transformer.");
        }

        if (m_ASEvaluator instanceof SubsetEvaluator && m_searchMethod instanceof Ranker) {
            throw new Exception(m_ASEvaluator.getClass().getName() + " must use a search method other than Ranker");
        }

        if (m_ASEvaluator instanceof AttributeEvaluator && !(m_searchMethod instanceof Ranker)) {
            // System.err.println("AttributeEvaluators must use a Ranker search "
            // +"method. Switching to Ranker...");
            // m_searchMethod = new Ranker();
            throw new Exception("AttributeEvaluators must use the Ranker search " + "method");
        }

        if (m_searchMethod instanceof RankedOutputSearch) {
            m_doRank = ((RankedOutputSearch) m_searchMethod).getGenerateRanking();
        }

        if (m_ASEvaluator instanceof UnsupervisedAttributeEvaluator
                || m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) {
            // unset the class index
            // m_trainInstances.setClassIndex(-1);
        } else {
            // check that a class index has been set
            if (m_trainInstances.classIndex() < 0) {
                m_trainInstances.setClassIndex(m_trainInstances.numAttributes() - 1);
            }
        }

        // Initialize the attribute evaluator
        m_ASEvaluator.buildEvaluator(m_trainInstances);
        if (m_ASEvaluator instanceof AttributeTransformer) {
            m_trainInstances = ((AttributeTransformer) m_ASEvaluator).transformedHeader();
            m_transformer = (AttributeTransformer) m_ASEvaluator;
        }
        int fieldWidth = (int) (Math.log(m_trainInstances.numAttributes()) + 1.0);

        // Do the search
        attributeSet = m_searchMethod.search(m_ASEvaluator, m_trainInstances);

        // try and determine if the search method uses an attribute transformer---
        // this is a bit of a hack to make things work properly with RankSearch
        // using PrincipalComponents as its attribute ranker
        try {
            BeanInfo bi = Introspector.getBeanInfo(m_searchMethod.getClass());
            PropertyDescriptor properties[];
            // methods = bi.getMethodDescriptors();
            properties = bi.getPropertyDescriptors();
            for (PropertyDescriptor propertie : properties) {
                propertie.getDisplayName();
                Method meth = propertie.getReadMethod();
                Object retType = meth.getReturnType();
                if (retType.equals(ASEvaluation.class)) {
                    Class<?> args[] = {};
                    ASEvaluation tempEval = (ASEvaluation) (meth.invoke(m_searchMethod, (Object[]) args));
                    if (tempEval instanceof AttributeTransformer) {
                        // grab the transformed data header
                        m_trainInstances = ((AttributeTransformer) tempEval).transformedHeader();
                        m_transformer = (AttributeTransformer) tempEval;
                    }
                }
            }
        } catch (IntrospectionException ex) {
            System.err.println("AttributeSelection: Couldn't " + "introspect");
        }

        // Do any postprocessing that a attribute selection method might require
        attributeSet = m_ASEvaluator.postProcess(attributeSet);
        if (!m_doRank) {
            m_selectionResults.append(printSelectionResults());
        }

        if ((m_searchMethod instanceof RankedOutputSearch) && m_doRank == true) {
            try {
                m_attributeRanking = ((RankedOutputSearch) m_searchMethod).rankedAttributes();
            } catch (Exception ex) {
                ex.printStackTrace();
                throw ex;
            }
            m_selectionResults.append(printSelectionResults());
            m_selectionResults.append("Ranked attributes:\n");

            // retrieve the number of attributes to retain
            m_numToSelect = ((RankedOutputSearch) m_searchMethod).getCalculatedNumToSelect();

            // determine fieldwidth for merit
            int f_p = 0;
            int w_p = 0;

            for (int i = 0; i < m_numToSelect; i++) {
                double precision = (Math.abs(m_attributeRanking[i][1])
                        - (int) (Math.abs(m_attributeRanking[i][1])));
                double intPart = (int) (Math.abs(m_attributeRanking[i][1]));

                if (precision > 0) {
                    precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10))) + 3;
                }
                if (precision > f_p) {
                    f_p = (int) precision;
                }

                if (intPart == 0) {
                    if (w_p < 2) {
                        w_p = 2;
                    }
                } else if ((Math.abs((Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10))) + 1) > w_p) {
                    if (m_attributeRanking[i][1] > 0) {
                        w_p = (int) Math.abs((Math.log(Math.abs(m_attributeRanking[i][1])) / Math.log(10))) + 1;
                    }
                }
            }

            for (int i = 0; i < m_numToSelect; i++) {
                m_selectionResults.append(Utils.doubleToString(m_attributeRanking[i][1], f_p + w_p + 1, f_p)
                        + Utils.doubleToString((m_attributeRanking[i][0] + 1), fieldWidth + 1, 0) + " "
                        + m_trainInstances.attribute((int) m_attributeRanking[i][0]).name() + "\n");
            }

            // set up the selected attributes array - usable by a filter or
            // whatever
            if (m_trainInstances.classIndex() >= 0) {
                if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
                        && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator))
                        || m_ASEvaluator instanceof AttributeTransformer) {
                    // one more for the class
                    m_selectedAttributeSet = new int[m_numToSelect + 1];
                    m_selectedAttributeSet[m_numToSelect] = m_trainInstances.classIndex();
                } else {
                    m_selectedAttributeSet = new int[m_numToSelect];
                }
            } else {
                m_selectedAttributeSet = new int[m_numToSelect];
            }

            m_selectionResults.append("\nSelected attributes: ");

            for (int i = 0; i < m_numToSelect; i++) {
                m_selectedAttributeSet[i] = (int) m_attributeRanking[i][0];

                if (i == m_numToSelect - 1) {
                    m_selectionResults.append(((int) m_attributeRanking[i][0] + 1) + " : " + (i + 1) + "\n");
                } else {
                    m_selectionResults.append(((int) m_attributeRanking[i][0] + 1));
                    m_selectionResults.append(",");
                }
            }
        } else {
            // set up the selected attributes array - usable by a filter or
            // whatever
            if ((!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
                    && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator))
                    || m_trainInstances.classIndex() >= 0)
            // one more for the class
            {
                m_selectedAttributeSet = new int[attributeSet.length + 1];
                m_selectedAttributeSet[attributeSet.length] = m_trainInstances.classIndex();
            } else {
                m_selectedAttributeSet = new int[attributeSet.length];
            }

            for (int i = 0; i < attributeSet.length; i++) {
                m_selectedAttributeSet[i] = attributeSet[i];
            }

            m_selectionResults.append("Selected attributes: ");

            for (int i = 0; i < attributeSet.length; i++) {
                if (i == (attributeSet.length - 1)) {
                    m_selectionResults.append((attributeSet[i] + 1) + " : " + attributeSet.length + "\n");
                } else {
                    m_selectionResults.append((attributeSet[i] + 1) + ",");
                }
            }

            for (int element : attributeSet) {
                m_selectionResults
                        .append("                     " + m_trainInstances.attribute(element).name() + "\n");
            }
        }

        // Cross validation should be called from here
        if (m_doXval == true) {
            m_selectionResults.append(CrossValidateAttributes());
        }

        // set up the attribute filter with the selected attributes
        if (m_selectedAttributeSet != null && !m_doXval) {
            m_attributeFilter = new Remove();
            m_attributeFilter.setAttributeIndicesArray(m_selectedAttributeSet);
            m_attributeFilter.setInvertSelection(true);
            m_attributeFilter.setInputFormat(m_trainInstances);
        }

        // Save space
        m_trainInstances = new Instances(m_trainInstances, 0);
        m_ASEvaluator.clean();
    }

    /**
     * Perform attribute selection with a particular evaluator and a set of
     * options specifying search method and options for the search method and
     * evaluator.
     * 
     * @param ASEvaluator an evaluator object
     * @param options an array of options, not only for the evaluator but also the
     *          search method (if any) and an input data file
     * @param train the input instances
     * @return the results of attribute selection as a String
     * @exception Exception if incorrect options are supplied
     */
    public static String SelectAttributes(ASEvaluation ASEvaluator, String[] options, Instances train)
            throws Exception {
        int seed = 1, folds = 10;
        String foldsString, seedString, searchName;
        String classString;
        String searchClassName;
        String[] searchOptions = null; // new String [1];
        ASSearch searchMethod = null;
        boolean doCrossVal = false;
        int classIndex = -1;
        boolean helpRequested = false;
        AttributeSelection trainSelector = new AttributeSelection();

        try {
            if (Utils.getFlag('h', options)) {
                helpRequested = true;
            }

            // does data already have a class attribute set?
            if (train.classIndex() != -1) {
                classIndex = train.classIndex() + 1;
            }

            // get basic options (options the same for all attribute selectors
            classString = Utils.getOption('c', options);

            if (classString.length() != 0) {
                if (classString.equals("first")) {
                    classIndex = 1;
                } else if (classString.equals("last")) {
                    classIndex = train.numAttributes();
                } else {
                    classIndex = Integer.parseInt(classString);
                }
            }

            if ((classIndex != -1) && ((classIndex == 0) || (classIndex > train.numAttributes()))) {
                throw new Exception("Class index out of range.");
            }

            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                // classIndex = train.numAttributes();
                // train.setClassIndex(classIndex - 1);
            }

            foldsString = Utils.getOption('x', options);

            if (foldsString.length() != 0) {
                folds = Integer.parseInt(foldsString);
                doCrossVal = true;
            }

            trainSelector.setFolds(folds);
            trainSelector.setXval(doCrossVal);

            seedString = Utils.getOption('n', options);

            if (seedString.length() != 0) {
                seed = Integer.parseInt(seedString);
            }

            trainSelector.setSeed(seed);

            searchName = Utils.getOption('s', options);

            if ((searchName.length() == 0) && (!(ASEvaluator instanceof AttributeEvaluator))) {
                throw new Exception("No search method given.");
            }

            if (searchName.length() != 0) {
                searchName = searchName.trim();
                // split off any search options
                int breakLoc = searchName.indexOf(' ');
                searchClassName = searchName;
                String searchOptionsString = "";

                if (breakLoc != -1) {
                    searchClassName = searchName.substring(0, breakLoc);
                    searchOptionsString = searchName.substring(breakLoc).trim();
                    searchOptions = Utils.splitOptions(searchOptionsString);
                }
            } else {
                try {
                    searchClassName = new String("weka.attributeSelection.Ranker");
                    searchMethod = (ASSearch) Class.forName(searchClassName).newInstance();
                } catch (Exception e) {
                    throw new Exception("Can't create Ranker object");
                }
            }

            // if evaluator is a subset evaluator
            // create search method and set its options (if any)
            if (searchMethod == null) {
                searchMethod = ASSearch.forName(searchClassName, searchOptions);
            }

            // set the search method
            trainSelector.setSearch(searchMethod);
        } catch (Exception e) {
            throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod));
        }

        try {
            // Set options for ASEvaluator
            if (ASEvaluator instanceof OptionHandler) {
                ((OptionHandler) ASEvaluator).setOptions(options);
            }

            /*
             * // Set options for Search method if (searchMethod instanceof
             * OptionHandler) { if (searchOptions != null) {
             * ((OptionHandler)searchMethod).setOptions(searchOptions); } }
             * Utils.checkForRemainingOptions(searchOptions);
             */
        } catch (Exception e) {
            throw new Exception("\n" + e.getMessage() + makeOptionString(ASEvaluator, searchMethod));
        }

        try {
            Utils.checkForRemainingOptions(options);
        } catch (Exception e) {
            throw new Exception('\n' + e.getMessage() + makeOptionString(ASEvaluator, searchMethod));
        }

        if (helpRequested) {
            System.out.println(makeOptionString(ASEvaluator, searchMethod));
            System.exit(0);
        }

        // set the attribute evaluator
        trainSelector.setEvaluator(ASEvaluator);

        // do the attribute selection
        trainSelector.SelectAttributes(train);

        // return the results string
        return trainSelector.toResultsString();
    }

    /**
     * Assembles a text description of the attribute selection results.
     * 
     * @return a string describing the results of attribute selection.
     */
    private String printSelectionResults() {
        StringBuffer text = new StringBuffer();
        text.append("\n\n=== Attribute Selection on all input data ===\n\n" + "Search Method:\n");
        text.append(m_searchMethod.toString());
        text.append("\nAttribute ");

        if (m_ASEvaluator instanceof SubsetEvaluator) {
            text.append("Subset Evaluator (");
        } else {
            text.append("Evaluator (");
        }

        if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
                && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
            text.append("supervised, ");
            text.append("Class (");

            if (m_trainInstances.attribute(m_trainInstances.classIndex()).isNumeric()) {
                text.append("numeric): ");
            } else {
                text.append("nominal): ");
            }

            text.append((m_trainInstances.classIndex() + 1) + " "
                    + m_trainInstances.attribute(m_trainInstances.classIndex()).name() + "):\n");
        } else {
            text.append("unsupervised):\n");
        }

        text.append(m_ASEvaluator.toString() + "\n");
        return text.toString();
    }

    /**
     * Make up the help string giving all the command line options
     * 
     * @param ASEvaluator the attribute evaluator to include options for
     * @param searchMethod the search method to include options for
     * @return a string detailing the valid command line options
     * @throws Exception if something goes wrong
     */
    private static String makeOptionString(ASEvaluation ASEvaluator, ASSearch searchMethod) throws Exception {

        StringBuffer optionsText = new StringBuffer("");
        // General options
        optionsText.append("\n\nGeneral options:\n\n");
        optionsText.append("-h\n\tdisplay this help\n");
        optionsText.append("-i <name of input file>\n");
        optionsText.append("\tSets training file.\n");
        optionsText.append("-c <class index>\n");
        optionsText.append("\tSets the class index for supervised attribute\n");
        optionsText.append("\tselection. Default=last column.\n");
        optionsText.append("-s <class name>\n");
        optionsText.append("\tSets search method for subset evaluators.\n");
        optionsText.append("-x <number of folds>\n");
        optionsText.append("\tPerform a cross validation.\n");
        optionsText.append("-n <random number seed>\n");
        optionsText.append("\tUse in conjunction with -x.\n");

        // Get attribute evaluator-specific options
        if (ASEvaluator instanceof OptionHandler) {
            optionsText.append("\nOptions specific to " + ASEvaluator.getClass().getName() + ":\n\n");
            Enumeration<Option> enu = ((OptionHandler) ASEvaluator).listOptions();

            while (enu.hasMoreElements()) {
                Option option = enu.nextElement();
                optionsText.append(option.synopsis() + '\n');
                optionsText.append(option.description() + "\n");
            }
        }

        if (searchMethod != null) {
            if (searchMethod instanceof OptionHandler) {
                optionsText.append("\nOptions specific to " + searchMethod.getClass().getName() + ":\n\n");
                Enumeration<Option> enu = ((OptionHandler) searchMethod).listOptions();

                while (enu.hasMoreElements()) {
                    Option option = enu.nextElement();
                    optionsText.append(option.synopsis() + '\n');
                    optionsText.append(option.description() + "\n");
                }
            }
        } else {
            if (ASEvaluator instanceof SubsetEvaluator) {
                System.out.println("No search method given.");
            }
        }

        return optionsText.toString();
    }

    /**
     * Main method for testing this class.
     * 
     * @param args the options
     */
    public static void main(String[] args) {
        try {
            if (args.length == 0) {
                throw new Exception("The first argument must be the name of an " + "attribute/subset evaluator");
            }

            String EvaluatorName = args[0];
            args[0] = "";
            ASEvaluation newEval = ASEvaluation.forName(EvaluatorName, null);
            System.out.println(SelectAttributes(newEval, args));
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }
}