weka.attributeSelection.ExhaustiveSearch.java Source code

Java tutorial

Introduction

Here is the source code for weka.attributeSelection.ExhaustiveSearch.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    ExhaustiveSearch.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import java.math.BigInteger;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

/**
 * <!-- globalinfo-start --> ExhaustiveSearch : <br/>
 * <br/>
 * Performs an exhaustive search through the space of attribute subsets starting
 * from the empty set of attrubutes. Reports the best subset found.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -V
 *  Output subsets as the search progresses.
 *  (default = false).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class ExhaustiveSearch extends ASSearch implements OptionHandler {

    /** for serialization */
    static final long serialVersionUID = 5741842861142379712L;

    /** the best feature set found during the search */
    private BitSet m_bestGroup;

    /** the merit of the best subset found */
    private double m_bestMerit;

    /** does the data have a class */
    private boolean m_hasClass;

    /** holds the class index */
    private int m_classIndex;

    /** number of attributes in the data */
    private int m_numAttribs;

    /** if true, then ouput new best subsets as the search progresses */
    private boolean m_verbose;

    /** the number of subsets evaluated during the search */
    private int m_evaluations;

    /**
     * Returns a string describing this search method
     * 
     * @return a description of the search suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "ExhaustiveSearch : \n\nPerforms an exhaustive search through "
                + "the space of attribute subsets starting from the empty set of "
                + "attrubutes. Reports the best subset found.";
    }

    /**
     * Constructor
     */
    public ExhaustiveSearch() {
        resetOptions();
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     **/
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> newVector = new Vector<Option>(2);

        newVector.addElement(
                new Option("\tOutput subsets as the search progresses." + "\n\t(default = false).", "V", 0, "-V"));
        return newVector.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -V
     *  Output subsets as the search progresses.
     *  (default = false).
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     * 
     **/
    @Override
    public void setOptions(String[] options) throws Exception {

        resetOptions();

        setVerbose(Utils.getFlag('V', options));

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String verboseTipText() {
        return "Print progress information. Sends progress info to the terminal " + "as the search progresses.";
    }

    /**
     * set whether or not to output new best subsets as the search proceeds
     * 
     * @param v true if output is to be verbose
     */
    public void setVerbose(boolean v) {
        m_verbose = v;
    }

    /**
     * get whether or not output is verbose
     * 
     * @return true if output is set to verbose
     */
    public boolean getVerbose() {
        return m_verbose;
    }

    /**
     * Gets the current settings of RandomSearch.
     * 
     * @return an array of strings suitable for passing to setOptions()
     */
    @Override
    public String[] getOptions() {

        Vector<String> options = new Vector<String>();

        if (m_verbose) {
            options.add("-V");
        }

        return options.toArray(new String[0]);
    }

    /**
     * prints a description of the search
     * 
     * @return a description of the search as a string
     */
    @Override
    public String toString() {
        StringBuffer text = new StringBuffer();

        text.append("\tExhaustive Search.\n\tStart set: ");

        text.append("no attributes\n");

        text.append("\tNumber of evaluations: " + m_evaluations + "\n");
        text.append("\tMerit of best subset found: " + Utils.doubleToString(Math.abs(m_bestMerit), 8, 3) + "\n");

        return text.toString();
    }

    /**
     * Searches the attribute subset space using an exhaustive search.
     * 
     * @param ASEval the attribute evaluator to guide the search
     * @param data the training instances.
     * @return an array (not necessarily ordered) of selected attribute indexes
     * @throws Exception if the search can't be completed
     */
    @Override
    public int[] search(ASEvaluation ASEval, Instances data) throws Exception {
        double best_merit;
        double tempMerit;
        boolean done = false;
        int sizeOfBest;
        int tempSize;

        BigInteger space = BigInteger.ZERO;

        m_evaluations = 0;
        m_numAttribs = data.numAttributes();
        m_bestGroup = new BitSet(m_numAttribs);

        if (!(ASEval instanceof SubsetEvaluator)) {
            throw new Exception(ASEval.getClass().getName() + " is not a " + "Subset evaluator!");
        }

        if (ASEval instanceof UnsupervisedSubsetEvaluator) {
            m_hasClass = false;
        } else {
            m_hasClass = true;
            m_classIndex = data.classIndex();
        }

        SubsetEvaluator ASEvaluator = (SubsetEvaluator) ASEval;
        m_numAttribs = data.numAttributes();

        best_merit = ASEvaluator.evaluateSubset(m_bestGroup);
        m_evaluations++;
        sizeOfBest = countFeatures(m_bestGroup);

        BitSet tempGroup = new BitSet(m_numAttribs);
        tempMerit = ASEvaluator.evaluateSubset(tempGroup);

        if (m_verbose) {
            System.out.println("Zero feature subset (" + Utils.doubleToString(Math.abs(tempMerit), 8, 5) + ")");
        }

        if (tempMerit >= best_merit) {
            tempSize = countFeatures(tempGroup);
            if (tempMerit > best_merit || (tempSize < sizeOfBest)) {
                best_merit = tempMerit;
                m_bestGroup = (BitSet) (tempGroup.clone());
                sizeOfBest = tempSize;
            }
        }

        int numatts = (m_hasClass) ? m_numAttribs - 1 : m_numAttribs;
        BigInteger searchSpaceEnd = BigInteger.ONE.add(BigInteger.ONE).pow(numatts).subtract(BigInteger.ONE);

        while (!done) {
            // the next subset
            space = space.add(BigInteger.ONE);
            if (space.equals(searchSpaceEnd)) {
                done = true;
            }
            tempGroup.clear();
            for (int i = 0; i < numatts; i++) {
                if (space.testBit(i)) {
                    if (!m_hasClass) {
                        tempGroup.set(i);
                    } else {
                        int j = (i >= m_classIndex) ? i + 1 : i;
                        tempGroup.set(j);
                    }
                }
            }

            tempMerit = ASEvaluator.evaluateSubset(tempGroup);
            m_evaluations++;
            if (tempMerit >= best_merit) {
                tempSize = countFeatures(tempGroup);
                if (tempMerit > best_merit || (tempSize < sizeOfBest)) {
                    best_merit = tempMerit;
                    m_bestGroup = (BitSet) (tempGroup.clone());
                    sizeOfBest = tempSize;
                    if (m_verbose) {
                        System.out.println("New best subset (" + Utils.doubleToString(Math.abs(best_merit), 8, 5)
                                + "): " + printSubset(m_bestGroup));
                    }
                }
            }
        }

        m_bestMerit = best_merit;

        return attributeList(m_bestGroup);
    }

    /**
     * counts the number of features in a subset
     * 
     * @param featureSet the feature set for which to count the features
     * @return the number of features in the subset
     */
    private int countFeatures(BitSet featureSet) {
        int count = 0;
        for (int i = 0; i < m_numAttribs; i++) {
            if (featureSet.get(i)) {
                count++;
            }
        }
        return count;
    }

    /**
     * prints a subset as a series of attribute numbers
     * 
     * @param temp the subset to print
     * @return a subset as a String of attribute numbers
     */
    private String printSubset(BitSet temp) {
        StringBuffer text = new StringBuffer();

        for (int j = 0; j < m_numAttribs; j++) {
            if (temp.get(j)) {
                text.append((j + 1) + " ");
            }
        }
        return text.toString();
    }

    /**
     * converts a BitSet into a list of attribute indexes
     * 
     * @param group the BitSet to convert
     * @return an array of attribute indexes
     **/
    private int[] attributeList(BitSet group) {
        int count = 0;

        // count how many were selected
        for (int i = 0; i < m_numAttribs; i++) {
            if (group.get(i)) {
                count++;
            }
        }

        int[] list = new int[count];
        count = 0;

        for (int i = 0; i < m_numAttribs; i++) {
            if (group.get(i)) {
                list[count++] = i;
            }
        }

        return list;
    }

    /**
     * resets to defaults
     */
    private void resetOptions() {
        m_verbose = false;
        m_evaluations = 0;
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }
}