weka.clusterers.FilteredClusterer.java Source code

Introduction

Here is the source code for weka.clusterers.FilteredClusterer.java
Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * FilteredClusterer.java
 * Copyright (C) 2006-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.clusterers;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.*;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

/**
 * <!-- globalinfo-start --> Class for running an arbitrary clusterer on data
 * that has been passed through an arbitrary filter. Like the clusterer, the
 * structure of the filter is based exclusively on the training data and test
 * instances will be processed by the filter without changing their structure.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -F &lt;filter specification&gt;
 *  Full class name of filter to use, followed
 *  by filter options.
 *  eg: "weka.filters.unsupervised.attribute.Remove -V -R 1,2"
 * (default: weka.filters.AllFilter)
 * </pre>
 * 
 * <pre>
 * -W
 *  Full name of base clusterer.
 *  (default: weka.clusterers.SimpleKMeans)
 * </pre>
 * 
 * <pre>
 * Options specific to clusterer weka.clusterers.SimpleKMeans:
 * </pre>
 * 
 * <pre>
 * -N &lt;num&gt;
 *  number of clusters.
 *  (default 2).
 * </pre>
 * 
 * <pre>
 * -V
 *  Display std. deviations for centroids.
 * </pre>
 * 
 * <pre>
 * -M
 *  Replace missing values with mean/mode.
 * </pre>
 * 
 * <pre>
 * -S &lt;num&gt;
 *  Random number seed.
 *  (default 10)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * Based on code from the FilteredClassifier by Len Trigg.
 * 
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 * @see weka.classifiers.meta.FilteredClassifier
 */
public class FilteredClusterer extends SingleClustererEnhancer implements Drawable {

    /** for serialization. */
    private static final long serialVersionUID = 1420005943163412943L;

    /** The filter. */
    protected Filter m_Filter;

    /** The instance structure of the filtered instances. */
    protected Instances m_FilteredInstances;

    /**
     * Default constructor.
     */
    public FilteredClusterer() {
        m_Clusterer = new SimpleKMeans();
        m_Filter = new weka.filters.AllFilter();
    }

    /**
     * Returns a string describing this clusterer.
     * 
     * @return a description of the clusterer suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Class for running an arbitrary clusterer on data that has been passed "
                + "through an arbitrary filter. Like the clusterer, the structure of the filter "
                + "is based exclusively on the training data and test instances will be processed "
                + "by the filter without changing their structure.";
    }

    /**
     * String describing default filter.
     * 
     * @return the default filter classname
     */
    protected String defaultFilterString() {
        return weka.filters.AllFilter.class.getName();
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();

        result.addElement(new Option("\tFull class name of filter to use, followed\n" + "\tby filter options.\n"
                + "\teg: \"weka.filters.unsupervised.attribute.Remove -V -R 1,2\"\n" + "(default: "
                + defaultFilterString() + ")", "F", 1, "-F <filter specification>"));

        result.addAll(Collections.list(super.listOptions()));

        return result.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -F &lt;filter specification&gt;
     *  Full class name of filter to use, followed
     *  by filter options.
     *  eg: "weka.filters.unsupervised.attribute.Remove -V -R 1,2"
     * (default: weka.filters.AllFilter)
     * </pre>
     * 
     * <pre>
     * -W
     *  Full name of base clusterer.
     *  (default: weka.clusterers.SimpleKMeans)
     * </pre>
     * 
     * <pre>
     * Options specific to clusterer weka.clusterers.SimpleKMeans:
     * </pre>
     * 
     * <pre>
     * -N &lt;num&gt;
     *  number of clusters.
     *  (default 2).
     * </pre>
     * 
     * <pre>
     * -V
     *  Display std. deviations for centroids.
     * </pre>
     * 
     * <pre>
     * -M
     *  Replace missing values with mean/mode.
     * </pre>
     * 
     * <pre>
     * -S &lt;num&gt;
     *  Random number seed.
     *  (default 10)
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {
        String tmpStr;
        String[] tmpOptions;

        tmpStr = Utils.getOption('F', options);
        if (tmpStr.length() > 0) {
            tmpOptions = Utils.splitOptions(tmpStr);
            if (tmpOptions.length == 0) {
                throw new IllegalArgumentException("Invalid filter specification string");
            }
            tmpStr = tmpOptions[0];
            tmpOptions[0] = "";
            setFilter((Filter) Utils.forName(Filter.class, tmpStr, tmpOptions));
        } else {
            setFilter(new weka.filters.AllFilter());
        }

        super.setOptions(options);

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the clusterer.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();

        result.addElement("-F");
        result.addElement(getFilterSpec());

        Collections.addAll(result, super.getOptions());

        return result.toArray(new String[result.size()]);
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String filterTipText() {
        return "The filter to be used.";
    }

    /**
     * Sets the filter.
     * 
     * @param filter the filter with all options set.
     */
    public void setFilter(Filter filter) {
        m_Filter = filter;

        if (m_Filter instanceof SupervisedFilter) {
            System.out.println("WARNING: you are using a supervised filter, which will leak "
                    + "information about the class attribute!");
        }
    }

    /**
     * Gets the filter used.
     * 
     * @return the filter
     */
    public Filter getFilter() {
        return m_Filter;
    }

    /**
     * Gets the filter specification string, which contains the class name of the
     * filter and any options to the filter.
     * 
     * @return the filter string.
     */
    protected String getFilterSpec() {
        String result;
        Filter filter;

        filter = getFilter();
        result = filter.getClass().getName();

        if (filter instanceof OptionHandler) {
            result += " " + Utils.joinOptions(((OptionHandler) filter).getOptions());
        }

        return result;
    }

    /**
     * Returns default capabilities of the clusterer.
     * 
     * @return the capabilities of this clusterer
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result;

        if (getFilter() == null) {
            result = super.getCapabilities();
            result.disableAll();
            result.enable(Capability.NO_CLASS);
        } else {
            result = getFilter().getCapabilities();
        }

        // set dependencies
        for (Capability cap : Capability.values()) {
            result.enableDependency(cap);
        }

        return result;
    }

    /**
     * Build the clusterer on the filtered data.
     * 
     * @param data the training data
     * @throws Exception if the clusterer could not be built successfully
     */
    @Override
    public void buildClusterer(Instances data) throws Exception {
        if (m_Clusterer == null) {
            throw new Exception("No base clusterer has been set!");
        }

        // remove instances with missing class
        if (data.classIndex() > -1) {
            data = new Instances(data);
            data.deleteWithMissingClass();
        }

        m_Filter.setInputFormat(data); // filter capabilities are checked here
        data = Filter.useFilter(data, m_Filter);

        // can clusterer handle the data?
        getClusterer().getCapabilities().testWithFail(data);

        m_FilteredInstances = data.stringFreeStructure();
        m_Clusterer.buildClusterer(data);
    }

    /**
     * Classifies a given instance after filtering.
     * 
     * @param instance the instance to be classified
     * @return the class distribution for the given instance
     * @throws Exception if instance could not be classified successfully
     */
    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {

        if (m_Filter.numPendingOutput() > 0) {
            throw new Exception("Filter output queue not empty!");
        }

        if (!m_Filter.input(instance)) {
            throw new Exception("Filter didn't make the test instance immediately available!");
        }

        m_Filter.batchFinished();
        Instance newInstance = m_Filter.output();

        return m_Clusterer.distributionForInstance(newInstance);
    }

    /**
     * Output a representation of this clusterer.
     * 
     * @return a representation of this clusterer
     */
    @Override
    public String toString() {
        String result;

        if (m_FilteredInstances == null) {
            result = "FilteredClusterer: No model built yet.";
        } else {
            result = "FilteredClusterer using " + getClustererSpec() + " on data filtered through "
                    + getFilterSpec() + "\n\nFiltered Header\n" + m_FilteredInstances.toString()
                    + "\n\nClusterer Model\n" + m_Clusterer.toString();
        }

        return result;
    }

    /**
     * Returns the type of graph this clusterer represents.
     *
     * @return the graph type of this clusterer
     */
    public int graphType() {

        if (m_Clusterer instanceof Drawable)
            return ((Drawable) m_Clusterer).graphType();
        else
            return Drawable.NOT_DRAWABLE;
    }

    /**
     * Returns graph describing the clusterer (if possible).
     *
     * @return the graph of the clusterer in dotty format
     * @throws Exception if the clusterer cannot be graphed
     */
    public String graph() throws Exception {

        if (m_Clusterer instanceof Drawable)
            return ((Drawable) m_Clusterer).graph();
        else
            throw new Exception("Clusterer: " + getClustererSpec() + " cannot be graphed");
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Main method for testing this class.
     * 
     * @param args the commandline options, use "-h" for help
     */
    public static void main(String[] args) {
        runClusterer(new FilteredClusterer(), args);
    }
}