weka.filters.Filter.java Source code

Java tutorial

Introduction

Here is the source code for weka.filters.Filter.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    Filter.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters;

import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.CapabilitiesHandler;
import weka.core.CapabilitiesIgnorer;
import weka.core.CommandlineRunnable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Queue;
import weka.core.RelationalLocator;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.StringLocator;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.core.Version;
import weka.core.converters.ConverterUtils.DataSource;

import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Vector;

/**
 * An abstract class for instance filters: objects that take instances as input,
 * carry out some transformation on the instance and then output the instance.
 * The method implementations in this class assume that most of the work will be
 * done in the methods overridden by subclasses.
 * <p>
 * 
 * A simple example of filter use. This example doesn't remove instances from
 * the output queue until all instances have been input, so has higher memory
 * consumption than an approach that uses output instances as they are made
 * available:
 * <p>
 * 
 * <code> <pre>
 *  Filter filter = ..some type of filter..
 *  Instances instances = ..some instances..
 *  for (int i = 0; i < data.numInstances(); i++) {
 *    filter.input(data.instance(i));
 *  }
 *  filter.batchFinished();
 *  Instances newData = filter.outputFormat();
 *  Instance processed;
 *  while ((processed = filter.output()) != null) {
 *    newData.add(processed);
 *  }
 *  ..do something with newData..
 * </pre> </code>
 * 
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision$
 */
public abstract class Filter implements Serializable, CapabilitiesHandler, RevisionHandler, OptionHandler,
        CapabilitiesIgnorer, CommandlineRunnable {

    /** for serialization */
    private static final long serialVersionUID = -8835063755891851218L;

    /** The output format for instances */
    private Instances m_OutputFormat = null;

    /** The output instance queue */
    private Queue m_OutputQueue = null;

    /** Indices of string attributes in the output format */
    protected StringLocator m_OutputStringAtts = null;

    /** Indices of string attributes in the input format */
    protected StringLocator m_InputStringAtts = null;

    /** Indices of relational attributes in the output format */
    protected RelationalLocator m_OutputRelAtts = null;

    /** Indices of relational attributes in the input format */
    protected RelationalLocator m_InputRelAtts = null;

    /** The input format for instances */
    private Instances m_InputFormat = null;

    /** Record whether the filter is at the start of a batch */
    protected boolean m_NewBatch = true;

    /** True if the first batch has been done */
    protected boolean m_FirstBatchDone = false;

    /** Whether the classifier is run in debug mode. */
    protected boolean m_Debug = false;

    /** Whether capabilities should not be checked before classifier is built. */
    protected boolean m_DoNotCheckCapabilities = false;

    /**
     * Returns true if the a new batch was started, either a new instance of the
     * filter was created or the batchFinished() method got called.
     *
     * @return true if a new batch has been initiated
     * @see #m_NewBatch
     * @see #batchFinished()
     */
    public boolean isNewBatch() {
        return m_NewBatch;
    }

    /**
     * Returns true if the first batch of instances got processed. Necessary for
     * supervised filters, which "learn" from the first batch and then shouldn't
     * get updated with subsequent calls of batchFinished().
     *
     * @return true if the first batch has been processed
     * @see #m_FirstBatchDone
     * @see #batchFinished()
     */
    public boolean isFirstBatchDone() {
        return m_FirstBatchDone;
    }

    /**
     * Default implementation returns false. Some filters may not necessarily be
     * able to produce an instance for output for every instance input after the
     * first batch has been completed - such filters should override this method
     * and return true.
     *
     * @return false by default
     */
    public boolean mayRemoveInstanceAfterFirstBatchDone() {
        return false;
    }

    /**
     * Returns the Capabilities of this filter. Derived filters have to override
     * this method to enable capabilities.
     *
     * @return the capabilities of this object
     * @see Capabilities
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result;

        result = new Capabilities(this);
        result.enableAll();

        result.setMinimumNumberInstances(0);

        return result;
    }

    /**
     * Returns the revision string.
     *
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Returns the Capabilities of this filter, customized based on the data.
     * I.e., if removes all class capabilities, in case there's not class
     * attribute present or removes the NO_CLASS capability, in case that there's
     * a class present.
     *
     * @param data the data to use for customization
     * @return the capabilities of this object, based on the data
     * @see #getCapabilities()
     */
    public Capabilities getCapabilities(Instances data) {
        Capabilities result;
        Capabilities classes;
        Iterator<Capability> iter;
        Capability cap;

        result = getCapabilities();

        // no class? -> remove all class capabilites apart from NO_CLASS
        if (data.classIndex() == -1) {
            classes = result.getClassCapabilities();
            iter = classes.capabilities();
            while (iter.hasNext()) {
                cap = iter.next();
                if (cap != Capability.NO_CLASS) {
                    result.disable(cap);
                    result.disableDependency(cap);
                }
            }
        }
        // class? -> remove NO_CLASS
        else {
            result.disable(Capability.NO_CLASS);
            result.disableDependency(Capability.NO_CLASS);
        }

        return result;
    }

    /**
     * Sets the format of output instances. The derived class should use this
     * method once it has determined the outputformat. The output queue is
     * cleared.
     *
     * @param outputFormat the new output format
     */
    protected void setOutputFormat(Instances outputFormat) {

        if (outputFormat != null) {
            m_OutputFormat = outputFormat.stringFreeStructure();
            initOutputLocators(m_OutputFormat, null);

            // Rename the relation
            String relationName = outputFormat.relationName() + "-" + this.getClass().getName();
            if (this instanceof OptionHandler) {
                String[] options = ((OptionHandler) this).getOptions();
                for (String option : options) {
                    relationName += option.trim();
                }
            }
            m_OutputFormat.setRelationName(relationName);
        } else {
            m_OutputFormat = null;
        }
        m_OutputQueue = new Queue();
    }

    /**
     * Gets the currently set inputformat instances. This dataset may contain
     * buffered instances.
     *
     * @return the input Instances.
     */
    protected Instances getInputFormat() {

        return m_InputFormat;
    }

    /**
     * Gets a copy of just the structure of the input format instances.
     *
     * @return a copy of the structure (attribute information) of the input
     * format instances
     */
    public Instances getCopyOfInputFormat() {
        return getInputFormat() == null ? null : new Instances(getInputFormat(), 0);
    }

    /**
     * Returns a reference to the current input format without copying it.
     *
     * @return a reference to the current input format
     */
    protected Instances inputFormatPeek() {

        return m_InputFormat;
    }

    /**
     * Returns a reference to the current output format without copying it.
     *
     * @return a reference to the current output format
     */
    protected Instances outputFormatPeek() {

        return m_OutputFormat;
    }

    /**
     * Adds an output instance to the queue. The derived class should use this
     * method for each output instance it makes available. Note that the instance
     * is only copied before it is added to the output queue if it has a reference
     * to a dataset.
     *
     * @param instance the instance to be added to the queue.
     */
    protected void push(Instance instance) {

        push(instance, true);
    }

    /**
     * Adds an output instance to the queue. The derived class should use this
     * method for each output instance it makes available. Note that the instance
     * is only copied before it is added to the output queue if copyInstance has
     * value true and if the instance has a reference to a dataset.
     *
     * @param instance the instance to be added to the queue.
     * @param copyInstance whether instance is to be copied
     */
    protected void push(Instance instance, boolean copyInstance) {

        if (instance != null) {
            if (instance.dataset() != null) {
                if (copyInstance) {
                    instance = (Instance) instance.copy();
                }
                copyValues(instance, false);
            }
            instance.setDataset(m_OutputFormat);
            m_OutputQueue.push(instance);
        }
    }

    /**
     * Clears the output queue.
     */
    protected void resetQueue() {

        m_OutputQueue = new Queue();
    }

    /**
     * Adds the supplied input instance to the inputformat dataset for later
     * processing. Use this method rather than getInputFormat().add(instance). Or
     * else. Note that the provided instance gets copied when buffered.
     *
     * @param instance the <code>Instance</code> to buffer.
     */
    protected void bufferInput(Instance instance) {

        if (instance != null) {
            instance = (Instance) instance.copy(); // The copyValues() method *does* modify the instance!
            copyValues(instance, true);
            m_InputFormat.add(instance);
        }
    }

    /**
     * Initializes the input attribute locators. If indices is null then all
     * attributes of the data will be considered, otherwise only the ones that
     * were provided.
     *
     * @param data the data to initialize the locators with
     * @param indices if not null, the indices to which to restrict the locating
     */
    protected void initInputLocators(Instances data, int[] indices) {
        if (indices == null) {
            m_InputStringAtts = new StringLocator(data);
            m_InputRelAtts = new RelationalLocator(data);
        } else {
            m_InputStringAtts = new StringLocator(data, indices);
            m_InputRelAtts = new RelationalLocator(data, indices);
        }
    }

    /**
     * Initializes the output attribute locators. If indices is null then all
     * attributes of the data will be considered, otherwise only the ones that
     * were provided.
     *
     * @param data the data to initialize the locators with
     * @param indices if not null, the indices to which to restrict the locating
     */
    protected void initOutputLocators(Instances data, int[] indices) {
        if (indices == null) {
            m_OutputStringAtts = new StringLocator(data);
            m_OutputRelAtts = new RelationalLocator(data);
        } else {
            m_OutputStringAtts = new StringLocator(data, indices);
            m_OutputRelAtts = new RelationalLocator(data, indices);
        }
    }

    /**
     * Copies string/relational values contained in the instance copied to a new
     * dataset. The Instance must already be assigned to a dataset. This dataset
     * and the destination dataset must have the same structure.
     *
     * @param instance the Instance containing the string/relational values to
     *          copy.
     * @param isInput if true the input format and input attribute locators are
     *          used otherwise the output format and output locators
     */
    protected void copyValues(Instance instance, boolean isInput) {

        RelationalLocator.copyRelationalValues(instance, (isInput) ? m_InputFormat : m_OutputFormat,
                (isInput) ? m_InputRelAtts : m_OutputRelAtts);

        StringLocator.copyStringValues(instance, (isInput) ? m_InputFormat : m_OutputFormat,
                (isInput) ? m_InputStringAtts : m_OutputStringAtts);
    }

    /**
     * Takes string/relational values referenced by an Instance and copies them
     * from a source dataset to a destination dataset. The instance references are
     * updated to be valid for the destination dataset. The instance may have the
     * structure (i.e. number and attribute position) of either dataset (this
     * affects where references are obtained from). Only works if the number of
     * string/relational attributes is the same in both indices (implicitly these
     * string/relational attributes should be semantically same but just with
     * shifted positions).
     *
     * @param instance the instance containing references to strings/ relational
     *          values in the source dataset that will have references updated to
     *          be valid for the destination dataset.
     * @param instSrcCompat true if the instance structure is the same as the
     *          source, or false if it is the same as the destination (i.e. which
     *          of the string/relational attribute indices contains the correct
     *          locations for this instance).
     * @param srcDataset the dataset for which the current instance
     *          string/relational value references are valid (after any position
     *          mapping if needed)
     * @param destDataset the dataset for which the current instance
     *          string/relational value references need to be inserted (after any
     *          position mapping if needed)
     */
    protected void copyValues(Instance instance, boolean instSrcCompat, Instances srcDataset,
            Instances destDataset) {

        RelationalLocator.copyRelationalValues(instance, instSrcCompat, srcDataset, m_InputRelAtts, destDataset,
                m_OutputRelAtts);

        StringLocator.copyStringValues(instance, instSrcCompat, srcDataset, m_InputStringAtts, destDataset,
                m_OutputStringAtts);
    }

    /**
     * This will remove all buffered instances from the inputformat dataset. Use
     * this method rather than getInputFormat().delete();
     */
    protected void flushInput() {

        if ((m_InputStringAtts.getAttributeIndices().length > 0)
                || (m_InputRelAtts.getAttributeIndices().length > 0)) {
            m_InputFormat = m_InputFormat.stringFreeStructure();
            m_InputStringAtts = new StringLocator(m_InputFormat, m_InputStringAtts.getAllowedIndices());
            m_InputRelAtts = new RelationalLocator(m_InputFormat, m_InputRelAtts.getAllowedIndices());
        } else {
            // This more efficient than new Instances(m_InputFormat, 0);
            m_InputFormat.delete();
        }
    }

    /**
     * tests the data whether the filter can actually handle it
     *
     * @param instanceInfo the data to test
     * @throws Exception if the test fails
     */
    protected void testInputFormat(Instances instanceInfo) throws Exception {
        getCapabilities(instanceInfo).testWithFail(instanceInfo);
    }

    /**
     * Sets the format of the input instances. If the filter is able to determine
     * the output format before seeing any input instances, it does so here. This
     * default implementation clears the output format and output queue, and the
     * new batch flag is set. Overriders should call
     * <code>super.setInputFormat(Instances)</code>
     *
     * @param instanceInfo an Instances object containing the input instance
     *          structure (any instances contained in the object are ignored -
     *          only the structure is required).
     * @return true if the outputFormat may be collected immediately
     * @throws Exception if the inputFormat can't be set successfully
     */
    public boolean setInputFormat(Instances instanceInfo) throws Exception {

        testInputFormat(instanceInfo);

        m_InputFormat = instanceInfo.stringFreeStructure();
        m_OutputFormat = null;
        m_OutputQueue = new Queue();
        m_NewBatch = true;
        m_FirstBatchDone = false;
        initInputLocators(m_InputFormat, null);
        return false;
    }

    /**
     * Gets the format of the output instances. This should only be called after
     * input() or batchFinished() has returned true. The relation name of the
     * output instances should be changed to reflect the action of the filter (eg:
     * add the filter name and options).
     *
     * @return an Instances object containing the output instance structure only.
     * @throws NullPointerException if no input structure has been defined (or the
     *           output format hasn't been determined yet)
     */
    public Instances getOutputFormat() {

        if (m_OutputFormat == null) {
            throw new NullPointerException("No output format defined.");
        }
        return new Instances(m_OutputFormat, 0);
    }

    /**
     * Input an instance for filtering. Ordinarily the instance is processed and
     * made available for output immediately. Some filters require all instances
     * be read before producing output, in which case output instances should be
     * collected after calling batchFinished(). If the input marks the start of a
     * new batch, the output queue is cleared. This default implementation assumes
     * all instance conversion will occur when batchFinished() is called.
     *
     * @param instance the input instance
     * @return true if the filtered instance may now be collected with output().
     * @throws NullPointerException if the input format has not been defined.
     * @throws Exception if the input instance was not of the correct format or if
     *           there was a problem with the filtering.
     */
    public boolean input(Instance instance) throws Exception {

        if (m_InputFormat == null) {
            throw new NullPointerException("No input instance format defined");
        }
        if (m_NewBatch) {
            m_OutputQueue = new Queue();
            m_NewBatch = false;
        }
        bufferInput(instance);
        return false;
    }

    /**
     * Signify that this batch of input to the filter is finished. If the filter
     * requires all instances prior to filtering, output() may now be called to
     * retrieve the filtered instances. Any subsequent instances filtered should
     * be filtered based on setting obtained from the first batch (unless the
     * inputFormat has been re-assigned or new options have been set). This
     * default implementation assumes all instance processing occurs during
     * inputFormat() and input().
     *
     * @return true if there are instances pending output
     * @throws NullPointerException if no input structure has been defined,
     * @throws Exception if there was a problem finishing the batch.
     */
    public boolean batchFinished() throws Exception {

        if (m_InputFormat == null) {
            throw new NullPointerException("No input instance format defined");
        }
        flushInput();
        m_NewBatch = true;
        m_FirstBatchDone = true;

        if (m_OutputQueue.empty()) {
            // Clear out references to old strings/relationals occasionally
            if ((m_OutputStringAtts.getAttributeIndices().length > 0)
                    || (m_OutputRelAtts.getAttributeIndices().length > 0)) {
                m_OutputFormat = m_OutputFormat.stringFreeStructure();
                m_OutputStringAtts = new StringLocator(m_OutputFormat, m_OutputStringAtts.getAllowedIndices());
            }
        }

        return (numPendingOutput() != 0);
    }

    /**
     * Output an instance after filtering and remove from the output queue.
     *
     * @return the instance that has most recently been filtered (or null if the
     *         queue is empty).
     * @throws NullPointerException if no output structure has been defined
     */
    public Instance output() {

        if (m_OutputFormat == null) {
            throw new NullPointerException("No output instance format defined");
        }
        if (m_OutputQueue.empty()) {
            return null;
        }
        Instance result = (Instance) m_OutputQueue.pop();

        // Clear out references to old strings/relationals occasionally
        /*
         * if (m_OutputQueue.empty() && m_NewBatch) { if (
         * (m_OutputStringAtts.getAttributeIndices().length > 0) ||
         * (m_OutputRelAtts.getAttributeIndices().length > 0) ) { m_OutputFormat =
         * m_OutputFormat.stringFreeStructure(); } }
         */
        return result;
    }

    /**
     * Output an instance after filtering but do not remove from the output queue.
     *
     * @return the instance that has most recently been filtered (or null if the
     *         queue is empty).
     * @throws NullPointerException if no input structure has been defined
     */
    public Instance outputPeek() {

        if (m_OutputFormat == null) {
            throw new NullPointerException("No output instance format defined");
        }
        if (m_OutputQueue.empty()) {
            return null;
        }
        Instance result = (Instance) m_OutputQueue.peek();
        return result;
    }

    /**
     * Returns the number of instances pending output
     *
     * @return the number of instances pending output
     * @throws NullPointerException if no input structure has been defined
     */
    public int numPendingOutput() {

        if (m_OutputFormat == null) {
            throw new NullPointerException("No output instance format defined");
        }
        return m_OutputQueue.size();
    }

    /**
     * Returns whether the output format is ready to be collected
     *
     * @return true if the output format is set
     */
    public boolean isOutputFormatDefined() {

        return (m_OutputFormat != null);
    }

    /**
     * Creates a deep copy of the given filter using serialization.
     *
     * @param model the filter to copy
     * @return a deep copy of the filter
     * @throws Exception if an error occurs
     */
    public static Filter makeCopy(Filter model) throws Exception {
        return (Filter) new SerializedObject(model).getObject();
    }

    /**
     * Creates a given number of deep copies of the given filter using
     * serialization.
     *
     * @param model the filter to copy
     * @param num the number of filter copies to create.
     * @return an array of filters.
     * @throws Exception if an error occurs
     */
    public static Filter[] makeCopies(Filter model, int num) throws Exception {

        if (model == null) {
            throw new Exception("No model filter set");
        }
        Filter[] filters = new Filter[num];
        SerializedObject so = new SerializedObject(model);
        for (int i = 0; i < filters.length; i++) {
            filters[i] = (Filter) so.getObject();
        }
        return filters;
    }

    /**
     * Filters an entire set of instances through a filter and returns the new
     * set.
     *
     * @param data the data to be filtered
     * @param filter the filter to be used
     * @return the filtered set of data
     * @throws Exception if the filter can't be used successfully
     */
    public static Instances useFilter(Instances data, Filter filter) throws Exception {
        /*
         * System.err.println(filter.getClass().getName() + " in:" +
         * data.numInstances());
         */
        if (filter instanceof SimpleBatchFilter) {
            ((SimpleBatchFilter) filter).input(data);
        } else {
            for (int i = 0; i < data.numInstances(); i++) {
                filter.input(data.instance(i));
            }
        }
        filter.batchFinished();
        Instances newData = filter.getOutputFormat();
        Instance processed;
        while ((processed = filter.output()) != null) {
            newData.add(processed);
        }

        /*
         * System.err.println(filter.getClass().getName() + " out:" +
         * newData.numInstances());
         */
        return newData;
    }

    /**
     * Returns a description of the filter, by default only the classname.
     *
     * @return a string describing the filter
     */
    @Override
    public String toString() {
        return this.getClass().getName();
    }

    /**
     * generates source code from the filter
     *
     * @param filter the filter to output as source
     * @param className the name of the generated class
     * @param input the input data the header is generated for
     * @param output the output data the header is generated for
     * @return the generated source code
     * @throws Exception if source code cannot be generated
     */
    public static String wekaStaticWrapper(Sourcable filter, String className, Instances input, Instances output)
            throws Exception {

        StringBuffer result;
        int i;
        int n;

        result = new StringBuffer();

        result.append("// Generated with Weka " + Version.VERSION + "\n");
        result.append("//\n");
        result.append("// This code is public domain and comes with no warranty.\n");
        result.append("//\n");
        result.append("// Timestamp: " + new Date() + "\n");
        result.append("// Relation: " + input.relationName() + "\n");
        result.append("\n");

        result.append("package weka.filters;\n");
        result.append("\n");
        result.append("import weka.core.Attribute;\n");
        result.append("import weka.core.Capabilities;\n");
        result.append("import weka.core.Capabilities.Capability;\n");
        result.append("import weka.core.DenseInstance;\n");
        result.append("import weka.core.Instance;\n");
        result.append("import weka.core.Instances;\n");
        result.append("import weka.core.Utils;\n");
        result.append("import weka.filters.Filter;\n");
        result.append("import java.util.ArrayList;\n");
        result.append("\n");
        result.append("public class WekaWrapper\n");
        result.append("  extends Filter {\n");

        // globalInfo
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Returns only the toString() method.\n");
        result.append("   *\n");
        result.append("   * @return a string describing the filter\n");
        result.append("   */\n");
        result.append("  public String globalInfo() {\n");
        result.append("    return toString();\n");
        result.append("  }\n");

        // getCapabilities
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Returns the capabilities of this filter.\n");
        result.append("   *\n");
        result.append("   * @return the capabilities\n");
        result.append("   */\n");
        result.append("  public Capabilities getCapabilities() {\n");
        result.append(((Filter) filter).getCapabilities().toSource("result", 4));
        result.append("    return result;\n");
        result.append("  }\n");

        // objectsToInstance
        result.append("\n");
        result.append("  /**\n");
        result.append("   * turns array of Objects into an Instance object\n");
        result.append("   *\n");
        result.append("   * @param obj   the Object array to turn into an Instance\n");
        result.append("   * @param format   the data format to use\n");
        result.append("   * @return      the generated Instance object\n");
        result.append("   */\n");
        result.append("  protected Instance objectsToInstance(Object[] obj, Instances format) {\n");
        result.append("    Instance      result;\n");
        result.append("    double[]      values;\n");
        result.append("    int      i;\n");
        result.append("\n");
        result.append("    values = new double[obj.length];\n");
        result.append("\n");
        result.append("    for (i = 0 ; i < obj.length; i++) {\n");
        result.append("      if (obj[i] == null)\n");
        result.append("        values[i] = Utils.missingValue();\n");
        result.append("      else if (format.attribute(i).isNumeric())\n");
        result.append("        values[i] = (Double) obj[i];\n");
        result.append("      else if (format.attribute(i).isNominal())\n");
        result.append("        values[i] = format.attribute(i).indexOfValue((String) obj[i]);\n");
        result.append("    }\n");
        result.append("\n");
        result.append("    // create new instance\n");
        result.append("    result = new DenseInstance(1.0, values);\n");
        result.append("    result.setDataset(format);\n");
        result.append("\n");
        result.append("    return result;\n");
        result.append("  }\n");

        // instanceToObjects
        result.append("\n");
        result.append("  /**\n");
        result.append("   * turns the Instance object into an array of Objects\n");
        result.append("   *\n");
        result.append("   * @param inst   the instance to turn into an array\n");
        result.append("   * @return      the Object array representing the instance\n");
        result.append("   */\n");
        result.append("  protected Object[] instanceToObjects(Instance inst) {\n");
        result.append("    Object[]   result;\n");
        result.append("    int      i;\n");
        result.append("\n");
        result.append("    result = new Object[inst.numAttributes()];\n");
        result.append("\n");
        result.append("    for (i = 0 ; i < inst.numAttributes(); i++) {\n");
        result.append("      if (inst.isMissing(i))\n");
        result.append("     result[i] = null;\n");
        result.append("      else if (inst.attribute(i).isNumeric())\n");
        result.append("     result[i] = inst.value(i);\n");
        result.append("      else\n");
        result.append("     result[i] = inst.stringValue(i);\n");
        result.append("    }\n");
        result.append("\n");
        result.append("    return result;\n");
        result.append("  }\n");

        // instancesToObjects
        result.append("\n");
        result.append("  /**\n");
        result.append("   * turns the Instances object into an array of Objects\n");
        result.append("   *\n");
        result.append("   * @param data   the instances to turn into an array\n");
        result.append("   * @return      the Object array representing the instances\n");
        result.append("   */\n");
        result.append("  protected Object[][] instancesToObjects(Instances data) {\n");
        result.append("    Object[][]   result;\n");
        result.append("    int      i;\n");
        result.append("\n");
        result.append("    result = new Object[data.numInstances()][];\n");
        result.append("\n");
        result.append("    for (i = 0; i < data.numInstances(); i++)\n");
        result.append("      result[i] = instanceToObjects(data.instance(i));\n");
        result.append("\n");
        result.append("    return result;\n");
        result.append("  }\n");

        // setInputFormat
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Only tests the input data.\n");
        result.append("   *\n");
        result.append("   * @param instanceInfo the format of the data to convert\n");
        result.append("   * @return always true, to indicate that the output format can \n");
        result.append("   *         be collected immediately.\n");
        result.append("   */\n");
        result.append("  public boolean setInputFormat(Instances instanceInfo) throws Exception {\n");
        result.append("    super.setInputFormat(instanceInfo);\n");
        result.append("    \n");
        result.append("    // generate output format\n");
        result.append("    ArrayList<Attribute> atts = new ArrayList<Attribute>();\n");
        result.append("    ArrayList<String> attValues;\n");
        for (i = 0; i < output.numAttributes(); i++) {
            result.append("    // " + output.attribute(i).name() + "\n");
            if (output.attribute(i).isNumeric()) {
                result.append("    atts.add(new Attribute(\"" + output.attribute(i).name() + "\"));\n");
            } else if (output.attribute(i).isNominal()) {
                result.append("    attValues = new ArrayList<String>();\n");
                for (n = 0; n < output.attribute(i).numValues(); n++) {
                    result.append("    attValues.add(\"" + output.attribute(i).value(n) + "\");\n");
                }
                result.append("    atts.add(new Attribute(\"" + output.attribute(i).name() + "\", attValues));\n");
            } else {
                throw new UnsupportedAttributeTypeException("Attribute type '" + output.attribute(i).type()
                        + "' (position " + (i + 1) + ") is not supported!");
            }
        }
        result.append("    \n");
        result.append("    Instances format = new Instances(\"" + output.relationName() + "\", atts, 0);\n");
        result.append("    format.setClassIndex(" + output.classIndex() + ");\n");
        result.append("    setOutputFormat(format);\n");
        result.append("    \n");
        result.append("    return true;\n");
        result.append("  }\n");

        // input
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Directly filters the instance.\n");
        result.append("   *\n");
        result.append("   * @param instance the instance to convert\n");
        result.append("   * @return always true, to indicate that the output can \n");
        result.append("   *         be collected immediately.\n");
        result.append("   */\n");
        result.append("  public boolean input(Instance instance) throws Exception {\n");
        result.append("    Object[] filtered = " + className + ".filter(instanceToObjects(instance));\n");
        result.append("    push(objectsToInstance(filtered, getOutputFormat()));\n");
        result.append("    return true;\n");
        result.append("  }\n");

        // batchFinished
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Performs a batch filtering of the buffered data, if any available.\n");
        result.append("   *\n");
        result.append("   * @return true if instances were filtered otherwise false\n");
        result.append("   */\n");
        result.append("  public boolean batchFinished() throws Exception {\n");
        result.append("    if (getInputFormat() == null)\n");
        result.append("      throw new NullPointerException(\"No input instance format defined\");;\n");
        result.append("\n");
        result.append("    Instances inst = getInputFormat();\n");
        result.append("    if (inst.numInstances() > 0) {\n");
        result.append("      Object[][] filtered = " + className + ".filter(instancesToObjects(inst));\n");
        result.append("      for (int i = 0; i < filtered.length; i++) {\n");
        result.append("        push(objectsToInstance(filtered[i], getOutputFormat()));\n");
        result.append("      }\n");
        result.append("    }\n");
        result.append("\n");
        result.append("    flushInput();\n");
        result.append("    m_NewBatch = true;\n");
        result.append("    m_FirstBatchDone = true;\n");
        result.append("\n");
        result.append("    return (inst.numInstances() > 0);\n");
        result.append("  }\n");

        // toString
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Returns only the classnames and what filter it is based on.\n");
        result.append("   *\n");
        result.append("   * @return a short description\n");
        result.append("   */\n");
        result.append("  public String toString() {\n");
        result.append("    return \"Auto-generated filter wrapper, based on " + filter.getClass().getName()
                + " (generated with Weka " + Version.VERSION + ").\\n" + "\" + this.getClass().getName() + \"/"
                + className + "\";\n");
        result.append("  }\n");

        // main
        result.append("\n");
        result.append("  /**\n");
        result.append("   * Runs the filter from commandline.\n");
        result.append("   *\n");
        result.append("   * @param args the commandline arguments\n");
        result.append("   */\n");
        result.append("  public static void main(String args[]) {\n");
        result.append("    runFilter(new WekaWrapper(), args);\n");
        result.append("  }\n");
        result.append("}\n");

        // actual filter code
        result.append("\n");
        result.append(filter.toSource(className, input));

        return result.toString();
    }

    /**
     * Method for testing filters.
     *
     * @param filter the filter to use
     * @param options should contain the following arguments: <br/>
     *          -i input_file <br/>
     *          -o output_file <br/>
     *          -c class_index <br/>
     *          -z classname (for filters implementing weka.filters.Sourcable)
     *          <br/>
     *          -decimal num (the number of decimal places to use in the output;
     *          default = 6) <br/>
     *          or -h for help on options
     * @throws Exception if something goes wrong or the user requests help on
     *           command options
     */
    public static void filterFile(Filter filter, String[] options) throws Exception {

        boolean debug = false;
        Instances data = null;
        DataSource input = null;
        PrintWriter output = null;
        boolean helpRequest;
        String sourceCode = "";
        int maxDecimalPlaces = 6;

        try {
            helpRequest = Utils.getFlag('h', options);

            if (Utils.getFlag('d', options)) {
                debug = true;
            }
            String infileName = Utils.getOption('i', options);
            String outfileName = Utils.getOption('o', options);
            String classIndex = Utils.getOption('c', options);
            if (filter instanceof Sourcable) {
                sourceCode = Utils.getOption('z', options);
            }

            String tmpStr = Utils.getOption("decimal", options);
            if (tmpStr.length() > 0) {
                maxDecimalPlaces = Integer.parseInt(tmpStr);
            }

            if (filter instanceof OptionHandler) {
                ((OptionHandler) filter).setOptions(options);
            }

            Utils.checkForRemainingOptions(options);
            if (helpRequest) {
                throw new Exception("Help requested.\n");
            }
            if (infileName.length() != 0) {
                input = new DataSource(infileName);
            } else {
                input = new DataSource(System.in);
            }
            if (outfileName.length() != 0) {
                output = new PrintWriter(new FileOutputStream(outfileName));
            } else {
                output = new PrintWriter(System.out);
            }

            data = input.getStructure();
            if (classIndex.length() != 0) {
                if (classIndex.equals("first")) {
                    data.setClassIndex(0);
                } else if (classIndex.equals("last")) {
                    data.setClassIndex(data.numAttributes() - 1);
                } else {
                    data.setClassIndex(Integer.parseInt(classIndex) - 1);
                }
            }
        } catch (Exception ex) {
            String filterOptions = "";
            // Output the error and also the valid options
            if (filter instanceof OptionHandler) {
                filterOptions += "\nFilter options:\n\n";
                Enumeration<Option> enu = ((OptionHandler) filter).listOptions();
                while (enu.hasMoreElements()) {
                    Option option = enu.nextElement();
                    filterOptions += option.synopsis() + '\n' + option.description() + "\n";
                }
            }

            String genericOptions = "\nGeneral options:\n\n" + "-h\n" + "\tGet help on available options.\n"
                    + "\t(use -b -h for help on batch mode.)\n" + "-i <file>\n"
                    + "\tThe name of the file containing input instances.\n"
                    + "\tIf not supplied then instances will be read from stdin.\n" + "-o <file>\n"
                    + "\tThe name of the file output instances will be written to.\n"
                    + "\tIf not supplied then instances will be written to stdout.\n" + "-c <class index>\n"
                    + "\tThe number of the attribute to use as the class.\n"
                    + "\t\"first\" and \"last\" are also valid entries.\n"
                    + "\tIf not supplied then no class is assigned.\n" + "-decimal <integer>\n"
                    + "\tThe maximum number of digits to print after the decimal\n"
                    + "\tplace for numeric values (default: 6)\n";

            if (filter instanceof Sourcable) {
                genericOptions += "-z <class name>\n"
                        + "\tOutputs the source code representing the trained filter.\n";
            }

            throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions);
        }

        if (debug) {
            System.err.println("Setting input format");
        }
        boolean printedHeader = false;
        if (filter.setInputFormat(data)) {
            if (debug) {
                System.err.println("Getting output format");
            }
            output.println(filter.getOutputFormat().toString());
            printedHeader = true;
        }

        // Pass all the instances to the filter
        Instance inst;
        while (input.hasMoreElements(data)) {
            inst = input.nextElement(data);
            if (debug) {
                System.err.println("Input instance to filter");
            }
            if (filter.input(inst)) {
                if (debug) {
                    System.err.println("Filter said collect immediately");
                }
                if (!printedHeader) {
                    throw new Error("Filter didn't return true from setInputFormat() " + "earlier!");
                }
                if (debug) {
                    System.err.println("Getting output instance");
                }
                output.println(filter.output().toStringMaxDecimalDigits(maxDecimalPlaces));
            }
        }

        // Say that input has finished, and print any pending output instances
        if (debug) {
            System.err.println("Setting end of batch");
        }
        if (filter.batchFinished()) {
            if (debug) {
                System.err.println("Filter said collect output");
            }
            if (!printedHeader) {
                if (debug) {
                    System.err.println("Getting output format");
                }
                output.println(filter.getOutputFormat().toString());
            }
            if (debug) {
                System.err.println("Getting output instance");
            }
            while (filter.numPendingOutput() > 0) {
                output.println(filter.output().toStringMaxDecimalDigits(maxDecimalPlaces));
                if (debug) {
                    System.err.println("Getting output instance");
                }
            }
        }
        if (debug) {
            System.err.println("Done");
        }

        if (output != null) {
            output.close();
        }

        if (sourceCode.length() != 0) {
            System.out.println(wekaStaticWrapper((Sourcable) filter, sourceCode, data, filter.getOutputFormat()));
        }
    }

    /**
     * Method for testing filters ability to process multiple batches.
     *
     * @param filter the filter to use
     * @param options should contain the following arguments: <br/>
     *          -i (first) input file <br/>
     *          -o (first) output file <br/>
     *          -r (second) input file <br/>
     *          -s (second) output file <br/>
     *          -c class_index <br/>
     *          -z classname (for filters implementing weka.filters.Sourcable)
     *          <br/>
     *          -decimal num (the number of decimal places to use in the output;
     *          default = 6) <br/>
     *          or -h for help on options
     * @throws Exception if something goes wrong or the user requests help on
     *           command options
     */
    public static void batchFilterFile(Filter filter, String[] options) throws Exception {

        Instances firstData = null;
        Instances secondData = null;
        DataSource firstInput = null;
        DataSource secondInput = null;
        PrintWriter firstOutput = null;
        PrintWriter secondOutput = null;
        boolean helpRequest;
        String sourceCode = "";
        int maxDecimalPlaces = 6;

        try {
            helpRequest = Utils.getFlag('h', options);

            String fileName = Utils.getOption('i', options);
            if (fileName.length() != 0) {
                firstInput = new DataSource(fileName);
            } else {
                throw new Exception("No first input file given.\n");
            }

            fileName = Utils.getOption('r', options);
            if (fileName.length() != 0) {
                secondInput = new DataSource(fileName);
            } else {
                throw new Exception("No second input file given.\n");
            }

            fileName = Utils.getOption('o', options);
            if (fileName.length() != 0) {
                firstOutput = new PrintWriter(new FileOutputStream(fileName));
            } else {
                firstOutput = new PrintWriter(System.out);
            }

            fileName = Utils.getOption('s', options);
            if (fileName.length() != 0) {
                secondOutput = new PrintWriter(new FileOutputStream(fileName));
            } else {
                secondOutput = new PrintWriter(System.out);
            }
            String classIndex = Utils.getOption('c', options);
            if (filter instanceof Sourcable) {
                sourceCode = Utils.getOption('z', options);
            }

            String tmpStr = Utils.getOption("decimal", options);
            if (tmpStr.length() > 0) {
                maxDecimalPlaces = Integer.parseInt(tmpStr);
            }

            if (filter instanceof OptionHandler) {
                ((OptionHandler) filter).setOptions(options);
            }
            Utils.checkForRemainingOptions(options);

            if (helpRequest) {
                throw new Exception("Help requested.\n");
            }
            firstData = firstInput.getStructure();
            secondData = secondInput.getStructure();
            if (!secondData.equalHeaders(firstData)) {
                throw new Exception("Input file formats differ.\n" + secondData.equalHeadersMsg(firstData) + "\n");
            }
            if (classIndex.length() != 0) {
                if (classIndex.equals("first")) {
                    firstData.setClassIndex(0);
                    secondData.setClassIndex(0);
                } else if (classIndex.equals("last")) {
                    firstData.setClassIndex(firstData.numAttributes() - 1);
                    secondData.setClassIndex(secondData.numAttributes() - 1);
                } else {
                    firstData.setClassIndex(Integer.parseInt(classIndex) - 1);
                    secondData.setClassIndex(Integer.parseInt(classIndex) - 1);
                }
            }
        } catch (Exception ex) {
            String filterOptions = "";
            // Output the error and also the valid options
            if (filter instanceof OptionHandler) {
                filterOptions += "\nFilter options:\n\n";
                Enumeration<Option> enu = ((OptionHandler) filter).listOptions();
                while (enu.hasMoreElements()) {
                    Option option = enu.nextElement();
                    filterOptions += option.synopsis() + '\n' + option.description() + "\n";
                }
            }

            String genericOptions = "\nGeneral options:\n\n" + "-h\n" + "\tGet help on available options.\n"
                    + "-i <filename>\n" + "\tThe file containing first input instances.\n" + "-o <filename>\n"
                    + "\tThe file first output instances will be written to.\n" + "-r <filename>\n"
                    + "\tThe file containing second input instances.\n" + "-s <filename>\n"
                    + "\tThe file second output instances will be written to.\n" + "-c <class index>\n"
                    + "\tThe number of the attribute to use as the class.\n"
                    + "\t\"first\" and \"last\" are also valid entries.\n"
                    + "\tIf not supplied then no class is assigned.\n" + "-decimal <integer>\n"
                    + "\tThe maximum number of digits to print after the decimal\n"
                    + "\tplace for numeric values (default: 6)\n";

            if (filter instanceof Sourcable) {
                genericOptions += "-z <class name>\n"
                        + "\tOutputs the source code representing the trained filter.\n";
            }

            throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions);
        }
        boolean printedHeader = false;
        if (filter.setInputFormat(firstData)) {
            firstOutput.println(filter.getOutputFormat().toString());
            printedHeader = true;
        }

        // Pass all the instances to the filter
        Instance inst;
        while (firstInput.hasMoreElements(firstData)) {
            inst = firstInput.nextElement(firstData);
            if (filter.input(inst)) {
                if (!printedHeader) {
                    throw new Error("Filter didn't return true from setInputFormat() " + "earlier!");
                }
                firstOutput.println(filter.output().toStringMaxDecimalDigits(maxDecimalPlaces));
            }
        }

        // Say that input has finished, and print any pending output instances
        if (filter.batchFinished()) {
            if (!printedHeader) {
                firstOutput.println(filter.getOutputFormat().toString());
            }
            while (filter.numPendingOutput() > 0) {
                firstOutput.println(filter.output().toStringMaxDecimalDigits(maxDecimalPlaces));
            }
        }

        if (firstOutput != null) {
            firstOutput.close();
        }
        printedHeader = false;
        if (filter.isOutputFormatDefined()) {
            secondOutput.println(filter.getOutputFormat().toString());
            printedHeader = true;
        }
        // Pass all the second instances to the filter
        while (secondInput.hasMoreElements(secondData)) {
            inst = secondInput.nextElement(secondData);
            if (filter.input(inst)) {
                if (!printedHeader) {
                    throw new Error("Filter didn't return true from" + " isOutputFormatDefined() earlier!");
                }
                secondOutput.println(filter.output().toStringMaxDecimalDigits(maxDecimalPlaces));
            }
        }

        // Say that input has finished, and print any pending output instances
        if (filter.batchFinished()) {
            if (!printedHeader) {
                secondOutput.println(filter.getOutputFormat().toString());
            }
            while (filter.numPendingOutput() > 0) {
                secondOutput.println(filter.output().toStringMaxDecimalDigits(maxDecimalPlaces));
            }
        }
        if (secondOutput != null) {
            secondOutput.close();
        }

        if (sourceCode.length() != 0) {
            System.out.println(
                    wekaStaticWrapper((Sourcable) filter, sourceCode, firstData, filter.getOutputFormat()));
        }
    }

    /**
     * runs the filter instance with the given options.
     *
     * @param filter the filter to run
     * @param options the commandline options
     */
    public static void runFilter(Filter filter, String[] options) {
        try {
            filter.preExecution();
            if (Utils.getFlag('b', options)) {
                Filter.batchFilterFile(filter, options);
            } else {
                Filter.filterFile(filter, options);
            }
        } catch (Exception e) {
            if ((e.toString().indexOf("Help requested") == -1) && (e.toString().indexOf("Filter options") == -1)) {
                e.printStackTrace();
            } else {
                System.err.println(e.getMessage());
            }
        }
        try {
            filter.postExecution();
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    /**
     * Returns an enumeration describing the available options.
     *
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> newVector = Option.listOptionsForClassHierarchy(this.getClass(), Filter.class);

        newVector.addElement(new Option(
                "\tIf set, filter is run in debug mode and\n" + "\tmay output additional info to the console",
                "output-debug-info", 0, "-output-debug-info"));
        newVector.addElement(new Option(
                "\tIf set, filter capabilities are not checked before filter is built\n" + "\t(use with caution).",
                "-do-not-check-capabilities", 0, "-do-not-check-capabilities"));

        return newVector.elements();
    }

    /**
     * Parses a given list of options. Valid options are:
     * <p>
     *
     * -D <br>
     * If set, filter is run in debug mode and may output additional info to the
     * console.
     * <p>
     *
     * -do-not-check-capabilities <br>
     * If set, filter capabilities are not checked before filter is built (use
     * with caution).
     * <p>
     *
     * @param options the list of options as an array of strings
     * @exception Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        Option.setOptionsForHierarchy(options, this, Filter.class);
        setDebug(Utils.getFlag("output-debug-info", options));
        setDoNotCheckCapabilities(Utils.getFlag("do-not-check-capabilities", options));
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> options = new Vector<String>();
        for (String s : Option.getOptionsForHierarchy(this, Filter.class)) {
            options.add(s);
        }

        if (getDebug()) {
            options.add("-output-debug-info");
        }
        if (getDoNotCheckCapabilities()) {
            options.add("-do-not-check-capabilities");
        }

        return options.toArray(new String[0]);
    }

    /**
     * Set debugging mode.
     *
     * @param debug true if debug output should be printed
     */
    public void setDebug(boolean debug) {

        m_Debug = debug;
    }

    /**
     * Get whether debugging is turned on.
     * 
     * @return true if debugging output is on
     */
    public boolean getDebug() {

        return m_Debug;
    }

    /**
     * Returns the tip text for this property
     *
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String debugTipText() {
        return "If set to true, filter may output additional info to " + "the console.";
    }

    /**
     * Set whether not to check capabilities.
     *
     * @param doNotCheckCapabilities true if capabilities are not to be checked.
     */
    public void setDoNotCheckCapabilities(boolean doNotCheckCapabilities) {

        m_DoNotCheckCapabilities = doNotCheckCapabilities;
    }

    /**
     * Get whether capabilities checking is turned off.
     * 
     * @return true if capabilities checking is turned off.
     */
    public boolean getDoNotCheckCapabilities() {

        return m_DoNotCheckCapabilities;
    }

    /**
     * Returns the tip text for this property
     *
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String doNotCheckCapabilitiesTipText() {
        return "If set, the filter's capabilities are not checked before it is built."
                + " (Use with caution to reduce runtime.)";
    }

    /**
     * Perform any setup stuff that might need to happen before commandline
     * execution. Subclasses should override if they need to do something here
     *
     * @throws Exception if a problem occurs during setup
     */
    @Override
    public void preExecution() throws Exception {
    }

    /**
     * Execute the supplied object.
     *
     * @param toRun the object to execute
     * @param options any options to pass to the object
     * @throws Exception if the object is not of the expected type.
     */
    @Override
    public void run(Object toRun, String[] options) throws Exception {
        if (!(toRun instanceof Filter)) {
            throw new IllegalArgumentException("Object to run is not a Filter!");
        }
        runFilter((Filter) toRun, options);
    }

    /**
     * Perform any teardown stuff that might need to happen after execution.
     * Subclasses should override if they need to do something here
     *
     * @throws Exception if a problem occurs during teardown
     */
    @Override
    public void postExecution() throws Exception {
    }

    /**
     * Main method for testing this class.
     *
     * @param args should contain arguments to the filter: use -h for help
     */
    public static void main(String[] args) {

        try {
            if (args.length == 0) {
                throw new Exception("First argument must be the class name of a Filter");
            }
            String fname = args[0];
            Filter f = (Filter) Class.forName(fname).newInstance();
            args[0] = "";
            runFilter(f, args);
        } catch (Exception ex) {
            ex.printStackTrace();
            System.err.println(ex.getMessage());
        }
    }
}