adams.flow.transformer.WekaSubsets.java Source code

Introduction

Here is the source code for adams.flow.transformer.WekaSubsets.java
Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * WekaSubsets.java
 * Copyright (C) 2012-2014 University of Waikato, Hamilton, New Zealand
 */

package adams.flow.transformer;

import java.util.ArrayList;
import java.util.List;

import weka.core.Instance;
import weka.core.Instances;
import adams.core.QuickInfoHelper;
import adams.data.weka.WekaAttributeIndex;
import adams.flow.core.Token;
import adams.flow.provenance.ActorType;
import adams.flow.provenance.Provenance;
import adams.flow.provenance.ProvenanceContainer;
import adams.flow.provenance.ProvenanceInformation;

/**
 <!-- globalinfo-start -->
 * Splits the dataset based on the unique values of the specified attribute: all rows with the same unique value form a subset.
 * <br><br>
 <!-- globalinfo-end -->
 *
 <!-- flow-summary-start -->
 * Input&#47;output:<br>
 * - accepts:<br>
 * &nbsp;&nbsp;&nbsp;weka.core.Instances<br>
 * - generates:<br>
 * &nbsp;&nbsp;&nbsp;weka.core.Instances<br>
 * <br><br>
 <!-- flow-summary-end -->
 *
 <!-- options-start -->
 * <pre>-logging-level &lt;OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST&gt; (property: loggingLevel)
 * &nbsp;&nbsp;&nbsp;The logging level for outputting errors and debugging output.
 * &nbsp;&nbsp;&nbsp;default: WARNING
 * </pre>
 * 
 * <pre>-name &lt;java.lang.String&gt; (property: name)
 * &nbsp;&nbsp;&nbsp;The name of the actor.
 * &nbsp;&nbsp;&nbsp;default: WekaSubsets
 * </pre>
 * 
 * <pre>-annotation &lt;adams.core.base.BaseAnnotation&gt; (property: annotations)
 * &nbsp;&nbsp;&nbsp;The annotations to attach to this actor.
 * &nbsp;&nbsp;&nbsp;default: 
 * </pre>
 * 
 * <pre>-skip &lt;boolean&gt; (property: skip)
 * &nbsp;&nbsp;&nbsp;If set to true, transformation is skipped and the input token is just forwarded 
 * &nbsp;&nbsp;&nbsp;as it is.
 * &nbsp;&nbsp;&nbsp;default: false
 * </pre>
 * 
 * <pre>-stop-flow-on-error &lt;boolean&gt; (property: stopFlowOnError)
 * &nbsp;&nbsp;&nbsp;If set to true, the flow gets stopped in case this actor encounters an error;
 * &nbsp;&nbsp;&nbsp; useful for critical actors.
 * &nbsp;&nbsp;&nbsp;default: false
 * </pre>
 * 
 * <pre>-index &lt;adams.data.weka.WekaAttributeIndex&gt; (property: index)
 * &nbsp;&nbsp;&nbsp;The index of the attribute to use for splitting the dataset into subsets;
 * &nbsp;&nbsp;&nbsp; An index is a number starting with 1; apart from attribute names (case-sensitive
 * &nbsp;&nbsp;&nbsp;), the following placeholders can be used as well: first, second, third, 
 * &nbsp;&nbsp;&nbsp;last_2, last_1, last
 * &nbsp;&nbsp;&nbsp;default: 1
 * &nbsp;&nbsp;&nbsp;example: An index is a number starting with 1; apart from attribute names (case-sensitive), the following placeholders can be used as well: first, second, third, last_2, last_1, last
 * </pre>
 * 
 <!-- options-end -->
 *
 * @author  fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 */
public class WekaSubsets extends AbstractTransformer {

    /** for serialization. */
    private static final long serialVersionUID = 4717726637561070097L;

    /** the attribute index to split on. */
    protected WekaAttributeIndex m_Index;

    /** the generated subsets. */
    protected List<Instances> m_Queue;

    /**
     * Returns a string describing the object.
     *
     * @return          a description suitable for displaying in the gui
     */
    @Override
    public String globalInfo() {
        return "Splits the dataset based on the unique values of the specified "
                + "attribute: all rows with the same unique value form a subset.";
    }

    /**
     * Adds options to the internal list of options.
     */
    @Override
    public void defineOptions() {
        super.defineOptions();

        m_OptionManager.add("index", "index", new WekaAttributeIndex("1"));
    }

    /**
     * Initializes the members.
     */
    @Override
    protected void initialize() {
        super.initialize();

        m_Index = new WekaAttributeIndex();
        m_Queue = new ArrayList<Instances>();
    }

    /**
     * Returns a quick info about the actor, which will be displayed in the GUI.
     *
     * @return      null if no info available, otherwise short string
     */
    @Override
    public String getQuickInfo() {
        return QuickInfoHelper.toString(this, "index", m_Index);
    }

    /**
     * Sets the index of the attribute to split on.
     *
     * @param value   the index
     */
    public void setIndex(WekaAttributeIndex value) {
        m_Index = value;
        reset();
    }

    /**
     * Returns the index of the attribute to split on.
     *
     * @return      the index
     */
    public WekaAttributeIndex getIndex() {
        return m_Index;
    }

    /**
     * Returns the tip text for this property.
     *
     * @return       tip text for this property suitable for
     *          displaying in the GUI or for listing the options.
     */
    public String indexTipText() {
        return "The index of the attribute to use for splitting the dataset into subsets; " + m_Index.getExample();
    }

    /**
     * Returns the class that the consumer accepts.
     * 
     * @return      the Class of objects that can be processed
     */
    public Class[] accepts() {
        return new Class[] { Instances.class };
    }

    /**
     * Returns the class of objects that it generates.
     *
     * @return      the Class of the generated tokens
     */
    public Class[] generates() {
        return new Class[] { Instances.class };
    }

    /**
     * Executes the flow item.
     *
     * @return      null if everything is fine, otherwise error message
     */
    @Override
    protected String doExecute() {
        String result;
        Instances data;
        Double old;
        Double curr;
        int i;
        int index;
        Instance inst;

        result = null;

        m_Queue.clear();

        // copy and sort data
        data = new Instances((Instances) m_InputToken.getPayload());
        m_Index.setData(data);
        ;
        index = m_Index.getIntIndex();
        data.sort(index);

        // create subsets
        old = null;
        i = 0;
        while (i < data.numInstances()) {
            inst = data.instance(i);
            curr = inst.value(index);
            if ((old == null) || !curr.equals(old)) {
                m_Queue.add(new Instances(data, data.numInstances()));
                old = curr;
            }
            m_Queue.get(m_Queue.size() - 1).add(inst);
            i++;
        }

        // compact subsets
        for (Instances sub : m_Queue)
            sub.compactify();

        return result;
    }

    /**
     * Checks whether there is pending output to be collected after
     * executing the flow item.
     *
     * @return      true if there is pending output
     */
    @Override
    public boolean hasPendingOutput() {
        return !m_Queue.isEmpty();
    }

    /**
     * Returns the generated token.
     *
     * @return      the generated token
     */
    @Override
    public Token output() {
        Token result;

        result = new Token(m_Queue.get(0));
        m_Queue.remove(0);

        updateProvenance(result);

        return result;
    }

    /**
     * Updates the provenance information in the provided container.
     *
     * @param cont   the provenance container to update
     */
    public void updateProvenance(ProvenanceContainer cont) {
        if (Provenance.getSingleton().isEnabled()) {
            if (m_InputToken.hasProvenance())
                cont.setProvenance(m_InputToken.getProvenance().getClone());
            cont.addProvenance(new ProvenanceInformation(ActorType.PREPROCESSOR,
                    m_InputToken.getPayload().getClass(), this, ((Token) cont).getPayload().getClass()));
        }
    }

    /**
     * Cleans up after the execution has finished.
     */
    @Override
    public void wrapUp() {
        m_Queue.clear();

        super.wrapUp();
    }
}