Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * WekaSubsets.java * Copyright (C) 2012-2014 University of Waikato, Hamilton, New Zealand */ package adams.flow.transformer; import java.util.ArrayList; import java.util.List; import weka.core.Instance; import weka.core.Instances; import adams.core.QuickInfoHelper; import adams.data.weka.WekaAttributeIndex; import adams.flow.core.Token; import adams.flow.provenance.ActorType; import adams.flow.provenance.Provenance; import adams.flow.provenance.ProvenanceContainer; import adams.flow.provenance.ProvenanceInformation; /** <!-- globalinfo-start --> * Splits the dataset based on the unique values of the specified attribute: all rows with the same unique value form a subset. * <br><br> <!-- globalinfo-end --> * <!-- flow-summary-start --> * Input/output:<br> * - accepts:<br> * weka.core.Instances<br> * - generates:<br> * weka.core.Instances<br> * <br><br> <!-- flow-summary-end --> * <!-- options-start --> * <pre>-logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel) * The logging level for outputting errors and debugging output. * default: WARNING * </pre> * * <pre>-name <java.lang.String> (property: name) * The name of the actor. * default: WekaSubsets * </pre> * * <pre>-annotation <adams.core.base.BaseAnnotation> (property: annotations) * The annotations to attach to this actor. * default: * </pre> * * <pre>-skip <boolean> (property: skip) * If set to true, transformation is skipped and the input token is just forwarded * as it is. * default: false * </pre> * * <pre>-stop-flow-on-error <boolean> (property: stopFlowOnError) * If set to true, the flow gets stopped in case this actor encounters an error; * useful for critical actors. * default: false * </pre> * * <pre>-index <adams.data.weka.WekaAttributeIndex> (property: index) * The index of the attribute to use for splitting the dataset into subsets; * An index is a number starting with 1; apart from attribute names (case-sensitive * ), the following placeholders can be used as well: first, second, third, * last_2, last_1, last * default: 1 * example: An index is a number starting with 1; apart from attribute names (case-sensitive), the following placeholders can be used as well: first, second, third, last_2, last_1, last * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class WekaSubsets extends AbstractTransformer { /** for serialization. */ private static final long serialVersionUID = 4717726637561070097L; /** the attribute index to split on. */ protected WekaAttributeIndex m_Index; /** the generated subsets. */ protected List<Instances> m_Queue; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Splits the dataset based on the unique values of the specified " + "attribute: all rows with the same unique value form a subset."; } /** * Adds options to the internal list of options. */ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("index", "index", new WekaAttributeIndex("1")); } /** * Initializes the members. */ @Override protected void initialize() { super.initialize(); m_Index = new WekaAttributeIndex(); m_Queue = new ArrayList<Instances>(); } /** * Returns a quick info about the actor, which will be displayed in the GUI. * * @return null if no info available, otherwise short string */ @Override public String getQuickInfo() { return QuickInfoHelper.toString(this, "index", m_Index); } /** * Sets the index of the attribute to split on. * * @param value the index */ public void setIndex(WekaAttributeIndex value) { m_Index = value; reset(); } /** * Returns the index of the attribute to split on. * * @return the index */ public WekaAttributeIndex getIndex() { return m_Index; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String indexTipText() { return "The index of the attribute to use for splitting the dataset into subsets; " + m_Index.getExample(); } /** * Returns the class that the consumer accepts. * * @return the Class of objects that can be processed */ public Class[] accepts() { return new Class[] { Instances.class }; } /** * Returns the class of objects that it generates. * * @return the Class of the generated tokens */ public Class[] generates() { return new Class[] { Instances.class }; } /** * Executes the flow item. * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances data; Double old; Double curr; int i; int index; Instance inst; result = null; m_Queue.clear(); // copy and sort data data = new Instances((Instances) m_InputToken.getPayload()); m_Index.setData(data); ; index = m_Index.getIntIndex(); data.sort(index); // create subsets old = null; i = 0; while (i < data.numInstances()) { inst = data.instance(i); curr = inst.value(index); if ((old == null) || !curr.equals(old)) { m_Queue.add(new Instances(data, data.numInstances())); old = curr; } m_Queue.get(m_Queue.size() - 1).add(inst); i++; } // compact subsets for (Instances sub : m_Queue) sub.compactify(); return result; } /** * Checks whether there is pending output to be collected after * executing the flow item. * * @return true if there is pending output */ @Override public boolean hasPendingOutput() { return !m_Queue.isEmpty(); } /** * Returns the generated token. * * @return the generated token */ @Override public Token output() { Token result; result = new Token(m_Queue.get(0)); m_Queue.remove(0); updateProvenance(result); return result; } /** * Updates the provenance information in the provided container. * * @param cont the provenance container to update */ public void updateProvenance(ProvenanceContainer cont) { if (Provenance.getSingleton().isEnabled()) { if (m_InputToken.hasProvenance()) cont.setProvenance(m_InputToken.getProvenance().getClone()); cont.addProvenance(new ProvenanceInformation(ActorType.PREPROCESSOR, m_InputToken.getPayload().getClass(), this, ((Token) cont).getPayload().getClass())); } } /** * Cleans up after the execution has finished. */ @Override public void wrapUp() { m_Queue.clear(); super.wrapUp(); } }