Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * WekaInstanceBuffer.java * Copyright (C) 2009-2013 University of Waikato, Hamilton, New Zealand */ package adams.flow.transformer; import java.util.ArrayList; import java.util.Hashtable; import java.util.Iterator; import java.util.List; import weka.core.BinarySparseInstance; import weka.core.DenseInstance; import weka.core.Instance; import weka.core.Instances; import weka.core.SparseInstance; import adams.core.QuickInfoHelper; import adams.flow.core.Token; import adams.flow.provenance.ActorType; import adams.flow.provenance.Provenance; import adams.flow.provenance.ProvenanceContainer; import adams.flow.provenance.ProvenanceInformation; import adams.flow.provenance.ProvenanceSupporter; /** <!-- globalinfo-start --> * Can act in two different ways:<br> * 1. Instance -> Instances (row -> dataset)<br> * Buffers weka.core.Instance objects and outputs a weka.core.Instances object, whenever the interval condition has been met.<br> * 2. Instances -> Instance (dataset -> row)<br> * Outputs all the weka.core.Instance objects that the incoming weka.core.Instances object contains. * <br><br> <!-- globalinfo-end --> * <!-- flow-summary-start --> * Input/output:<br> * - accepts:<br> * weka.core.Instance<br> * weka.core.Instance[]<br> * - generates:<br> * weka.core.Instances<br> * <br><br> <!-- flow-summary-end --> * <!-- options-start --> * Valid options are: <br><br> * * <pre>-logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel) * The logging level for outputting errors and debugging output. * default: WARNING * </pre> * * <pre>-name <java.lang.String> (property: name) * The name of the actor. * default: WekaInstanceBuffer * </pre> * * <pre>-annotation <adams.core.base.BaseText> (property: annotations) * The annotations to attach to this actor. * default: * </pre> * * <pre>-skip (property: skip) * If set to true, transformation is skipped and the input token is just forwarded * as it is. * </pre> * * <pre>-stop-flow-on-error (property: stopFlowOnError) * If set to true, the flow gets stopped in case this actor encounters an error; * useful for critical actors. * </pre> * * <pre>-operation <INSTANCES_TO_INSTANCE|INSTANCE_TO_INSTANCES> (property: operation) * The way the buffer operates, 'dataset -> row' or 'row -> dataset'. * default: INSTANCE_TO_INSTANCES * </pre> * * <pre>-check (property: checkHeader) * Whether to check the headers - if the headers change, the Instance object * gets dumped into a new file (in case of INSTANCE_TO_INSTANCES). * </pre> * * <pre>-interval <int> (property: interval) * The interval at which to output the Instances object (in case of INSTANCE_TO_INSTANCES * ). * default: 1 * minimum: 1 * </pre> * * <pre>-clear-buffer (property: clearBuffer) * Whether to clear the buffer once the dataset has been forwarded (in case * of INSTANCE_TO_INSTANCES). * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class WekaInstanceBuffer extends AbstractTransformer implements ProvenanceSupporter { /** for serialization. */ private static final long serialVersionUID = 6774529845778672623L; /** the key for storing the current buffer in the backup. */ public final static String BACKUP_BUFFER = "buffer"; /** the key for storing the current iterator in the backup. */ public final static String BACKUP_ITERATOR = "iterator"; /** * Defines how the buffer actor operates. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public enum Operation { /** Instances -> Instance. */ INSTANCES_TO_INSTANCE, /** Instance -> Instances. */ INSTANCE_TO_INSTANCES, } /** the currently buffered data. */ protected Instances m_Buffer; /** the iterator for broadcasting Instance objects. */ protected Iterator<Instance> m_Iterator; /** the way the buffer operates. */ protected Operation m_Operation; /** whether to check the header. */ protected boolean m_CheckHeader; /** the interval of when to output the Instances object. */ protected int m_Interval; /** whether to clear the buffer once it has been forwarded. */ protected boolean m_ClearBuffer; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Can act in two different ways:\n" + "1. Instance -> Instances (row -> dataset)\n" + "Buffers weka.core.Instance objects and outputs a weka.core.Instances " + "object, whenever the interval condition has been met.\n" + "2. Instances -> Instance (dataset -> row)\n" + "Outputs all the weka.core.Instance objects that the incoming " + "weka.core.Instances object contains."; } /** * Adds options to the internal list of options. */ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("operation", "operation", Operation.INSTANCE_TO_INSTANCES); m_OptionManager.add("check", "checkHeader", false); m_OptionManager.add("interval", "interval", 1, 1, null); m_OptionManager.add("clear-buffer", "clearBuffer", false); } /** * Returns a quick info about the actor, which will be displayed in the GUI. * * @return null if no info available, otherwise short string */ @Override public String getQuickInfo() { String result; List<String> options; result = QuickInfoHelper.toString(this, "operation", m_Operation); result += QuickInfoHelper.toString(this, "interval", m_Interval, ", interval: "); options = new ArrayList<String>(); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "checkHeader", m_CheckHeader, "check header")); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "clearBuffer", m_ClearBuffer, "clear")); result += QuickInfoHelper.flatten(options); return result; } /** * Sets the way the buffer operates. * * @param value the operation */ public void setOperation(Operation value) { m_Operation = value; reset(); } /** * Returns the way the buffer operates. * * @return the operation */ public Operation getOperation() { return m_Operation; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String operationTipText() { return "The way the buffer operates, 'dataset -> row' or 'row -> dataset'."; } /** * Sets whether to check the header or not. * * @param value if true then the headers get checked */ public void setCheckHeader(boolean value) { m_CheckHeader = value; reset(); } /** * Returns whether the header gets checked or not. * * @return true if the header gets checked */ public boolean getCheckHeader() { return m_CheckHeader; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String checkHeaderTipText() { return "Whether to check the headers - if the headers change, the Instance " + "object gets dumped into a new file (in case of " + Operation.INSTANCE_TO_INSTANCES + ")."; } /** * Sets the interval for outputting the Instances objects. * * @param value the interval */ public void setInterval(int value) { m_Interval = value; reset(); } /** * Returns the interval for outputting the Instances objects. * * @return the interval */ public int getInterval() { return m_Interval; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String intervalTipText() { return "The interval at which to output the Instances object (in case of " + Operation.INSTANCE_TO_INSTANCES + ")."; } /** * Sets whether to clear the buffer once the dataset has been forwarded. * * @param value true if to clear buffer */ public void setClearBuffer(boolean value) { m_ClearBuffer = value; reset(); } /** * Returns whether to clear the buffer once the dataset has been forwarded. * * @return true if to clear buffer */ public boolean getClearBuffer() { return m_ClearBuffer; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String clearBufferTipText() { return "Whether to clear the buffer once the dataset has been forwarded " + "(in case of " + Operation.INSTANCE_TO_INSTANCES + ")."; } /** * Returns the class that the consumer accepts. * * @return <!-- flow-accepts-start -->weka.core.Instance.class, weka.core.Instance[].class<!-- flow-accepts-end --> */ public Class[] accepts() { if (m_Operation == Operation.INSTANCE_TO_INSTANCES) return new Class[] { Instance.class, Instance[].class }; else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) return new Class[] { Instances.class }; else throw new IllegalStateException("Unhandled operation: " + m_Operation); } /** * Returns the class of objects that it generates. * * @return <!-- flow-generates-start -->weka.core.Instances.class<!-- flow-generates-end --> */ public Class[] generates() { if (m_Operation == Operation.INSTANCE_TO_INSTANCES) return new Class[] { Instances.class }; else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) return new Class[] { Instance.class }; else throw new IllegalStateException("Unhandled operation: " + m_Operation); } /** * Removes entries from the backup. */ @Override protected void pruneBackup() { super.pruneBackup(); pruneBackup(BACKUP_BUFFER); pruneBackup(BACKUP_ITERATOR); } /** * Backs up the current state of the actor before update the variables. * * @return the backup */ @Override protected Hashtable<String, Object> backupState() { Hashtable<String, Object> result; result = super.backupState(); if (m_Buffer != null) result.put(BACKUP_BUFFER, m_Buffer); if (m_Iterator != null) result.put(BACKUP_ITERATOR, m_Iterator); return result; } /** * Restores the state of the actor before the variables got updated. * * @param state the backup of the state to restore from */ @Override protected void restoreState(Hashtable<String, Object> state) { if (state.containsKey(BACKUP_BUFFER)) { m_Buffer = (Instances) state.get(BACKUP_BUFFER); state.remove(BACKUP_BUFFER); } if (state.containsKey(BACKUP_ITERATOR)) { m_Iterator = (Iterator<Instance>) state.get(BACKUP_ITERATOR); state.remove(BACKUP_ITERATOR); } super.restoreState(state); } /** * Resets the scheme. */ @Override protected void reset() { super.reset(); m_Buffer = null; m_Iterator = null; } /** * Executes the flow item. * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instance[] insts; Instance inst; double[] values; int i; int n; boolean updated; result = null; if (m_Operation == Operation.INSTANCE_TO_INSTANCES) { if (m_InputToken.getPayload() instanceof Instance) { insts = new Instance[] { (Instance) m_InputToken.getPayload() }; } else { insts = (Instance[]) m_InputToken.getPayload(); } for (n = 0; n < insts.length; n++) { inst = insts[n]; if ((m_Buffer != null) && m_CheckHeader) { if (!m_Buffer.equalHeaders(inst.dataset())) { getLogger().info("Header changed, resetting buffer"); m_Buffer = null; } } // buffer instance if (m_Buffer == null) m_Buffer = new Instances(inst.dataset(), 0); // we need to make sure that string and relational values are in our // buffer header and update the current Instance accordingly before // buffering it values = inst.toDoubleArray(); updated = false; for (i = 0; i < values.length; i++) { if (inst.isMissing(i)) continue; if (inst.attribute(i).isString()) { values[i] = m_Buffer.attribute(i).addStringValue(inst.stringValue(i)); updated = true; } else if (inst.attribute(i).isRelationValued()) { values[i] = m_Buffer.attribute(i).addRelation(inst.relationalValue(i)); updated = true; } } if (updated) { if (inst instanceof SparseInstance) { inst = new SparseInstance(inst.weight(), values); } else if (inst instanceof BinarySparseInstance) { inst = new BinarySparseInstance(inst.weight(), values); } else { if (!(inst instanceof DenseInstance)) { getLogger().severe("Unhandled instance class (" + inst.getClass().getName() + "), " + "defaulting to " + DenseInstance.class.getName()); } inst = new DenseInstance(inst.weight(), values); } } else { inst = (Instance) inst.copy(); } m_Buffer.add(inst); } if (m_Buffer.numInstances() % m_Interval == 0) { m_OutputToken = new Token(m_Buffer); if (m_ClearBuffer) m_Buffer = null; } } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) { m_Buffer = (Instances) m_InputToken.getPayload(); m_Iterator = m_Buffer.iterator(); } else { throw new IllegalStateException("Unhandled operation: " + m_Operation); } return result; } /** * Checks whether there is pending output to be collected after * executing the flow item. * * @return true if there is pending output */ @Override public boolean hasPendingOutput() { if (m_Operation == Operation.INSTANCE_TO_INSTANCES) return super.hasPendingOutput(); else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) return ((m_Iterator != null) && m_Iterator.hasNext()); else throw new IllegalStateException("Unhandled operation: " + m_Operation); } /** * Returns the generated token. * * @return the generated token */ @Override public Token output() { Token result; if (m_Operation == Operation.INSTANCE_TO_INSTANCES) { result = m_OutputToken; m_OutputToken = null; } else if (m_Operation == Operation.INSTANCES_TO_INSTANCE) { result = new Token(m_Iterator.next()); } else { throw new IllegalStateException("Unhandled operation: " + m_Operation); } updateProvenance(result); return result; } /** * Updates the provenance information in the provided container. * * @param cont the provenance container to update */ public void updateProvenance(ProvenanceContainer cont) { if (Provenance.getSingleton().isEnabled()) { if (m_InputToken.hasProvenance()) cont.setProvenance(m_InputToken.getProvenance().getClone()); cont.addProvenance(new ProvenanceInformation(ActorType.PREPROCESSOR, m_InputToken.getPayload().getClass(), this, ((Token) cont).getPayload().getClass())); } } /** * Cleans up after the execution has finished. */ @Override public void wrapUp() { m_Iterator = null; m_Buffer = null; super.wrapUp(); } }