Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * WekaReorderAttributesToReference.java * Copyright (C) 2013-2017 University of Waikato, Hamilton, New Zealand */ package adams.flow.transformer; import adams.core.MessageCollection; import adams.core.QuickInfoHelper; import adams.core.io.PlaceholderFile; import adams.flow.core.CallableActorHelper; import adams.flow.core.CallableActorReference; import adams.flow.core.DynamicInitializer; import adams.flow.core.OptionalOneTimeInitializer; import adams.flow.core.Token; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; import weka.core.SelectedTag; import weka.core.converters.AbstractFileLoader; import weka.core.converters.ArffLoader; import weka.core.converters.ConverterUtils.DataSource; import weka.filters.Filter; import weka.filters.MultiFilter; import weka.filters.unsupervised.attribute.Add; import weka.filters.unsupervised.attribute.Reorder; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; /** <!-- globalinfo-start --> * Reorders the attributes of the Instance/Instances passing through according to the provided reference dataset (callable actor or reference file).<br> * This ensures that the generated data always has the same structure as the reference dataset. * <br><br> <!-- globalinfo-end --> * <!-- flow-summary-start --> * Input/output:<br> * - accepts:<br> * weka.core.Instance<br> * weka.core.Instances<br> * - generates:<br> * weka.core.Instance<br> * weka.core.Instances<br> * <br><br> <!-- flow-summary-end --> * <!-- options-start --> * <pre>-logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel) * The logging level for outputting errors and debugging output. * default: WARNING * </pre> * * <pre>-name <java.lang.String> (property: name) * The name of the actor. * default: WekaReorderAttributesToReference * </pre> * * <pre>-annotation <adams.core.base.BaseText> (property: annotations) * The annotations to attach to this actor. * default: * </pre> * * <pre>-skip <boolean> (property: skip) * If set to true, transformation is skipped and the input token is just forwarded * as it is. * default: false * </pre> * * <pre>-stop-flow-on-error <boolean> (property: stopFlowOnError) * If set to true, the flow gets stopped in case this actor encounters an error; * useful for critical actors. * default: false * </pre> * * <pre>-reference-file <adams.core.io.PlaceholderFile> (property: referenceFile) * The reference dataset to load (when not pointing to a directory). * default: ${CWD} * </pre> * * <pre>-use-custom <boolean> (property: useCustomLoader) * If set to true, then the custom loader will be used for loading the data. * default: false * </pre> * * <pre>-loader <weka.core.converters.AbstractFileLoader> (property: customLoader) * The custom loader to use if enabled. * default: weka.core.converters.ArffLoader * </pre> * * <pre>-reference-actor <adams.flow.core.CallableActorReference> (property: referenceActor) * The callable actor to use for obtaining the reference dataset in case reference * file points to a directory. * default: unknown * </pre> * * <pre>-on-the-fly <boolean> (property: onTheFly) * If set to true, the reference file is not required to be present at set * up time (eg if built on the fly), only at execution time. * default: false * </pre> * * <pre>-init-once <boolean> (property: initializeOnce) * If set to true, then the internal reorder filter will get initialized only * with the first batch of data; otherwise every time data gets passed through. * default: false * </pre> * * <pre>-lenient <boolean> (property: lenient) * If set to true, attributes from the reference data that are missing in the * incoming data get tolerated. * default: false * </pre> * * <pre>-keep <boolean> (property: keepRelationName) * If set to true, then the filter won't change the relation name of the incoming * dataset. * default: false * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class WekaReorderAttributesToReference extends AbstractTransformer implements OptionalOneTimeInitializer, DynamicInitializer { /** for serialization. */ private static final long serialVersionUID = 530323409335629567L; /** the key for storing the reference dataset in the backup. */ public final static String BACKUP_REFERENCE = "reference"; /** the key for storing the reorder filter in the backup. */ public final static String BACKUP_REORDER = "reorder"; /** the reference dataset to load. */ protected PlaceholderFile m_ReferenceFile; /** whether to use a custom converter. */ protected boolean m_UseCustomLoader; /** the custom loader. */ protected AbstractFileLoader m_CustomLoader; /** the callable actor to get the reference data from. */ protected CallableActorReference m_ReferenceActor; /** the reference dataset. */ protected Instances m_Reference; /** whether the dataset gets generated on the fly and might not be available at setUp time. */ protected boolean m_OnTheFly; /** whether to initialize filter only with the first batch. */ protected boolean m_InitializeOnce; /** whether to tolerate attributes that are not present in the incoming data. */ protected boolean m_Lenient; /** whether to keep the incoming relation name. */ protected boolean m_KeepRelationName; /** the reorder filter to use. */ protected MultiFilter m_Reorder; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Reorders the attributes of the Instance/Instances passing through " + "according to the provided reference dataset (callable actor or reference file).\n" + "This ensures that the generated data always has the same structure " + "as the reference dataset."; } /** * Adds options to the internal list of options. */ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("reference-file", "referenceFile", new PlaceholderFile(".")); m_OptionManager.add("use-custom", "useCustomLoader", false); m_OptionManager.add("loader", "customLoader", new ArffLoader()); m_OptionManager.add("reference-actor", "referenceActor", new CallableActorReference(CallableActorReference.UNKNOWN)); m_OptionManager.add("on-the-fly", "onTheFly", false); m_OptionManager.add("init-once", "initializeOnce", false); m_OptionManager.add("lenient", "lenient", false); m_OptionManager.add("keep", "keepRelationName", false); } /** * Resets the actor. */ @Override protected void reset() { super.reset(); m_Reference = null; m_Reorder = null; } /** * Sets the file to load the reference dataset from. * * @param value the reference file */ public void setReferenceFile(PlaceholderFile value) { m_ReferenceFile = value; reset(); } /** * Returns the file to load the reference dataset from. * * @return the reference file */ public PlaceholderFile getReferenceFile() { return m_ReferenceFile; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String referenceFileTipText() { return "The reference dataset to load (when not pointing to a directory)."; } /** * Sets whether to use a custom loader or not. * * @param value if true then the custom loader will be used */ public void setUseCustomLoader(boolean value) { m_UseCustomLoader = value; reset(); } /** * Returns whether a custom loader is used or not. * * @return true if a custom loader is used */ public boolean getUseCustomLoader() { return m_UseCustomLoader; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String useCustomLoaderTipText() { return "If set to true, then the custom loader will be used for loading the data."; } /** * Sets the custom loader to use. * * @param value the custom loader */ public void setCustomLoader(AbstractFileLoader value) { m_CustomLoader = value; reset(); } /** * Returns the custom loader in use. * * @return the custom loader */ public AbstractFileLoader getCustomLoader() { return m_CustomLoader; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String customLoaderTipText() { return "The custom loader to use if enabled."; } /** * Sets the callable actor to obtain the reference dataset from if reference file is pointing * to a directory. * * @param value the actor reference */ public void setReferenceActor(CallableActorReference value) { m_ReferenceActor = value; reset(); } /** * Returns the callable actor to obtain the reference dataset from if reference file is pointing * to a directory. * * @return the actor reference */ public CallableActorReference getReferenceActor() { return m_ReferenceActor; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String referenceActorTipText() { return "The callable actor to use for obtaining the reference dataset in case " + "reference file points to a directory."; } /** * Sets whether the reference file gets built on the fly and might not be present * at start up time. * * @param value if true then the reference does not have to be present at * start up time */ public void setOnTheFly(boolean value) { m_OnTheFly = value; reset(); } /** * Returns whether the reference file gets built on the fly and might not be present * at start up time. * * @return true if the reference is not necessarily present at start * up time */ public boolean getOnTheFly() { return m_OnTheFly; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String onTheFlyTipText() { return "If set to true, the reference file is not required to be present at " + "set up time (eg if built on the fly), only at execution time."; } /** * Sets whether the internal reorder filter gets initialized only with the first batch. * * @param value true if the filter gets only initialized once */ public void setInitializeOnce(boolean value) { m_InitializeOnce = value; reset(); } /** * Returns whether the internal reorder filter gets initialized only with the first batch. * * @return true if the filter gets only initialized once */ public boolean getInitializeOnce() { return m_InitializeOnce; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String initializeOnceTipText() { return "If set to true, then the internal reorder filter will get initialized only with the " + "first batch of data; otherwise every time data gets passed through."; } /** * Sets whether to tolerate attributes that are missing in the incoming data. * * @param value true if to tolerate unknown attributes */ public void setLenient(boolean value) { m_Lenient = value; reset(); } /** * Returns whether to tolerate attributes that are missing in the incoming data. * * @return true if to tolerate unknown attributes */ public boolean getLenient() { return m_Lenient; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String lenientTipText() { return "If set to true, attributes from the reference data that are missing " + "in the incoming data get tolerated."; } /** * Sets whether the filter doesn't change the relation name. * * @param value true if the filter won't change the relation name */ public void setKeepRelationName(boolean value) { m_KeepRelationName = value; reset(); } /** * Returns whether the filter doesn't change the relation name. * * @return true if the filter doesn't change the relation name */ public boolean getKeepRelationName() { return m_KeepRelationName; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String keepRelationNameTipText() { return "If set to true, then the filter won't change the relation name of the " + "incoming dataset."; } /** * Returns a quick info about the actor, which will be displayed in the GUI. * * @return null if no info available, otherwise short string */ @Override public String getQuickInfo() { String result; List<String> options; result = QuickInfoHelper.toString(this, "referenceFile", m_ReferenceFile, "file: "); result += QuickInfoHelper.toString(this, "referenceActor", m_ReferenceActor, ", actor: "); options = new ArrayList<String>(); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "useCustomLoader", getUseCustomLoader(), m_CustomLoader.getClass().getSimpleName())); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "onTheFly", m_OnTheFly, "on-the-fly")); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "initializeOnce", m_InitializeOnce, "once")); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "lenient", m_Lenient, "lenient")); QuickInfoHelper.add(options, QuickInfoHelper.toString(this, "keepRelationName", m_KeepRelationName, "keep")); result += QuickInfoHelper.flatten(options); return result; } /** * Returns the class that the consumer accepts. * * @return the Class of objects that can be processed */ @Override public Class[] accepts() { return new Class[] { Instance.class, Instances.class }; } /** * Returns the class of objects that it generates. * * @return the Class of the generated tokens */ @Override public Class[] generates() { return new Class[] { Instance.class, Instances.class }; } /** * Removes entries from the backup. */ @Override protected void pruneBackup() { super.pruneBackup(); pruneBackup(BACKUP_REFERENCE); pruneBackup(BACKUP_REORDER); } /** * Backs up the current state of the actor before update the variables. * * @return the backup */ @Override protected Hashtable<String, Object> backupState() { Hashtable<String, Object> result; result = super.backupState(); if (m_Reference != null) result.put(BACKUP_REFERENCE, m_Reference); if (m_Reorder != null) result.put(BACKUP_REORDER, m_Reorder); return result; } /** * Restores the state of the actor before the variables got updated. * * @param state the backup of the state to restore from */ @Override protected void restoreState(Hashtable<String, Object> state) { if (state.containsKey(BACKUP_REFERENCE)) { m_Reference = (Instances) state.get(BACKUP_REFERENCE); state.remove(BACKUP_REFERENCE); } if (state.containsKey(BACKUP_REORDER)) { m_Reorder = (MultiFilter) state.get(BACKUP_REORDER); state.remove(BACKUP_REORDER); } super.restoreState(state); } /** * Loads the reference data. * * @return null if everything worked, otherwise an error message */ protected String setUpReference() { String result; AbstractFileLoader loader; DataSource source; MessageCollection errors; result = null; if (m_ReferenceFile.isDirectory()) { // obtain reference from callable actor try { errors = new MessageCollection(); m_Reference = (Instances) CallableActorHelper.getSetupFromSource(null, m_ReferenceActor, this, errors); if (m_Reference == null) { if (!errors.isEmpty()) result = errors.toString(); } } catch (Exception e) { m_Reference = null; result = handleException( "Failed to obtain reference from callable actor '" + m_ReferenceActor + "': ", e); } } else { // load reference try { if (m_UseCustomLoader) { loader = m_CustomLoader; loader.setFile(m_ReferenceFile.getAbsoluteFile()); source = new DataSource(loader); } else { source = new DataSource(m_ReferenceFile.getAbsolutePath()); } m_Reference = source.getDataSet(); if (m_Reference == null) result = "Failed to load reference dataset from '" + m_ReferenceFile + "'!"; } catch (Exception e) { m_Reference = null; result = handleException("Failed to load reference dataset from '" + m_ReferenceFile + "': ", e); } } return result; } /** * Initializes the item for flow execution. * * @return null if everything is fine, otherwise error message */ @Override public String setUp() { String result; result = super.setUp(); if (result == null) { if (!m_OnTheFly) result = setUpReference(); } return result; } /** * Executes the flow item. * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances dataOld; Instance instOld; Instances dataNew; Instance instNew; Attribute att; int i; StringBuilder order; List<Add> adds; Add add; int index; StringBuilder labels; int n; List<Filter> filters; Reorder reorder; result = null; if (m_OnTheFly && (m_Reference == null)) { result = setUpReference(); if (result != null) return result; } dataNew = null; instNew = null; // get input data if (m_InputToken.getPayload() instanceof Instance) { instOld = (Instance) m_InputToken.getPayload(); dataOld = instOld.dataset(); } else { instOld = null; dataOld = (Instances) m_InputToken.getPayload(); } // do we need to initialize filter? if (m_InitializeOnce || (m_Reorder == null)) { // check incoming data if (!m_Lenient) { for (i = 0; i < m_Reference.numAttributes(); i++) { att = m_Reference.attribute(i); if (dataOld.attribute(att.name()) == null) { if (result == null) result = "Missing attribute(s) in incoming data: " + att.name(); else result += ", " + att.name(); } } if (result != null) getLogger().severe(result); } if (result == null) { try { // determine indices order = new StringBuilder(); adds = new ArrayList<Add>(); for (i = 0; i < m_Reference.numAttributes(); i++) { att = m_Reference.attribute(i); if (dataOld.attribute(att.name()) == null) { index = dataOld.numAttributes() + adds.size(); add = new Add(); add.setAttributeIndex("last"); add.setAttributeName(att.name()); add.setAttributeType(new SelectedTag(att.type(), Add.TAGS_TYPE)); if (att.isNominal()) { labels = new StringBuilder(); for (n = 0; n < att.numValues(); n++) { if (labels.length() > 0) labels.append(","); labels.append(att.value(n)); } add.setNominalLabels(labels.toString()); } adds.add(add); } else { index = dataOld.attribute(att.name()).index(); } if (order.length() > 0) order.append(","); order.append((index + 1)); } // build reorder filter reorder = new Reorder(); reorder.setAttributeIndices(order.toString()); // build multifilter filters = new ArrayList<Filter>(); filters.addAll(adds); filters.add(reorder); m_Reorder = new MultiFilter(); m_Reorder.setFilters(filters.toArray(new Filter[filters.size()])); // initialize filter m_Reorder.setInputFormat(dataOld); } catch (Exception e) { result = handleException("Failed to initialize reorder filter!", e); } } } // reorder data if (result == null) { try { if (instOld != null) { m_Reorder.input(instOld); m_Reorder.batchFinished(); instNew = m_Reorder.output(); if (m_KeepRelationName) instNew.dataset().setRelationName(dataOld.relationName()); } else { dataNew = Filter.useFilter(dataOld, m_Reorder); if (m_KeepRelationName) dataNew.setRelationName(dataOld.relationName()); } } catch (Exception e) { result = handleException("Failed to reorder data!", e); instNew = null; dataNew = null; } } if (instNew != null) m_OutputToken = new Token(instNew); else if (dataNew != null) m_OutputToken = new Token(dataNew); return result; } }