Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * WekaInstancesHistogramRanges.java * Copyright (C) 2017 University of Waikato, Hamilton, New Zealand */ package adams.flow.transformer; import adams.core.Index; import adams.core.QuickInfoHelper; import adams.core.base.BaseString; import adams.data.spreadsheet.SpreadSheet; import adams.data.statistics.ArrayHistogram; import adams.data.statistics.ArrayHistogram.BinCalculation; import adams.data.statistics.StatUtils; import adams.data.weka.WekaAttributeIndex; import weka.core.Instances; /** <!-- globalinfo-start --> * Outputs the ranges generated by adams.data.statistics.ArrayHistogram using the incoming weka.core.Instances object.<br> * The actor just uses the internal format (double array) and does not check whether the attributes are actually numeric. * <br><br> <!-- globalinfo-end --> * <!-- flow-summary-start --> * Input/output:<br> * - accepts:<br> * weka.core.Instances<br> * - generates:<br> * java.lang.String<br> * <br><br> <!-- flow-summary-end --> * <!-- options-start --> * <pre>-logging-level <OFF|SEVERE|WARNING|INFO|CONFIG|FINE|FINER|FINEST> (property: loggingLevel) * The logging level for outputting errors and debugging output. * default: WARNING * </pre> * * <pre>-name <java.lang.String> (property: name) * The name of the actor. * default: WekaInstancesHistogramRanges * </pre> * * <pre>-annotation <adams.core.base.BaseAnnotation> (property: annotations) * The annotations to attach to this actor. * default: * </pre> * * <pre>-skip <boolean> (property: skip) * If set to true, transformation is skipped and the input token is just forwarded * as it is. * default: false * </pre> * * <pre>-stop-flow-on-error <boolean> (property: stopFlowOnError) * If set to true, the flow execution at this level gets stopped in case this * actor encounters an error; the error gets propagated; useful for critical * actors. * default: false * </pre> * * <pre>-silent <boolean> (property: silent) * If enabled, then no errors are output in the console; Note: the enclosing * actor handler must have this enabled as well. * default: false * </pre> * * <pre>-output-array <boolean> (property: outputArray) * If enabled, the ranges are output as array rather than one by one. * default: false * </pre> * * <pre>-type <ROW_BY_INDEX|COLUMN_BY_INDEX|COLUMN_BY_REGEXP> (property: dataType) * Whether to retrieve rows or columns from the Instances object. * default: COLUMN_BY_INDEX * </pre> * * <pre>-location <adams.core.base.BaseString> [-location ...] (property: locations) * The locations of the data, depending on the chosen data type that can be * either indices, attribute names or regular expressions on the attribute * names. * default: * </pre> * * <pre>-bin-calc <MANUAL|FREQUENCY|DENSITY|STURGES|SCOTT|SQRT> (property: binCalculation) * Defines how the number of bins are calculated. * default: MANUAL * </pre> * * <pre>-num-bins <int> (property: numBins) * The number of bins to use in case of manual bin calculation. * default: 50 * minimum: 1 * </pre> * * <pre>-bin-width <double> (property: binWidth) * The bin width to use for some of the calculations. * default: 1.0 * minimum: 1.0E-5 * </pre> * * <pre>-normalize <boolean> (property: normalize) * If set to true the data gets normalized first before the histogram is calculated. * default: false * </pre> * * <pre>-use-fixed-min-max <boolean> (property: useFixedMinMax) * If enabled, then the user-specified min/max values are used for the bin * calculation rather than the min/max from the data (allows comparison of * histograms when generating histograms over a range of arrays). * default: false * </pre> * * <pre>-manual-min <double> (property: manualMin) * The minimum to use when using manual binning with user-supplied min/max * enabled. * default: 0.0 * </pre> * * <pre>-manual-max <double> (property: manualMax) * The maximum to use when using manual binning with user-supplied max/max * enabled. * default: 1.0 * </pre> * * <pre>-num-decimals <int> (property: numDecimals) * The number of decimals to show in the bin descriptions. * default: 3 * minimum: 0 * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class WekaInstancesHistogramRanges extends AbstractArrayProvider { /** for serialization. */ private static final long serialVersionUID = -8493694755948450901L; /** the type of data to get from the Instances object (rows or columns). */ protected WekaInstancesStatisticDataType m_DataType; /** the array of indices/regular expressions. */ protected BaseString[] m_Locations; /** how to calculate the number of bins. */ protected BinCalculation m_BinCalculation; /** the number of bins in case of manual bin calculation. */ protected int m_NumBins; /** the bin width - used for some calculations. */ protected double m_BinWidth; /** whether to normalize the data. */ protected boolean m_Normalize; /** whether to use fixed min/max for manual bin calculation. */ protected boolean m_UseFixedMinMax; /** the manual minimum. */ protected double m_ManualMin; /** the manual maximum. */ protected double m_ManualMax; /** the number of decimals to show. */ protected int m_NumDecimals; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Outputs the ranges generated by " + ArrayHistogram.class.getName() + " using the incoming " + Instances.class.getName() + " object.\n" + "The actor just uses the internal format (double array) and does " + "not check whether the attributes are actually numeric."; } /** * Adds options to the internal list of options. */ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("type", "dataType", WekaInstancesStatisticDataType.COLUMN_BY_INDEX); m_OptionManager.add("location", "locations", new BaseString[0]); m_OptionManager.add("bin-calc", "binCalculation", BinCalculation.MANUAL); m_OptionManager.add("num-bins", "numBins", 50, 1, null); m_OptionManager.add("bin-width", "binWidth", 1.0, 0.00001, null); m_OptionManager.add("normalize", "normalize", false); m_OptionManager.add("use-fixed-min-max", "useFixedMinMax", false); m_OptionManager.add("manual-min", "manualMin", 0.0); m_OptionManager.add("manual-max", "manualMax", 1.0); m_OptionManager.add("num-decimals", "numDecimals", 3, 0, null); } /** * Returns the base class of the items. * * @return the class */ @Override protected Class getItemClass() { return String.class; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ @Override public String outputArrayTipText() { return "If enabled, the ranges are output as array rather than one by one."; } /** * Sets what type of data to retrieve from the Instances object. * * @param value the type of conversion */ public void setDataType(WekaInstancesStatisticDataType value) { m_DataType = value; reset(); } /** * Returns what type of data to retrieve from the Instances object. * * @return the type of conversion */ public WekaInstancesStatisticDataType getDataType() { return m_DataType; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String dataTypeTipText() { return "Whether to retrieve rows or columns from the Instances object."; } /** * Sets the locations of the data (indices/regular expressions on attribute name). * * @param value the locations of the data */ public void setLocations(BaseString[] value) { m_Locations = value; reset(); } /** * Returns the locations of the data (indices/regular expressions on attribute name). * * @return the locations of the data */ public BaseString[] getLocations() { return m_Locations; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String locationsTipText() { return "The locations of the data, depending on the chosen data type that " + "can be either indices, attribute names or regular expressions on the attribute names."; } /** * Sets how the number of bins is calculated. * * @param value the bin calculation */ public void setBinCalculation(BinCalculation value) { m_BinCalculation = value; reset(); } /** * Returns how the number of bins is calculated. * * @return the bin calculation */ public BinCalculation getBinCalculation() { return m_BinCalculation; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String binCalculationTipText() { return "Defines how the number of bins are calculated."; } /** * Sets the number of bins to use in manual calculation. * * @param value the number of bins */ public void setNumBins(int value) { m_NumBins = value; reset(); } /** * Returns the number of bins to use in manual calculation. * * @return the number of bins */ public int getNumBins() { return m_NumBins; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String numBinsTipText() { return "The number of bins to use in case of manual bin calculation."; } /** * Sets the bin width to use (for some calculations). * * @param value the bin width */ public void setBinWidth(double value) { m_BinWidth = value; reset(); } /** * Returns the bin width in use (for some calculations). * * @return the bin width */ public double getBinWidth() { return m_BinWidth; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String binWidthTipText() { return "The bin width to use for some of the calculations."; } /** * Sets whether to normalize the data before generating the histogram. * * @param value if true the data gets normalized first */ public void setNormalize(boolean value) { m_Normalize = value; reset(); } /** * Returns whether to normalize the data before generating the histogram. * * @return true if the data gets normalized first */ public boolean getNormalize() { return m_Normalize; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String normalizeTipText() { return "If set to true the data gets normalized first before the histogram is calculated."; } /** * Sets whether to use user-supplied min/max for bin calculation rather * than obtain min/max from data. * * @param value true if to use user-supplied min/max */ public void setUseFixedMinMax(boolean value) { m_UseFixedMinMax = value; reset(); } /** * Returns whether to use user-supplied min/max for bin calculation rather * than obtain min/max from data. * * @return true if to use user-supplied min/max */ public boolean getUseFixedMinMax() { return m_UseFixedMinMax; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String useFixedMinMaxTipText() { return "If enabled, then the user-specified min/max values are used for the " + "bin calculation rather than the min/max from the data (allows " + "comparison of histograms when generating histograms over a range " + "of arrays)."; } /** * Sets the minimum to use when using manual binning with user-supplied * min/max enabled. * * @param value the minimum */ public void setManualMin(double value) { m_ManualMin = value; reset(); } /** * Returns the minimum to use when using manual binning with user-supplied * min/max enabled. * * @return the minimum */ public double getManualMin() { return m_ManualMin; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String manualMinTipText() { return "The minimum to use when using manual binning with user-supplied min/max enabled."; } /** * Sets the maximum to use when using manual binning with user-supplied * max/max enabled. * * @param value the maximum */ public void setManualMax(double value) { m_ManualMax = value; reset(); } /** * Returns the maximum to use when using manual binning with user-supplied * max/max enabled. * * @return the maximum */ public double getManualMax() { return m_ManualMax; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String manualMaxTipText() { return "The maximum to use when using manual binning with user-supplied max/max enabled."; } /** * Sets the number of decimals to show in the bin description. * * @param value the number of decimals */ public void setNumDecimals(int value) { m_NumDecimals = value; reset(); } /** * Returns the number of decimals to show in the bin description. * * @return the number of decimals */ public int getNumDecimals() { return m_NumDecimals; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String numDecimalsTipText() { return "The number of decimals to show in the bin descriptions."; } /** * Returns a quick info about the actor, which will be displayed in the GUI. * * @return null if no info available, otherwise short string */ @Override public String getQuickInfo() { return QuickInfoHelper.toString(this, "outputArray", (m_OutputArray ? "as array" : "one by one")); } /** * Returns the class that the consumer accepts. * * @return <!-- flow-accepts-start -->weka.core.Instances.class<!-- flow-accepts-end --> */ public Class[] accepts() { return new Class[] { Instances.class }; } /** * Executes the flow item. * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; SpreadSheet sheet; Instances data; int i; int n; Index index; ArrayHistogram stat; result = null; m_Queue.clear(); try { sheet = null; data = (Instances) m_InputToken.getPayload(); stat = new ArrayHistogram(); stat.setBinCalculation(m_BinCalculation); stat.setNumBins(m_NumBins); stat.setBinWidth(m_BinWidth); stat.setNormalize(m_Normalize); stat.setUseFixedMinMax(m_UseFixedMinMax); stat.setManualMin(m_ManualMin); stat.setManualMax(m_ManualMax); stat.setDisplayRanges(true); stat.setNumDecimals(m_NumDecimals); for (i = 0; i < m_Locations.length; i++) { switch (m_DataType) { case ROW_BY_INDEX: index = new Index(m_Locations[i].stringValue()); index.setMax(data.numInstances()); stat.add(StatUtils.toNumberArray(data.instance(index.getIntIndex()).toDoubleArray())); break; case COLUMN_BY_INDEX: index = new WekaAttributeIndex(m_Locations[i].stringValue()); ((WekaAttributeIndex) index).setData(data); stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(index.getIntIndex()))); break; case COLUMN_BY_REGEXP: for (n = 0; n < data.numAttributes(); n++) { if (data.attribute(n).name().matches(m_Locations[i].stringValue())) { stat.add(StatUtils.toNumberArray(data.attributeToDoubleArray(n))); break; } } break; default: throw new IllegalStateException("Unhandled data type: " + m_DataType); } } sheet = stat.calculate().toSpreadSheet(); } catch (Exception e) { result = handleException("Error generating the ranges: ", e); sheet = null; } if (sheet != null) { for (i = 0; i < sheet.getColumnCount(); i++) m_Queue.add(sheet.getColumnName(i)); } return result; } }