Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * WekaExperimentGenerator.java * Copyright (C) 2010-2015 University of Waikato, Hamilton, New Zealand */ package adams.flow.sink; import adams.core.QuickInfoHelper; import adams.core.io.PlaceholderFile; import weka.classifiers.Classifier; import weka.experiment.CSVResultListener; import weka.experiment.ClassifierSplitEvaluator; import weka.experiment.CrossValidationResultProducer; import weka.experiment.Experiment; import weka.experiment.InstancesResultListener; import weka.experiment.PropertyNode; import weka.experiment.RandomSplitResultProducer; import weka.experiment.RegressionSplitEvaluator; import weka.experiment.SplitEvaluator; import javax.swing.DefaultListModel; import java.beans.IntrospectionException; import java.beans.PropertyDescriptor; import java.io.File; /** <!-- globalinfo-start --> * Generates an experiment setup that can be used in conjunction with the Experiment transformer actor. * <br><br> <!-- globalinfo-end --> * <!-- flow-summary-start --> * Input/output:<br> * - accepts:<br> * weka.classifiers.Classifier[]<br> * <br><br> <!-- flow-summary-end --> * <!-- options-start --> * Valid options are: <br><br> * * <pre>-D <int> (property: debugLevel) * The greater the number the more additional info the scheme may output to * the console (0 = off). * default: 0 * minimum: 0 * </pre> * * <pre>-name <java.lang.String> (property: name) * The name of the actor. * default: ExperimentGenerator * </pre> * * <pre>-annotation <adams.core.base.BaseText> (property: annotations) * The annotations to attach to this actor. * default: * </pre> * * <pre>-skip (property: skip) * If set to true, transformation is skipped and the input token is just forwarded * as it is. * </pre> * * <pre>-exp-type <CLASSIFICATION|REGRESSION> (property: experimentType) * The type of experiment to perform. * default: CLASSIFICATION * </pre> * * <pre>-eval-type <CROSS_VALIDATION|TRAIN_TEST_SPLIT_RANDOMIZED|TRAIN_TEST_SPLIT_ORDER_PRESERVED> (property: evaluationType) * The type of evaluation to perform. * default: CROSS_VALIDATION * </pre> * * <pre>-runs <int> (property: runs) * The number of runs to perform. * default: 10 * minimum: 1 * </pre> * * <pre>-folds <int> (property: folds) * The number of folds to use in cross-validation. * default: 10 * minimum: 2 * </pre> * * <pre>-split <double> (property: splitPercentage) * The percentage to use in train/test splits. * default: 66.0 * minimum: 1.0E-4 * maximum: 99.9999 * </pre> * * <pre>-result-format <ARFF|CSV> (property: resultFormat) * The data format the experimental results are stored in. * default: ARFF * </pre> * * <pre>-result-file <adams.core.io.PlaceholderFile> (property: resultFile) * The file to store the experimental results in. * default: . * </pre> * * <pre>-output <adams.core.io.PlaceholderFile> (property: outputFile) * The file to store the experiment setup in (the extension determines the * type). * default: . * </pre> * <!-- options-end --> * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public class WekaExperimentGenerator extends AbstractSink { /** for serialization. */ private static final long serialVersionUID = -586416108746115363L; /** * The experiment type. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public enum ExperimentType { /** classification. */ CLASSIFICATION, /** regression. */ REGRESSION } /** * The evaluation type. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public enum EvaluationType { /** cross-validation. */ CROSS_VALIDATION, /** train/test split randomized. */ TRAIN_TEST_SPLIT_RANDOMIZED, /** train/test split order preserved. */ TRAIN_TEST_SPLIT_ORDER_PRESERVED } /** * The data format the experiment data is stored in. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision$ */ public enum ResultFormat { /** ARFF. */ ARFF, /** CSV. */ CSV } /** the type of experiment. */ protected ExperimentType m_ExperimentType; /** the type of evaluation. */ protected EvaluationType m_EvaluationType; /** the number of runs to perform. */ protected int m_Runs; /** the number of folds to use (only cross-validation). */ protected int m_Folds; /** the split-percentage to use (only train/test splits). */ protected double m_SplitPercentage; /** the result format. */ protected ResultFormat m_ResultFormat; /** the file to store the result in. */ protected PlaceholderFile m_ResultFile; /** the file to store the experiment in. */ protected PlaceholderFile m_OutputFile; /** * Returns a string describing the object. * * @return a description suitable for displaying in the gui */ @Override public String globalInfo() { return "Generates an experiment setup that can be used in conjunction with " + "the Experiment transformer actor."; } /** * Adds options to the internal list of options. */ @Override public void defineOptions() { super.defineOptions(); m_OptionManager.add("exp-type", "experimentType", ExperimentType.CLASSIFICATION); m_OptionManager.add("eval-type", "evaluationType", EvaluationType.CROSS_VALIDATION); m_OptionManager.add("runs", "runs", 10, 1, null); m_OptionManager.add("folds", "folds", 10, 2, null); m_OptionManager.add("split", "splitPercentage", 66.0, 0.0001, 99.9999); m_OptionManager.add("result-format", "resultFormat", ResultFormat.ARFF); m_OptionManager.add("result-file", "resultFile", new PlaceholderFile(".")); m_OptionManager.add("output", "outputFile", new PlaceholderFile(".")); } /** * Returns a quick info about the actor, which will be displayed in the GUI. * * @return null if no info available, otherwise short string */ @Override public String getQuickInfo() { return QuickInfoHelper.toString(this, "outputFile", m_OutputFile, "Output: "); } /** * Sets the type of experiment to perform. * * @param value the type */ public void setExperimentType(ExperimentType value) { m_ExperimentType = value; reset(); } /** * Returns the type of experiment to perform. * * @return the type */ public ExperimentType getExperimentType() { return m_ExperimentType; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String experimentTypeTipText() { return "The type of experiment to perform."; } /** * Sets the type of evaluation to perform. * * @param value the type */ public void setEvaluationType(EvaluationType value) { m_EvaluationType = value; reset(); } /** * Returns the type of evaluation to perform. * * @return the type */ public EvaluationType getEvaluationType() { return m_EvaluationType; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String evaluationTypeTipText() { return "The type of evaluation to perform."; } /** * Sets the number of runs to perform. * * @param value the runs */ public void setRuns(int value) { if (value >= 1) { m_Runs = value; reset(); } else { getLogger().severe("At least 1 run must be performed, provided: " + value); } } /** * Returns the number of runs to perform. * * @return the runs */ public int getRuns() { return m_Runs; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String runsTipText() { return "The number of runs to perform."; } /** * Sets the number of folds to use (only CV). * * @param value the folds */ public void setFolds(int value) { if (value > 1) { m_Folds = value; reset(); } else { getLogger().severe("At least 2 folds are necessary, provided: " + value); } } /** * Returns the number of folds to use (only CV). * * @return the folds */ public int getFolds() { return m_Folds; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String foldsTipText() { return "The number of folds to use in cross-validation."; } /** * Sets the split percentage (only train/test splits). * * @param value the percentage (0-100) */ public void setSplitPercentage(double value) { if ((value > 0) && (value < 100)) { m_SplitPercentage = value; reset(); } else { getLogger().severe("Split percentage must be 0 < x < 100, provided: " + value); } } /** * Returns the split percentage (only train/test splits). * * @return the data format */ public double getSplitPercentage() { return m_SplitPercentage; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String splitPercentageTipText() { return "The percentage to use in train/test splits."; } /** * Sets the data format the results are stored in. * * @param value the format */ public void setResultFormat(ResultFormat value) { m_ResultFormat = value; reset(); } /** * Returns the data format the results are stored in. * * @return the format */ public ResultFormat getResultFormat() { return m_ResultFormat; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String resultFormatTipText() { return "The data format the experimental results are stored in."; } /** * Sets the file to store the experimental results in. * * @param value the file */ public void setResultFile(PlaceholderFile value) { m_ResultFile = value; reset(); } /** * Returns the file to store the experimental results in. * * @return the file */ public PlaceholderFile getResultFile() { return m_ResultFile; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String resultFileTipText() { return "The file to store the experimental results in."; } /** * Sets the file to store the experiment setup in (the extensions determines the type). * * @param value the file */ public void setOutputFile(PlaceholderFile value) { m_OutputFile = value; reset(); } /** * Returns the file to store the experiment in. * * @return the file */ public PlaceholderFile getOutputFile() { return m_OutputFile; } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the GUI or for listing the options. */ public String outputFileTipText() { return "The file to store the experiment setup in (the extension determines the type)."; } /** * Returns the class that the consumer accepts. * * @return <!-- flow-accepts-start -->weka.classifiers.Classifier[].class<!-- flow-accepts-end --> */ public Class[] accepts() { return new Class[] { weka.classifiers.Classifier[].class }; } /** * Executes the flow item. * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Experiment exp; SplitEvaluator se; Classifier sec; CrossValidationResultProducer cvrp; RandomSplitResultProducer rsrp; PropertyNode[] propertyPath; DefaultListModel model; InstancesResultListener irl; CSVResultListener crl; result = null; if (m_ResultFile.isDirectory()) result = "Result file points to a directory: " + m_ResultFile; else if (m_OutputFile.isDirectory()) result = "Output file points to a directory: " + m_OutputFile; if (result == null) { exp = new Experiment(); exp.setPropertyArray(new Classifier[0]); exp.setUsePropertyIterator(true); // classification or regression se = null; sec = null; if (m_ExperimentType == ExperimentType.CLASSIFICATION) { se = new ClassifierSplitEvaluator(); sec = ((ClassifierSplitEvaluator) se).getClassifier(); } else if (m_ExperimentType == ExperimentType.REGRESSION) { se = new RegressionSplitEvaluator(); sec = ((RegressionSplitEvaluator) se).getClassifier(); } else { throw new IllegalStateException("Unhandled experiment type: " + m_ExperimentType); } // crossvalidation or train/test split if (m_EvaluationType == EvaluationType.CROSS_VALIDATION) { cvrp = new CrossValidationResultProducer(); cvrp.setNumFolds(m_Folds); cvrp.setSplitEvaluator(se); propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", CrossValidationResultProducer.class), CrossValidationResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(cvrp); exp.setPropertyPath(propertyPath); } else if ((m_EvaluationType == EvaluationType.TRAIN_TEST_SPLIT_RANDOMIZED) || (m_EvaluationType == EvaluationType.TRAIN_TEST_SPLIT_ORDER_PRESERVED)) { rsrp = new RandomSplitResultProducer(); rsrp.setRandomizeData(m_EvaluationType == EvaluationType.TRAIN_TEST_SPLIT_RANDOMIZED); rsrp.setTrainPercent(m_SplitPercentage); rsrp.setSplitEvaluator(se); propertyPath = new PropertyNode[2]; try { propertyPath[0] = new PropertyNode(se, new PropertyDescriptor("splitEvaluator", RandomSplitResultProducer.class), RandomSplitResultProducer.class); propertyPath[1] = new PropertyNode(sec, new PropertyDescriptor("classifier", se.getClass()), se.getClass()); } catch (IntrospectionException e) { e.printStackTrace(); } exp.setResultProducer(rsrp); exp.setPropertyPath(propertyPath); } else { throw new IllegalStateException("Unhandled evaluation type: " + m_EvaluationType); } // runs exp.setRunLower(1); exp.setRunUpper(m_Runs); // classifier exp.setPropertyArray((Classifier[]) m_InputToken.getPayload()); // datasets (empty for the template) model = new DefaultListModel(); exp.setDatasets(model); // result if (m_ResultFormat == ResultFormat.ARFF) { irl = new InstancesResultListener(); irl.setOutputFile(new File(m_ResultFile.getAbsolutePath())); exp.setResultListener(irl); } else if (m_ResultFormat == ResultFormat.CSV) { crl = new CSVResultListener(); crl.setOutputFile(new File(m_ResultFile.getAbsolutePath())); exp.setResultListener(crl); } else { throw new IllegalStateException("Unhandled result format: " + m_ResultFormat); } // save template try { Experiment.write(m_OutputFile.getAbsolutePath(), exp); } catch (Exception e) { result = handleException("Failed to save experiment to '" + m_OutputFile + "': ", e); } } return result; } }