Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * OneRAttributeEval.java * Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand * */ package weka.attributeSelection; import java.util.Enumeration; import java.util.Random; import java.util.Vector; import weka.classifiers.AbstractClassifier; import weka.classifiers.Classifier; import weka.classifiers.Evaluation; import weka.core.Capabilities; import weka.core.Capabilities.Capability; import weka.core.Instances; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionUtils; import weka.core.Utils; import weka.filters.Filter; import weka.filters.unsupervised.attribute.Remove; /** * <!-- globalinfo-start --> OneRAttributeEval :<br/> * <br/> * Evaluates the worth of an attribute by using the OneR classifier.<br/> * <p/> * <!-- globalinfo-end --> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -S <seed> * Random number seed for cross validation * (default = 1) * </pre> * * <pre> * -F <folds> * Number of folds for cross validation * (default = 10) * </pre> * * <pre> * -D * Use training data for evaluation rather than cross validaton * </pre> * * <pre> * -B <minimum bucket size> * Minimum number of objects in a bucket * (passed on to OneR, default = 6) * </pre> * * <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision$ */ public class OneRAttributeEval extends ASEvaluation implements AttributeEvaluator, OptionHandler { /** for serialization */ static final long serialVersionUID = 4386514823886856980L; /** The training instances */ private Instances m_trainInstances; /** Random number seed */ private int m_randomSeed; /** Number of folds for cross validation */ private int m_folds; /** Use training data to evaluate merit rather than x-val */ private boolean m_evalUsingTrainingData; /** Passed on to OneR */ private int m_minBucketSize; /** * Returns a string describing this attribute evaluator * * @return a description of the evaluator suitable for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "OneRAttributeEval :\n\nEvaluates the worth of an attribute by " + "using the OneR classifier.\n"; } /** * Returns a string for this option suitable for display in the gui as a tip * text * * @return a string describing this option */ public String seedTipText() { return "Set the seed for use in cross validation."; } /** * Set the random number seed for cross validation * * @param seed the seed to use */ public void setSeed(int seed) { m_randomSeed = seed; } /** * Get the random number seed * * @return an <code>int</code> value */ public int getSeed() { return m_randomSeed; } /** * Returns a string for this option suitable for display in the gui as a tip * text * * @return a string describing this option */ public String foldsTipText() { return "Set the number of folds for cross validation."; } /** * Set the number of folds to use for cross validation * * @param folds the number of folds */ public void setFolds(int folds) { m_folds = folds; if (m_folds < 2) { m_folds = 2; } } /** * Get the number of folds used for cross validation * * @return the number of folds */ public int getFolds() { return m_folds; } /** * Returns a string for this option suitable for display in the gui as a tip * text * * @return a string describing this option */ public String evalUsingTrainingDataTipText() { return "Use the training data to evaluate attributes rather than " + "cross validation."; } /** * Use the training data to evaluate attributes rather than cross validation * * @param e true if training data is to be used for evaluation */ public void setEvalUsingTrainingData(boolean e) { m_evalUsingTrainingData = e; } /** * Returns a string for this option suitable for display in the gui as a tip * text * * @return a string describing this option */ public String minimumBucketSizeTipText() { return "The minimum number of objects in a bucket " + "(passed to OneR)."; } /** * Set the minumum bucket size used by OneR * * @param minB the minimum bucket size to use */ public void setMinimumBucketSize(int minB) { m_minBucketSize = minB; } /** * Get the minimum bucket size used by oneR * * @return the minimum bucket size used */ public int getMinimumBucketSize() { return m_minBucketSize; } /** * Returns true if the training data is to be used for evaluation * * @return true if training data is to be used for evaluation */ public boolean getEvalUsingTrainingData() { return m_evalUsingTrainingData; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration<Option> listOptions() { Vector<Option> newVector = new Vector<Option>(4); newVector.addElement( new Option("\tRandom number seed for cross validation\n" + "\t(default = 1)", "S", 1, "-S <seed>")); newVector.addElement( new Option("\tNumber of folds for cross validation\n" + "\t(default = 10)", "F", 1, "-F <folds>")); newVector.addElement( new Option("\tUse training data for evaluation rather than cross validaton", "D", 0, "-D")); newVector.addElement( new Option("\tMinimum number of objects in a bucket\n" + "\t(passed on to " + "OneR, default = 6)", "B", 1, "-B <minimum bucket size>")); return newVector.elements(); } /** * Parses a given list of options. * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -S <seed> * Random number seed for cross validation * (default = 1) * </pre> * * <pre> * -F <folds> * Number of folds for cross validation * (default = 10) * </pre> * * <pre> * -D * Use training data for evaluation rather than cross validaton * </pre> * * <pre> * -B <minimum bucket size> * Minimum number of objects in a bucket * (passed on to OneR, default = 6) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String temp = Utils.getOption('S', options); if (temp.length() != 0) { setSeed(Integer.parseInt(temp)); } temp = Utils.getOption('F', options); if (temp.length() != 0) { setFolds(Integer.parseInt(temp)); } temp = Utils.getOption('B', options); if (temp.length() != 0) { setMinimumBucketSize(Integer.parseInt(temp)); } setEvalUsingTrainingData(Utils.getFlag('D', options)); Utils.checkForRemainingOptions(options); } /** * returns the current setup. * * @return the options of the current setup */ @Override public String[] getOptions() { Vector<String> options = new Vector<String>(); if (getEvalUsingTrainingData()) { options.add("-D"); } options.add("-S"); options.add("" + getSeed()); options.add("-F"); options.add("" + getFolds()); options.add("-B"); options.add("" + getMinimumBucketSize()); return options.toArray(new String[0]); } /** * Constructor */ public OneRAttributeEval() { resetOptions(); } /** * Returns the capabilities of this evaluator. * * @return the capabilities of this evaluator * @see Capabilities */ @Override public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); result.disableAll(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Initializes a OneRAttribute attribute evaluator. Discretizes all attributes * that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); m_trainInstances = data; } /** * rests to defaults. */ protected void resetOptions() { m_trainInstances = null; m_randomSeed = 1; m_folds = 10; m_evalUsingTrainingData = false; m_minBucketSize = 6; // default used by OneR } /** * evaluates an individual attribute by measuring the amount of information * gained about the class given the attribute. * * @param attribute the index of the attribute to be evaluated * @throws Exception if the attribute could not be evaluated */ @Override public double evaluateAttribute(int attribute) throws Exception { int[] featArray = new int[2]; // feat + class double errorRate; Evaluation o_Evaluation; Remove delTransform = new Remove(); delTransform.setInvertSelection(true); // copy the instances Instances trainCopy = new Instances(m_trainInstances); featArray[0] = attribute; featArray[1] = trainCopy.classIndex(); delTransform.setAttributeIndicesArray(featArray); delTransform.setInputFormat(trainCopy); trainCopy = Filter.useFilter(trainCopy, delTransform); o_Evaluation = new Evaluation(trainCopy); String[] oneROpts = { "-B", "" + getMinimumBucketSize() }; Classifier oneR = AbstractClassifier.forName("weka.classifiers.rules.OneR", oneROpts); if (m_evalUsingTrainingData) { oneR.buildClassifier(trainCopy); o_Evaluation.evaluateModel(oneR, trainCopy); } else { /* * o_Evaluation.crossValidateModel("weka.classifiers.rules.OneR", * trainCopy, 10, null, new Random(m_randomSeed)); */ o_Evaluation.crossValidateModel(oneR, trainCopy, m_folds, new Random(m_randomSeed)); } errorRate = o_Evaluation.errorRate(); return (1 - errorRate) * 100.0; } /** * Return a description of the evaluator * * @return description as a string */ @Override public String toString() { StringBuffer text = new StringBuffer(); if (m_trainInstances == null) { text.append("\tOneR feature evaluator has not been built yet"); } else { text.append("\tOneR feature evaluator.\n\n"); text.append("\tUsing "); if (m_evalUsingTrainingData) { text.append("training data for evaluation of attributes."); } else { text.append("" + getFolds() + " fold cross validation for evaluating " + "attributes."); } text.append("\n\tMinimum bucket size for OneR: " + getMinimumBucketSize()); } text.append("\n"); return text.toString(); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision$"); } @Override public int[] postProcess(int[] attributeSet) { // save memory m_trainInstances = new Instances(m_trainInstances, 0); return attributeSet; } // ============ // Test method. // ============ /** * Main method for testing this class. * * @param args the options */ public static void main(String[] args) { runEvaluator(new OneRAttributeEval(), args); } }