Java tutorial
/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * CheckClassifier.java * Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand * */ package weka.classifiers; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.Random; import java.util.Vector; import weka.core.Attribute; import weka.core.CheckScheme; import weka.core.Instance; import weka.core.Instances; import weka.core.MultiInstanceCapabilitiesHandler; import weka.core.Option; import weka.core.OptionHandler; import weka.core.RevisionUtils; import weka.core.SerializationHelper; import weka.core.TestInstances; import weka.core.Utils; import weka.core.WeightedInstancesHandler; /** * Class for examining the capabilities and finding problems with classifiers. * If you implement a classifier using the WEKA.libraries, you should run the * checks on it to ensure robustness and correct operation. Passing all the * tests of this object does not mean bugs in the classifier don't exist, but * this will help find some common ones. * <p/> * * Typical usage: * <p/> * <code>java weka.classifiers.CheckClassifier -W classifier_name * classifier_options </code> * <p/> * * CheckClassifier reports on the following: * <ul> * <li>Classifier abilities * <ul> * <li>Possible command line options to the classifier</li> * <li>Whether the classifier can predict nominal, numeric, string, date or * relational class attributes. Warnings will be displayed if performance is * worse than ZeroR</li> * <li>Whether the classifier can be trained incrementally</li> * <li>Whether the classifier can handle numeric predictor attributes</li> * <li>Whether the classifier can handle nominal predictor attributes</li> * <li>Whether the classifier can handle string predictor attributes</li> * <li>Whether the classifier can handle date predictor attributes</li> * <li>Whether the classifier can handle relational predictor attributes</li> * <li>Whether the classifier can handle multi-instance data</li> * <li>Whether the classifier can handle missing predictor values</li> * <li>Whether the classifier can handle missing class values</li> * <li>Whether a nominal classifier only handles 2 class problems</li> * <li>Whether the classifier can handle instance weights</li> * </ul> * </li> * <li>Correct functioning * <ul> * <li>Correct initialisation during buildClassifier (i.e. no result changes * when buildClassifier called repeatedly)</li> * <li>Whether incremental training produces the same results as during * non-incremental training (which may or may not be OK)</li> * <li>Whether the classifier alters the data pased to it (number of instances, * instance order, instance weights, etc)</li> * <li>Whether the toString() method works correctly before the classifier has * been built.</li> * </ul> * </li> * <li>Degenerate cases * <ul> * <li>building classifier with zero training instances</li> * <li>all but one predictor attribute values missing</li> * <li>all predictor attribute values missing</li> * <li>all but one class values missing</li> * <li>all class values missing</li> * </ul> * </li> * </ul> * Running CheckClassifier with the debug option set will output the training * and test datasets for any failed tests. * <p/> * * The <code>weka.classifiers.AbstractClassifierTest</code> uses this class to * test all the classifiers. Any changes here, have to be checked in that * abstract test class, too. * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -D * Turn on debugging output. * </pre> * * <pre> * -S * Silent mode - prints nothing to stdout. * </pre> * * <pre> * -N <num> * The number of instances in the datasets (default 20). * </pre> * * <pre> * -nominal <num> * The number of nominal attributes (default 2). * </pre> * * <pre> * -nominal-values <num> * The number of values for nominal attributes (default 1). * </pre> * * <pre> * -numeric <num> * The number of numeric attributes (default 1). * </pre> * * <pre> * -string <num> * The number of string attributes (default 1). * </pre> * * <pre> * -date <num> * The number of date attributes (default 1). * </pre> * * <pre> * -relational <num> * The number of relational attributes (default 1). * </pre> * * <pre> * -num-instances-relational <num> * The number of instances in relational/bag attributes (default 10). * </pre> * * <pre> * -words <comma-separated-list> * The words to use in string attributes. * </pre> * * <pre> * -word-separators <chars> * The word separators to use in string attributes. * </pre> * * <pre> * -W * Full name of the classifier analysed. * eg: weka.classifiers.bayes.NaiveBayes * (default weka.classifiers.rules.ZeroR) * </pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> * -D * If set, classifier is run in debug mode and * may output additional info to the console * </pre> * * <!-- options-end --> * * Options after -- are passed to the designated classifier. * <p/> * * @author Len Trigg (trigg@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision$ * @see TestInstances */ public class CheckClassifier extends CheckScheme { /* * Note about test methods: - methods return array of booleans - first index: * success or not - second index: acceptable or not (e.g., Exception is OK) - * in case the performance is worse than that of ZeroR both indices are true * * FracPete (fracpete at waikato dot ac dot nz) */ /*** The classifier to be examined */ protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR(); /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration<Option> listOptions() { Vector<Option> result = new Vector<Option>(); result.addAll(Collections.list(super.listOptions())); result.add( new Option("\tFull name of the classifier analysed.\n" + "\teg: weka.classifiers.bayes.NaiveBayes\n" + "\t(default weka.classifiers.rules.ZeroR)", "W", 1, "-W")); if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) { result.add(new Option("", "", 0, "\nOptions specific to classifier " + m_Classifier.getClass().getName() + ":")); result.addAll(Collections.list(((OptionHandler) m_Classifier).listOptions())); } return result.elements(); } /** * Parses a given list of options. * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -D * Turn on debugging output. * </pre> * * <pre> * -S * Silent mode - prints nothing to stdout. * </pre> * * <pre> * -N <num> * The number of instances in the datasets (default 20). * </pre> * * <pre> * -nominal <num> * The number of nominal attributes (default 2). * </pre> * * <pre> * -nominal-values <num> * The number of values for nominal attributes (default 1). * </pre> * * <pre> * -numeric <num> * The number of numeric attributes (default 1). * </pre> * * <pre> * -string <num> * The number of string attributes (default 1). * </pre> * * <pre> * -date <num> * The number of date attributes (default 1). * </pre> * * <pre> * -relational <num> * The number of relational attributes (default 1). * </pre> * * <pre> * -num-instances-relational <num> * The number of instances in relational/bag attributes (default 10). * </pre> * * <pre> * -words <comma-separated-list> * The words to use in string attributes. * </pre> * * <pre> * -word-separators <chars> * The word separators to use in string attributes. * </pre> * * <pre> * -W * Full name of the classifier analysed. * eg: weka.classifiers.bayes.NaiveBayes * (default weka.classifiers.rules.ZeroR) * </pre> * * <pre> * Options specific to classifier weka.classifiers.rules.ZeroR: * </pre> * * <pre> * -D * If set, classifier is run in debug mode and * may output additional info to the console * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; super.setOptions(options); tmpStr = Utils.getOption('W', options); if (tmpStr.length() == 0) { tmpStr = weka.classifiers.rules.ZeroR.class.getName(); } setClassifier((Classifier) forName("weka.classifiers", Classifier.class, tmpStr, Utils.partitionOptions(options))); } /** * Gets the current settings of the CheckClassifier. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector<String> result; String[] options; result = new Vector<String>(); Collections.addAll(result, super.getOptions()); if (getClassifier() != null) { result.add("-W"); result.add(getClassifier().getClass().getName()); } if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) { options = ((OptionHandler) m_Classifier).getOptions(); if (options.length > 0) { result.add("--"); Collections.addAll(result, options); } } return result.toArray(new String[result.size()]); } /** * Begin the tests, reporting results to System.out */ @Override public void doTests() { if (getClassifier() == null) { println("\n=== No classifier set ==="); return; } println("\n=== Check on Classifier: " + getClassifier().getClass().getName() + " ===\n"); // Start tests m_ClasspathProblems = false; println("--> Checking for interfaces"); canTakeOptions(); boolean updateableClassifier = updateableClassifier()[0]; boolean weightedInstancesHandler = weightedInstancesHandler()[0]; boolean multiInstanceHandler = multiInstanceHandler()[0]; println("--> Classifier tests"); declaresSerialVersionUID(); testToString(); testsPerClassType(Attribute.NOMINAL, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.NUMERIC, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.DATE, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.STRING, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); testsPerClassType(Attribute.RELATIONAL, updateableClassifier, weightedInstancesHandler, multiInstanceHandler); } /** * Set the classifier for boosting. * * @param newClassifier the Classifier to use. */ public void setClassifier(Classifier newClassifier) { m_Classifier = newClassifier; } /** * Get the classifier used as the classifier * * @return the classifier used as the classifier */ public Classifier getClassifier() { return m_Classifier; } /** * Run a battery of tests for a given class attribute type * * @param classType true if the class attribute should be numeric * @param updateable true if the classifier is updateable * @param weighted true if the classifier says it handles weights * @param multiInstance true if the classifier is a multi-instance classifier */ protected void testsPerClassType(int classType, boolean updateable, boolean weighted, boolean multiInstance) { boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0]; boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0]; boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0]; boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0]; boolean PRel; if (!multiInstance) { PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0]; } else { PRel = false; } if (PNom || PNum || PStr || PDat || PRel) { if (weighted) { instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); } canHandleOnlyClass(PNom, PNum, PStr, PDat, PRel, classType); if (classType == Attribute.NOMINAL) { canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4); } if (!multiInstance) { canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0); canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1); } canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 20)[0]; if (handleMissingPredictors) { canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100); } boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 20)[0]; if (handleMissingClass) { canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100); } correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, handleMissingPredictors, handleMissingClass); doesntUseTestClassVal(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); if (updateable) { updatingEquality(PNom, PNum, PStr, PDat, PRel, multiInstance, classType); } } } /** * Checks whether the scheme's toString() method works even though the * classifies hasn't been built yet. * * @return index 0 is true if the toString() method works fine */ protected boolean[] testToString() { boolean[] result = new boolean[2]; print("toString..."); try { Classifier copy = m_Classifier.getClass().newInstance(); copy.toString(); result[0] = true; println("yes"); } catch (Exception e) { result[0] = false; println("no"); if (m_Debug) { println("\n=== Full report ==="); e.printStackTrace(); println("\n"); } } return result; } /** * tests for a serialVersionUID. Fails in case the scheme doesn't declare a * UID. * * @return index 0 is true if the scheme declares a UID */ protected boolean[] declaresSerialVersionUID() { boolean[] result = new boolean[2]; print("serialVersionUID..."); result[0] = !SerializationHelper.needsUID(m_Classifier.getClass()); if (result[0]) { println("yes"); } else { println("no"); } return result; } /** * Checks whether the scheme can take command line options. * * @return index 0 is true if the classifier can take options */ protected boolean[] canTakeOptions() { boolean[] result = new boolean[2]; print("options..."); if (m_Classifier instanceof OptionHandler) { println("yes"); if (m_Debug) { println("\n=== Full report ==="); Enumeration<Option> enu = ((OptionHandler) m_Classifier).listOptions(); while (enu.hasMoreElements()) { Option option = enu.nextElement(); print(option.synopsis() + "\n" + option.description() + "\n"); } println("\n"); } result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks whether the scheme can build models incrementally. * * @return index 0 is true if the classifier can train incrementally */ protected boolean[] updateableClassifier() { boolean[] result = new boolean[2]; print("updateable classifier..."); if (m_Classifier instanceof UpdateableClassifier) { println("yes"); result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks whether the scheme says it can handle instance weights. * * @return true if the classifier handles instance weights */ protected boolean[] weightedInstancesHandler() { boolean[] result = new boolean[2]; print("weighted instances classifier..."); if (m_Classifier instanceof WeightedInstancesHandler) { println("yes"); result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks whether the scheme handles multi-instance data. * * @return true if the classifier handles multi-instance data */ protected boolean[] multiInstanceHandler() { boolean[] result = new boolean[2]; print("multi-instance classifier..."); if (m_Classifier instanceof MultiInstanceCapabilitiesHandler) { println("yes"); result[0] = true; } else { println("no"); result[0] = false; } return result; } /** * Checks basic prediction of the scheme, for simple non-troublesome datasets. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NOMINAL, NUMERIC, etc.) * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] canPredict(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("basic predict"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); ArrayList<String> accepts = new ArrayList<String>(); accepts.add("unary"); accepts.add("binary"); accepts.add("nominal"); accepts.add("numeric"); accepts.add("string"); accepts.add("date"); accepts.add("relational"); accepts.add("multi-instance"); accepts.add("not in classpath"); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme can handle data that contains only the class * attribute. If a scheme cannot build a proper model with that data, it * should default back to a ZeroR model. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param classType the class type (NOMINAL, NUMERIC, etc.) * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] canHandleOnlyClass(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, int classType) { print("only class in data"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, false, classType); print("..."); ArrayList<String> accepts = new ArrayList<String>(); accepts.add("class"); accepts.add("zeror"); int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 0; boolean predictorMissing = false, classMissing = false; return runBasicTest(false, false, false, false, false, false, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest, 2, accepts); } /** * Checks whether nominal schemes can handle more than two classes. If a * scheme is only designed for two-class problems it should throw an * appropriate exception for multi-class problems. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param numClasses the number of classes to test * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] canHandleNClasses(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int numClasses) { print("more than two class problems"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL); print("..."); ArrayList<String> accepts = new ArrayList<String>(); accepts.add("number"); accepts.add("class"); int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 0; boolean predictorMissing = false, classMissing = false; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme can handle class attributes as Nth attribute. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param classIndex the index of the class attribute (0-based, -1 means last * attribute) * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable * @see TestInstances#CLASS_IS_LAST */ protected boolean[] canHandleClassAsNthAttribute(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType, int classIndex) { if (classIndex == TestInstances.CLASS_IS_LAST) { print("class attribute as last attribute"); } else { print("class attribute as " + (classIndex + 1) + ". attribute"); } printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); ArrayList<String> accepts = new ArrayList<String>(); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, classIndex, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme can handle zero training instances. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] canHandleZeroTraining(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("handle zero training instances"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); ArrayList<String> accepts = new ArrayList<String>(); accepts.add("train"); accepts.add("value"); int numTrain = 0, numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether the scheme correctly initialises models when buildClassifier * is called. This test calls buildClassifier with one training dataset and * records performance on a test set. buildClassifier is then called on a * training set with different structure, and then again with the original * training set. The performance on the test set is compared with the original * results and any performance difference noted as incorrect build * initialisation. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed, index 1 is true if the * scheme performs worse than ZeroR, but without error (index 0 is * false) */ protected boolean[] correctBuildInitialisation(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { boolean[] result = new boolean[2]; print("correct initialisation during buildClassifier"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; Instances train1 = null; Instances test1 = null; Instances train2 = null; Instances test2 = null; Classifier classifier = null; Evaluation evaluation1A = null; Evaluation evaluation1B = null; Evaluation evaluation2 = null; boolean built = false; int stage = 0; try { // Make two sets of train/test splits with different // numbers of attributes train1 = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); train2 = makeTestDataset(84, numTrain, nominalPredictor ? getNumNominal() + 1 : 0, numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); test1 = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); test2 = makeTestDataset(48, numTest, nominalPredictor ? getNumNominal() + 1 : 0, numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train1, missingLevel, predictorMissing, classMissing); addMissing(test1, Math.min(missingLevel, 50), predictorMissing, classMissing); addMissing(train2, missingLevel, predictorMissing, classMissing); addMissing(test2, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifier = AbstractClassifier.makeCopies(getClassifier(), 1)[0]; evaluation1A = new Evaluation(train1); evaluation1B = new Evaluation(train1); evaluation2 = new Evaluation(train2); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { stage = 0; classifier.buildClassifier(train1); built = true; if (!testWRTZeroR(classifier, evaluation1A, train1, test1)[0]) { throw new Exception("Scheme performs worse than ZeroR"); } stage = 1; built = false; classifier.buildClassifier(train2); built = true; if (!testWRTZeroR(classifier, evaluation2, train2, test2)[0]) { throw new Exception("Scheme performs worse than ZeroR"); } stage = 2; built = false; classifier.buildClassifier(train1); built = true; if (!testWRTZeroR(classifier, evaluation1B, train1, test1)[0]) { throw new Exception("Scheme performs worse than ZeroR"); } stage = 3; if (!evaluation1A.equals(evaluation1B)) { if (m_Debug) { println("\n=== Full report ===\n" + evaluation1A.toSummaryString("\nFirst buildClassifier()", true) + "\n\n"); println(evaluation1B.toSummaryString("\nSecond buildClassifier()", true) + "\n\n"); } throw new Exception("Results differ between buildClassifier calls"); } println("yes"); result[0] = true; } catch (Exception ex) { String msg = ex.getMessage().toLowerCase(); if (msg.indexOf("worse than zeror") >= 0) { println("warning: performs worse than ZeroR"); // result[0] = (stage < 1); // result[1] = (stage < 1); result[0] = true; result[1] = true; } else { println("no"); result[0] = false; } if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } switch (stage) { case 0: print(" of dataset 1"); break; case 1: print(" of dataset 2"); break; case 2: print(" of dataset 1 (2nd build)"); break; case 3: print(", comparing results from builds of dataset 1"); break; } println(": " + ex.getMessage() + "\n"); println("here are the datasets:\n"); println("=== Train1 Dataset ===\n" + train1.toString() + "\n"); println("=== Test1 Dataset ===\n" + test1.toString() + "\n\n"); println("=== Train2 Dataset ===\n" + train2.toString() + "\n"); println("=== Test2 Dataset ===\n" + test2.toString() + "\n\n"); } } return result; } /** * Checks basic missing value handling of the scheme. If the missing values * cause an exception to be thrown by the scheme, this will be recorded. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param predictorMissing true if the missing values may be in the predictors * @param classMissing true if the missing values may be in the class * @param missingLevel the percentage of missing values * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] canHandleMissing(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType, boolean predictorMissing, boolean classMissing, int missingLevel) { if (missingLevel == 100) { print("100% "); } print("missing"); if (predictorMissing) { print(" predictor"); if (classMissing) { print(" and"); } } if (classMissing) { print(" class"); } print(" values"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); ArrayList<String> accepts = new ArrayList<String>(); accepts.add("missing"); accepts.add("value"); accepts.add("train"); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Checks whether an updateable scheme produces the same model when trained * incrementally as when batch trained. The model itself cannot be compared, * so we compare the evaluation on test data for both models. It is possible * to get a false positive on this test (likelihood depends on the * classifier). * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed */ protected boolean[] updatingEquality(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("incremental training produces the same results" + " as batch training"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier[] classifiers = null; Evaluation evaluationB = null; Evaluation evaluationI = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifiers = AbstractClassifier.makeCopies(getClassifier(), 2); evaluationB = new Evaluation(train); evaluationI = new Evaluation(train); classifiers[0].buildClassifier(train); testWRTZeroR(classifiers[0], evaluationB, train, test); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { classifiers[1].buildClassifier(new Instances(train, 0)); for (int i = 0; i < train.numInstances(); i++) { ((UpdateableClassifier) classifiers[1]).updateClassifier(train.instance(i)); } built = true; testWRTZeroR(classifiers[1], evaluationI, train, test); if (!evaluationB.equals(evaluationI)) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); println("Results differ between batch and " + "incrementally built models.\n" + "Depending on the classifier, this may be OK"); println("Here are the results:\n"); println(evaluationB.toSummaryString("\nbatch built results\n", true)); println(evaluationI.toSummaryString("\nincrementally built results\n", true)); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } else { println("yes"); result[0] = true; } } catch (Exception ex) { result[0] = false; print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); } return result; } /** * Checks whether the classifier erroneously uses the class value of test * instances (if provided). Runs the classifier with test instance class * values set to missing and compares with results when test instance class * values are left intact. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed */ protected boolean[] doesntUseTestClassVal(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("classifier ignores test instance class vals"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = 2 * getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier[] classifiers = null; boolean evalFail = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() + 1 : 0, numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() + 1 : 0, numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifiers = AbstractClassifier.makeCopies(getClassifier(), 2); classifiers[0].buildClassifier(train); classifiers[1].buildClassifier(train); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { // Now set test values to missing when predicting for (int i = 0; i < test.numInstances(); i++) { Instance testInst = test.instance(i); Instance classMissingInst = (Instance) testInst.copy(); classMissingInst.setDataset(test); classMissingInst.setClassMissing(); double[] dist0 = classifiers[0].distributionForInstance(testInst); double[] dist1 = classifiers[1].distributionForInstance(classMissingInst); for (int j = 0; j < dist0.length; j++) { // ignore, if both are NaNs if (Double.isNaN(dist0[j]) && Double.isNaN(dist1[j])) { if (getDebug()) { System.out.println("Both predictions are NaN!"); } continue; } // distribution different? if (dist0[j] != dist1[j]) { throw new Exception("Prediction different for instance " + (i + 1)); } } } println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); if (evalFail) { println("Results differ between non-missing and " + "missing test class values."); } else { print("Problem during testing"); println(": " + ex.getMessage() + "\n"); } println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Train Weights ===\n"); for (int i = 0; i < train.numInstances(); i++) { println(" " + (i + 1) + " " + train.instance(i).weight()); } println("=== Test Dataset ===\n" + test.toString() + "\n\n"); println("(test weights all 1.0\n"); } } return result; } /** * Checks whether the classifier can handle instance weights. This test * compares the classifier performance on two datasets that are identical * except for the training weights. If the results change, then the classifier * must be using the weights. It may be possible to get a false positive from * this test if the weight changes aren't significant enough to induce a * change in classifier performance (but the weights are chosen to minimize * the likelihood of this). * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 true if the test was passed */ protected boolean[] instanceWeights(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("classifier uses instance weights"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = 2 * getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier[] classifiers = null; Evaluation evaluationB = null; Evaluation evaluationI = null; boolean built = false; boolean evalFail = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() + 1 : 0, numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() + 1 : 0, numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifiers = AbstractClassifier.makeCopies(getClassifier(), 2); evaluationB = new Evaluation(train); evaluationI = new Evaluation(train); classifiers[0].buildClassifier(train); testWRTZeroR(classifiers[0], evaluationB, train, test); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { // Now modify instance weights and re-built/test for (int i = 0; i < train.numInstances(); i++) { train.instance(i).setWeight(0); } Random random = new Random(1); for (int i = 0; i < train.numInstances() / 2; i++) { int inst = random.nextInt(train.numInstances()); int weight = random.nextInt(10) + 1; train.instance(inst).setWeight(weight); } classifiers[1].buildClassifier(train); built = true; testWRTZeroR(classifiers[1], evaluationI, train, test); if (evaluationB.equals(evaluationI)) { // println("no"); evalFail = true; throw new Exception("evalFail"); } println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); if (evalFail) { println("Results don't differ between non-weighted and " + "weighted instance models."); println("Here are the results:\n"); println(evaluationB.toSummaryString("\nboth methods\n", true)); } else { print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); } println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Train Weights ===\n"); for (int i = 0; i < train.numInstances(); i++) { println(" " + (i + 1) + " " + train.instance(i).weight()); } println("=== Test Dataset ===\n" + test.toString() + "\n\n"); println("(test weights all 1.0\n"); } } return result; } /** * Checks whether the scheme alters the training dataset during training. If * the scheme needs to modify the training data it should take a copy of the * training data. Currently checks for changes to header structure, number of * instances, order of instances, instance weights. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param predictorMissing true if we know the classifier can handle (at * least) moderate missing predictor values * @param classMissing true if we know the classifier can handle (at least) * moderate missing class values * @return index 0 is true if the test was passed */ protected boolean[] datasetIntegrity(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType, boolean predictorMissing, boolean classMissing) { print("classifier doesn't alter original datasets"); printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 20; boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier classifier = null; Evaluation evaluation = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifier = AbstractClassifier.makeCopies(getClassifier(), 1)[0]; evaluation = new Evaluation(train); } catch (Exception ex) { throw new Error("Error setting up for tests: " + ex.getMessage()); } try { Instances trainCopy = new Instances(train); Instances testCopy = new Instances(test); classifier.buildClassifier(trainCopy); compareDatasets(train, trainCopy); built = true; testWRTZeroR(classifier, evaluation, trainCopy, testCopy); compareDatasets(test, testCopy); println("yes"); result[0] = true; } catch (Exception ex) { println("no"); result[0] = false; if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); println("Here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } return result; } /** * Runs a text on the datasets with the given characteristics. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param missingLevel the percentage of missing values * @param predictorMissing true if the missing values may be in the predictors * @param classMissing true if the missing values may be in the class * @param numTrain the number of instances in the training set * @param numTest the number of instaces in the test set * @param numClasses the number of classes * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType, int missingLevel, boolean predictorMissing, boolean classMissing, int numTrain, int numTest, int numClasses, ArrayList<String> accepts) { return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, TestInstances.CLASS_IS_LAST, missingLevel, predictorMissing, classMissing, numTrain, numTest, numClasses, accepts); } /** * Runs a text on the datasets with the given characteristics. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param classIndex the attribute index of the class * @param missingLevel the percentage of missing values * @param predictorMissing true if the missing values may be in the predictors * @param classMissing true if the missing values may be in the class * @param numTrain the number of instances in the training set * @param numTest the number of instaces in the test set * @param numClasses the number of classes * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test was * acceptable */ protected boolean[] runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType, int classIndex, int missingLevel, boolean predictorMissing, boolean classMissing, int numTrain, int numTest, int numClasses, ArrayList<String> accepts) { boolean[] result = new boolean[2]; Instances train = null; Instances test = null; Classifier classifier = null; Evaluation evaluation = null; boolean built = false; try { train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, classIndex, multiInstance); test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0, numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0, datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses, classType, classIndex, multiInstance); if (missingLevel > 0) { addMissing(train, missingLevel, predictorMissing, classMissing); addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing); } classifier = AbstractClassifier.makeCopies(getClassifier(), 1)[0]; evaluation = new Evaluation(train); } catch (Exception ex) { ex.printStackTrace(); throw new Error("Error setting up for tests: " + ex.getMessage()); } try { classifier.buildClassifier(train); built = true; if (!testWRTZeroR(classifier, evaluation, train, test)[0]) { result[0] = true; result[1] = true; throw new Exception("Scheme performs worse than ZeroR"); } println("yes"); result[0] = true; } catch (Exception ex) { boolean acceptable = false; String msg; if (ex.getMessage() == null) { msg = ""; } else { msg = ex.getMessage().toLowerCase(); } if (msg.indexOf("not in classpath") > -1) { m_ClasspathProblems = true; } if (msg.indexOf("worse than zeror") >= 0) { println("warning: performs worse than ZeroR"); result[0] = true; result[1] = true; } else { for (int i = 0; i < accepts.size(); i++) { if (msg.indexOf(accepts.get(i)) >= 0) { acceptable = true; } } println("no" + (acceptable ? " (OK error message)" : "")); result[1] = acceptable; } if (m_Debug) { println("\n=== Full Report ==="); print("Problem during"); if (built) { print(" testing"); } else { print(" training"); } println(": " + ex.getMessage() + "\n"); if (!acceptable) { if (accepts.size() > 0) { print("Error message doesn't mention "); for (int i = 0; i < accepts.size(); i++) { if (i != 0) { print(" or "); } print('"' + accepts.get(i) + '"'); } } println("here are the datasets:\n"); println("=== Train Dataset ===\n" + train.toString() + "\n"); println("=== Test Dataset ===\n" + test.toString() + "\n\n"); } } } return result; } /** * Determine whether the scheme performs worse than ZeroR during testing * * @param classifier the pre-trained classifier * @param evaluation the classifier evaluation object * @param train the training data * @param test the test data * @return index 0 is true if the scheme performs better than ZeroR * @throws Exception if there was a problem during the scheme's testing */ protected boolean[] testWRTZeroR(Classifier classifier, Evaluation evaluation, Instances train, Instances test) throws Exception { boolean[] result = new boolean[2]; evaluation.evaluateModel(classifier, test); try { // Tested OK, compare with ZeroR Classifier zeroR = new weka.classifiers.rules.ZeroR(); zeroR.buildClassifier(train); Evaluation zeroREval = new Evaluation(train); zeroREval.evaluateModel(zeroR, test); result[0] = Utils.grOrEq(zeroREval.errorRate(), evaluation.errorRate()); } catch (Exception ex) { throw new Error("Problem determining ZeroR performance: " + ex.getMessage()); } return result; } /** * Make a simple set of instances, which can later be modified for use in * specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numNominal the number of nominal attributes * @param numNumeric the number of numeric attributes * @param numString the number of string attributes * @param numDate the number of date attributes * @param numRelational the number of relational attributes * @param numClasses the number of classes (if nominal class) * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param multiInstance whether the dataset should a multi-instance dataset * @return the test dataset * @throws Exception if the dataset couldn't be generated * @see #process(Instances) */ protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numString, int numDate, int numRelational, int numClasses, int classType, boolean multiInstance) throws Exception { return makeTestDataset(seed, numInstances, numNominal, numNumeric, numString, numDate, numRelational, numClasses, classType, TestInstances.CLASS_IS_LAST, multiInstance); } /** * Make a simple set of instances with variable position of the class * attribute, which can later be modified for use in specific tests. * * @param seed the random number seed * @param numInstances the number of instances to generate * @param numNominal the number of nominal attributes * @param numNumeric the number of numeric attributes * @param numString the number of string attributes * @param numDate the number of date attributes * @param numRelational the number of relational attributes * @param numClasses the number of classes (if nominal class) * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param classIndex the index of the class (0-based, -1 as last) * @param multiInstance whether the dataset should a multi-instance dataset * @return the test dataset * @throws Exception if the dataset couldn't be generated * @see TestInstances#CLASS_IS_LAST * @see #process(Instances) */ protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numString, int numDate, int numRelational, int numClasses, int classType, int classIndex, boolean multiInstance) throws Exception { TestInstances dataset = new TestInstances(); dataset.setSeed(seed); dataset.setNumInstances(numInstances); dataset.setNumNominal(numNominal); dataset.setNumNumeric(numNumeric); dataset.setNumString(numString); dataset.setNumDate(numDate); dataset.setNumRelational(numRelational); dataset.setNumClasses(numClasses); dataset.setClassType(classType); dataset.setClassIndex(classIndex); dataset.setNumClasses(numClasses); dataset.setMultiInstance(multiInstance); dataset.setWords(getWords()); dataset.setWordSeparators(getWordSeparators()); return process(dataset.generate()); } /** * Print out a short summary string for the dataset characteristics * * @param nominalPredictor true if nominal predictor attributes are present * @param numericPredictor true if numeric predictor attributes are present * @param stringPredictor true if string predictor attributes are present * @param datePredictor true if date predictor attributes are present * @param relationalPredictor true if relational predictor attributes are * present * @param multiInstance whether multi-instance is needed * @param classType the class type (NUMERIC, NOMINAL, etc.) */ protected void printAttributeSummary(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { String str = ""; if (numericPredictor) { str += " numeric"; } if (nominalPredictor) { if (str.length() > 0) { str += " &"; } str += " nominal"; } if (stringPredictor) { if (str.length() > 0) { str += " &"; } str += " string"; } if (datePredictor) { if (str.length() > 0) { str += " &"; } str += " date"; } if (relationalPredictor) { if (str.length() > 0) { str += " &"; } str += " relational"; } str += " predictors)"; switch (classType) { case Attribute.NUMERIC: str = " (numeric class," + str; break; case Attribute.NOMINAL: str = " (nominal class," + str; break; case Attribute.STRING: str = " (string class," + str; break; case Attribute.DATE: str = " (date class," + str; break; case Attribute.RELATIONAL: str = " (relational class," + str; break; } print(str); } /** * Returns the revision string. * * @return the revision */ @Override public String getRevision() { return RevisionUtils.extract("$Revision$"); } /** * Test method for this class * * @param args the commandline parameters */ public static void main(String[] args) { runCheck(new CheckClassifier(), args); } }