weka.classifiers.CheckClassifier.java Source code

Java tutorial

Introduction

Here is the source code for weka.classifiers.CheckClassifier.java

Source

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    CheckClassifier.java
 *    Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.CheckScheme;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.MultiInstanceCapabilitiesHandler;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializationHelper;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * Class for examining the capabilities and finding problems with classifiers.
 * If you implement a classifier using the WEKA.libraries, you should run the
 * checks on it to ensure robustness and correct operation. Passing all the
 * tests of this object does not mean bugs in the classifier don't exist, but
 * this will help find some common ones.
 * <p/>
 * 
 * Typical usage:
 * <p/>
 * <code>java weka.classifiers.CheckClassifier -W classifier_name
 * classifier_options </code>
 * <p/>
 * 
 * CheckClassifier reports on the following:
 * <ul>
 * <li>Classifier abilities
 * <ul>
 * <li>Possible command line options to the classifier</li>
 * <li>Whether the classifier can predict nominal, numeric, string, date or
 * relational class attributes. Warnings will be displayed if performance is
 * worse than ZeroR</li>
 * <li>Whether the classifier can be trained incrementally</li>
 * <li>Whether the classifier can handle numeric predictor attributes</li>
 * <li>Whether the classifier can handle nominal predictor attributes</li>
 * <li>Whether the classifier can handle string predictor attributes</li>
 * <li>Whether the classifier can handle date predictor attributes</li>
 * <li>Whether the classifier can handle relational predictor attributes</li>
 * <li>Whether the classifier can handle multi-instance data</li>
 * <li>Whether the classifier can handle missing predictor values</li>
 * <li>Whether the classifier can handle missing class values</li>
 * <li>Whether a nominal classifier only handles 2 class problems</li>
 * <li>Whether the classifier can handle instance weights</li>
 * </ul>
 * </li>
 * <li>Correct functioning
 * <ul>
 * <li>Correct initialisation during buildClassifier (i.e. no result changes
 * when buildClassifier called repeatedly)</li>
 * <li>Whether incremental training produces the same results as during
 * non-incremental training (which may or may not be OK)</li>
 * <li>Whether the classifier alters the data pased to it (number of instances,
 * instance order, instance weights, etc)</li>
 * <li>Whether the toString() method works correctly before the classifier has
 * been built.</li>
 * </ul>
 * </li>
 * <li>Degenerate cases
 * <ul>
 * <li>building classifier with zero training instances</li>
 * <li>all but one predictor attribute values missing</li>
 * <li>all predictor attribute values missing</li>
 * <li>all but one class values missing</li>
 * <li>all class values missing</li>
 * </ul>
 * </li>
 * </ul>
 * Running CheckClassifier with the debug option set will output the training
 * and test datasets for any failed tests.
 * <p/>
 * 
 * The <code>weka.classifiers.AbstractClassifierTest</code> uses this class to
 * test all the classifiers. Any changes here, have to be checked in that
 * abstract test class, too.
 * <p/>
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -D
 *  Turn on debugging output.
 * </pre>
 * 
 * <pre>
 * -S
 *  Silent mode - prints nothing to stdout.
 * </pre>
 * 
 * <pre>
 * -N &lt;num&gt;
 *  The number of instances in the datasets (default 20).
 * </pre>
 * 
 * <pre>
 * -nominal &lt;num&gt;
 *  The number of nominal attributes (default 2).
 * </pre>
 * 
 * <pre>
 * -nominal-values &lt;num&gt;
 *  The number of values for nominal attributes (default 1).
 * </pre>
 * 
 * <pre>
 * -numeric &lt;num&gt;
 *  The number of numeric attributes (default 1).
 * </pre>
 * 
 * <pre>
 * -string &lt;num&gt;
 *  The number of string attributes (default 1).
 * </pre>
 * 
 * <pre>
 * -date &lt;num&gt;
 *  The number of date attributes (default 1).
 * </pre>
 * 
 * <pre>
 * -relational &lt;num&gt;
 *  The number of relational attributes (default 1).
 * </pre>
 * 
 * <pre>
 * -num-instances-relational &lt;num&gt;
 *  The number of instances in relational/bag attributes (default 10).
 * </pre>
 * 
 * <pre>
 * -words &lt;comma-separated-list&gt;
 *  The words to use in string attributes.
 * </pre>
 * 
 * <pre>
 * -word-separators &lt;chars&gt;
 *  The word separators to use in string attributes.
 * </pre>
 * 
 * <pre>
 * -W
 *  Full name of the classifier analysed.
 *  eg: weka.classifiers.bayes.NaiveBayes
 *  (default weka.classifiers.rules.ZeroR)
 * </pre>
 * 
 * <pre>
 * Options specific to classifier weka.classifiers.rules.ZeroR:
 * </pre>
 * 
 * <pre>
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * Options after -- are passed to the designated classifier.
 * <p/>
 * 
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 * @see TestInstances
 */
public class CheckClassifier extends CheckScheme {

    /*
     * Note about test methods: - methods return array of booleans - first index:
     * success or not - second index: acceptable or not (e.g., Exception is OK) -
     * in case the performance is worse than that of ZeroR both indices are true
     * 
     * FracPete (fracpete at waikato dot ac dot nz)
     */

    /*** The classifier to be examined */
    protected Classifier m_Classifier = new weka.classifiers.rules.ZeroR();

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();

        result.addAll(Collections.list(super.listOptions()));

        result.add(
                new Option("\tFull name of the classifier analysed.\n" + "\teg: weka.classifiers.bayes.NaiveBayes\n"
                        + "\t(default weka.classifiers.rules.ZeroR)", "W", 1, "-W"));

        if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) {
            result.add(new Option("", "", 0,
                    "\nOptions specific to classifier " + m_Classifier.getClass().getName() + ":"));
            result.addAll(Collections.list(((OptionHandler) m_Classifier).listOptions()));
        }

        return result.elements();
    }

    /**
     * Parses a given list of options.
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -D
     *  Turn on debugging output.
     * </pre>
     * 
     * <pre>
     * -S
     *  Silent mode - prints nothing to stdout.
     * </pre>
     * 
     * <pre>
     * -N &lt;num&gt;
     *  The number of instances in the datasets (default 20).
     * </pre>
     * 
     * <pre>
     * -nominal &lt;num&gt;
     *  The number of nominal attributes (default 2).
     * </pre>
     * 
     * <pre>
     * -nominal-values &lt;num&gt;
     *  The number of values for nominal attributes (default 1).
     * </pre>
     * 
     * <pre>
     * -numeric &lt;num&gt;
     *  The number of numeric attributes (default 1).
     * </pre>
     * 
     * <pre>
     * -string &lt;num&gt;
     *  The number of string attributes (default 1).
     * </pre>
     * 
     * <pre>
     * -date &lt;num&gt;
     *  The number of date attributes (default 1).
     * </pre>
     * 
     * <pre>
     * -relational &lt;num&gt;
     *  The number of relational attributes (default 1).
     * </pre>
     * 
     * <pre>
     * -num-instances-relational &lt;num&gt;
     *  The number of instances in relational/bag attributes (default 10).
     * </pre>
     * 
     * <pre>
     * -words &lt;comma-separated-list&gt;
     *  The words to use in string attributes.
     * </pre>
     * 
     * <pre>
     * -word-separators &lt;chars&gt;
     *  The word separators to use in string attributes.
     * </pre>
     * 
     * <pre>
     * -W
     *  Full name of the classifier analysed.
     *  eg: weka.classifiers.bayes.NaiveBayes
     *  (default weka.classifiers.rules.ZeroR)
     * </pre>
     * 
     * <pre>
     * Options specific to classifier weka.classifiers.rules.ZeroR:
     * </pre>
     * 
     * <pre>
     * -D
     *  If set, classifier is run in debug mode and
     *  may output additional info to the console
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {
        String tmpStr;

        super.setOptions(options);

        tmpStr = Utils.getOption('W', options);
        if (tmpStr.length() == 0) {
            tmpStr = weka.classifiers.rules.ZeroR.class.getName();
        }
        setClassifier((Classifier) forName("weka.classifiers", Classifier.class, tmpStr,
                Utils.partitionOptions(options)));
    }

    /**
     * Gets the current settings of the CheckClassifier.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {
        Vector<String> result;
        String[] options;

        result = new Vector<String>();

        Collections.addAll(result, super.getOptions());

        if (getClassifier() != null) {
            result.add("-W");
            result.add(getClassifier().getClass().getName());
        }

        if ((m_Classifier != null) && (m_Classifier instanceof OptionHandler)) {

            options = ((OptionHandler) m_Classifier).getOptions();
            if (options.length > 0) {
                result.add("--");
                Collections.addAll(result, options);
            }
        }

        return result.toArray(new String[result.size()]);
    }

    /**
     * Begin the tests, reporting results to System.out
     */
    @Override
    public void doTests() {

        if (getClassifier() == null) {
            println("\n=== No classifier set ===");
            return;
        }
        println("\n=== Check on Classifier: " + getClassifier().getClass().getName() + " ===\n");

        // Start tests
        m_ClasspathProblems = false;
        println("--> Checking for interfaces");
        canTakeOptions();
        boolean updateableClassifier = updateableClassifier()[0];
        boolean weightedInstancesHandler = weightedInstancesHandler()[0];
        boolean multiInstanceHandler = multiInstanceHandler()[0];
        println("--> Classifier tests");
        declaresSerialVersionUID();
        testToString();
        testsPerClassType(Attribute.NOMINAL, updateableClassifier, weightedInstancesHandler, multiInstanceHandler);
        testsPerClassType(Attribute.NUMERIC, updateableClassifier, weightedInstancesHandler, multiInstanceHandler);
        testsPerClassType(Attribute.DATE, updateableClassifier, weightedInstancesHandler, multiInstanceHandler);
        testsPerClassType(Attribute.STRING, updateableClassifier, weightedInstancesHandler, multiInstanceHandler);
        testsPerClassType(Attribute.RELATIONAL, updateableClassifier, weightedInstancesHandler,
                multiInstanceHandler);
    }

    /**
     * Set the classifier for boosting.
     * 
     * @param newClassifier the Classifier to use.
     */
    public void setClassifier(Classifier newClassifier) {
        m_Classifier = newClassifier;
    }

    /**
     * Get the classifier used as the classifier
     * 
     * @return the classifier used as the classifier
     */
    public Classifier getClassifier() {
        return m_Classifier;
    }

    /**
     * Run a battery of tests for a given class attribute type
     * 
     * @param classType true if the class attribute should be numeric
     * @param updateable true if the classifier is updateable
     * @param weighted true if the classifier says it handles weights
     * @param multiInstance true if the classifier is a multi-instance classifier
     */
    protected void testsPerClassType(int classType, boolean updateable, boolean weighted, boolean multiInstance) {

        boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0];
        boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0];
        boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0];
        boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0];
        boolean PRel;
        if (!multiInstance) {
            PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0];
        } else {
            PRel = false;
        }

        if (PNom || PNum || PStr || PDat || PRel) {
            if (weighted) {
                instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
            }

            canHandleOnlyClass(PNom, PNum, PStr, PDat, PRel, classType);

            if (classType == Attribute.NOMINAL) {
                canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4);
            }

            if (!multiInstance) {
                canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0);
                canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1);
            }

            canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
            boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance,
                    classType, true, false, 20)[0];
            if (handleMissingPredictors) {
                canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100);
            }

            boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType,
                    false, true, 20)[0];
            if (handleMissingClass) {
                canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100);
            }

            correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
            datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, handleMissingPredictors,
                    handleMissingClass);
            doesntUseTestClassVal(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
            if (updateable) {
                updatingEquality(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
            }
        }
    }

    /**
     * Checks whether the scheme's toString() method works even though the
     * classifies hasn't been built yet.
     * 
     * @return index 0 is true if the toString() method works fine
     */
    protected boolean[] testToString() {
        boolean[] result = new boolean[2];

        print("toString...");

        try {
            Classifier copy = m_Classifier.getClass().newInstance();
            copy.toString();
            result[0] = true;
            println("yes");
        } catch (Exception e) {
            result[0] = false;
            println("no");
            if (m_Debug) {
                println("\n=== Full report ===");
                e.printStackTrace();
                println("\n");
            }
        }

        return result;
    }

    /**
     * tests for a serialVersionUID. Fails in case the scheme doesn't declare a
     * UID.
     * 
     * @return index 0 is true if the scheme declares a UID
     */
    protected boolean[] declaresSerialVersionUID() {
        boolean[] result = new boolean[2];

        print("serialVersionUID...");

        result[0] = !SerializationHelper.needsUID(m_Classifier.getClass());

        if (result[0]) {
            println("yes");
        } else {
            println("no");
        }

        return result;
    }

    /**
     * Checks whether the scheme can take command line options.
     * 
     * @return index 0 is true if the classifier can take options
     */
    protected boolean[] canTakeOptions() {

        boolean[] result = new boolean[2];

        print("options...");
        if (m_Classifier instanceof OptionHandler) {
            println("yes");
            if (m_Debug) {
                println("\n=== Full report ===");
                Enumeration<Option> enu = ((OptionHandler) m_Classifier).listOptions();
                while (enu.hasMoreElements()) {
                    Option option = enu.nextElement();
                    print(option.synopsis() + "\n" + option.description() + "\n");
                }
                println("\n");
            }
            result[0] = true;
        } else {
            println("no");
            result[0] = false;
        }

        return result;
    }

    /**
     * Checks whether the scheme can build models incrementally.
     * 
     * @return index 0 is true if the classifier can train incrementally
     */
    protected boolean[] updateableClassifier() {

        boolean[] result = new boolean[2];

        print("updateable classifier...");
        if (m_Classifier instanceof UpdateableClassifier) {
            println("yes");
            result[0] = true;
        } else {
            println("no");
            result[0] = false;
        }

        return result;
    }

    /**
     * Checks whether the scheme says it can handle instance weights.
     * 
     * @return true if the classifier handles instance weights
     */
    protected boolean[] weightedInstancesHandler() {

        boolean[] result = new boolean[2];

        print("weighted instances classifier...");
        if (m_Classifier instanceof WeightedInstancesHandler) {
            println("yes");
            result[0] = true;
        } else {
            println("no");
            result[0] = false;
        }

        return result;
    }

    /**
     * Checks whether the scheme handles multi-instance data.
     * 
     * @return true if the classifier handles multi-instance data
     */
    protected boolean[] multiInstanceHandler() {
        boolean[] result = new boolean[2];

        print("multi-instance classifier...");
        if (m_Classifier instanceof MultiInstanceCapabilitiesHandler) {
            println("yes");
            result[0] = true;
        } else {
            println("no");
            result[0] = false;
        }

        return result;
    }

    /**
     * Checks basic prediction of the scheme, for simple non-troublesome datasets.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NOMINAL, NUMERIC, etc.)
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] canPredict(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor,
            boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) {

        print("basic predict");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        ArrayList<String> accepts = new ArrayList<String>();
        accepts.add("unary");
        accepts.add("binary");
        accepts.add("nominal");
        accepts.add("numeric");
        accepts.add("string");
        accepts.add("date");
        accepts.add("relational");
        accepts.add("multi-instance");
        accepts.add("not in classpath");
        int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor,
                multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest,
                numClasses, accepts);
    }

    /**
     * Checks whether the scheme can handle data that contains only the class
     * attribute. If a scheme cannot build a proper model with that data, it
     * should default back to a ZeroR model.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param classType the class type (NOMINAL, NUMERIC, etc.)
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] canHandleOnlyClass(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, int classType) {

        print("only class in data");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, false, classType);
        print("...");
        ArrayList<String> accepts = new ArrayList<String>();
        accepts.add("class");
        accepts.add("zeror");
        int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        return runBasicTest(false, false, false, false, false, false, classType, missingLevel, predictorMissing,
                classMissing, numTrain, numTest, 2, accepts);
    }

    /**
     * Checks whether nominal schemes can handle more than two classes. If a
     * scheme is only designed for two-class problems it should throw an
     * appropriate exception for multi-class problems.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param numClasses the number of classes to test
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] canHandleNClasses(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int numClasses) {

        print("more than two class problems");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, Attribute.NOMINAL);
        print("...");
        ArrayList<String> accepts = new ArrayList<String>();
        accepts.add("number");
        accepts.add("class");
        int numTrain = getNumInstances(), numTest = getNumInstances(), missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor,
                multiInstance, Attribute.NOMINAL, missingLevel, predictorMissing, classMissing, numTrain, numTest,
                numClasses, accepts);
    }

    /**
     * Checks whether the scheme can handle class attributes as Nth attribute.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param classIndex the index of the class attribute (0-based, -1 means last
     *          attribute)
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     * @see TestInstances#CLASS_IS_LAST
     */
    protected boolean[] canHandleClassAsNthAttribute(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType, int classIndex) {

        if (classIndex == TestInstances.CLASS_IS_LAST) {
            print("class attribute as last attribute");
        } else {
            print("class attribute as " + (classIndex + 1) + ". attribute");
        }
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        ArrayList<String> accepts = new ArrayList<String>();
        int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor,
                multiInstance, classType, classIndex, missingLevel, predictorMissing, classMissing, numTrain,
                numTest, numClasses, accepts);
    }

    /**
     * Checks whether the scheme can handle zero training instances.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] canHandleZeroTraining(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType) {

        print("handle zero training instances");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        ArrayList<String> accepts = new ArrayList<String>();
        accepts.add("train");
        accepts.add("value");
        int numTrain = 0, numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor,
                multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest,
                numClasses, accepts);
    }

    /**
     * Checks whether the scheme correctly initialises models when buildClassifier
     * is called. This test calls buildClassifier with one training dataset and
     * records performance on a test set. buildClassifier is then called on a
     * training set with different structure, and then again with the original
     * training set. The performance on the test set is compared with the original
     * results and any performance difference noted as incorrect build
     * initialisation.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @return index 0 is true if the test was passed, index 1 is true if the
     *         scheme performs worse than ZeroR, but without error (index 0 is
     *         false)
     */
    protected boolean[] correctBuildInitialisation(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType) {

        boolean[] result = new boolean[2];

        print("correct initialisation during buildClassifier");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        Instances train1 = null;
        Instances test1 = null;
        Instances train2 = null;
        Instances test2 = null;
        Classifier classifier = null;
        Evaluation evaluation1A = null;
        Evaluation evaluation1B = null;
        Evaluation evaluation2 = null;
        boolean built = false;
        int stage = 0;
        try {

            // Make two sets of train/test splits with different
            // numbers of attributes
            train1 = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            train2 = makeTestDataset(84, numTrain, nominalPredictor ? getNumNominal() + 1 : 0,
                    numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            test1 = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            test2 = makeTestDataset(48, numTest, nominalPredictor ? getNumNominal() + 1 : 0,
                    numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            if (missingLevel > 0) {
                addMissing(train1, missingLevel, predictorMissing, classMissing);
                addMissing(test1, Math.min(missingLevel, 50), predictorMissing, classMissing);
                addMissing(train2, missingLevel, predictorMissing, classMissing);
                addMissing(test2, Math.min(missingLevel, 50), predictorMissing, classMissing);
            }

            classifier = AbstractClassifier.makeCopies(getClassifier(), 1)[0];
            evaluation1A = new Evaluation(train1);
            evaluation1B = new Evaluation(train1);
            evaluation2 = new Evaluation(train2);
        } catch (Exception ex) {
            throw new Error("Error setting up for tests: " + ex.getMessage());
        }
        try {
            stage = 0;
            classifier.buildClassifier(train1);
            built = true;
            if (!testWRTZeroR(classifier, evaluation1A, train1, test1)[0]) {
                throw new Exception("Scheme performs worse than ZeroR");
            }

            stage = 1;
            built = false;
            classifier.buildClassifier(train2);
            built = true;
            if (!testWRTZeroR(classifier, evaluation2, train2, test2)[0]) {
                throw new Exception("Scheme performs worse than ZeroR");
            }

            stage = 2;
            built = false;
            classifier.buildClassifier(train1);
            built = true;
            if (!testWRTZeroR(classifier, evaluation1B, train1, test1)[0]) {
                throw new Exception("Scheme performs worse than ZeroR");
            }

            stage = 3;
            if (!evaluation1A.equals(evaluation1B)) {
                if (m_Debug) {
                    println("\n=== Full report ===\n"
                            + evaluation1A.toSummaryString("\nFirst buildClassifier()", true) + "\n\n");
                    println(evaluation1B.toSummaryString("\nSecond buildClassifier()", true) + "\n\n");
                }
                throw new Exception("Results differ between buildClassifier calls");
            }
            println("yes");
            result[0] = true;

        } catch (Exception ex) {
            String msg = ex.getMessage().toLowerCase();
            if (msg.indexOf("worse than zeror") >= 0) {
                println("warning: performs worse than ZeroR");
                // result[0] = (stage < 1);
                // result[1] = (stage < 1);
                result[0] = true;
                result[1] = true;
            } else {
                println("no");
                result[0] = false;
            }
            if (m_Debug) {
                println("\n=== Full Report ===");
                print("Problem during");
                if (built) {
                    print(" testing");
                } else {
                    print(" training");
                }
                switch (stage) {
                case 0:
                    print(" of dataset 1");
                    break;
                case 1:
                    print(" of dataset 2");
                    break;
                case 2:
                    print(" of dataset 1 (2nd build)");
                    break;
                case 3:
                    print(", comparing results from builds of dataset 1");
                    break;
                }
                println(": " + ex.getMessage() + "\n");
                println("here are the datasets:\n");
                println("=== Train1 Dataset ===\n" + train1.toString() + "\n");
                println("=== Test1 Dataset ===\n" + test1.toString() + "\n\n");
                println("=== Train2 Dataset ===\n" + train2.toString() + "\n");
                println("=== Test2 Dataset ===\n" + test2.toString() + "\n\n");
            }
        }

        return result;
    }

    /**
     * Checks basic missing value handling of the scheme. If the missing values
     * cause an exception to be thrown by the scheme, this will be recorded.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param predictorMissing true if the missing values may be in the predictors
     * @param classMissing true if the missing values may be in the class
     * @param missingLevel the percentage of missing values
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] canHandleMissing(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType, boolean predictorMissing, boolean classMissing, int missingLevel) {

        if (missingLevel == 100) {
            print("100% ");
        }
        print("missing");
        if (predictorMissing) {
            print(" predictor");
            if (classMissing) {
                print(" and");
            }
        }
        if (classMissing) {
            print(" class");
        }
        print(" values");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        ArrayList<String> accepts = new ArrayList<String>();
        accepts.add("missing");
        accepts.add("value");
        accepts.add("train");
        int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2;

        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor,
                multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numTest,
                numClasses, accepts);
    }

    /**
     * Checks whether an updateable scheme produces the same model when trained
     * incrementally as when batch trained. The model itself cannot be compared,
     * so we compare the evaluation on test data for both models. It is possible
     * to get a false positive on this test (likelihood depends on the
     * classifier).
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @return index 0 is true if the test was passed
     */
    protected boolean[] updatingEquality(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType) {

        print("incremental training produces the same results" + " as batch training");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        boolean[] result = new boolean[2];
        Instances train = null;
        Instances test = null;
        Classifier[] classifiers = null;
        Evaluation evaluationB = null;
        Evaluation evaluationI = null;
        boolean built = false;
        try {
            train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            if (missingLevel > 0) {
                addMissing(train, missingLevel, predictorMissing, classMissing);
                addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing);
            }
            classifiers = AbstractClassifier.makeCopies(getClassifier(), 2);
            evaluationB = new Evaluation(train);
            evaluationI = new Evaluation(train);
            classifiers[0].buildClassifier(train);
            testWRTZeroR(classifiers[0], evaluationB, train, test);
        } catch (Exception ex) {
            throw new Error("Error setting up for tests: " + ex.getMessage());
        }
        try {
            classifiers[1].buildClassifier(new Instances(train, 0));
            for (int i = 0; i < train.numInstances(); i++) {
                ((UpdateableClassifier) classifiers[1]).updateClassifier(train.instance(i));
            }
            built = true;
            testWRTZeroR(classifiers[1], evaluationI, train, test);
            if (!evaluationB.equals(evaluationI)) {
                println("no");
                result[0] = false;

                if (m_Debug) {
                    println("\n=== Full Report ===");
                    println("Results differ between batch and " + "incrementally built models.\n"
                            + "Depending on the classifier, this may be OK");
                    println("Here are the results:\n");
                    println(evaluationB.toSummaryString("\nbatch built results\n", true));
                    println(evaluationI.toSummaryString("\nincrementally built results\n", true));
                    println("Here are the datasets:\n");
                    println("=== Train Dataset ===\n" + train.toString() + "\n");
                    println("=== Test Dataset ===\n" + test.toString() + "\n\n");
                }
            } else {
                println("yes");
                result[0] = true;
            }
        } catch (Exception ex) {
            result[0] = false;

            print("Problem during");
            if (built) {
                print(" testing");
            } else {
                print(" training");
            }
            println(": " + ex.getMessage() + "\n");
        }

        return result;
    }

    /**
     * Checks whether the classifier erroneously uses the class value of test
     * instances (if provided). Runs the classifier with test instance class
     * values set to missing and compares with results when test instance class
     * values are left intact.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @return index 0 is true if the test was passed
     */
    protected boolean[] doesntUseTestClassVal(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType) {

        print("classifier ignores test instance class vals");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        int numTrain = 2 * getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        boolean[] result = new boolean[2];
        Instances train = null;
        Instances test = null;
        Classifier[] classifiers = null;
        boolean evalFail = false;
        try {
            train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() + 1 : 0,
                    numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() + 1 : 0,
                    numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            if (missingLevel > 0) {
                addMissing(train, missingLevel, predictorMissing, classMissing);
                addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing);
            }
            classifiers = AbstractClassifier.makeCopies(getClassifier(), 2);
            classifiers[0].buildClassifier(train);
            classifiers[1].buildClassifier(train);
        } catch (Exception ex) {
            throw new Error("Error setting up for tests: " + ex.getMessage());
        }
        try {

            // Now set test values to missing when predicting
            for (int i = 0; i < test.numInstances(); i++) {
                Instance testInst = test.instance(i);
                Instance classMissingInst = (Instance) testInst.copy();
                classMissingInst.setDataset(test);
                classMissingInst.setClassMissing();
                double[] dist0 = classifiers[0].distributionForInstance(testInst);
                double[] dist1 = classifiers[1].distributionForInstance(classMissingInst);
                for (int j = 0; j < dist0.length; j++) {
                    // ignore, if both are NaNs
                    if (Double.isNaN(dist0[j]) && Double.isNaN(dist1[j])) {
                        if (getDebug()) {
                            System.out.println("Both predictions are NaN!");
                        }
                        continue;
                    }
                    // distribution different?
                    if (dist0[j] != dist1[j]) {
                        throw new Exception("Prediction different for instance " + (i + 1));
                    }
                }
            }

            println("yes");
            result[0] = true;
        } catch (Exception ex) {
            println("no");
            result[0] = false;

            if (m_Debug) {
                println("\n=== Full Report ===");

                if (evalFail) {
                    println("Results differ between non-missing and " + "missing test class values.");
                } else {
                    print("Problem during testing");
                    println(": " + ex.getMessage() + "\n");
                }
                println("Here are the datasets:\n");
                println("=== Train Dataset ===\n" + train.toString() + "\n");
                println("=== Train Weights ===\n");
                for (int i = 0; i < train.numInstances(); i++) {
                    println(" " + (i + 1) + "    " + train.instance(i).weight());
                }
                println("=== Test Dataset ===\n" + test.toString() + "\n\n");
                println("(test weights all 1.0\n");
            }
        }

        return result;
    }

    /**
     * Checks whether the classifier can handle instance weights. This test
     * compares the classifier performance on two datasets that are identical
     * except for the training weights. If the results change, then the classifier
     * must be using the weights. It may be possible to get a false positive from
     * this test if the weight changes aren't significant enough to induce a
     * change in classifier performance (but the weights are chosen to minimize
     * the likelihood of this).
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @return index 0 true if the test was passed
     */
    protected boolean[] instanceWeights(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor,
            boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) {

        print("classifier uses instance weights");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        int numTrain = 2 * getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0;
        boolean predictorMissing = false, classMissing = false;

        boolean[] result = new boolean[2];
        Instances train = null;
        Instances test = null;
        Classifier[] classifiers = null;
        Evaluation evaluationB = null;
        Evaluation evaluationI = null;
        boolean built = false;
        boolean evalFail = false;
        try {
            train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() + 1 : 0,
                    numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() + 1 : 0,
                    numericPredictor ? getNumNumeric() + 1 : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            if (missingLevel > 0) {
                addMissing(train, missingLevel, predictorMissing, classMissing);
                addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing);
            }
            classifiers = AbstractClassifier.makeCopies(getClassifier(), 2);
            evaluationB = new Evaluation(train);
            evaluationI = new Evaluation(train);
            classifiers[0].buildClassifier(train);
            testWRTZeroR(classifiers[0], evaluationB, train, test);
        } catch (Exception ex) {
            throw new Error("Error setting up for tests: " + ex.getMessage());
        }
        try {

            // Now modify instance weights and re-built/test
            for (int i = 0; i < train.numInstances(); i++) {
                train.instance(i).setWeight(0);
            }
            Random random = new Random(1);
            for (int i = 0; i < train.numInstances() / 2; i++) {
                int inst = random.nextInt(train.numInstances());
                int weight = random.nextInt(10) + 1;
                train.instance(inst).setWeight(weight);
            }
            classifiers[1].buildClassifier(train);
            built = true;
            testWRTZeroR(classifiers[1], evaluationI, train, test);
            if (evaluationB.equals(evaluationI)) {
                // println("no");
                evalFail = true;
                throw new Exception("evalFail");
            }

            println("yes");
            result[0] = true;
        } catch (Exception ex) {
            println("no");
            result[0] = false;

            if (m_Debug) {
                println("\n=== Full Report ===");

                if (evalFail) {
                    println("Results don't differ between non-weighted and " + "weighted instance models.");
                    println("Here are the results:\n");
                    println(evaluationB.toSummaryString("\nboth methods\n", true));
                } else {
                    print("Problem during");
                    if (built) {
                        print(" testing");
                    } else {
                        print(" training");
                    }
                    println(": " + ex.getMessage() + "\n");
                }
                println("Here are the datasets:\n");
                println("=== Train Dataset ===\n" + train.toString() + "\n");
                println("=== Train Weights ===\n");
                for (int i = 0; i < train.numInstances(); i++) {
                    println(" " + (i + 1) + "    " + train.instance(i).weight());
                }
                println("=== Test Dataset ===\n" + test.toString() + "\n\n");
                println("(test weights all 1.0\n");
            }
        }

        return result;
    }

    /**
     * Checks whether the scheme alters the training dataset during training. If
     * the scheme needs to modify the training data it should take a copy of the
     * training data. Currently checks for changes to header structure, number of
     * instances, order of instances, instance weights.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param predictorMissing true if we know the classifier can handle (at
     *          least) moderate missing predictor values
     * @param classMissing true if we know the classifier can handle (at least)
     *          moderate missing class values
     * @return index 0 is true if the test was passed
     */
    protected boolean[] datasetIntegrity(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType, boolean predictorMissing, boolean classMissing) {

        print("classifier doesn't alter original datasets");
        printAttributeSummary(nominalPredictor, numericPredictor, stringPredictor, datePredictor,
                relationalPredictor, multiInstance, classType);
        print("...");
        int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 20;

        boolean[] result = new boolean[2];
        Instances train = null;
        Instances test = null;
        Classifier classifier = null;
        Evaluation evaluation = null;
        boolean built = false;
        try {
            train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, multiInstance);
            if (missingLevel > 0) {
                addMissing(train, missingLevel, predictorMissing, classMissing);
                addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing);
            }
            classifier = AbstractClassifier.makeCopies(getClassifier(), 1)[0];
            evaluation = new Evaluation(train);
        } catch (Exception ex) {
            throw new Error("Error setting up for tests: " + ex.getMessage());
        }
        try {
            Instances trainCopy = new Instances(train);
            Instances testCopy = new Instances(test);
            classifier.buildClassifier(trainCopy);
            compareDatasets(train, trainCopy);
            built = true;
            testWRTZeroR(classifier, evaluation, trainCopy, testCopy);
            compareDatasets(test, testCopy);

            println("yes");
            result[0] = true;
        } catch (Exception ex) {
            println("no");
            result[0] = false;

            if (m_Debug) {
                println("\n=== Full Report ===");
                print("Problem during");
                if (built) {
                    print(" testing");
                } else {
                    print(" training");
                }
                println(": " + ex.getMessage() + "\n");
                println("Here are the datasets:\n");
                println("=== Train Dataset ===\n" + train.toString() + "\n");
                println("=== Test Dataset ===\n" + test.toString() + "\n\n");
            }
        }

        return result;
    }

    /**
     * Runs a text on the datasets with the given characteristics.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param missingLevel the percentage of missing values
     * @param predictorMissing true if the missing values may be in the predictors
     * @param classMissing true if the missing values may be in the class
     * @param numTrain the number of instances in the training set
     * @param numTest the number of instaces in the test set
     * @param numClasses the number of classes
     * @param accepts the acceptable string in an exception
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor,
            boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType,
            int missingLevel, boolean predictorMissing, boolean classMissing, int numTrain, int numTest,
            int numClasses, ArrayList<String> accepts) {

        return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor,
                multiInstance, classType, TestInstances.CLASS_IS_LAST, missingLevel, predictorMissing, classMissing,
                numTrain, numTest, numClasses, accepts);
    }

    /**
     * Runs a text on the datasets with the given characteristics.
     * 
     * @param nominalPredictor if true use nominal predictor attributes
     * @param numericPredictor if true use numeric predictor attributes
     * @param stringPredictor if true use string predictor attributes
     * @param datePredictor if true use date predictor attributes
     * @param relationalPredictor if true use relational predictor attributes
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param classIndex the attribute index of the class
     * @param missingLevel the percentage of missing values
     * @param predictorMissing true if the missing values may be in the predictors
     * @param classMissing true if the missing values may be in the class
     * @param numTrain the number of instances in the training set
     * @param numTest the number of instaces in the test set
     * @param numClasses the number of classes
     * @param accepts the acceptable string in an exception
     * @return index 0 is true if the test was passed, index 1 is true if test was
     *         acceptable
     */
    protected boolean[] runBasicTest(boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor,
            boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType,
            int classIndex, int missingLevel, boolean predictorMissing, boolean classMissing, int numTrain,
            int numTest, int numClasses, ArrayList<String> accepts) {

        boolean[] result = new boolean[2];
        Instances train = null;
        Instances test = null;
        Classifier classifier = null;
        Evaluation evaluation = null;
        boolean built = false;
        try {
            train = makeTestDataset(42, numTrain, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, classIndex, multiInstance);
            test = makeTestDataset(24, numTest, nominalPredictor ? getNumNominal() : 0,
                    numericPredictor ? getNumNumeric() : 0, stringPredictor ? getNumString() : 0,
                    datePredictor ? getNumDate() : 0, relationalPredictor ? getNumRelational() : 0, numClasses,
                    classType, classIndex, multiInstance);
            if (missingLevel > 0) {
                addMissing(train, missingLevel, predictorMissing, classMissing);
                addMissing(test, Math.min(missingLevel, 50), predictorMissing, classMissing);
            }
            classifier = AbstractClassifier.makeCopies(getClassifier(), 1)[0];
            evaluation = new Evaluation(train);
        } catch (Exception ex) {
            ex.printStackTrace();
            throw new Error("Error setting up for tests: " + ex.getMessage());
        }
        try {
            classifier.buildClassifier(train);
            built = true;
            if (!testWRTZeroR(classifier, evaluation, train, test)[0]) {
                result[0] = true;
                result[1] = true;
                throw new Exception("Scheme performs worse than ZeroR");
            }

            println("yes");
            result[0] = true;
        } catch (Exception ex) {
            boolean acceptable = false;
            String msg;
            if (ex.getMessage() == null) {
                msg = "";
            } else {
                msg = ex.getMessage().toLowerCase();
            }
            if (msg.indexOf("not in classpath") > -1) {
                m_ClasspathProblems = true;
            }
            if (msg.indexOf("worse than zeror") >= 0) {
                println("warning: performs worse than ZeroR");
                result[0] = true;
                result[1] = true;
            } else {
                for (int i = 0; i < accepts.size(); i++) {
                    if (msg.indexOf(accepts.get(i)) >= 0) {
                        acceptable = true;
                    }
                }

                println("no" + (acceptable ? " (OK error message)" : ""));
                result[1] = acceptable;
            }

            if (m_Debug) {
                println("\n=== Full Report ===");
                print("Problem during");
                if (built) {
                    print(" testing");
                } else {
                    print(" training");
                }
                println(": " + ex.getMessage() + "\n");
                if (!acceptable) {
                    if (accepts.size() > 0) {
                        print("Error message doesn't mention ");
                        for (int i = 0; i < accepts.size(); i++) {
                            if (i != 0) {
                                print(" or ");
                            }
                            print('"' + accepts.get(i) + '"');
                        }
                    }
                    println("here are the datasets:\n");
                    println("=== Train Dataset ===\n" + train.toString() + "\n");
                    println("=== Test Dataset ===\n" + test.toString() + "\n\n");
                }
            }
        }

        return result;
    }

    /**
     * Determine whether the scheme performs worse than ZeroR during testing
     * 
     * @param classifier the pre-trained classifier
     * @param evaluation the classifier evaluation object
     * @param train the training data
     * @param test the test data
     * @return index 0 is true if the scheme performs better than ZeroR
     * @throws Exception if there was a problem during the scheme's testing
     */
    protected boolean[] testWRTZeroR(Classifier classifier, Evaluation evaluation, Instances train, Instances test)
            throws Exception {

        boolean[] result = new boolean[2];

        evaluation.evaluateModel(classifier, test);
        try {

            // Tested OK, compare with ZeroR
            Classifier zeroR = new weka.classifiers.rules.ZeroR();
            zeroR.buildClassifier(train);
            Evaluation zeroREval = new Evaluation(train);
            zeroREval.evaluateModel(zeroR, test);
            result[0] = Utils.grOrEq(zeroREval.errorRate(), evaluation.errorRate());
        } catch (Exception ex) {
            throw new Error("Problem determining ZeroR performance: " + ex.getMessage());
        }

        return result;
    }

    /**
     * Make a simple set of instances, which can later be modified for use in
     * specific tests.
     * 
     * @param seed the random number seed
     * @param numInstances the number of instances to generate
     * @param numNominal the number of nominal attributes
     * @param numNumeric the number of numeric attributes
     * @param numString the number of string attributes
     * @param numDate the number of date attributes
     * @param numRelational the number of relational attributes
     * @param numClasses the number of classes (if nominal class)
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param multiInstance whether the dataset should a multi-instance dataset
     * @return the test dataset
     * @throws Exception if the dataset couldn't be generated
     * @see #process(Instances)
     */
    protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numString,
            int numDate, int numRelational, int numClasses, int classType, boolean multiInstance) throws Exception {

        return makeTestDataset(seed, numInstances, numNominal, numNumeric, numString, numDate, numRelational,
                numClasses, classType, TestInstances.CLASS_IS_LAST, multiInstance);
    }

    /**
     * Make a simple set of instances with variable position of the class
     * attribute, which can later be modified for use in specific tests.
     * 
     * @param seed the random number seed
     * @param numInstances the number of instances to generate
     * @param numNominal the number of nominal attributes
     * @param numNumeric the number of numeric attributes
     * @param numString the number of string attributes
     * @param numDate the number of date attributes
     * @param numRelational the number of relational attributes
     * @param numClasses the number of classes (if nominal class)
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     * @param classIndex the index of the class (0-based, -1 as last)
     * @param multiInstance whether the dataset should a multi-instance dataset
     * @return the test dataset
     * @throws Exception if the dataset couldn't be generated
     * @see TestInstances#CLASS_IS_LAST
     * @see #process(Instances)
     */
    protected Instances makeTestDataset(int seed, int numInstances, int numNominal, int numNumeric, int numString,
            int numDate, int numRelational, int numClasses, int classType, int classIndex, boolean multiInstance)
            throws Exception {

        TestInstances dataset = new TestInstances();

        dataset.setSeed(seed);
        dataset.setNumInstances(numInstances);
        dataset.setNumNominal(numNominal);
        dataset.setNumNumeric(numNumeric);
        dataset.setNumString(numString);
        dataset.setNumDate(numDate);
        dataset.setNumRelational(numRelational);
        dataset.setNumClasses(numClasses);
        dataset.setClassType(classType);
        dataset.setClassIndex(classIndex);
        dataset.setNumClasses(numClasses);
        dataset.setMultiInstance(multiInstance);
        dataset.setWords(getWords());
        dataset.setWordSeparators(getWordSeparators());

        return process(dataset.generate());
    }

    /**
     * Print out a short summary string for the dataset characteristics
     * 
     * @param nominalPredictor true if nominal predictor attributes are present
     * @param numericPredictor true if numeric predictor attributes are present
     * @param stringPredictor true if string predictor attributes are present
     * @param datePredictor true if date predictor attributes are present
     * @param relationalPredictor true if relational predictor attributes are
     *          present
     * @param multiInstance whether multi-instance is needed
     * @param classType the class type (NUMERIC, NOMINAL, etc.)
     */
    protected void printAttributeSummary(boolean nominalPredictor, boolean numericPredictor,
            boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance,
            int classType) {

        String str = "";

        if (numericPredictor) {
            str += " numeric";
        }

        if (nominalPredictor) {
            if (str.length() > 0) {
                str += " &";
            }
            str += " nominal";
        }

        if (stringPredictor) {
            if (str.length() > 0) {
                str += " &";
            }
            str += " string";
        }

        if (datePredictor) {
            if (str.length() > 0) {
                str += " &";
            }
            str += " date";
        }

        if (relationalPredictor) {
            if (str.length() > 0) {
                str += " &";
            }
            str += " relational";
        }

        str += " predictors)";

        switch (classType) {
        case Attribute.NUMERIC:
            str = " (numeric class," + str;
            break;
        case Attribute.NOMINAL:
            str = " (nominal class," + str;
            break;
        case Attribute.STRING:
            str = " (string class," + str;
            break;
        case Attribute.DATE:
            str = " (date class," + str;
            break;
        case Attribute.RELATIONAL:
            str = " (relational class," + str;
            break;
        }

        print(str);
    }

    /**
     * Returns the revision string.
     * 
     * @return the revision
     */
    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision$");
    }

    /**
     * Test method for this class
     * 
     * @param args the commandline parameters
     */
    public static void main(String[] args) {
        runCheck(new CheckClassifier(), args);
    }
}