Example usage for weka.core Instances readInstance

List of usage examples for weka.core Instances readInstance

Introduction

In this page you can find the example usage for weka.core Instances readInstance.

Prototype

@Deprecated
public boolean readInstance(Reader reader) throws IOException 

Source Link

Document

Reads a single instance from the reader and appends it to the dataset.

Usage

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Evaluates a classifier with the options given in an array of
 * strings. <p>//from   ww w  . j  av a2  s  . c o  m
 *
 * Valid options are: <p>
 *
 * -t name of training file <br>
 * Name of the file with the training data. (required) <p>
 *
 * -T name of test file <br>
 * Name of the file with the test data. If missing a cross-validation
 * is performed. <p>
 *
 * -c class index <br>
 * Index of the class attribute (1, 2, ...; default: last). <p>
 *
 * -x number of folds <br>
 * The number of folds for the cross-validation (default: 10). <p>
 *
 * -s random number seed <br>
 * Random number seed for the cross-validation (default: 1). <p>
 *
 * -m file with cost matrix <br>
 * The name of a file containing a cost matrix. <p>
 *
 * -l name of model input file <br>
 * Loads classifier from the given file. <p>
 *
 * -d name of model output file <br>
 * Saves classifier built from the training data into the given file. <p>
 *
 * -v <br>
 * Outputs no statistics for the training data. <p>
 *
 * -o <br>
 * Outputs statistics only, not the classifier. <p>
 *
 * -i <br>
 * Outputs detailed information-retrieval statistics per class. <p>
 *
 * -k <br>
 * Outputs information-theoretic statistics. <p>
 *
 * -p <br>
 * Outputs predictions for test instances (and nothing else). <p>
 *
 * -r <br>
 * Outputs cumulative margin distribution (and nothing else). <p>
 *
 * -g <br>
 * Only for classifiers that implement "Graphable." Outputs
 * the graph representation of the classifier (and nothing
 * else). <p>
 *
 * @param classifier machine learning classifier
 * @param options the array of string containing the options
 * @exception Exception if model could not be evaluated successfully
 * @return a string describing the results */
public static String[] evaluateModel(Classifier classifier, String trainFileName, String objectOutputFileName)
        throws Exception {

    Instances train = null, tempTrain, test = null, template = null;
    int seed = 1, folds = 10, classIndex = -1;
    String testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName,
            attributeRangeString;
    boolean IRstatistics = false, noOutput = false, printClassifications = false, trainStatistics = true,
            printMargins = false, printComplexityStatistics = false, printGraph = false,
            classStatistics = false, printSource = false;
    StringBuffer text = new StringBuffer();
    BufferedReader trainReader = null, testReader = null;
    ObjectInputStream objectInputStream = null;
    CostMatrix costMatrix = null;
    StringBuffer schemeOptionsText = null;
    Range attributesToOutput = null;
    long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;

    try {

        String[] options = null;

        // Get basic options (options the same for all schemes)
        classIndexString = Utils.getOption('c', options);
        if (classIndexString.length() != 0) {
            classIndex = Integer.parseInt(classIndexString);
        }
        //  trainFileName = Utils.getOption('t', options);

        objectInputFileName = Utils.getOption('l', options);
        //   objectOutputFileName = Utils.getOption('d', options);
        testFileName = Utils.getOption('T', options);
        if (trainFileName.length() == 0) {
            if (objectInputFileName.length() == 0) {
                throw new Exception("No training file and no object " + "input file given.");
            }
            if (testFileName.length() == 0) {
                throw new Exception("No training file and no test " + "file given.");
            }
        } else if ((objectInputFileName.length() != 0)
                && ((!(classifier instanceof UpdateableClassifier)) || (testFileName.length() == 0))) {
            throw new Exception("Classifier not incremental, or no " + "test file provided: can't "
                    + "use both train and model file.");
        }
        try {
            if (trainFileName.length() != 0) {
                trainReader = new BufferedReader(new FileReader(trainFileName));
            }
            if (testFileName.length() != 0) {
                testReader = new BufferedReader(new FileReader(testFileName));
            }
            if (objectInputFileName.length() != 0) {
                InputStream is = new FileInputStream(objectInputFileName);
                if (objectInputFileName.endsWith(".gz")) {
                    is = new GZIPInputStream(is);
                }
                objectInputStream = new ObjectInputStream(is);
            }
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        if (testFileName.length() != 0) {
            template = test = new Instances(testReader, 1);
            if (classIndex != -1) {
                test.setClassIndex(classIndex - 1);
            } else {
                test.setClassIndex(test.numAttributes() - 1);
            }
            if (classIndex > test.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
        }
        if (trainFileName.length() != 0) {
            if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)) {
                train = new Instances(trainReader, 1);
            } else {
                train = new Instances(trainReader);
            }
            template = train;
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            if ((testFileName.length() != 0) && !test.equalHeaders(train)) {
                throw new IllegalArgumentException("Train and test file not compatible!");
            }
            if (classIndex > train.numAttributes()) {
                throw new Exception("Index of class attribute too large.");
            }
            //train = new Instances(train);
        }
        if (template == null) {
            throw new Exception("No actual dataset provided to use as template");
        }
        seedString = Utils.getOption('s', options);
        if (seedString.length() != 0) {
            seed = Integer.parseInt(seedString);
        }
        foldsString = Utils.getOption('x', options);
        if (foldsString.length() != 0) {
            folds = Integer.parseInt(foldsString);
        }
        costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());

        classStatistics = Utils.getFlag('i', options);
        noOutput = Utils.getFlag('o', options);
        trainStatistics = !Utils.getFlag('v', options);
        printComplexityStatistics = Utils.getFlag('k', options);
        printMargins = Utils.getFlag('r', options);
        printGraph = Utils.getFlag('g', options);
        sourceClass = Utils.getOption('z', options);
        printSource = (sourceClass.length() != 0);

        // Check -p option
        try {
            attributeRangeString = Utils.getOption('p', options);
        } catch (Exception e) {
            throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. "
                    + "It now expects a parameter specifying a range of attributes "
                    + "to list with the predictions. Use '-p 0' for none.");
        }
        if (attributeRangeString.length() != 0) {
            printClassifications = true;
            if (!attributeRangeString.equals("0")) {
                attributesToOutput = new Range(attributeRangeString);
            }
        }

        // If a model file is given, we can't process
        // scheme-specific options
        if (objectInputFileName.length() != 0) {
            Utils.checkForRemainingOptions(options);
        } else {

            // Set options for classifier
            if (classifier instanceof OptionHandler) {
                /* for (int i = 0; i < options.length; i++) {
                if (options[i].length() != 0) {
                    if (schemeOptionsText == null) {
                        schemeOptionsText = new StringBuffer();
                    }
                    if (options[i].indexOf(' ') != -1) {
                        schemeOptionsText.append('"' + options[i] + "\" ");
                    } else {
                        schemeOptionsText.append(options[i] + " ");
                    }
                }
                 }
                 */
                ((OptionHandler) classifier).setOptions(options);
            }
        }
        Utils.checkForRemainingOptions(options);

    } catch (Exception e) {
        throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier));
    }

    // Setup up evaluation objects
    EvaluationInternal trainingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);
    EvaluationInternal testingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix);

    if (objectInputFileName.length() != 0) {

        // Load classifier from file
        classifier = (Classifier) objectInputStream.readObject();
        objectInputStream.close();
    }

    // Build the classifier if no object file provided
    if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0) && (costMatrix == null)
            && (trainFileName.length() != 0)) {

        // Build classifier incrementally
        trainingEvaluation.setPriors(train);
        testingEvaluation.setPriors(train);
        trainTimeStart = System.currentTimeMillis();
        if (objectInputFileName.length() == 0) {
            classifier.buildClassifier(train);
        }
        while (train.readInstance(trainReader)) {

            trainingEvaluation.updatePriors(train.instance(0));
            testingEvaluation.updatePriors(train.instance(0));
            ((UpdateableClassifier) classifier).updateClassifier(train.instance(0));
            train.delete(0);
        }
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
        trainReader.close();
    } else if (objectInputFileName.length() == 0) {

        // Build classifier in one go
        tempTrain = new Instances(train);
        trainingEvaluation.setPriors(tempTrain);
        testingEvaluation.setPriors(tempTrain);
        trainTimeStart = System.currentTimeMillis();
        classifier.buildClassifier(tempTrain);
        trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
    }

    // Save the classifier if an object output file is provided
    if (objectOutputFileName.length() != 0) {
        OutputStream os = new FileOutputStream(objectOutputFileName);
        if (objectOutputFileName.endsWith(".gz")) {
            os = new GZIPOutputStream(os);
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
        objectOutputStream.writeObject(classifier);
        objectOutputStream.flush();
        objectOutputStream.close();
    }

    /*   // If classifier is drawable output string describing graph
       if ((classifier instanceof Drawable)
    && (printGraph)) {
    return ((Drawable) classifier).graph();
       }
            
       // Output the classifier as equivalent source
       if ((classifier instanceof Sourcable)
    && (printSource)) {
    return wekaStaticWrapper((Sourcable) classifier, sourceClass);
       }
            
       // Output test instance predictions only
       if (printClassifications) {
    return printClassifications(classifier, new Instances(template, 0),
                                testFileName, classIndex, attributesToOutput);
       }
       */

    // Output model
    if (!(noOutput || printMargins)) {
        if (classifier instanceof OptionHandler) {
            if (schemeOptionsText != null) {
                text.append("\nOptions: " + schemeOptionsText);
                text.append("\n");
            }
        }
        text.append("\n" + classifier.toString() + "\n");
    }

    if (!printMargins && (costMatrix != null)) {
        text.append("\n=== Evaluation Cost Matrix ===\n\n").append(costMatrix.toString());
    }

    // Compute error estimate from training data
    if ((trainStatistics) && (trainFileName.length() != 0)) {

        if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)
                && (costMatrix == null)) {

            // Classifier was trained incrementally, so we have to
            // reopen the training data in order to test on it.
            trainReader = new BufferedReader(new FileReader(trainFileName));

            // Incremental testing
            train = new Instances(trainReader, 1);
            if (classIndex != -1) {
                train.setClassIndex(classIndex - 1);
            } else {
                train.setClassIndex(train.numAttributes() - 1);
            }
            testTimeStart = System.currentTimeMillis();
            while (train.readInstance(trainReader)) {

                trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0));
                train.delete(0);
            }
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
            trainReader.close();
        } else {
            testTimeStart = System.currentTimeMillis();
            trainingEvaluation.evaluateModel(classifier, train);
            testTimeElapsed = System.currentTimeMillis() - testTimeStart;
        }

        // Print the results of the training evaluation
        //  if (printMargins) {
        //      return trainingEvaluation.toCumulativeMarginDistributionString();
        //   } else {
        text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2)
                + " seconds");
        text.append("\nTime taken to test model on training data: "
                + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");
        text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n",
                printComplexityStatistics));
        if (template.classAttribute().isNominal()) {
            if (classStatistics) {
                text.append("\n\n" + trainingEvaluation.toClassDetailsString());
            }
            text.append("\n\n" + trainingEvaluation.toMatrixString());
        }

        //  }
    }

    // Compute proper error estimates
    if (testFileName.length() != 0) {

        // Testing is on the supplied test data
        while (test.readInstance(testReader)) {

            testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0));
            test.delete(0);
        }
        testReader.close();

        text.append("\n\n"
                + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics));
    } else if (trainFileName.length() != 0) {

        // Testing is via cross-validation on training data
        Random random = new Random(seed);
        testingEvaluation.crossValidateModel(classifier, train, folds, random);
        if (template.classAttribute().isNumeric()) {
            text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
                    printComplexityStatistics));
        } else {
            text.append("\n\n\n" + testingEvaluation
                    .toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics));
        }
    }
    if (template.classAttribute().isNominal()) {
        if (classStatistics) {
            text.append("\n\n" + testingEvaluation.toClassDetailsString());
        }
        text.append("\n\n" + testingEvaluation.toMatrixString());
    }

    String result = "\t" + Utils.doubleToString(trainingEvaluation.pctCorrect(), 12, 4) + " %";
    result += "       " + Utils.doubleToString(testingEvaluation.pctCorrect(), 12, 4) + " %";

    String[] returnString = { text.toString(), result };
    return returnString;
}

From source file:GClass.EvaluationInternal.java

License:Open Source License

/**
 * Prints the predictions for the given dataset into a String variable.
 *///w  w  w  .j  a  v  a2 s .  com
protected static String printClassifications(Classifier classifier, Instances train, String testFileName,
        int classIndex, Range attributesToOutput) throws Exception {

    StringBuffer text = new StringBuffer();
    if (testFileName.length() != 0) {
        BufferedReader testReader = null;
        try {
            testReader = new BufferedReader(new FileReader(testFileName));
        } catch (Exception e) {
            throw new Exception("Can't open file " + e.getMessage() + '.');
        }
        Instances test = new Instances(testReader, 1);
        if (classIndex != -1) {
            test.setClassIndex(classIndex - 1);
        } else {
            test.setClassIndex(test.numAttributes() - 1);
        }
        int i = 0;
        while (test.readInstance(testReader)) {
            Instance instance = test.instance(0);
            Instance withMissing = (Instance) instance.copy();
            withMissing.setDataset(test);
            double predValue = ((Classifier) classifier).classifyInstance(withMissing);
            if (test.classAttribute().isNumeric()) {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + predValue + " ");
                }
                if (instance.classIsMissing()) {
                    text.append("missing");
                } else {
                    text.append(instance.classValue());
                }
                text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
            } else {
                if (Instance.isMissingValue(predValue)) {
                    text.append(i + " missing ");
                } else {
                    text.append(i + " " + test.classAttribute().value((int) predValue) + " ");
                }
                if (Instance.isMissingValue(predValue)) {
                    text.append("missing ");
                } else {
                    text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
                }
                text.append(instance.toString(instance.classIndex()) + " "
                        + attributeValuesString(withMissing, attributesToOutput) + "\n");
            }
            test.delete(0);
            i++;
        }
        testReader.close();
    }
    return text.toString();
}

From source file:moa.streams.clustering.FileStream.java

License:Apache License

/**
 * @param ignoredAttributes Attributes that will be ignored
 * @return A list with min/max and diff=max-min values per attribute of the arff file 
 *///from ww w.  ja v a  2  s  .co m
protected ArrayList<Double[]> readMinMaxDiffValues(HashSet<Integer> ignoredAttributes) {
    ArrayList<Double[]> valuesMinMaxDiff = null;

    if (ignoredAttributes == null)
        ignoredAttributes = new HashSet<Integer>();

    try {
        InputStream fileStream = new FileInputStream(arffFileOption.getFile());
        InputStreamProgressMonitor fileProgressMonitor = new InputStreamProgressMonitor(fileStream);
        Reader fileReader = new BufferedReader(new InputStreamReader(fileProgressMonitor));
        Instances instances = new Instances(fileReader, 1);

        valuesMinMaxDiff = new ArrayList<Double[]>();
        for (int i = 0; i < instances.numAttributes() - ignoredAttributes.size(); i++) {
            Double[] values = { Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0.0 };
            valuesMinMaxDiff.add(values);
        }

        System.out.print("Reading arff file for normalization...");
        int counter = 0;
        while (instances.readInstance(fileReader)) {
            Instance instance = instances.instance(0);
            int a = 0;
            for (int i = 0; i < instances.numAttributes(); i++) {
                if (!ignoredAttributes.contains(i)) {
                    double value = instance.value(i);
                    if (value < valuesMinMaxDiff.get(a)[0])
                        valuesMinMaxDiff.get(a)[0] = value;
                    if (value > valuesMinMaxDiff.get(a)[1])
                        valuesMinMaxDiff.get(a)[1] = value;
                    a++;
                }
            }
            instances.delete();

            //show some progress
            counter++;
            if (counter >= 10000) {
                counter = 0;
                System.out.print(".");
            }
        }
        if (fileReader != null) {
            fileReader.close();
            fileReader = null;
        }
        System.out.println("done!");

        for (int i = 0; i < valuesMinMaxDiff.size(); i++) {
            valuesMinMaxDiff.get(i)[2] = valuesMinMaxDiff.get(i)[1] - valuesMinMaxDiff.get(i)[0];
        }

        return valuesMinMaxDiff;
    } catch (IOException ioe) {
        throw new RuntimeException("ArffFileStream failed to read instance from stream.", ioe);
    }
}