Example usage for weka.classifiers.trees J48 J48

List of usage examples for weka.classifiers.trees J48 J48

Introduction

In this page you can find the example usage for weka.classifiers.trees J48 J48.

Prototype

J48

Source Link

Usage

From source file:cs.man.ac.uk.classifiers.GetAUC.java

License:Open Source License

/**
 * Computes the AUC for the supplied learner.
 * @return the AUC as a double value./*from w ww.j  a v  a2 s  . co  m*/
 */
@SuppressWarnings("unused")
private static double validate5x2CV() {
    try {
        // other options
        int runs = 5;
        int folds = 2;
        double AUC_SUM = 0;

        // perform cross-validation
        for (int i = 0; i < runs; i++) {
            // randomize data
            int seed = i + 1;
            Random rand = new Random(seed);
            Instances randData = new Instances(data);
            randData.randomize(rand);

            if (randData.classAttribute().isNominal()) {
                System.out.println("Stratifying...");
                randData.stratify(folds);
            }

            Evaluation eval = new Evaluation(randData);

            for (int n = 0; n < folds; n++) {
                Instances train = randData.trainCV(folds, n);
                Instances test = randData.testCV(folds, n);

                // the above code is used by the StratifiedRemoveFolds filter, the
                // code below by the Explorer/Experimenter:
                // Instances train = randData.trainCV(folds, n, rand);

                // build and evaluate classifier
                String[] options = { "-U", "-A" };
                J48 classifier = new J48();
                //HTree classifier = new HTree();

                classifier.setOptions(options);
                classifier.buildClassifier(train);
                eval.evaluateModel(classifier, test);

                // generate curve
                ThresholdCurve tc = new ThresholdCurve();
                int classIndex = 0;
                Instances result = tc.getCurve(eval.predictions(), classIndex);

                // plot curve
                vmc = new ThresholdVisualizePanel();
                AUC_SUM += ThresholdCurve.getROCArea(result);
                System.out.println("AUC: " + ThresholdCurve.getROCArea(result) + " \tAUC SUM: " + AUC_SUM);
            }
        }

        return AUC_SUM / ((double) runs * (double) folds);
    } catch (Exception e) {
        System.out.println("Exception validating data!");
        return 0;
    }
}

From source file:cs.man.ac.uk.mvc.ClassifierBuilder.java

License:Open Source License

/**
 * Builds and tests the classifier specified by the algorithm variable.
 * Note if no unlabelled data is in the test set, then meta data can be set to null.
 * @return confusion matrix describing binary classification outcomes.
 *//*from   w w w  .  j a  v  a2 s.  c  o  m*/
public int[][] test() {
    switch (algorithm) {
    case Classifiers.J48:
        return stdloadAndTest(new StandardAlgorithmTester(this.outputFile, "J48", this.verbose, new J48()));
    case Classifiers.MLP:
        return stdloadAndTest(
                new StandardAlgorithmTester(this.outputFile, "MLP", this.verbose, new MultilayerPerceptron()));
    case Classifiers.NB:
        return stdloadAndTest(
                new StandardAlgorithmTester(this.outputFile, "NB", this.verbose, new NaiveBayes()));
    case Classifiers.SVM:
        return stdloadAndTest(new StandardAlgorithmTester(this.outputFile, "SVM", this.verbose, new SMO()));
    case Classifiers.HTREE:
        return streamloadAndTest(
                new StreamAlgorithmTester(this.outputFile, "HTREE", this.verbose, new HoeffdingTree()));
    case Classifiers.GHVFDT:
        return streamloadAndTest(
                new StreamAlgorithmTester(this.outputFile, "GHVFDT", this.verbose, new GHVFDT()));
    case Classifiers.PNB:
        return streamloadAndTest(new StreamAlgorithmTester(this.outputFile, "PNB", this.verbose, new PNB()));
    case Classifiers.OCVFDT:
        return streamloadAndTest(
                new StreamAlgorithmTester(this.outputFile, "OCVFDT", this.verbose, new OCVFDT()));
    default:
        int[][] confusion_matrix = { { 0, 0 }, { 0, 0 } };
        return confusion_matrix;
    }
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsEnsembleNew(String trainPath, String testPath, String resultPath) {
    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    /**//from w  w  w . j  av  a2 s.  c  om
     * The ensemble classifiers. This is a heterogeneous ensemble.
     */
    J48 learner1 = new J48();
    SMO learner2 = new SMO();
    NaiveBayes learner3 = new NaiveBayes();
    MultilayerPerceptron learner5 = new MultilayerPerceptron();

    System.out.println("Training Ensemble.");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());

        learner1.buildClassifier(data);
        learner2.buildClassifier(data);
        learner3.buildClassifier(data);
        learner5.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s).");
    } catch (IOException e) {
        System.out.println("Could not train Ensemble classifier IOException on training data file.");
    } catch (Exception e) {
        System.out.println("Could not train Ensemble classifier Exception building model.");
    }

    try {
        String line = "";

        // Read the file and display it line by line. 
        BufferedReader in = null;

        // Read in and store each positive prediction in the tree map.
        try {
            //open stream to file
            in = new BufferedReader(new FileReader(testPath));

            while ((line = in.readLine()) != null) {
                if (line.toLowerCase().contains("@data"))
                    break;
            }
        } catch (Exception e) {
        }

        // A different ARFF loader used here (compared to above) as
        // the ARFF file may be extremely large. In which case the whole
        // file cannot be read in. Instead it is read in incrementally.
        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));

        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Ensemble Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;
            line = in.readLine();

            double classification1 = learner1.classifyInstance(current);
            double classification2 = learner2.classifyInstance(current);
            double classification3 = learner3.classifyInstance(current);
            double classification5 = learner5.classifyInstance(current);

            // All classifiers must agree. This is a very primitive ensemble strategy!
            if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) {
                if (line != null) {
                    //System.out.println("Instance: "+instanceNumber+"\t"+line);
                    //System.in.read();
                }
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        in.close();

        System.out.println("Test set instances: " + instanceNumber);

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s).");
    } catch (Exception e) {
        System.out.println("Could not test Ensemble classifier due to an error.");
    }
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsEnsembleStream(String trainPath, String testPath, String resultPath) {
    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    /**/*from   ww  w. ja  v  a  2  s . co  m*/
     * The ensemble classifiers. This is a heterogeneous ensemble.
     */
    J48 learner1 = new J48();
    SMO learner2 = new SMO();
    NaiveBayes learner3 = new NaiveBayes();
    MultilayerPerceptron learner5 = new MultilayerPerceptron();

    System.out.println("Training Ensemble.");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());

        learner1.buildClassifier(data);
        learner2.buildClassifier(data);
        learner3.buildClassifier(data);
        learner5.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s).");
    } catch (IOException e) {
        System.out.println("Could not train Ensemble classifier IOException on training data file.");
    } catch (Exception e) {
        System.out.println("Could not train Ensemble classifier Exception building model.");
    }

    try {
        // A different ARFF loader used here (compared to above) as
        // the ARFF file may be extremely large. In which case the whole
        // file cannot be read in. Instead it is read in incrementally.
        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));

        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("Ensemble Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;

            double classification1 = learner1.classifyInstance(current);
            double classification2 = learner2.classifyInstance(current);
            double classification3 = learner3.classifyInstance(current);
            double classification5 = learner5.classifyInstance(current);

            // All classifiers must agree. This is a very primitive ensemble strategy!
            if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) {
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        System.out.println("Test set instances: " + instanceNumber);

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s).");
    } catch (Exception e) {
        System.out.println("Could not test Ensemble classifier due to an error.");
    }
}

From source file:cs.man.ac.uk.predict.Predictor.java

License:Open Source License

public static void makePredictionsJ48(String trainPath, String testPath, String resultPath) {
    /**//from  w  w w .j  a v a  2s  .  c  om
     * The decision tree classifier.
     */
    J48 learner = new J48();

    System.out.println("Training set: " + trainPath);
    System.out.println("Test set: " + testPath);

    System.out.println("Training J48");
    long startTime = System.nanoTime();
    try {
        BufferedReader reader = new BufferedReader(new FileReader(trainPath));
        Instances data = new Instances(reader);
        data.setClassIndex(data.numAttributes() - 1);
        System.out.println("Training data length: " + data.numInstances());
        learner.buildClassifier(data);

        long endTime = System.nanoTime();
        long nanoseconds = endTime - startTime;
        double seconds = (double) nanoseconds / 1000000000.0;
        System.out.println("Training J48 completed in " + nanoseconds + " (ns) or " + seconds + " (s)");
    } catch (IOException e) {
        System.out.println("Could not train J48 classifier IOException on training data file");
    } catch (Exception e) {
        System.out.println("Could not train J48 classifier Exception building model");
    }

    try {
        // Prepare data for testing
        //BufferedReader reader = new BufferedReader( new FileReader(testPath));
        //Instances data = new Instances(reader);
        //data.setClassIndex(data.numAttributes() - 1);

        ArffLoader loader = new ArffLoader();
        loader.setFile(new File(testPath));
        Instances data = loader.getStructure();
        data.setClassIndex(data.numAttributes() - 1);

        System.out.println("J48 Classifier is ready.");
        System.out.println("Testing on all instances avaialable.");
        System.out.println("Test set instances: " + data.numInstances());

        startTime = System.nanoTime();

        int instanceNumber = 0;

        // label instances
        Instance current;

        //for (int i = 0; i < data.numInstances(); i++) 
        while ((current = loader.getNextInstance(data)) != null) {
            instanceNumber += 1;

            //double classification = learner.classifyInstance(data.instance(i));
            double classification = learner.classifyInstance(current);
            //String instanceClass= Double.toString(data.instance(i).classValue());

            if (classification == 1)// Predicted positive, actually negative
            {
                Writer.append(resultPath, instanceNumber + "\n");
            }
        }

        long endTime = System.nanoTime();
        long duration = endTime - startTime;
        double seconds = (double) duration / 1000000000.0;

        System.out.println("Testing J48 completed in " + duration + " (ns) or " + seconds + " (s)");
    } catch (Exception e) {
        System.out.println("Could not test J48 classifier due to an error");
    }
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile1(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial1.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from  ww  w.jav a2  s  .  c o m
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[1] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(2);
        for (int j = 0; j < 2; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 1)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_autosentiment.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(1);
    test.setClassIndex(1);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile2(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial2.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES// w  w  w . ja  v a  2  s . com
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[3] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(4);
        for (int j = 0; j < 4; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 3)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(3);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwords.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(3);
    test.setClassIndex(3);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile3(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial3.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from   w ww. j  a va  2  s .c  om
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[7] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(8);
        for (int j = 0; j < 8; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 7)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(7);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndpos.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(7);
    test.setClassIndex(7);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile4(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial4.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES//from  ww  w .  j  a  v  a2 s .c  om
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[10] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(11);
        for (int j = 0; j < 11; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 10)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(10);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_asAndpolarwordsAndposAnddep.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(10);
    test.setClassIndex(10);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}

From source file:csav2.Weka_additive.java

public void createTrainingFeatureFile5(String input) throws Exception {
    String file = "Classifier\\featurefile_additive_trial5.arff";
    ArffLoader loader = new ArffLoader();

    //ATTRIBUTES/*from  w w w  .  ja  v  a 2 s.  c  o  m*/
    Attribute attr[] = new Attribute[50];

    //numeric
    attr[0] = new Attribute("Autosentiment");
    attr[1] = new Attribute("PositiveMatch");
    attr[2] = new Attribute("NegativeMatch");
    attr[3] = new Attribute("FW");
    attr[4] = new Attribute("JJ");
    attr[5] = new Attribute("RB");
    attr[6] = new Attribute("RB_JJ");
    attr[7] = new Attribute("amod");
    attr[8] = new Attribute("acomp");
    attr[9] = new Attribute("advmod");
    attr[10] = new Attribute("BLPos");
    attr[11] = new Attribute("BLNeg");

    //class
    FastVector classValue = new FastVector(3);
    classValue.addElement("p");
    classValue.addElement("n");
    classValue.addElement("o");
    attr[12] = new Attribute("answer", classValue);

    FastVector attrs = new FastVector();
    attrs.addElement(attr[0]);
    attrs.addElement(attr[1]);
    attrs.addElement(attr[2]);
    attrs.addElement(attr[3]);
    attrs.addElement(attr[4]);
    attrs.addElement(attr[5]);
    attrs.addElement(attr[6]);
    attrs.addElement(attr[7]);
    attrs.addElement(attr[8]);
    attrs.addElement(attr[9]);
    attrs.addElement(attr[10]);
    attrs.addElement(attr[11]);
    attrs.addElement(attr[12]);

    // Add Instances
    Instances dataset = new Instances("my_dataset", attrs, 0);

    if (new File(file).isFile()) {
        loader.setFile(new File(file));
        dataset = loader.getDataSet();
    }

    System.out.println("-----------------------------------------");
    System.out.println(input);
    System.out.println("-----------------------------------------");

    StringTokenizer tokenizer = new StringTokenizer(input);

    while (tokenizer.hasMoreTokens()) {
        Instance example = new Instance(13);
        for (int j = 0; j < 13; j++) {
            String st = tokenizer.nextToken();
            System.out.println(j + " " + st);
            if (j == 0)
                example.setValue(attr[j], Float.parseFloat(st));
            else if (j == 12)
                example.setValue(attr[j], st);
            else
                example.setValue(attr[j], Integer.parseInt(st));
        }
        dataset.add(example);
    }

    //Save dataset
    ArffSaver saver = new ArffSaver();
    saver.setInstances(dataset);
    saver.setFile(new File(file));
    saver.writeBatch();

    //Read dataset
    loader.setFile(new File(file));
    dataset = loader.getDataSet();

    //Build classifier
    dataset.setClassIndex(12);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);

    //Save classifier
    String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndbl.model";
    OutputStream os = new FileOutputStream(file1);
    ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
    objectOutputStream.writeObject(classifier);

    // Comment out if not needed
    //Read classifier back
    InputStream is = new FileInputStream(file1);
    ObjectInputStream objectInputStream = new ObjectInputStream(is);
    classifier = (Classifier) objectInputStream.readObject();
    objectInputStream.close();

    //Evaluate resample if needed
    //dataset = dataset.resample(new Random(42));
    //split to 70:30 learn and test set
    double percent = 70.0;
    int trainSize = (int) Math.round(dataset.numInstances() * percent / 100);
    int testSize = dataset.numInstances() - trainSize;
    Instances train = new Instances(dataset, 0, trainSize);
    Instances test = new Instances(dataset, trainSize, testSize);
    train.setClassIndex(12);
    test.setClassIndex(12);

    //Evaluate
    Evaluation eval = new Evaluation(dataset); //trainset
    eval.crossValidateModel(classifier, dataset, 10, new Random(1));
    System.out.println("EVALUATION:\n" + eval.toSummaryString());
    System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure());
    System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision());
    System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall());
}