List of usage examples for weka.classifiers.trees J48 J48
J48
From source file:cs.man.ac.uk.classifiers.GetAUC.java
License:Open Source License
/** * Computes the AUC for the supplied learner. * @return the AUC as a double value./*from w ww.j a v a2 s . co m*/ */ @SuppressWarnings("unused") private static double validate5x2CV() { try { // other options int runs = 5; int folds = 2; double AUC_SUM = 0; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) { System.out.println("Stratifying..."); randData.stratify(folds); } Evaluation eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); // build and evaluate classifier String[] options = { "-U", "-A" }; J48 classifier = new J48(); //HTree classifier = new HTree(); classifier.setOptions(options); classifier.buildClassifier(train); eval.evaluateModel(classifier, test); // generate curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); // plot curve vmc = new ThresholdVisualizePanel(); AUC_SUM += ThresholdCurve.getROCArea(result); System.out.println("AUC: " + ThresholdCurve.getROCArea(result) + " \tAUC SUM: " + AUC_SUM); } } return AUC_SUM / ((double) runs * (double) folds); } catch (Exception e) { System.out.println("Exception validating data!"); return 0; } }
From source file:cs.man.ac.uk.mvc.ClassifierBuilder.java
License:Open Source License
/** * Builds and tests the classifier specified by the algorithm variable. * Note if no unlabelled data is in the test set, then meta data can be set to null. * @return confusion matrix describing binary classification outcomes. *//*from w w w . j a v a2 s. c o m*/ public int[][] test() { switch (algorithm) { case Classifiers.J48: return stdloadAndTest(new StandardAlgorithmTester(this.outputFile, "J48", this.verbose, new J48())); case Classifiers.MLP: return stdloadAndTest( new StandardAlgorithmTester(this.outputFile, "MLP", this.verbose, new MultilayerPerceptron())); case Classifiers.NB: return stdloadAndTest( new StandardAlgorithmTester(this.outputFile, "NB", this.verbose, new NaiveBayes())); case Classifiers.SVM: return stdloadAndTest(new StandardAlgorithmTester(this.outputFile, "SVM", this.verbose, new SMO())); case Classifiers.HTREE: return streamloadAndTest( new StreamAlgorithmTester(this.outputFile, "HTREE", this.verbose, new HoeffdingTree())); case Classifiers.GHVFDT: return streamloadAndTest( new StreamAlgorithmTester(this.outputFile, "GHVFDT", this.verbose, new GHVFDT())); case Classifiers.PNB: return streamloadAndTest(new StreamAlgorithmTester(this.outputFile, "PNB", this.verbose, new PNB())); case Classifiers.OCVFDT: return streamloadAndTest( new StreamAlgorithmTester(this.outputFile, "OCVFDT", this.verbose, new OCVFDT())); default: int[][] confusion_matrix = { { 0, 0 }, { 0, 0 } }; return confusion_matrix; } }
From source file:cs.man.ac.uk.predict.Predictor.java
License:Open Source License
public static void makePredictionsEnsembleNew(String trainPath, String testPath, String resultPath) { System.out.println("Training set: " + trainPath); System.out.println("Test set: " + testPath); /**//from w w w . j av a2 s. c om * The ensemble classifiers. This is a heterogeneous ensemble. */ J48 learner1 = new J48(); SMO learner2 = new SMO(); NaiveBayes learner3 = new NaiveBayes(); MultilayerPerceptron learner5 = new MultilayerPerceptron(); System.out.println("Training Ensemble."); long startTime = System.nanoTime(); try { BufferedReader reader = new BufferedReader(new FileReader(trainPath)); Instances data = new Instances(reader); data.setClassIndex(data.numAttributes() - 1); System.out.println("Training data length: " + data.numInstances()); learner1.buildClassifier(data); learner2.buildClassifier(data); learner3.buildClassifier(data); learner5.buildClassifier(data); long endTime = System.nanoTime(); long nanoseconds = endTime - startTime; double seconds = (double) nanoseconds / 1000000000.0; System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s)."); } catch (IOException e) { System.out.println("Could not train Ensemble classifier IOException on training data file."); } catch (Exception e) { System.out.println("Could not train Ensemble classifier Exception building model."); } try { String line = ""; // Read the file and display it line by line. BufferedReader in = null; // Read in and store each positive prediction in the tree map. try { //open stream to file in = new BufferedReader(new FileReader(testPath)); while ((line = in.readLine()) != null) { if (line.toLowerCase().contains("@data")) break; } } catch (Exception e) { } // A different ARFF loader used here (compared to above) as // the ARFF file may be extremely large. In which case the whole // file cannot be read in. Instead it is read in incrementally. ArffLoader loader = new ArffLoader(); loader.setFile(new File(testPath)); Instances data = loader.getStructure(); data.setClassIndex(data.numAttributes() - 1); System.out.println("Ensemble Classifier is ready."); System.out.println("Testing on all instances avaialable."); startTime = System.nanoTime(); int instanceNumber = 0; // label instances Instance current; while ((current = loader.getNextInstance(data)) != null) { instanceNumber += 1; line = in.readLine(); double classification1 = learner1.classifyInstance(current); double classification2 = learner2.classifyInstance(current); double classification3 = learner3.classifyInstance(current); double classification5 = learner5.classifyInstance(current); // All classifiers must agree. This is a very primitive ensemble strategy! if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) { if (line != null) { //System.out.println("Instance: "+instanceNumber+"\t"+line); //System.in.read(); } Writer.append(resultPath, instanceNumber + "\n"); } } in.close(); System.out.println("Test set instances: " + instanceNumber); long endTime = System.nanoTime(); long duration = endTime - startTime; double seconds = (double) duration / 1000000000.0; System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s)."); } catch (Exception e) { System.out.println("Could not test Ensemble classifier due to an error."); } }
From source file:cs.man.ac.uk.predict.Predictor.java
License:Open Source License
public static void makePredictionsEnsembleStream(String trainPath, String testPath, String resultPath) { System.out.println("Training set: " + trainPath); System.out.println("Test set: " + testPath); /**/*from ww w. ja v a 2 s . co m*/ * The ensemble classifiers. This is a heterogeneous ensemble. */ J48 learner1 = new J48(); SMO learner2 = new SMO(); NaiveBayes learner3 = new NaiveBayes(); MultilayerPerceptron learner5 = new MultilayerPerceptron(); System.out.println("Training Ensemble."); long startTime = System.nanoTime(); try { BufferedReader reader = new BufferedReader(new FileReader(trainPath)); Instances data = new Instances(reader); data.setClassIndex(data.numAttributes() - 1); System.out.println("Training data length: " + data.numInstances()); learner1.buildClassifier(data); learner2.buildClassifier(data); learner3.buildClassifier(data); learner5.buildClassifier(data); long endTime = System.nanoTime(); long nanoseconds = endTime - startTime; double seconds = (double) nanoseconds / 1000000000.0; System.out.println("Training Ensemble completed in " + nanoseconds + " (ns) or " + seconds + " (s)."); } catch (IOException e) { System.out.println("Could not train Ensemble classifier IOException on training data file."); } catch (Exception e) { System.out.println("Could not train Ensemble classifier Exception building model."); } try { // A different ARFF loader used here (compared to above) as // the ARFF file may be extremely large. In which case the whole // file cannot be read in. Instead it is read in incrementally. ArffLoader loader = new ArffLoader(); loader.setFile(new File(testPath)); Instances data = loader.getStructure(); data.setClassIndex(data.numAttributes() - 1); System.out.println("Ensemble Classifier is ready."); System.out.println("Testing on all instances avaialable."); startTime = System.nanoTime(); int instanceNumber = 0; // label instances Instance current; while ((current = loader.getNextInstance(data)) != null) { instanceNumber += 1; double classification1 = learner1.classifyInstance(current); double classification2 = learner2.classifyInstance(current); double classification3 = learner3.classifyInstance(current); double classification5 = learner5.classifyInstance(current); // All classifiers must agree. This is a very primitive ensemble strategy! if (classification1 == 1 && classification2 == 1 && classification3 == 1 && classification5 == 1) { Writer.append(resultPath, instanceNumber + "\n"); } } System.out.println("Test set instances: " + instanceNumber); long endTime = System.nanoTime(); long duration = endTime - startTime; double seconds = (double) duration / 1000000000.0; System.out.println("Testing Ensemble completed in " + duration + " (ns) or " + seconds + " (s)."); } catch (Exception e) { System.out.println("Could not test Ensemble classifier due to an error."); } }
From source file:cs.man.ac.uk.predict.Predictor.java
License:Open Source License
public static void makePredictionsJ48(String trainPath, String testPath, String resultPath) { /**//from w w w .j a v a 2s . c om * The decision tree classifier. */ J48 learner = new J48(); System.out.println("Training set: " + trainPath); System.out.println("Test set: " + testPath); System.out.println("Training J48"); long startTime = System.nanoTime(); try { BufferedReader reader = new BufferedReader(new FileReader(trainPath)); Instances data = new Instances(reader); data.setClassIndex(data.numAttributes() - 1); System.out.println("Training data length: " + data.numInstances()); learner.buildClassifier(data); long endTime = System.nanoTime(); long nanoseconds = endTime - startTime; double seconds = (double) nanoseconds / 1000000000.0; System.out.println("Training J48 completed in " + nanoseconds + " (ns) or " + seconds + " (s)"); } catch (IOException e) { System.out.println("Could not train J48 classifier IOException on training data file"); } catch (Exception e) { System.out.println("Could not train J48 classifier Exception building model"); } try { // Prepare data for testing //BufferedReader reader = new BufferedReader( new FileReader(testPath)); //Instances data = new Instances(reader); //data.setClassIndex(data.numAttributes() - 1); ArffLoader loader = new ArffLoader(); loader.setFile(new File(testPath)); Instances data = loader.getStructure(); data.setClassIndex(data.numAttributes() - 1); System.out.println("J48 Classifier is ready."); System.out.println("Testing on all instances avaialable."); System.out.println("Test set instances: " + data.numInstances()); startTime = System.nanoTime(); int instanceNumber = 0; // label instances Instance current; //for (int i = 0; i < data.numInstances(); i++) while ((current = loader.getNextInstance(data)) != null) { instanceNumber += 1; //double classification = learner.classifyInstance(data.instance(i)); double classification = learner.classifyInstance(current); //String instanceClass= Double.toString(data.instance(i).classValue()); if (classification == 1)// Predicted positive, actually negative { Writer.append(resultPath, instanceNumber + "\n"); } } long endTime = System.nanoTime(); long duration = endTime - startTime; double seconds = (double) duration / 1000000000.0; System.out.println("Testing J48 completed in " + duration + " (ns) or " + seconds + " (s)"); } catch (Exception e) { System.out.println("Could not test J48 classifier due to an error"); } }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile1(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial1.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from ww w.jav a2 s . c o m Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[1] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(2); for (int j = 0; j < 2; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 1) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(1); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_autosentiment.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(1); test.setClassIndex(1); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile2(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial2.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES// w w w . ja v a 2 s . com Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[3] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(4); for (int j = 0; j < 4; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 3) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(3); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwords.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(3); test.setClassIndex(3); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile3(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial3.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from w ww. j a va 2 s .c om Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[7] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(8); for (int j = 0; j < 8; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 7) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(7); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndpos.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(7); test.setClassIndex(7); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile4(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial4.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES//from ww w . j a v a2 s .c om Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[10] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(11); for (int j = 0; j < 11; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 10) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(10); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_asAndpolarwordsAndposAnddep.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(10); test.setClassIndex(10); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }
From source file:csav2.Weka_additive.java
public void createTrainingFeatureFile5(String input) throws Exception { String file = "Classifier\\featurefile_additive_trial5.arff"; ArffLoader loader = new ArffLoader(); //ATTRIBUTES/*from w w w . ja v a 2 s. c o m*/ Attribute attr[] = new Attribute[50]; //numeric attr[0] = new Attribute("Autosentiment"); attr[1] = new Attribute("PositiveMatch"); attr[2] = new Attribute("NegativeMatch"); attr[3] = new Attribute("FW"); attr[4] = new Attribute("JJ"); attr[5] = new Attribute("RB"); attr[6] = new Attribute("RB_JJ"); attr[7] = new Attribute("amod"); attr[8] = new Attribute("acomp"); attr[9] = new Attribute("advmod"); attr[10] = new Attribute("BLPos"); attr[11] = new Attribute("BLNeg"); //class FastVector classValue = new FastVector(3); classValue.addElement("p"); classValue.addElement("n"); classValue.addElement("o"); attr[12] = new Attribute("answer", classValue); FastVector attrs = new FastVector(); attrs.addElement(attr[0]); attrs.addElement(attr[1]); attrs.addElement(attr[2]); attrs.addElement(attr[3]); attrs.addElement(attr[4]); attrs.addElement(attr[5]); attrs.addElement(attr[6]); attrs.addElement(attr[7]); attrs.addElement(attr[8]); attrs.addElement(attr[9]); attrs.addElement(attr[10]); attrs.addElement(attr[11]); attrs.addElement(attr[12]); // Add Instances Instances dataset = new Instances("my_dataset", attrs, 0); if (new File(file).isFile()) { loader.setFile(new File(file)); dataset = loader.getDataSet(); } System.out.println("-----------------------------------------"); System.out.println(input); System.out.println("-----------------------------------------"); StringTokenizer tokenizer = new StringTokenizer(input); while (tokenizer.hasMoreTokens()) { Instance example = new Instance(13); for (int j = 0; j < 13; j++) { String st = tokenizer.nextToken(); System.out.println(j + " " + st); if (j == 0) example.setValue(attr[j], Float.parseFloat(st)); else if (j == 12) example.setValue(attr[j], st); else example.setValue(attr[j], Integer.parseInt(st)); } dataset.add(example); } //Save dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(file)); saver.writeBatch(); //Read dataset loader.setFile(new File(file)); dataset = loader.getDataSet(); //Build classifier dataset.setClassIndex(12); Classifier classifier = new J48(); classifier.buildClassifier(dataset); //Save classifier String file1 = "Classifier\\classifier_add_asAndpolarwordsAndposAnddepAndbl.model"; OutputStream os = new FileOutputStream(file1); ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); // Comment out if not needed //Read classifier back InputStream is = new FileInputStream(file1); ObjectInputStream objectInputStream = new ObjectInputStream(is); classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); //Evaluate resample if needed //dataset = dataset.resample(new Random(42)); //split to 70:30 learn and test set double percent = 70.0; int trainSize = (int) Math.round(dataset.numInstances() * percent / 100); int testSize = dataset.numInstances() - trainSize; Instances train = new Instances(dataset, 0, trainSize); Instances test = new Instances(dataset, trainSize, testSize); train.setClassIndex(12); test.setClassIndex(12); //Evaluate Evaluation eval = new Evaluation(dataset); //trainset eval.crossValidateModel(classifier, dataset, 10, new Random(1)); System.out.println("EVALUATION:\n" + eval.toSummaryString()); System.out.println("WEIGHTED MEASURE:\n" + eval.weightedFMeasure()); System.out.println("WEIGHTED PRECISION:\n" + eval.weightedPrecision()); System.out.println("WEIGHTED RECALL:\n" + eval.weightedRecall()); }