List of usage examples for weka.classifiers Evaluation toSummaryString
@Override
public String toSummaryString()
From source file:de.tudarmstadt.ukp.alignment.framework.combined.WekaMachineLearning.java
License:Apache License
/** * * This method creates a serialized WEKA model file from an .arff file containing the annotated gold standard * * * @param gs_arff the annotated gold standard in an .arff file * @param model output file for the model * @param output_eval if true, the evaluation of the trained classifier is printed (10-fold cross validation) * @throws Exception// ww w . j av a2s.c o m */ public static void createModelFromGoldstandard(String gs_arff, String model, boolean output_eval) throws Exception { DataSource source = new DataSource(gs_arff); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } Remove rm = new Remove(); rm.setAttributeIndices("1"); // remove ID attribute BayesNet bn = new BayesNet(); //Standard classifier; BNs proved most robust, but of course other classifiers are possible // meta-classifier FilteredClassifier fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(bn); fc.buildClassifier(data); // build classifier SerializationHelper.write(model, fc); if (output_eval) { Evaluation eval = new Evaluation(data); eval.crossValidateModel(fc, data, 10, new Random(1)); System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); } }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifier(WekaClassifier wekaClassifier, Dataset trainDataset, Dataset testDataset) throws Exception { Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the train instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + trainDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff" }); Instances train = DataSource.read(MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff"); train.setClassIndex(train.numAttributes() - 1); // Add IDs to the test instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + testDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff" }); Instances test = DataSource.read(MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff"); test.setClassIndex(test.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data test.randomize(random);//ww w . j av a 2s.c o m // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Prepare the output buffer AbstractOutput output = new PlainText(); output.setBuffer(new StringBuffer()); output.setHeader(test); output.setAttributes("first"); Evaluation eval = new Evaluation(train); eval.evaluateModel(filteredClassifier, test, output); // Convert predictions to CSV // Format: inst#, actual, predicted, error, probability, (ID) String[] scores = new String[new Double(eval.numInstances()).intValue()]; double[] probabilities = new double[new Double(eval.numInstances()).intValue()]; for (String line : output.getBuffer().toString().split("\n")) { String[] linesplit = line.split("\\s+"); // If there's been an error, the length of linesplit is 6, otherwise 5, // due to the error flag "+" int id; String expectedValue, classification; double probability; if (line.contains("+")) { id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[5]); } else { id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[4]); } scores[id - 1] = classification; probabilities[id - 1] = probability; } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"), sb.toString()); // Output probabilities sb = new StringBuilder(); for (Double probability : probabilities) sb.append(probability.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"), sb.toString()); // Output predictions FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"), output.getBuffer().toString()); // Output meta information sb = new StringBuilder(); sb.append(classifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);/* www. ja va2 s . co m*/ // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.MekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); List<String> labelNames = ClassifierProvider.labelNames(Y); labelNames.stream().map(attr -> new Attribute(attr, Arrays.asList("y", "n"))) .forEachOrdered(attributes::add); List<String> featureNames = ClassifierProvider.featureNames(X); featureNames.stream().map(Attribute::new).forEachOrdered(attributes::add); String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, 0); datasetSchema.setClassIndex(labelNames.size()); // add instances // due to the limitation of the interface definition, X, Y should be reorganized SetMultimap<Map<String, Double>, String> XY = HashMultimap.create(); IntStream.range(0, X.size()).forEach(i -> XY.put(X.get(i), Y.get(i))); Instances trainingInstances = new Instances(datasetSchema, XY.size()); for (Map.Entry<Map<String, Double>, Collection<String>> entry : XY.asMap().entrySet()) { Set<String> y = ImmutableSet.copyOf(entry.getValue()); Map<String, Double> x = entry.getKey(); SparseInstance instance = new SparseInstance(labelNames.size() + x.size()); for (String labelName : labelNames) { instance.setValue(datasetSchema.attribute(labelName), y.contains(labelName) ? "y" : "n"); }/* w ww. java2s.c om*/ for (Map.Entry<String, Double> e : x.entrySet()) { instance.setValue(datasetSchema.attribute(e.getKey()), e.getValue()); } trainingInstances.add(instance); } // training try { classifier = (MultiLabelClassifier) AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add); Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y)); attributes.add(label);// ww w . j a va 2 s .co m String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, X.size()); datasetSchema.setClass(label); // add instances Instances trainingInstances = new Instances(datasetSchema, X.size()); if (balanceWeight) { Multiset<String> labelCounts = HashMultiset.create(Y); double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max() .orElseThrow(AnalysisEngineProcessException::new); for (int i = 0; i < X.size(); i++) { String y = Y.get(i); double weight = maxCount / labelCounts.count(y); trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances)); } } else { for (int i = 0; i < X.size(); i++) { trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances)); } } // training try { classifier = AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // write model and dataset schema try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // backup training dataset as arff file if (datasetExportFile != null) { try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingInstances); saver.setFile(datasetExportFile); saver.writeBatch(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:edu.teco.context.recognition.WekaManager.java
License:Apache License
public void testClassification() { // set class attribute (last attribute) testingData.setClassIndex(testingData.numAttributes() - 1); if (FrameworkContext.INFO) Log.i("WekaData", "Testing data:\n" + testingData.toString()); // Test the model Evaluation eTest; try {/* ww w.j av a2 s. com*/ eTest = new Evaluation(trainingData); eTest.evaluateModel(classifier, testingData); if (FrameworkContext.INFO) Log.i("WekaData", "\nClass detail:\n\n" + eTest.toClassDetailsString()); // Print the result la Weka explorer: String strSummary = eTest.toSummaryString(); if (FrameworkContext.INFO) Log.i("WekaData", "----- Summary -----\n" + strSummary); // print the confusion matrix if (FrameworkContext.INFO) Log.i("WekaData", "----- Confusion Matrix -----\n" + eTest.toMatrixString()); // print class details if (FrameworkContext.INFO) Log.i("WekaData", "----- Class Detail -----\n" + eTest.toClassDetailsString()); notifyTestCalculated(strSummary); } catch (Exception e) { e.printStackTrace(); } }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Prints the results stored in an Evaluation object to standard output * (summary, class results and confusion matrix) * // w ww. jav a 2 s .c o m * @param Evaluation eval * @throws Exception */ public void printClassifierResults(Evaluation eval) throws Exception { // Print the result la Weka explorer: String strSummary = eval.toSummaryString(); System.out.println(strSummary); // Print per class results String resPerClass = eval.toClassDetailsString(); System.out.println(resPerClass); // Get the confusion matrix String cMatrix = eval.toMatrixString(); System.out.println(cMatrix); System.out.println(); }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.WekaWrapper.java
License:Apache License
public static String evaluateDataset(Classifier classifier, DataSet trainingDS, DataSet validationDS) { Instances training_ds = convertDataSetToInstances(trainingDS); Instances validation_ds = convertDataSetToInstances(validationDS); String summary;//from w ww .jav a2 s. c om try { // Evaluete dataset with weka and return a summary Evaluation evaluation = new Evaluation(training_ds); evaluation.evaluateModel(classifier, validation_ds); summary = evaluation.toSummaryString(); } catch (Exception e) { logger.error("Error while evaluating Dataset", e); throw new WekaWrapperException("Error while evaluating Dataset", e); } return summary; }
From source file:ffnn.FFNN.java
/** * @param args the command line arguments *//*from www .j a v a2s . co m*/ public static void main(String[] args) throws Exception { FFNNTubesAI cls; Scanner scan = new Scanner(System.in); System.out.print("new / read? (n/r)"); if (scan.next().equals("n")) { cls = new FFNNTubesAI(); } else { cls = (FFNNTubesAI) TucilWeka.readModel(); } int temp; Instances data = TucilWeka.readDataSet("C:\\Program Files\\Weka-3-8\\data\\Team.arff"); //Tampilkan opsi for (int i = 0; i < data.numAttributes(); i++) { System.out.println(i + ". " + data.attribute(i)); } System.out.print("Class Index : "); temp = scan.nextInt(); data.setClassIndex(temp); data = preprocess(data); System.out.println(data); System.out.print("full train? (y/n)"); if (scan.next().equals("y")) { try { cls.buildClassifier(data); } catch (Exception ex) { Logger.getLogger(FFNNTubesAI.class.getName()).log(Level.SEVERE, null, ex); } } int fold = 10; //FFNNTubesAI.printMatrix(cls.weight1, cls.input_layer+1, cls.hidden_layer); //FFNNTubesAI.printMatrix(cls.weight2, cls.hidden_layer, cls.output_layer); //FFNNTubesAI.printMatrix(cls.bias2, 1, cls.output_layer); Evaluation eval = new Evaluation(data); System.out.print("eval/10-fold? (e/f)"); if (scan.next().equals("e")) { eval.evaluateModel(cls, data); } else { eval.crossValidateModel(cls, data, fold, new Random(1)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); System.out.println(eval.toClassDetailsString()); }
From source file:FFNN.MultiplePerceptron.java
public static void main(String args[]) throws Exception { // System.out.println("input jumlah layer 0/1 :"); // Scanner input = new Scanner(System.in); // int layer = input.nextInt(); // System.out.println("input learning rate"); // double rate = input.nextDouble(); // int hidden = 0; // if(layer==1){ // System.out.println("input jumlah neuron di hidden layer"); // hidden = input.nextInt(); // }/* w ww. j a v a 2 s .c o m*/ // // System.out.print("Masukkan nama file : "); // String filename = input.next(); ConverterUtils.DataSource source = new ConverterUtils.DataSource( ("D:\\Program Files\\Weka-3-8\\data\\iris.arff")); Instances train = source.getDataSet(); // Normalize nm = new Normalize(); // nm.setInputFormat(train); // train = Filter.useFilter(train, nm); for (int i = 0; i < train.numAttributes(); i++) System.out.println(i + ". " + train.attribute(i).name()); System.out.print("Masukkan indeks kelas : "); //int classIdx = input.nextInt(); train.setClassIndex(train.numAttributes() - 1); MultiplePerceptron mlp = new MultiplePerceptron(10000, 1, 13, train); mlp.buildClassifier(train); Evaluation eval = new Evaluation(train); eval.evaluateModel(mlp, train); System.out.println(eval.toSummaryString()); // System.out.println(eval.toMatrixString()); }