List of usage examples for weka.classifiers Evaluation Evaluation
public Evaluation(Instances data) throws Exception
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.WekaProvider.java
License:Apache License
@Override public void train(List<Map<String, Double>> X, List<String> Y, boolean crossValidation) throws AnalysisEngineProcessException { // create attribute (including label) info ArrayList<Attribute> attributes = new ArrayList<>(); ClassifierProvider.featureNames(X).stream().map(Attribute::new).forEachOrdered(attributes::add); Attribute label = new Attribute("__label__", ClassifierProvider.labelNames(Y)); attributes.add(label);/*from www. ja v a2 s.c o m*/ String name = Files.getNameWithoutExtension(modelFile.getName()); datasetSchema = new Instances(name, attributes, X.size()); datasetSchema.setClass(label); // add instances Instances trainingInstances = new Instances(datasetSchema, X.size()); if (balanceWeight) { Multiset<String> labelCounts = HashMultiset.create(Y); double maxCount = labelCounts.entrySet().stream().mapToInt(Multiset.Entry::getCount).max() .orElseThrow(AnalysisEngineProcessException::new); for (int i = 0; i < X.size(); i++) { String y = Y.get(i); double weight = maxCount / labelCounts.count(y); trainingInstances.add(newInstance(X.get(i), y, weight, trainingInstances)); } } else { for (int i = 0; i < X.size(); i++) { trainingInstances.add(newInstance(X.get(i), Y.get(i), 1.0, trainingInstances)); } } // training try { classifier = AbstractClassifier.forName(classifierName, options); classifier.buildClassifier(trainingInstances); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // write model and dataset schema try { SerializationHelper.write(modelFile.getAbsolutePath(), classifier); SerializationHelper.write(datasetSchemaFile.getAbsolutePath(), datasetSchema); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // backup training dataset as arff file if (datasetExportFile != null) { try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingInstances); saver.setFile(datasetExportFile); saver.writeBatch(); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } if (crossValidation) { try { Evaluation eval = new Evaluation(trainingInstances); Random rand = new Random(); eval.crossValidateModel(classifier, trainingInstances, 10, rand); LOG.debug(eval.toSummaryString()); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } }
From source file:edu.teco.context.recognition.WekaManager.java
License:Apache License
public void testClassification() { // set class attribute (last attribute) testingData.setClassIndex(testingData.numAttributes() - 1); if (FrameworkContext.INFO) Log.i("WekaData", "Testing data:\n" + testingData.toString()); // Test the model Evaluation eTest;/*w w w .j a v a 2 s .c o m*/ try { eTest = new Evaluation(trainingData); eTest.evaluateModel(classifier, testingData); if (FrameworkContext.INFO) Log.i("WekaData", "\nClass detail:\n\n" + eTest.toClassDetailsString()); // Print the result la Weka explorer: String strSummary = eTest.toSummaryString(); if (FrameworkContext.INFO) Log.i("WekaData", "----- Summary -----\n" + strSummary); // print the confusion matrix if (FrameworkContext.INFO) Log.i("WekaData", "----- Confusion Matrix -----\n" + eTest.toMatrixString()); // print class details if (FrameworkContext.INFO) Log.i("WekaData", "----- Class Detail -----\n" + eTest.toClassDetailsString()); notifyTestCalculated(strSummary); } catch (Exception e) { e.printStackTrace(); } }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
public void crossValidate(int foldNum) throws Exception { System.out.println("WekaWrapper: " + foldNum + "-fold cross validation over train data."); System.err.println("WekaWrapper: " + foldNum + "-fold cross validation over train data."); Evaluation eTest = new Evaluation(traindata); eTest.crossValidateModel(this.MLclass, traindata, foldNum, new Random(1)); //seed = 1; /* it remains for the future to inspect the random generation. * It seems using the same seed over an specific sequence generates the same randomization. * Thus, for the same sequence of instances, fold generation is always the same. *///from w w w. j a va 2 s . c om //eTest.crossValidateModel(this.MLclass, traindata, foldNum, new Random((int)(Math.random()*traindata.numInstances()))); printClassifierResults(eTest); }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Loads the model stored in the given file and evaluates it against the current test data. * The void returns and error if no test data is presents. * //from ww w. jav a2s. c om * @param modelPath * @throws Exception */ public void testModel(String modelPath) throws Exception { if ((testdata == null) || testdata.isEmpty()) { System.err.println("WekaWrapper: testModel() - no test data available, model won't be evaluated"); System.exit(9); } // check model file if (!FileUtilsElh.checkFile(modelPath)) { System.err.println("WekaWrapper: testModel() - model couldn't be loaded"); System.exit(8); } // deserialize model this.MLclass = (Classifier) weka.core.SerializationHelper.readAll(modelPath)[0]; System.err.println("WekaWrapper: testModel() - Classifier ready."); Evaluation eTest = new Evaluation(this.testdata); eTest.evaluateModel(this.MLclass, this.testdata); System.err.println("WekaWrapper: testModel() - Test ready."); printClassifierResults(eTest); }
From source file:elh.eus.absa.WekaWrapper.java
License:Open Source License
/** * Trains the current classifier with the current training data and tests it with the current test data. * //from w w w. jav a2s. c o m * If no test data is currently available train data is split in two parts (train 90% / test 10%). * * @throws Exception */ public void trainTest() throws Exception { if ((testdata == null) || testdata.isEmpty()) { System.err.println( "WekaWrapper: trainTest() - test data is empty. Train data will be divided in two (90% train / 10% test)"); //traindata.randomize(new Random((int)(Math.random()*traindata.numInstances()))); /* it remains for the future to inspect the random generation. * It seems using the same seed over an specific sequence generates the same randomization. * Thus, for the same sequence of instances, fold generation is always the same. */ traindata.randomize(new Random(1)); Instances trainset90 = traindata.trainCV(10, 9); Instances testset10 = traindata.testCV(10, 9); setTestdata(testset10); setTraindata(trainset90); } //train the classisfier this.MLclass.buildClassifier(this.traindata); System.err.println(" Classifier ready."); Evaluation eTest = new Evaluation(this.testdata); eTest.evaluateModel(this.MLclass, this.testdata); System.err.println("WekaWrapper: trainTest() - Test ready."); printClassifierResults(eTest); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation without noise * //from w ww .j a v a2s .co m * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidate(Classifier classifier, Instances dataset, int folds) { // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (independent fp and fn) * /*from w w w . ja va 2s. co m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal fpPercentage, BigDecimal fnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, fpPercentage, fnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (combined fp and fn) * /*from ww w .j a v a 2s.co m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal combinedFpFnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, combinedFpFnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:epsi.i5.datamining.Weka.java
public void excutionAlgo() throws FileNotFoundException, IOException, Exception { BufferedReader reader = new BufferedReader(new FileReader("src/epsi/i5/data/" + fileOne + ".arff")); Instances data = new Instances(reader); reader.close();/*from w w w . ja va 2 s . co m*/ //System.out.println(data.attribute(0)); data.setClass(data.attribute(0)); NaiveBayes NB = new NaiveBayes(); NB.buildClassifier(data); Evaluation naiveBayes = new Evaluation(data); naiveBayes.crossValidateModel(NB, data, 10, new Random(1)); naiveBayes.evaluateModel(NB, data); //System.out.println(test.confusionMatrix() + "1"); //System.out.println(test.correct() + "2"); System.out.println("*****************************"); System.out.println("******** Naive Bayes ********"); System.out.println(naiveBayes.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(naiveBayes.pctCorrect()); System.out.println(""); J48 j = new J48(); j.buildClassifier(data); Evaluation jeval = new Evaluation(data); jeval.crossValidateModel(j, data, 10, new Random(1)); jeval.evaluateModel(j, data); System.out.println("*****************************"); System.out.println("************ J48 ************"); System.out.println(jeval.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(jeval.pctCorrect()); System.out.println(""); DecisionTable DT = new DecisionTable(); DT.buildClassifier(data); Evaluation decisionTable = new Evaluation(data); decisionTable.crossValidateModel(DT, data, 10, new Random(1)); decisionTable.evaluateModel(DT, data); System.out.println("*****************************"); System.out.println("******* DecisionTable *******"); System.out.println(decisionTable.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(decisionTable.pctCorrect()); System.out.println(""); OneR OR = new OneR(); OR.buildClassifier(data); Evaluation oneR = new Evaluation(data); oneR.crossValidateModel(OR, data, 10, new Random(1)); oneR.evaluateModel(OR, data); System.out.println("*****************************"); System.out.println("************ OneR ***********"); System.out.println(oneR.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(oneR.pctCorrect()); //Polarit data.setClass(data.attribute(1)); System.out.println(""); M5Rules MR = new M5Rules(); MR.buildClassifier(data); Evaluation m5rules = new Evaluation(data); m5rules.crossValidateModel(MR, data, 10, new Random(1)); m5rules.evaluateModel(MR, data); System.out.println("*****************************"); System.out.println("********** M5Rules **********"); System.out.println(m5rules.correlationCoefficient()); System.out.println(""); LinearRegression LR = new LinearRegression(); LR.buildClassifier(data); Evaluation linearR = new Evaluation(data); linearR.crossValidateModel(LR, data, 10, new Random(1)); linearR.evaluateModel(LR, data); System.out.println("*****************************"); System.out.println("********** linearR **********"); System.out.println(linearR.correlationCoefficient()); }
From source file:es.bsc.autonomic.powermodeller.tools.classifiers.WekaWrapper.java
License:Apache License
public static String evaluateDataset(Classifier classifier, DataSet trainingDS, DataSet validationDS) { Instances training_ds = convertDataSetToInstances(trainingDS); Instances validation_ds = convertDataSetToInstances(validationDS); String summary;//w ww.j ava2s. c om try { // Evaluete dataset with weka and return a summary Evaluation evaluation = new Evaluation(training_ds); evaluation.evaluateModel(classifier, validation_ds); summary = evaluation.toSummaryString(); } catch (Exception e) { logger.error("Error while evaluating Dataset", e); throw new WekaWrapperException("Error while evaluating Dataset", e); } return summary; }