List of usage examples for weka.core Instances testCV
public Instances testCV(int numFolds, int numFold)
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (independent fp and fn) * //from ww w. ja va2s . co m * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal fpPercentage, BigDecimal fnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, fpPercentage, fnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (combined fp and fn) * /*from w ww. j a va 2 s . c o m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal combinedFpFnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, combinedFpFnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:es.upm.dit.gsi.barmas.dataset.utils.DatasetSplitter.java
License:Open Source License
/** * @param folds/* w w w. ja v a 2 s . c om*/ * @param minAgents * @param maxAgents * @param originalDatasetPath * @param outputDir * @param scenario * @param logger */ public void splitDataset(int folds, int minAgents, int maxAgents, String originalDatasetPath, String outputDir, String scenario, Logger logger) { int ratioint = (int) ((1 / (double) folds) * 100); double roundedratio = ((double) ratioint) / 100; // Look for essentials List<String[]> essentials = this.getEssentials(originalDatasetPath, logger); for (int fold = 0; fold < folds; fold++) { String outputDirWithRatio = outputDir + "/" + roundedratio + "testRatio/iteration-" + fold; File dir = new File(outputDirWithRatio); if (!dir.exists() || !dir.isDirectory()) { dir.mkdirs(); } logger.finer("--> splitDataset()"); logger.fine("Creating experiment.info..."); try { Instances originalData = this.getDataFromCSV(originalDatasetPath); originalData.randomize(new Random()); originalData.stratify(folds); // TestDataSet Instances testData = originalData.testCV(folds, fold); CSVSaver saver = new CSVSaver(); ArffSaver arffsaver = new ArffSaver(); File file = new File(outputDirWithRatio + File.separator + "test-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(testData); saver.setFile(file); saver.writeBatch(); } file = new File(outputDirWithRatio + File.separator + "test-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(testData); arffsaver.setFile(file); arffsaver.writeBatch(); } // BayesCentralDataset Instances trainData = originalData.trainCV(folds, fold); file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.csv"); if (!file.exists()) { saver.resetOptions(); saver.setInstances(trainData); saver.setFile(file); saver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.csv"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } file = new File(outputDirWithRatio + File.separator + "bayes-central-dataset.arff"); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(trainData); arffsaver.setFile(file); arffsaver.writeBatch(); this.copyFileUsingApacheCommonsIO(file, new File( outputDirWithRatio + File.separator + "bayes-central-dataset-noEssentials.arff"), logger); CsvWriter w = new CsvWriter(new FileWriter(file, true), ','); for (String[] essential : essentials) { w.writeRecord(essential); } w.close(); } // Agent datasets CsvReader csvreader = new CsvReader(new FileReader(new File(originalDatasetPath))); csvreader.readHeaders(); String[] headers = csvreader.getHeaders(); csvreader.close(); for (int agents = minAgents; agents <= maxAgents; agents++) { this.createExperimentInfoFile(folds, agents, originalDatasetPath, outputDirWithRatio, scenario, logger); HashMap<String, CsvWriter> writers = new HashMap<String, CsvWriter>(); String agentsDatasetsDir = outputDirWithRatio + File.separator + agents + "agents"; HashMap<String, CsvWriter> arffWriters = new HashMap<String, CsvWriter>(); File f = new File(agentsDatasetsDir); if (!f.isDirectory()) { f.mkdirs(); } Instances copy = new Instances(trainData); copy.delete(); for (int i = 0; i < agents; i++) { String fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.csv"; file = new File(fileName); if (!file.exists()) { CsvWriter writer = new CsvWriter(new FileWriter(fileName), ','); writer.writeRecord(headers); writers.put("AGENT" + i, writer); } fileName = agentsDatasetsDir + File.separator + "agent-" + i + "-dataset.arff"; file = new File(fileName); if (!file.exists()) { arffsaver.resetOptions(); arffsaver.setInstances(copy); arffsaver.setFile(new File(fileName)); arffsaver.writeBatch(); CsvWriter arffwriter = new CsvWriter(new FileWriter(fileName, true), ','); arffWriters.put("AGENT" + i, arffwriter); } logger.fine("AGENT" + i + " dataset created in csv and arff formats."); } // Append essentials to all for (String[] essential : essentials) { for (CsvWriter wr : writers.values()) { wr.writeRecord(essential); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.writeRecord(essential); } } int agentCounter = 0; for (int j = 0; j < trainData.numInstances(); j++) { Instance instance = trainData.instance(j); CsvWriter writer = writers.get("AGENT" + agentCounter); CsvWriter arffwriter = arffWriters.get("AGENT" + agentCounter); String[] row = new String[instance.numAttributes()]; for (int a = 0; a < instance.numAttributes(); a++) { row[a] = instance.stringValue(a); } if (writer != null) { writer.writeRecord(row); } if (arffwriter != null) { arffwriter.writeRecord(row); } agentCounter++; if (agentCounter == agents) { agentCounter = 0; } } for (CsvWriter wr : writers.values()) { wr.close(); } for (CsvWriter arffwr : arffWriters.values()) { arffwr.close(); } } } catch (Exception e) { logger.severe("Exception while splitting dataset. ->"); logger.severe(e.getMessage()); System.exit(1); } logger.finest("Dataset for fold " + fold + " created."); } logger.finer("<-- splitDataset()"); }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Performs a (stratified if class is nominal) cross-validation * for a classifier on a set of instances. * * @param classifier the classifier with any options set. * @param data the data on which the cross-validation is to be * performed//from w w w . j a v a 2 s. c om * @param numFolds the number of folds for the cross-validation * @param random random number generator for randomization * @exception Exception if a classifier could not be generated * successfully or the class is not defined */ public void crossValidateModel(Classifier classifier, Instances data, int numFolds, Random random) throws Exception { // Make a copy of the data we can reorder data = new Instances(data); data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(numFolds); } // Do the folds for (int i = 0; i < numFolds; i++) { Instances train = data.trainCV(numFolds, i, random); setPriors(train); Classifier copiedClassifier = Classifier.makeCopy(classifier); copiedClassifier.buildClassifier(train); Instances test = data.testCV(numFolds, i); evaluateModel(copiedClassifier, test); } m_NumFolds = numFolds; }
From source file:gr.auth.ee.lcs.ArffTrainTestLoader.java
License:Open Source License
/** * Load instances into the global train store and create test set. * /*w ww . j a v a 2 s. co m*/ * @param filename * the .arff filename to be used * @param generateTestSet * true if a test set is going to be generated * @throws IOException * if the input file is not found */ public final void loadInstances(final String filename, final boolean generateTestSet) throws IOException { // Open .arff final Instances set = InstancesUtility.openInstance(filename); if (set.classIndex() < 0) { set.setClassIndex(set.numAttributes() - 1); } set.randomize(new Random()); if (generateTestSet) { final int numOfFolds = (int) SettingsLoader.getNumericSetting("NumberOfFolds", 10); final int fold = (int) Math.floor(Math.random() * numOfFolds); trainSet = set.trainCV(numOfFolds, fold); testSet = set.testCV(numOfFolds, fold); } else { trainSet = set; } myLcs.instances = InstancesUtility.convertIntancesToDouble(trainSet); myLcs.labelCardinality = InstancesUtility.getLabelCardinality(trainSet); }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier, Instances pInstances, String pModelName, String pClassifierName) throws Exception { // other options int folds = 10; // randomize data Random rand = new Random(42); Instances randData = new Instances(pInstances); randData.randomize(rand);/*from w w w . j a va 2 s .co m*/ if (randData.classAttribute().isNominal()) { randData.stratify(folds); } // perform cross-validation and add predictions Instances predictedData = null; Evaluation eval = new Evaluation(randData); int positiveValueIndexOfClassFeature = 0; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); int classFeatureIndex = 0; for (int i = 0; i < train.numAttributes(); i++) { if (train.attribute(i).name().equals("isBuggy")) { classFeatureIndex = i; break; } } Attribute classFeature = train.attribute(classFeatureIndex); for (int i = 0; i < classFeature.numValues(); i++) { if (classFeature.value(i).equals("TRUE")) { positiveValueIndexOfClassFeature = i; } } train.setClassIndex(classFeatureIndex); test.setClassIndex(classFeatureIndex); // build and evaluate classifier pClassifier.buildClassifier(train); eval.evaluateModel(pClassifier, test); // add predictions // AddClassification filter = new AddClassification(); // filter.setClassifier(pClassifier); // filter.setOutputClassification(true); // filter.setOutputDistribution(true); // filter.setOutputErrorFlag(true); // filter.setInputFormat(train); // Filter.useFilter(train, filter); // Instances pred = Filter.useFilter(test, filter); // if (predictedData == null) // predictedData = new Instances(pred, 0); // // for (int j = 0; j < pred.numInstances(); j++) // predictedData.add(pred.instance(j)); } double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)) / (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numFalsePositives(positiveValueIndexOfClassFeature) + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)); double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature) * eval.recall(positiveValueIndexOfClassFeature)) / (eval.precision(positiveValueIndexOfClassFeature) + eval.recall(positiveValueIndexOfClassFeature))); File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv"); PrintWriter pw1 = new PrintWriter(wekaOutput); pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature)); System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";" + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";" + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n"); }
From source file:j48.NBTreeNoSplit.java
License:Open Source License
/** * Utility method for fast 5-fold cross validation of a naive bayes * model/*from w w w .j a v a 2 s. c om*/ * * @param fullModel a <code>NaiveBayesUpdateable</code> value * @param trainingSet an <code>Instances</code> value * @param r a <code>Random</code> value * @return a <code>double</code> value * @exception Exception if an error occurs */ public static double crossValidate(NaiveBayesUpdateable fullModel, Instances trainingSet, Random r) throws Exception { // make some copies for fast evaluation of 5-fold xval Classifier[] copies = Classifier.makeCopies(fullModel, 5); Evaluation eval = new Evaluation(trainingSet); // make some splits for (int j = 0; j < 5; j++) { Instances test = trainingSet.testCV(5, j); // unlearn these test instances for (int k = 0; k < test.numInstances(); k++) { test.instance(k).setWeight(-test.instance(k).weight()); ((NaiveBayesUpdateable) copies[j]).updateClassifier(test.instance(k)); // reset the weight back to its original value test.instance(k).setWeight(-test.instance(k).weight()); } eval.evaluateModel(copies[j], test); } return eval.incorrect(); }
From source file:j48.PruneableClassifierTree.java
License:Open Source License
/** * Method for building a pruneable classifier tree. * * @param data the data to build the tree from * @throws Exception if tree can't be built successfully *///from w w w . ja va2s . c o m public void buildClassifier(Instances data) throws Exception { // can classifier tree handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_seed); data.stratify(numSets); buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup); if (pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
From source file:jjj.asap.sas.ensemble.impl.CrossValidatedEnsemble.java
License:Open Source License
@Override public StrongLearner build(int essaySet, String ensembleName, List<WeakLearner> learners) { // can't handle empty case if (learners.isEmpty()) { return this.ensemble.build(essaySet, ensembleName, learners); }/*from w ww . ja v a 2 s. c om*/ // create a dummy dataset. DatasetBuilder builder = new DatasetBuilder(); builder.addVariable("id"); builder.addNominalVariable("class", Contest.getRubrics(essaySet)); Instances dummy = builder.getDataset("dummy"); // add data Map<Double, Double> groundTruth = Contest.getGoldStandard(essaySet); for (double id : learners.get(0).getPreds().keySet()) { dummy.add(new DenseInstance(1.0, new double[] { id, groundTruth.get(id) })); } // stratify dummy.sort(0); dummy.randomize(new Random(1)); dummy.setClassIndex(1); dummy.stratify(nFolds); // now evaluate each fold Map<Double, Double> preds = new HashMap<Double, Double>(); for (int k = 0; k < nFolds; k++) { Instances train = dummy.trainCV(nFolds, k); Instances test = dummy.testCV(nFolds, k); List<WeakLearner> cvLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : learners) { WeakLearner copy = learner.copyOf(); for (int i = 0; i < test.numInstances(); i++) { copy.getPreds().remove(test.instance(i).value(0)); copy.getProbs().remove(test.instance(i).value(0)); } cvLeaners.add(copy); } // train on fold StrongLearner cv = this.ensemble.build(essaySet, ensembleName, cvLeaners); List<WeakLearner> testLeaners = new ArrayList<WeakLearner>(); for (WeakLearner learner : cv.getLearners()) { WeakLearner copy = learner.copyOf(); copy.getPreds().clear(); copy.getProbs().clear(); WeakLearner source = find(copy.getName(), learners); for (int i = 0; i < test.numInstances(); i++) { double id = test.instance(i).value(0); copy.getPreds().put(id, source.getPreds().get(id)); copy.getProbs().put(id, source.getProbs().get(id)); } testLeaners.add(copy); } preds.putAll(this.ensemble.classify(essaySet, ensembleName, testLeaners, cv.getContext())); } // now prepare final result StrongLearner strong = this.ensemble.build(essaySet, ensembleName, learners); double trainingError = strong.getKappa(); double cvError = Calc.kappa(essaySet, preds, groundTruth); // Job.log(essaySet+"-"+ensembleName, "XVAL: training error = " + trainingError + " cv error = " + cvError); strong.setKappa(cvError); return strong; }
From source file:liac.igmn.evaluation.Evaluator.java
License:Open Source License
public void crossValidation(IGMN model, Dataset dataset, int numFolds, int runs, boolean randomize) { confusionMatrix = new ConfusionMatrix(dataset.getClassesNames()); Instances instances = dataset.getWekaDataset(); int seed = 1; for (int run = 0; run < runs; run++) { if (randomize) { instances.randomize(new Random(seed)); seed += 1;/*from w ww . j a v a 2s . c o m*/ } if (verbose) System.out.println("RUN: " + (run + 1)); for (int n = 0; n < numFolds; n++) { Instances train = instances.trainCV(numFolds, n); Instances test = instances.testCV(numFolds, n); SimpleMatrix trainData = MatrixUtil.instancesToMatrix(train); SimpleMatrix testData = MatrixUtil.instancesToMatrix(test); model.reset(); if (verbose) System.out.println("TRAINING FOLD: " + (n + 1)); model.train(trainData); if (verbose) System.out.println("TESTING..."); SimpleMatrix testInputs = testData.extractMatrix(0, dataset.getInputSize(), 0, SimpleMatrix.END); SimpleMatrix testTargets = testData.extractMatrix(dataset.getInputSize(), dataset.getNumAttributes(), 0, SimpleMatrix.END); for (int i = 0; i < testInputs.numCols(); i++) { SimpleMatrix y = model.classify(testInputs.extractVector(false, i)); SimpleMatrix target = testTargets.extractVector(false, i); int tInd = MatrixUtil.maxElementIndex(target); int yInd = MatrixUtil.maxElementIndex(y); confusionMatrix.addPrediction(tInd, yInd); } } } confusionMatrix.set(confusionMatrix.divide(runs)); }