List of usage examples for weka.core Instances randomize
public void randomize(Random random)
From source file:development.SpectralTransformComparison.java
public void run() { // Set up the int nosCases = 400; int[] nosCasesPerClass = { nosCases / 2, nosCases / 2 }; int runs = 50; int minParas = 2; int maxParas = 10; ArrayList<String> names = new ArrayList<>(); Random rand = new Random(); c = ACFDomainClassification.setSingleClassifiers(names); int length = m; try {/*from w ww. ja va 2s . co m*/ int nosTrans = 3; Instances[] train = new Instances[nosTrans]; Instances[] test = new Instances[nosTrans]; double[][] sum = new double[train.length][c.length]; double[][] sumSq = new double[train.length][c.length]; PowerSpectrum ps = new PowerSpectrum(); PowerCepstrum pc = new PowerCepstrum(); pc.useFFT(); FFT fft = new FFT(); OutFile of = new OutFile(path + "mean_" + m + ".csv"); OutFile of2 = new OutFile(path + "sd_" + m + ".csv"); System.out.println(" Running length =" + m); of.writeLine("classifier,PS,PC,FFT"); of2.writeLine("classifier,PS,PC,FFT"); for (int i = 0; i < runs; i++) { //Generate data AND SET NOISE LEVEL c = ACFDomainClassification.setSingleClassifiers(names); if (i % 10 == 0) System.out.println(" m =" + m + " performing run =" + i); train = new Instances[nosTrans]; test = new Instances[nosTrans]; //Change to simulate sin waves. Instances rawTrain = SimulatePowerSpectrum.generateFFTDataSet(minParas, maxParas, length, nosCasesPerClass, true); rawTrain.randomize(rand); Instances rawTest = new Instances(rawTrain, 0); for (int k = 0; k < nosCases / 2; k++) { Instance r = rawTrain.remove(0); rawTest.add(r); } //Generate transforms train[0] = ps.process(rawTrain); train[1] = pc.process(rawTrain); train[2] = fft.process(rawTrain); test[0] = ps.process(rawTest); test[1] = pc.process(rawTest); test[2] = fft.process(rawTest); //Measure classification accuracy for (int j = 0; j < test.length; j++) { for (int k = 0; k < c.length; k++) { double a = ClassifierTools.singleTrainTestSplitAccuracy(c[k], train[j], test[j]); sum[j][k] += a; sumSq[j][k] += a * a; } } } DecimalFormat df = new DecimalFormat("###.###"); System.out.print("\n m=" + length); for (int j = 0; j < c.length; j++) { of.writeString(names.get(j) + ","); of2.writeString(names.get(j) + ","); for (int i = 0; i < test.length; i++) { sum[i][j] /= runs; sumSq[i][j] = sumSq[i][j] / runs - sum[i][j] * sum[i][j]; System.out.print("," + df.format(sum[i][j]) + " (" + df.format(sumSq[i][j]) + ")"); of.writeString(df.format(sum[i][j]) + ","); of2.writeString(df.format(sumSq[i][j]) + ","); } of.writeString("\n"); of2.writeString("\n"); } } catch (Exception e) { System.out.println(" Error =" + e); e.printStackTrace(); System.exit(0); } }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifier(WekaClassifier wekaClassifier, Dataset trainDataset, Dataset testDataset) throws Exception { Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the train instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + trainDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff" }); Instances train = DataSource.read(MODELS_DIR + "/" + trainDataset.toString() + "-plusIDs.arff"); train.setClassIndex(train.numAttributes() - 1); // Add IDs to the test instances and get the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + testDataset.toString() + ".arff", "-o", MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff" }); Instances test = DataSource.read(MODELS_DIR + "/" + testDataset.toString() + "-plusIDs.arff"); test.setClassIndex(test.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data test.randomize(random); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Prepare the output buffer AbstractOutput output = new PlainText(); output.setBuffer(new StringBuffer()); output.setHeader(test);// w w w.java 2 s.c o m output.setAttributes("first"); Evaluation eval = new Evaluation(train); eval.evaluateModel(filteredClassifier, test, output); // Convert predictions to CSV // Format: inst#, actual, predicted, error, probability, (ID) String[] scores = new String[new Double(eval.numInstances()).intValue()]; double[] probabilities = new double[new Double(eval.numInstances()).intValue()]; for (String line : output.getBuffer().toString().split("\n")) { String[] linesplit = line.split("\\s+"); // If there's been an error, the length of linesplit is 6, otherwise 5, // due to the error flag "+" int id; String expectedValue, classification; double probability; if (line.contains("+")) { id = Integer.parseInt(linesplit[6].substring(1, linesplit[6].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[5]); } else { id = Integer.parseInt(linesplit[5].substring(1, linesplit[5].length() - 1)); expectedValue = linesplit[2].substring(2); classification = linesplit[3].substring(2); probability = Double.parseDouble(linesplit[4]); } scores[id - 1] = classification; probabilities[id - 1] = probability; } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".csv"), sb.toString()); // Output probabilities sb = new StringBuilder(); for (Double probability : probabilities) sb.append(probability.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".probabilities.csv"), sb.toString()); // Output predictions FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".predictions.txt"), output.getBuffer().toString()); // Output meta information sb = new StringBuilder(); sb.append(classifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + testDataset.toString() + "/" + wekaClassifier.toString() + "/" + testDataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random); // Perform cross-validation Instances predictedData = null;//from www .j av a2s . co m Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read( MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random); // Perform cross-validation Instances predictedData = null;//from ww w. ja v a2 s . c o m Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }
From source file:dkpro.similarity.experiments.sts2013baseline.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); String location = MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"; Instances data = DataSource.read(location); if (data == null) { throw new IOException("Could not load data from: " + location); }/* w w w. j a va2 s .c o m*/ data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random); // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
/** * @param args//ww w . j a v a2 s .c o m * @throws IOException * @throws FileNotFoundException */ public static void main(final String[] args) throws FileNotFoundException, IOException { final String experiment_file = args[0]; final File root_directory; if (args.length > 1) { root_directory = new File(args[1]); } else { root_directory = new File("."); } final CsvConfigurationParser csv_config = new CsvConfigurationParser(new FileReader(experiment_file)); final String experiment_name = FilenameUtils.getBaseName(experiment_file); final File expr_directory = new File(root_directory, experiment_name); expr_directory.mkdirs(); final Csv.Writer csv = new Csv.Writer( new PrintStream(new FileOutputStream(new File(expr_directory, "results.csv")))); final String[] parameter_headers = new String[] { "kpca.kernel", "kpca.rbf.sigma", "kpca.random_forest.Ntrees", "kpca.random_forest.max_depth", "kpca.Nbases", "multiclass.classifier", "multiclass.random_forest.Ntrees", "multiclass.random_forest.max_depth", "pairwise_classifier.max_branching", "training.label_noise" }; csv.cell("domain").cell("abstraction"); for (final String p : parameter_headers) { csv.cell(p); } csv.cell("Ntrain").cell("Ntest").cell("ami.mean").cell("ami.variance").cell("ami.confidence").newline(); for (int expr = 0; expr < csv_config.size(); ++expr) { try { final KeyValueStore expr_config = csv_config.get(expr); final Configuration config = new Configuration(root_directory.getPath(), expr_directory.getName(), expr_config); System.out.println("[Loading '" + config.training_data_single + "']"); final Instances single = WekaUtil .readLabeledDataset(new File(root_directory, config.training_data_single + ".arff")); final Instances train = new Instances(single, 0); final int[] idx = Fn.range(0, single.size()); int instance_counter = 0; Fn.shuffle(config.rng, idx); final int Ntrain = config.getInt("Ntrain_games"); // TODO: Rename? final double label_noise = config.getDouble("training.label_noise"); final int Nlabels = train.classAttribute().numValues(); assert (Nlabels > 0); for (int i = 0; i < Ntrain; ++i) { final Instance inst = single.get(idx[instance_counter++]); if (label_noise > 0 && config.rng.nextDouble() < label_noise) { int noisy_label = 0; do { noisy_label = config.rng.nextInt(Nlabels); } while (noisy_label == (int) inst.classValue()); System.out.println("Noisy label (" + inst.classValue() + " -> " + noisy_label + ")"); inst.setClassValue(noisy_label); } train.add(inst); inst.setDataset(train); } final Fn.Function2<Boolean, Instance, Instance> plausible_p = createPlausiblePredicate(config); final int Ntest = config.Ntest_games; int Ntest_added = 0; final ArrayList<Instances> tests = new ArrayList<Instances>(); while (instance_counter < single.size() && Ntest_added < Ntest) { final Instance inst = single.get(idx[instance_counter++]); boolean found = false; for (final Instances test : tests) { // Note that 'plausible_p' should be transitive if (plausible_p.apply(inst, test.get(0))) { WekaUtil.addInstance(test, inst); if (test.size() == 30) { Ntest_added += test.size(); } else if (test.size() > 30) { Ntest_added += 1; } found = true; break; } } if (!found) { final Instances test = new Instances(single, 0); WekaUtil.addInstance(test, inst); tests.add(test); } } final Iterator<Instances> test_itr = tests.iterator(); while (test_itr.hasNext()) { if (test_itr.next().size() < 30) { test_itr.remove(); } } System.out.println("=== tests.size() = " + tests.size()); System.out.println("=== Ntest_added = " + Ntest_added); System.out.println("[Training]"); final Evaluator evaluator = createEvaluator(config, train); // final Instances transformed_test = evaluator.prepareInstances( test ); System.out.println("[Evaluating]"); final int Nxval = evaluator.isSensitiveToOrdering() ? 10 : 1; final MeanVarianceAccumulator ami = new MeanVarianceAccumulator(); final MeanVarianceAccumulator errors = new MeanVarianceAccumulator(); final MeanVarianceAccumulator relative_error = new MeanVarianceAccumulator(); int c = 0; for (int xval = 0; xval < Nxval; ++xval) { for (final Instances test : tests) { // TODO: Debugging WekaUtil.writeDataset(new File(config.root_directory), "test_" + (c++), test); // transformed_test.randomize( new RandomAdaptor( config.rng ) ); // final ClusterContingencyTable ct = evaluator.evaluate( transformed_test ); test.randomize(new RandomAdaptor(config.rng)); final ClusterContingencyTable ct = evaluator.evaluate(test); System.out.println(ct); int Nerrors = 0; final MeanVarianceAccumulator mv = new MeanVarianceAccumulator(); for (int i = 0; i < ct.R; ++i) { final int max = Fn.max(ct.n[i]); Nerrors += (ct.a[i] - max); mv.add(((double) ct.a[i]) / ct.N * Nerrors / ct.a[i]); } errors.add(Nerrors); relative_error.add(mv.mean()); System.out.println("exemplar: " + test.get(0)); System.out.println("Nerrors = " + Nerrors); final PrintStream ct_out = new PrintStream( new FileOutputStream(new File(expr_directory, "ct_" + expr + "_" + xval + ".csv"))); ct.writeCsv(ct_out); ct_out.close(); final double ct_ami = ct.adjustedMutualInformation_max(); if (Double.isNaN(ct_ami)) { System.out.println("! ct_ami = NaN"); } else { ami.add(ct_ami); } System.out.println(); } } System.out.println("errors = " + errors.mean() + " (" + errors.confidence() + ")"); System.out.println( "relative_error = " + relative_error.mean() + " (" + relative_error.confidence() + ")"); System.out.println("AMI_max = " + ami.mean() + " (" + ami.confidence() + ")"); csv.cell(config.domain).cell(config.get("abstraction.discovery")); for (final String p : parameter_headers) { csv.cell(config.get(p)); } csv.cell(Ntrain).cell(Ntest).cell(ami.mean()).cell(ami.variance()).cell(ami.confidence()).newline(); } catch (final Exception ex) { ex.printStackTrace(); } } }
From source file:entity.DifficultyResamplingManager.java
License:Open Source License
/** * called by generateResampledSubdataset * // w w w.ja v a 2 s. c om * @param originalDataset * @param subdatasetDimensions * @return */ private Instances generateResampledSubdataset(Instances originalDataset, SubdatasetDimensions subdatasetDimensions) { // creates an empty dataset Instances resampledSubdataset = new Instances(originalDataset); resampledSubdataset.delete(); // randomize dataset instances order originalDataset.randomize(RandomizationManager.randomGenerator); // calc number of positives to insert int positivesToInsert = subdatasetDimensions.getP(); if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] positivesToInsert = " + positivesToInsert); // calc number of negatives to insert int negativesToInsert = subdatasetDimensions.getN(); // iterates over the original dataset instances for (int i = 0; i < originalDataset.numInstances(); i++) { // if instance is positive and more are needed in the new dataset, inserts into new dataset if ((positivesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.buggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); positivesToInsert--; } // if instance is negative and more are needed in the new dataset, inserts into new dataset if ((negativesToInsert > 0) && (originalDataset.instance(i).stringValue(originalDataset.classIndex()) .equals(Settings.nonbuggyLabel))) { resampledSubdataset.add(originalDataset.instance(i)); negativesToInsert--; } } if (verbose) System.out.println("[DifficultyResamplingManager, generateResampledSubdataset] resampling terminato: " + this.printDatasetInfo(resampledSubdataset)); return resampledSubdataset; }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation without noise * //from w w w . j a v a 2 s . c o m * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidate(Classifier classifier, Instances dataset, int folds) { // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); clsCopy.buildClassifier(train); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (independent fp and fn) * /*from www. ja v a 2 s. c o m*/ * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal fpPercentage, BigDecimal fnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, fpPercentage, fnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }
From source file:entity.NfoldCrossValidationManager.java
License:Open Source License
/** * n fold cross validation with noise (combined fp and fn) * //from w w w . j a v a2s . com * @param classifier * @param dataset * @param folds * @return */ public Stats crossValidateWithNoise(Classifier classifier, Instances dataset, int folds, BigDecimal combinedFpFnPercentage) { // noise manager NoiseInjectionManager noiseInjectionManager = new NoiseInjectionManager(); // randomizes order of instances Instances randDataset = new Instances(dataset); randDataset.randomize(RandomizationManager.randomGenerator); // cross-validation Evaluation eval = null; try { eval = new Evaluation(randDataset); } catch (Exception e) { e.printStackTrace(); } for (int n = 0; n < folds; n++) { Instances test = randDataset.testCV(folds, n); Instances train = randDataset.trainCV(folds, n, RandomizationManager.randomGenerator); // copies instances of train set to not modify the original Instances noisyTrain = new Instances(train); // injects level of noise in the copied train set noiseInjectionManager.addNoiseToDataset(noisyTrain, combinedFpFnPercentage); // build and evaluate classifier Classifier clsCopy; try { clsCopy = Classifier.makeCopy(classifier); // trains the model using a noisy train set clsCopy.buildClassifier(noisyTrain); eval.evaluateModel(clsCopy, test); } catch (Exception e) { e.printStackTrace(); } } // output evaluation for the nfold cross validation Double precision = eval.precision(Settings.classificationChoice); Double recall = eval.recall(Settings.classificationChoice); Double fmeasure = eval.fMeasure(Settings.classificationChoice); Double classificationTP = eval.numTruePositives(Settings.classificationChoice); Double classificationTN = eval.numTrueNegatives(Settings.classificationChoice); Double classificationFP = eval.numFalsePositives(Settings.classificationChoice); Double classificationFN = eval.numFalseNegatives(Settings.classificationChoice); Double kappa = eval.kappa(); return new Stats(classificationTP, classificationTN, classificationFP, classificationFN, kappa, precision, recall, fmeasure); }