List of usage examples for weka.core Instances Instances
public Instances(Instances dataset, int capacity)
From source file:de.tudarmstadt.ukp.similarity.experiments.coling2012.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);// w w w. j a v a 2s. com // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(filteredClassifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } // Prepare output classification String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/output.csv"), sb.toString()); }
From source file:decisiontree.MyC45.java
/** * Splits a dataset according to the values of a nominal attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split */// w ww .j a v a 2 s.c om private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
From source file:decisiontree.MyC45.java
/** * Splits a dataset according to the values of a numeric attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split *//*from w w w.ja v a2s.c om*/ private Instances[] splitData(Instances data, Attribute att, double threshold) { Instances[] splitData = new Instances[2]; for (int i = 0; i < 2; i++) { splitData[i] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.value(att) >= threshold) { inst.setValue(att, threshold); splitData[1].add(inst); } else { inst.setValue(att, 0); splitData[0].add(inst); } } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
From source file:decisiontree.MyID3.java
private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); }/*from w w w . ja v a2 s . c o m*/ Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (Instances split : splitData) { split.compactify(); } return splitData; }
From source file:development.CrossValidateShapelets.java
public static Instances randomise(Instances train, int[] pos) { //Generate a random permutation into pos Random r = new Random(); for (int i = 0; i < pos.length; i++) pos[i] = i;//from w ww . j av a 2 s.com for (int i = 0; i < pos.length; i++) { int p1 = r.nextInt(pos.length); int p2 = r.nextInt(pos.length); int temp = pos[p1]; pos[p1] = pos[p2]; pos[p2] = temp; } Instances newD = new Instances(train, 0); for (int i = 0; i < pos.length; i++) newD.add(train.instance(pos[i])); return newD; }
From source file:development.SpectralTransformComparison.java
public void run() { // Set up the int nosCases = 400; int[] nosCasesPerClass = { nosCases / 2, nosCases / 2 }; int runs = 50; int minParas = 2; int maxParas = 10; ArrayList<String> names = new ArrayList<>(); Random rand = new Random(); c = ACFDomainClassification.setSingleClassifiers(names); int length = m; try {/*www .jav a 2 s . com*/ int nosTrans = 3; Instances[] train = new Instances[nosTrans]; Instances[] test = new Instances[nosTrans]; double[][] sum = new double[train.length][c.length]; double[][] sumSq = new double[train.length][c.length]; PowerSpectrum ps = new PowerSpectrum(); PowerCepstrum pc = new PowerCepstrum(); pc.useFFT(); FFT fft = new FFT(); OutFile of = new OutFile(path + "mean_" + m + ".csv"); OutFile of2 = new OutFile(path + "sd_" + m + ".csv"); System.out.println(" Running length =" + m); of.writeLine("classifier,PS,PC,FFT"); of2.writeLine("classifier,PS,PC,FFT"); for (int i = 0; i < runs; i++) { //Generate data AND SET NOISE LEVEL c = ACFDomainClassification.setSingleClassifiers(names); if (i % 10 == 0) System.out.println(" m =" + m + " performing run =" + i); train = new Instances[nosTrans]; test = new Instances[nosTrans]; //Change to simulate sin waves. Instances rawTrain = SimulatePowerSpectrum.generateFFTDataSet(minParas, maxParas, length, nosCasesPerClass, true); rawTrain.randomize(rand); Instances rawTest = new Instances(rawTrain, 0); for (int k = 0; k < nosCases / 2; k++) { Instance r = rawTrain.remove(0); rawTest.add(r); } //Generate transforms train[0] = ps.process(rawTrain); train[1] = pc.process(rawTrain); train[2] = fft.process(rawTrain); test[0] = ps.process(rawTest); test[1] = pc.process(rawTest); test[2] = fft.process(rawTest); //Measure classification accuracy for (int j = 0; j < test.length; j++) { for (int k = 0; k < c.length; k++) { double a = ClassifierTools.singleTrainTestSplitAccuracy(c[k], train[j], test[j]); sum[j][k] += a; sumSq[j][k] += a * a; } } } DecimalFormat df = new DecimalFormat("###.###"); System.out.print("\n m=" + length); for (int j = 0; j < c.length; j++) { of.writeString(names.get(j) + ","); of2.writeString(names.get(j) + ","); for (int i = 0; i < test.length; i++) { sum[i][j] /= runs; sumSq[i][j] = sumSq[i][j] / runs - sum[i][j] * sum[i][j]; System.out.print("," + df.format(sum[i][j]) + " (" + df.format(sumSq[i][j]) + ")"); of.writeString(df.format(sum[i][j]) + ","); of2.writeString(df.format(sumSq[i][j]) + ","); } of.writeString("\n"); of2.writeString("\n"); } } catch (Exception e) { System.out.println(" Error =" + e); e.printStackTrace(); System.exit(0); } }
From source file:dewaweebtreeclassifier.Sujeong.java
public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) { Instances[] splitInstances = new Instances[attr.numValues()]; for (int i = 0; i < attr.numValues(); i++) { splitInstances[i] = new Instances(data, data.numInstances()); }// w w w . j av a 2 s . com Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); splitInstances[(int) instance.value(attr)].add(instance); } for (int i = 0; i < attr.numValues(); i++) { splitInstances[i].compactify(); } return splitInstances; }
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * /* w ww. j ava 2s . co m*/ * @param data * @param attr * @return */ public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) { Instances[] splitInstances = new Instances[attr.numValues()]; for (int i = 0; i < attr.numValues(); i++) { splitInstances[i] = new Instances(data, data.numInstances()); } Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); splitInstances[(int) instance.value(attr)].add(instance); } for (int i = 0; i < attr.numValues(); i++) { splitInstances[i].compactify(); } return splitInstances; }
From source file:distributed.core.DistributedUtils.java
License:Open Source License
public static Instances makeHeaderWithSummaryAtts(Instances denormalized, boolean treatZerosAsMissing) { Instances header = new Instances(denormalized, 0); for (int i = 0; i < denormalized.numAttributes(); i++) { AttributeStats stats = denormalized.attributeStats(i); if (denormalized.attribute(i).isNumeric()) { NumericStats ns = new NumericStats(denormalized.attribute(i).name()); if (!treatZerosAsMissing) { ns.getStats()[ArffSummaryNumericMetric.MIN.ordinal()] = stats.numericStats.min; ns.getStats()[ArffSummaryNumericMetric.MAX.ordinal()] = stats.numericStats.max; ns.getStats()[ArffSummaryNumericMetric.COUNT.ordinal()] = stats.numericStats.count; ns.getStats()[ArffSummaryNumericMetric.SUM.ordinal()] = stats.numericStats.sum; ns.getStats()[ArffSummaryNumericMetric.SUMSQ.ordinal()] = stats.numericStats.sumSq; ns.getStats()[ArffSummaryNumericMetric.MISSING.ordinal()] = stats.missingCount; ns.computeDerived();/*from w ww. j ava2s.c o m*/ } else { ns = getNumericAttributeStatsSparse(denormalized, i); } Attribute newAtt = ns.makeAttribute(); header.insertAttributeAt(newAtt, header.numAttributes()); } else if (denormalized.attribute(i).isNominal()) { NominalStats nom = new NominalStats(denormalized.attribute(i).name()); nom.setNumMissing(stats.missingCount); double[] labelFreqs = stats.nominalWeights; for (int j = 0; j < denormalized.attribute(i).numValues(); j++) { nom.add(denormalized.attribute(i).value(j), labelFreqs[j]); } Attribute newAtt = nom.makeAttribute(); header.insertAttributeAt(newAtt, header.numAttributes()); } } return header; }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);// w w w .j ava 2s.c o m // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }