List of usage examples for weka.core Instances delete
public void delete(int index)
From source file:core.ClusterEvaluationEX.java
License:Open Source License
public Instances DeleteNoise(Instances data) { noise = data.stringFreeStructure();// w w w . ja v a2 s. c om for (int i = 0; i < data.numInstances(); i++) { if (data.instance(i).value(1) == -1) { noise.add(data.instance(i)); data.delete(i); i--; } } return data; }
From source file:de.ugoe.cs.cpdp.dataprocessing.CLAMIProcessor.java
License:Apache License
/** * <p>/*w ww .ja va2s. c om*/ * Applies the CLAMI processor to the data. The test data is also required, in order to * guarantee a consistent metric set. * </p> * * @param testdata * test data; the data is not modified, only metrics are dropped * @param data * data to which the CLAMI processor is applied */ public void applyCLAMI(Instances testdata, Instances data) { // first determine medians double[] medians = new double[data.numAttributes()]; // get medians for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { medians[j] = data.kthSmallestValue(j, (data.numInstances() + 1) >> 1); } } // now determine cluster number for each instance double[] clusterNumber = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { int countHighValues = 0; Instance currentInstance = data.get(i); for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { if (currentInstance.value(j) > medians[j]) { countHighValues++; } } } clusterNumber[i] = countHighValues; } // determine median of cluster number Median m = new Median(); double medianClusterNumber = m.evaluate(clusterNumber); // now we filter the metrics int[] numMetricViolations = new int[data.numAttributes()]; for (int j = 0; j < data.numAttributes(); j++) { int currentViolations = 0; for (int i = 0; i < data.numInstances(); i++) { Instance currentInstance = data.get(i); if (j != data.classIndex()) { if (clusterNumber[i] > medianClusterNumber) { // "buggy" if (currentInstance.value(j) <= medians[j]) { currentViolations++; } } else { // "not buggy" if (currentInstance.value(j) > medians[j]) { currentViolations++; } } } } numMetricViolations[j] = currentViolations; } SortedSet<Integer> distinctViolationCounts = new TreeSet<>(); for (int currentViolations : numMetricViolations) { distinctViolationCounts.add(currentViolations); } Iterator<Integer> violationCountInterator = distinctViolationCounts.iterator(); int violationCutoff = violationCountInterator.next(); // now we filter the data; // this is first tried with the metrics with fewest violations. if no buggy/bugfree // instances remain, this is repeated with the next metrics with second fewest violations, // and so on. // this part is a bit unclear from the description in the paper, but I confirmed with the // author that this is how they implemented it boolean[] cleanInstances = new boolean[data.numInstances()]; int numCleanBuggyInstances = 0; int numCleanBugfreeInstances = 0; do { violationCutoff = violationCountInterator.next(); cleanInstances = new boolean[data.numInstances()]; numCleanBuggyInstances = 0; numCleanBugfreeInstances = 0; for (int i = 0; i < data.numInstances(); i++) { int currentViolations = 0; Instance currentInstance = data.get(i); for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex() && numMetricViolations[j] == violationCutoff) { if (clusterNumber[i] > medianClusterNumber) { // "buggy" if (currentInstance.value(j) <= medians[j]) { currentViolations++; } } else { // "not buggy" if (currentInstance.value(j) > medians[j]) { currentViolations++; } } } } if (currentViolations == 0) { cleanInstances[i] = true; if (clusterNumber[i] > medianClusterNumber) { numCleanBuggyInstances++; } else { numCleanBugfreeInstances++; } } else { cleanInstances[i] = false; } } } while (numCleanBuggyInstances == 0 || numCleanBugfreeInstances == 0); // output some interesting information to provide insights into the CLAMI model Console.traceln(Level.FINE, "Selected Metrics and Median-threshold: "); for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex() && numMetricViolations[j] == violationCutoff) { Console.traceln(Level.FINE, "\t" + data.attribute(j).name() + ": " + medians[j]); } } // finally modify the instances // drop the metrics (also from the testdata) for (int j = data.numAttributes() - 1; j >= 0; j--) { if (j != data.classIndex() && numMetricViolations[j] != violationCutoff) { data.deleteAttributeAt(j); testdata.deleteAttributeAt(j); } } // drop the unclean instances for (int i = data.numInstances() - 1; i >= 0; i--) { if (!cleanInstances[i]) { data.delete(i); } else { // set the classification if (clusterNumber[i] > medianClusterNumber) { data.get(i).setClassValue(1.0d); } else { data.get(i).setClassValue(0.0d); } } } }
From source file:de.ugoe.cs.cpdp.dataprocessing.NominalAttributeFilter.java
License:Apache License
@Override public void apply(Instances testdata, Instances traindata) { int indexOfConfidenceAttribute = -1; // Find index of the named confidence attribute to filter for for (int i = 0; i < traindata.numAttributes(); i++) { if (traindata.attribute(i).name().equals(nominalAttributeName)) { indexOfConfidenceAttribute = i; }//from ww w .j a v a 2 s . c om } // if it was not found return if (indexOfConfidenceAttribute == -1) { return; } // Find index of nominal values Attribute confidenceAttribute = traindata.attribute(indexOfConfidenceAttribute); ArrayList<Object> nominalValuesOfConfidenceAttribute = Collections .list(confidenceAttribute.enumerateValues()); ArrayList<Double> indexOfnominalAttributeValues = new ArrayList<Double>(); for (int k = 0; k < nominalValuesOfConfidenceAttribute.size(); k++) { for (String attributeValue : nominalAttributeValues) { if (((String) nominalValuesOfConfidenceAttribute.get(k)).equals(attributeValue)) { indexOfnominalAttributeValues.add((double) k); } } } // Go through all instances and check if nominal attribute equals for (int j = traindata.numInstances() - 1; j >= 0; j--) { Instance wekaInstance = traindata.get(j); // delete all instances where nominal attribute has the value of one of the parameter if (indexOfnominalAttributeValues.contains(wekaInstance.value(indexOfConfidenceAttribute))) { traindata.delete(j); } } }
From source file:de.ugoe.cs.cpdp.dataselection.SynonymOutlierRemoval.java
License:Apache License
/** * <p>/* w w w .java2 s . co m*/ * Applies the synonym outlier removal. * </p> * * @param traindata * data from which the outliers are removed. */ public void applySynonymRemoval(Instances traindata) { double minDistance[][] = new double[traindata.size()][traindata.numAttributes() - 1]; double minDistanceAttribute[] = new double[traindata.numAttributes() - 1]; double distance; for (int j = 0; j < minDistanceAttribute.length; j++) { minDistanceAttribute[j] = Double.MAX_VALUE; } for (int i1 = traindata.size() - 1; i1 < traindata.size(); i1++) { int k = 0; for (int j = 0; j < traindata.numAttributes(); j++) { if (j != traindata.classIndex()) { minDistance[i1][k] = Double.MAX_VALUE; for (int i2 = 0; i2 < traindata.size(); i2++) { if (i1 != i2) { distance = Math.abs(traindata.get(i1).value(j) - traindata.get(i2).value(j)); if (distance < minDistance[i1][k]) { minDistance[i1][k] = distance; } if (distance < minDistanceAttribute[k]) { minDistanceAttribute[k] = distance; } } } k++; } } } for (int i = traindata.size() - 1; i >= 0; i--) { boolean hasClosest = false; for (int j = 0; !hasClosest && j < traindata.numAttributes(); j++) { hasClosest = minDistance[i][j] <= minDistanceAttribute[j]; } if (!hasClosest) { traindata.delete(i); } } }
From source file:de.ugoe.cs.cpdp.execution.WithinProjectOrderedSplitExperiment.java
License:Apache License
/** * Executes the experiment with the steps as described in the class comment. * //from w w w .j ava2s .co m * @see Runnable#run() */ @Override public void run() { final List<SoftwareVersion> versions = new LinkedList<>(); for (IVersionLoader loader : config.getLoaders()) { versions.addAll(loader.load()); } for (IVersionFilter filter : config.getVersionFilters()) { filter.apply(versions); } boolean writeHeader = true; int versionCount = 1; int testVersionCount = 0; int numTrainers = 0; for (SoftwareVersion testVersion : versions) { if (isVersion(testVersion, config.getTestVersionFilters())) { testVersionCount++; } } numTrainers += config.getSetWiseTrainers().size(); numTrainers += config.getSetWiseTestdataAwareTrainers().size(); numTrainers += config.getTrainers().size(); numTrainers += config.getTestAwareTrainers().size(); // sort versions Collections.sort(versions); for (SoftwareVersion testVersion : versions) { if (isVersion(testVersion, config.getTestVersionFilters())) { Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: starting", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion())); int numResultsAvailable = resultsAvailable(testVersion); if (numResultsAvailable >= numTrainers * config.getRepetitions()) { Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: results already available; skipped", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion())); versionCount++; continue; } // Setup testdata and training data Instances testdata = testVersion.getInstances(); List<Double> efforts = testVersion.getEfforts(); // now split data into parts double percentage = 0.5; // 0.5 as default value String param = config.getExecutionStrategyParameters(); if (config.getExecutionStrategyParameters() != null) { try { percentage = Double.parseDouble(param); } catch (NumberFormatException e) { throw new RuntimeException( "invalid execution strategy parameter, must be numeric: " + param); } } int initialTestSize = testdata.size(); Instances traindata = new Instances(testdata); for (int i = initialTestSize - 1; i >= 0; i--) { if ((((double) i) / initialTestSize) < percentage) { testdata.delete(i); if (efforts != null) { efforts.remove(i); } } else { traindata.delete(i); } } for (IProcessesingStrategy processor : config.getPreProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying preprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName())); processor.apply(testdata, traindata); } for (IPointWiseDataselectionStrategy dataselector : config.getPointWiseSelectors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying pointwise selection %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), dataselector.getClass().getName())); traindata = dataselector.apply(testdata, traindata); } for (IProcessesingStrategy processor : config.getPostProcessors()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying setwise postprocessor %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), processor.getClass().getName())); processor.apply(testdata, traindata); } for (ITrainingStrategy trainer : config.getTrainers()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), trainer.getName())); trainer.apply(traindata); } for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying trainer %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), trainer.getName())); trainer.apply(testdata, traindata); } File resultsDir = new File(config.getResultsPath()); if (!resultsDir.exists()) { resultsDir.mkdir(); } for (IEvaluationStrategy evaluator : config.getEvaluators()) { Console.traceln(Level.FINE, String.format("[%s] [%02d/%02d] %s: applying evaluator %s", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion(), evaluator.getClass().getName())); List<ITrainer> allTrainers = new LinkedList<>(); for (ITrainingStrategy trainer : config.getTrainers()) { allTrainers.add(trainer); } for (ITestAwareTrainingStrategy trainer : config.getTestAwareTrainers()) { allTrainers.add(trainer); } if (writeHeader) { evaluator.setParameter(config.getResultsPath() + "/" + config.getExperimentName() + ".csv"); } evaluator.apply(testdata, traindata, allTrainers, efforts, writeHeader, config.getResultStorages()); writeHeader = false; } Console.traceln(Level.INFO, String.format("[%s] [%02d/%02d] %s: finished", config.getExperimentName(), versionCount, testVersionCount, testVersion.getVersion())); versionCount++; } } }
From source file:de.unidue.langtech.grading.tc.LearningCurveTask.java
License:Open Source License
@Override public void execute(TaskContext aContext) throws Exception { boolean multiLabel = false; for (Integer numberInstances : NUMBER_OF_TRAINING_INSTANCES) { for (int iteration = 0; iteration < ITERATIONS; iteration++) { File arffFileTrain = new File( aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TRAINING_DATA, AccessMode.READONLY) .getPath() + "/" + TRAINING_DATA_FILENAME); File arffFileTest = new File( aContext.getStorageLocation(TEST_TASK_INPUT_KEY_TEST_DATA, AccessMode.READONLY).getPath() + "/" + TRAINING_DATA_FILENAME); Instances trainData = TaskUtils.getInstances(arffFileTrain, multiLabel); Instances testData = TaskUtils.getInstances(arffFileTest, multiLabel); if (numberInstances > trainData.size()) { continue; }// w w w . j av a2 s . c o m Classifier cl = AbstractClassifier.forName(classificationArguments.get(0), classificationArguments.subList(1, classificationArguments.size()).toArray(new String[0])); Instances copyTestData = new Instances(testData); trainData = WekaUtils.removeOutcomeId(trainData, multiLabel); testData = WekaUtils.removeOutcomeId(testData, multiLabel); Random generator = new Random(); generator.setSeed(System.nanoTime()); trainData.randomize(generator); // remove fraction of training data that should not be used for training for (int i = trainData.size() - 1; i >= numberInstances; i--) { trainData.delete(i); } // file to hold prediction results File evalOutput = new File( aContext.getStorageLocation(TEST_TASK_OUTPUT_KEY, AccessMode.READWRITE).getPath() + "/" + EVALUATION_DATA_FILENAME + "_" + numberInstances + "_" + iteration); // train the classifier on the train set split - not necessary in multilabel setup, but // in single label setup cl.buildClassifier(trainData); weka.core.SerializationHelper.write(evalOutput.getAbsolutePath(), WekaUtils.getEvaluationSinglelabel(cl, trainData, testData)); testData = WekaUtils.getPredictionInstancesSingleLabel(testData, cl); testData = WekaUtils.addOutcomeId(testData, copyTestData, false); // // Write out the predictions // DataSink.write(aContext.getStorageLocation(TEST_TASK_OUTPUT_KEY, AccessMode.READWRITE) // .getAbsolutePath() + "/" + PREDICTIONS_FILENAME + "_" + trainPercent, testData); } } }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Take a certain number of a set of instances. * @param instances//from w ww . java2 s .c om * @param numInstances the number of instances to keep * @return a reduced set of instances according to the given number to keep */ public static Instances trimInstances(Instances instances, int numInstances) { Instances trimmedInstances = new Instances(instances); for (int i = trimmedInstances.numInstances() - 1; i >= numInstances; i--) { trimmedInstances.delete(i); } return trimmedInstances; }
From source file:edu.umbc.cs.maple.utils.WekaUtils.java
License:Open Source License
/** Extract a particular subset of the instances. * @param instances// ww w .java2 s .com * @param startIdx the start instance index * @param numInstancesToRetrieve the number of instances to retrieve * @return the specified subset of the instances. */ public static Instances subsetInstances(Instances instances, int startIdx, int numInstancesToRetrieve) { double possibleNumInstancesToRetrieve = instances.numInstances() - startIdx; if (numInstancesToRetrieve > possibleNumInstancesToRetrieve) { throw new IllegalArgumentException( "Cannot retrieve more than " + possibleNumInstancesToRetrieve + " instances."); } int endIdx = startIdx + numInstancesToRetrieve - 1; // delete all instance indices outside of [startIdx, endIdx] Instances subset = new Instances(instances); for (int i = subset.numInstances() - 1; i >= 0; i--) { if (i < startIdx || i > endIdx) subset.delete(i); } return subset; }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Evaluates a classifier with the options given in an array of * strings. <p>/*w w w .ja v a 2 s .com*/ * * Valid options are: <p> * * -t name of training file <br> * Name of the file with the training data. (required) <p> * * -T name of test file <br> * Name of the file with the test data. If missing a cross-validation * is performed. <p> * * -c class index <br> * Index of the class attribute (1, 2, ...; default: last). <p> * * -x number of folds <br> * The number of folds for the cross-validation (default: 10). <p> * * -s random number seed <br> * Random number seed for the cross-validation (default: 1). <p> * * -m file with cost matrix <br> * The name of a file containing a cost matrix. <p> * * -l name of model input file <br> * Loads classifier from the given file. <p> * * -d name of model output file <br> * Saves classifier built from the training data into the given file. <p> * * -v <br> * Outputs no statistics for the training data. <p> * * -o <br> * Outputs statistics only, not the classifier. <p> * * -i <br> * Outputs detailed information-retrieval statistics per class. <p> * * -k <br> * Outputs information-theoretic statistics. <p> * * -p <br> * Outputs predictions for test instances (and nothing else). <p> * * -r <br> * Outputs cumulative margin distribution (and nothing else). <p> * * -g <br> * Only for classifiers that implement "Graphable." Outputs * the graph representation of the classifier (and nothing * else). <p> * * @param classifier machine learning classifier * @param options the array of string containing the options * @exception Exception if model could not be evaluated successfully * @return a string describing the results */ public static String[] evaluateModel(Classifier classifier, String trainFileName, String objectOutputFileName) throws Exception { Instances train = null, tempTrain, test = null, template = null; int seed = 1, folds = 10, classIndex = -1; String testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, attributeRangeString; boolean IRstatistics = false, noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false, printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false; StringBuffer text = new StringBuffer(); BufferedReader trainReader = null, testReader = null; ObjectInputStream objectInputStream = null; CostMatrix costMatrix = null; StringBuffer schemeOptionsText = null; Range attributesToOutput = null; long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0; try { String[] options = null; // Get basic options (options the same for all schemes) classIndexString = Utils.getOption('c', options); if (classIndexString.length() != 0) { classIndex = Integer.parseInt(classIndexString); } // trainFileName = Utils.getOption('t', options); objectInputFileName = Utils.getOption('l', options); // objectOutputFileName = Utils.getOption('d', options); testFileName = Utils.getOption('T', options); if (trainFileName.length() == 0) { if (objectInputFileName.length() == 0) { throw new Exception("No training file and no object " + "input file given."); } if (testFileName.length() == 0) { throw new Exception("No training file and no test " + "file given."); } } else if ((objectInputFileName.length() != 0) && ((!(classifier instanceof UpdateableClassifier)) || (testFileName.length() == 0))) { throw new Exception("Classifier not incremental, or no " + "test file provided: can't " + "use both train and model file."); } try { if (trainFileName.length() != 0) { trainReader = new BufferedReader(new FileReader(trainFileName)); } if (testFileName.length() != 0) { testReader = new BufferedReader(new FileReader(testFileName)); } if (objectInputFileName.length() != 0) { InputStream is = new FileInputStream(objectInputFileName); if (objectInputFileName.endsWith(".gz")) { is = new GZIPInputStream(is); } objectInputStream = new ObjectInputStream(is); } } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } if (testFileName.length() != 0) { template = test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } if (classIndex > test.numAttributes()) { throw new Exception("Index of class attribute too large."); } } if (trainFileName.length() != 0) { if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0)) { train = new Instances(trainReader, 1); } else { train = new Instances(trainReader); } template = train; if (classIndex != -1) { train.setClassIndex(classIndex - 1); } else { train.setClassIndex(train.numAttributes() - 1); } if ((testFileName.length() != 0) && !test.equalHeaders(train)) { throw new IllegalArgumentException("Train and test file not compatible!"); } if (classIndex > train.numAttributes()) { throw new Exception("Index of class attribute too large."); } //train = new Instances(train); } if (template == null) { throw new Exception("No actual dataset provided to use as template"); } seedString = Utils.getOption('s', options); if (seedString.length() != 0) { seed = Integer.parseInt(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.length() != 0) { folds = Integer.parseInt(foldsString); } costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses()); classStatistics = Utils.getFlag('i', options); noOutput = Utils.getFlag('o', options); trainStatistics = !Utils.getFlag('v', options); printComplexityStatistics = Utils.getFlag('k', options); printMargins = Utils.getFlag('r', options); printGraph = Utils.getFlag('g', options); sourceClass = Utils.getOption('z', options); printSource = (sourceClass.length() != 0); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (Exception e) { throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.length() != 0) { printClassifications = true; if (!attributeRangeString.equals("0")) { attributesToOutput = new Range(attributeRangeString); } } // If a model file is given, we can't process // scheme-specific options if (objectInputFileName.length() != 0) { Utils.checkForRemainingOptions(options); } else { // Set options for classifier if (classifier instanceof OptionHandler) { /* for (int i = 0; i < options.length; i++) { if (options[i].length() != 0) { if (schemeOptionsText == null) { schemeOptionsText = new StringBuffer(); } if (options[i].indexOf(' ') != -1) { schemeOptionsText.append('"' + options[i] + "\" "); } else { schemeOptionsText.append(options[i] + " "); } } } */ ((OptionHandler) classifier).setOptions(options); } } Utils.checkForRemainingOptions(options); } catch (Exception e) { throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier)); } // Setup up evaluation objects EvaluationInternal trainingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix); EvaluationInternal testingEvaluation = new EvaluationInternal(new Instances(template, 0), costMatrix); if (objectInputFileName.length() != 0) { // Load classifier from file classifier = (Classifier) objectInputStream.readObject(); objectInputStream.close(); } // Build the classifier if no object file provided if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0) && (costMatrix == null) && (trainFileName.length() != 0)) { // Build classifier incrementally trainingEvaluation.setPriors(train); testingEvaluation.setPriors(train); trainTimeStart = System.currentTimeMillis(); if (objectInputFileName.length() == 0) { classifier.buildClassifier(train); } while (train.readInstance(trainReader)) { trainingEvaluation.updatePriors(train.instance(0)); testingEvaluation.updatePriors(train.instance(0)); ((UpdateableClassifier) classifier).updateClassifier(train.instance(0)); train.delete(0); } trainTimeElapsed = System.currentTimeMillis() - trainTimeStart; trainReader.close(); } else if (objectInputFileName.length() == 0) { // Build classifier in one go tempTrain = new Instances(train); trainingEvaluation.setPriors(tempTrain); testingEvaluation.setPriors(tempTrain); trainTimeStart = System.currentTimeMillis(); classifier.buildClassifier(tempTrain); trainTimeElapsed = System.currentTimeMillis() - trainTimeStart; } // Save the classifier if an object output file is provided if (objectOutputFileName.length() != 0) { OutputStream os = new FileOutputStream(objectOutputFileName); if (objectOutputFileName.endsWith(".gz")) { os = new GZIPOutputStream(os); } ObjectOutputStream objectOutputStream = new ObjectOutputStream(os); objectOutputStream.writeObject(classifier); objectOutputStream.flush(); objectOutputStream.close(); } /* // If classifier is drawable output string describing graph if ((classifier instanceof Drawable) && (printGraph)) { return ((Drawable) classifier).graph(); } // Output the classifier as equivalent source if ((classifier instanceof Sourcable) && (printSource)) { return wekaStaticWrapper((Sourcable) classifier, sourceClass); } // Output test instance predictions only if (printClassifications) { return printClassifications(classifier, new Instances(template, 0), testFileName, classIndex, attributesToOutput); } */ // Output model if (!(noOutput || printMargins)) { if (classifier instanceof OptionHandler) { if (schemeOptionsText != null) { text.append("\nOptions: " + schemeOptionsText); text.append("\n"); } } text.append("\n" + classifier.toString() + "\n"); } if (!printMargins && (costMatrix != null)) { text.append("\n=== Evaluation Cost Matrix ===\n\n").append(costMatrix.toString()); } // Compute error estimate from training data if ((trainStatistics) && (trainFileName.length() != 0)) { if ((classifier instanceof UpdateableClassifier) && (testFileName.length() != 0) && (costMatrix == null)) { // Classifier was trained incrementally, so we have to // reopen the training data in order to test on it. trainReader = new BufferedReader(new FileReader(trainFileName)); // Incremental testing train = new Instances(trainReader, 1); if (classIndex != -1) { train.setClassIndex(classIndex - 1); } else { train.setClassIndex(train.numAttributes() - 1); } testTimeStart = System.currentTimeMillis(); while (train.readInstance(trainReader)) { trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0)); train.delete(0); } testTimeElapsed = System.currentTimeMillis() - testTimeStart; trainReader.close(); } else { testTimeStart = System.currentTimeMillis(); trainingEvaluation.evaluateModel(classifier, train); testTimeElapsed = System.currentTimeMillis() - testTimeStart; } // Print the results of the training evaluation // if (printMargins) { // return trainingEvaluation.toCumulativeMarginDistributionString(); // } else { text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds"); text.append("\nTime taken to test model on training data: " + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds"); text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n", printComplexityStatistics)); if (template.classAttribute().isNominal()) { if (classStatistics) { text.append("\n\n" + trainingEvaluation.toClassDetailsString()); } text.append("\n\n" + trainingEvaluation.toMatrixString()); } // } } // Compute proper error estimates if (testFileName.length() != 0) { // Testing is on the supplied test data while (test.readInstance(testReader)) { testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0)); test.delete(0); } testReader.close(); text.append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics)); } else if (trainFileName.length() != 0) { // Testing is via cross-validation on training data Random random = new Random(seed); testingEvaluation.crossValidateModel(classifier, train, folds, random); if (template.classAttribute().isNumeric()) { text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n", printComplexityStatistics)); } else { text.append("\n\n\n" + testingEvaluation .toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics)); } } if (template.classAttribute().isNominal()) { if (classStatistics) { text.append("\n\n" + testingEvaluation.toClassDetailsString()); } text.append("\n\n" + testingEvaluation.toMatrixString()); } String result = "\t" + Utils.doubleToString(trainingEvaluation.pctCorrect(), 12, 4) + " %"; result += " " + Utils.doubleToString(testingEvaluation.pctCorrect(), 12, 4) + " %"; String[] returnString = { text.toString(), result }; return returnString; }
From source file:GClass.EvaluationInternal.java
License:Open Source License
/** * Prints the predictions for the given dataset into a String variable. *//*from ww w . j a va2 s . c o m*/ protected static String printClassifications(Classifier classifier, Instances train, String testFileName, int classIndex, Range attributesToOutput) throws Exception { StringBuffer text = new StringBuffer(); if (testFileName.length() != 0) { BufferedReader testReader = null; try { testReader = new BufferedReader(new FileReader(testFileName)); } catch (Exception e) { throw new Exception("Can't open file " + e.getMessage() + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != -1) { test.setClassIndex(classIndex - 1); } else { test.setClassIndex(test.numAttributes() - 1); } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.setDataset(test); double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().isNumeric()) { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.append("missing"); } else { text.append(instance.classValue()); } text.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.append(i + " missing "); } else { text.append(i + " " + test.classAttribute().value((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.append("missing "); } else { text.append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.close(); } return text.toString(); }