List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Performs a (stratified if class is nominal) cross-validation * for a classifier on a set of instances. Now performs * a deep copy of the classifier before each call to * buildClassifier() (just in case the classifier is not * initialized properly)./* w ww . j ava 2s . c o m*/ * * @param classifier the classifier with any options set. * @param data the data on which the cross-validation is to be * performed * @param numFolds the number of folds for the cross-validation * @param random random number generator for randomization * @param forPredictionsString varargs parameter that, if supplied, is * expected to hold a StringBuffer to print predictions to, * a Range of attributes to output and a Boolean (true if the distribution * is to be printed) * @throws Exception if a classifier could not be generated * successfully or the class is not defined */ public void crossValidateModel(Classifier classifier, Instances data, int numFolds, Random random, Object... forPredictionsPrinting) throws Exception { // Make a copy of the data we can reorder data = new Instances(data); data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(numFolds); } // We assume that the first element is a StringBuffer, the second a Range (attributes // to output) and the third a Boolean (whether or not to output a distribution instead // of just a classification) if (forPredictionsPrinting.length > 0) { // print the header first StringBuffer buff = (StringBuffer) forPredictionsPrinting[0]; Range attsToOutput = (Range) forPredictionsPrinting[1]; boolean printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue(); printClassificationsHeader(data, attsToOutput, printDist, buff); } // Do the folds for (int i = 0; i < numFolds; i++) { Instances train = data.trainCV(numFolds, i, random); setPriors(train); Classifier copiedClassifier = Classifier.makeCopy(classifier); copiedClassifier.buildClassifier(train); Instances test = data.testCV(numFolds, i); evaluateModel(copiedClassifier, test, forPredictionsPrinting); } m_NumFolds = numFolds; }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Prints the header for the predictions output into a supplied StringBuffer * * @param test structure of the test set to print predictions for * @param attributesToOutput indices of the attributes to output * @param printDistribution prints the complete distribution for nominal * attributes, not just the predicted value * @param text the StringBuffer to print to */// w w w .j a v a 2s. c o m protected static void printClassificationsHeader(Instances test, Range attributesToOutput, boolean printDistribution, StringBuffer text) { // print header if (test.classAttribute().isNominal()) if (printDistribution) text.append(" inst# actual predicted error distribution"); else text.append(" inst# actual predicted error prediction"); else text.append(" inst# actual predicted error"); if (attributesToOutput != null) { attributesToOutput.setUpper(test.numAttributes() - 1); text.append(" ("); boolean first = true; for (int i = 0; i < test.numAttributes(); i++) { if (i == test.classIndex()) continue; if (attributesToOutput.isInRange(i)) { if (!first) text.append(","); text.append(test.attribute(i).name()); first = false; } } text.append(")"); } text.append("\n"); }
From source file:cs.man.ac.uk.classifiers.GetAUC.java
License:Open Source License
/** * Computes the AUC for the supplied stream learner. * @return the AUC as a double value./*from ww w .j a v a 2 s.c o m*/ */ private static double validate5x2CVStream() { try { // Other options int runs = 5; int folds = 2; double AUC_SUM = 0; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) { System.out.println("Stratifying..."); randData.stratify(folds); } for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); Distribution testDistribution = new Distribution(test); ArffSaver trainSaver = new ArffSaver(); trainSaver.setInstances(train); trainSaver.setFile(new File(trainPath)); trainSaver.writeBatch(); ArffSaver testSaver = new ArffSaver(); testSaver.setInstances(test); double[][] dist = testDistribution.matrix(); int negativeClassSize = (int) dist[0][0]; int positiveClassSize = (int) dist[0][1]; double balance = (double) positiveClassSize / (double) negativeClassSize; String tempTestPath = testPath.replace(".arff", "_" + positiveClassSize + "_" + negativeClassSize + "_" + balance + "_1.0.arff");// [Test-n-Set-n]_[+]_[-]_[K]_[L]; testSaver.setFile(new File(tempTestPath)); testSaver.writeBatch(); ARFFFile file = new ARFFFile(tempTestPath, CLASS_INDEX, new DebugLogger(false)); file.createMetaData(); HoeffdingTreeTester streamClassifier = new HoeffdingTreeTester(trainPath, tempTestPath, CLASS_INDEX, new String[] { "0", "1" }, new DebugLogger(true)); streamClassifier.train(); System.in.read(); //AUC_SUM += streamClassifier.getROCExternalData("",(int)testDistribution.perClass(1),(int)testDistribution.perClass(0)); streamClassifier.testStatic(homeDirectory + "/FuckSakeTest.txt"); String[] files = Common.getFilePaths(scratch); for (int j = 0; j < files.length; j++) Common.fileDelete(files[j]); } } return AUC_SUM / ((double) runs * (double) folds); } catch (Exception e) { System.out.println("Exception validating data!"); e.printStackTrace(); return 0; } }
From source file:cs.man.ac.uk.classifiers.GetAUC.java
License:Open Source License
/** * Computes the AUC for the supplied learner. * @return the AUC as a double value.// w w w .j a v a2 s . c om */ @SuppressWarnings("unused") private static double validate5x2CV() { try { // other options int runs = 5; int folds = 2; double AUC_SUM = 0; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; Random rand = new Random(seed); Instances randData = new Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) { System.out.println("Stratifying..."); randData.stratify(folds); } Evaluation eval = new Evaluation(randData); for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); // build and evaluate classifier String[] options = { "-U", "-A" }; J48 classifier = new J48(); //HTree classifier = new HTree(); classifier.setOptions(options); classifier.buildClassifier(train); eval.evaluateModel(classifier, test); // generate curve ThresholdCurve tc = new ThresholdCurve(); int classIndex = 0; Instances result = tc.getCurve(eval.predictions(), classIndex); // plot curve vmc = new ThresholdVisualizePanel(); AUC_SUM += ThresholdCurve.getROCArea(result); System.out.println("AUC: " + ThresholdCurve.getROCArea(result) + " \tAUC SUM: " + AUC_SUM); } } return AUC_SUM / ((double) runs * (double) folds); } catch (Exception e) { System.out.println("Exception validating data!"); return 0; } }
From source file:de.fub.maps.project.detector.model.inference.processhandler.InferenceDataProcessHandler.java
License:Open Source License
@Override protected void handle() { clearResults();//from ww w . j a v a 2 s.c om Classifier classifier = getInferenceModel().getClassifier(); HashSet<TrackSegment> inferenceDataSet = getInferenceDataSet(); Collection<Attribute> attributeList = getInferenceModel().getAttributes(); if (!attributeList.isEmpty()) { Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet(); setClassesToView(keySet); Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList), 0); //NO18N unlabeledInstances.setClassIndex(0); ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>(); for (TrackSegment segment : inferenceDataSet) { Instance instance = getInstance(segment); unlabeledInstances.add(instance); segmentList.add(segment); } // create copy Instances labeledInstances = new Instances(unlabeledInstances); for (int index = 0; index < labeledInstances.numInstances(); index++) { try { Instance instance = labeledInstances.instance(index); // classify instance double classifyed = classifier.classifyInstance(instance); instance.setClassValue(classifyed); // get class label String value = unlabeledInstances.classAttribute().value((int) classifyed); if (index < segmentList.size()) { instanceToTrackSegmentMap.put(instance, segmentList.get(index)); } // put label and instance to result map put(value, instance); } catch (Exception ex) { Exceptions.printStackTrace(ex); } } // update visw updateVisualRepresentation(); // update result set of the inferenceModel for (Entry<String, List<Instance>> entry : resultMap.entrySet()) { HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>(); for (Instance instance : entry.getValue()) { TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance); if (trackSegment != null) { trackSegmentList.add(trackSegment); } } // only those classes are put into the result data set, which are not empty if (!trackSegmentList.isEmpty()) { getInferenceModel().getResult().put(entry.getKey(), trackSegmentList); } } } else { throw new InferenceModelClassifyException(MessageFormat .format("No attributes available. Attribute list lengeth == {0}", attributeList.size())); } resultMap.clear(); instanceToTrackSegmentMap.clear(); }
From source file:de.fub.maps.project.detector.model.inference.processhandler.SpecialInferenceDataProcessHandler.java
License:Open Source License
@Override protected void handle() { clearResults();/* ww w . j a va 2 s . c om*/ Classifier classifier = getInferenceModel().getClassifier(); Collection<Attribute> attributeList = getInferenceModel().getAttributes(); if (!attributeList.isEmpty()) { Set<String> keySet = getInferenceModel().getInput().getTrainingsSet().keySet(); setClassesToView(keySet); Instances unlabeledInstances = new Instances("Unlabeld Tracks", new ArrayList<Attribute>(attributeList), 0); //NO18N unlabeledInstances.setClassIndex(0); ArrayList<TrackSegment> segmentList = new ArrayList<TrackSegment>(); for (Entry<String, HashSet<TrackSegment>> entry : getInferenceModel().getInput().getTrainingsSet() .entrySet()) { for (TrackSegment segment : entry.getValue()) { segment.setLabel(entry.getKey()); Instance instance = getInstance(segment); unlabeledInstances.add(instance); segmentList.add(segment); } } // create copy Instances labeledInstances = new Instances(unlabeledInstances); for (int index = 0; index < labeledInstances.numInstances(); index++) { try { Instance instance = labeledInstances.instance(index); // classify instance double classifyed = classifier.classifyInstance(instance); instance.setClassValue(classifyed); // get class label String value = unlabeledInstances.classAttribute().value((int) classifyed); if (index < segmentList.size()) { instanceToTrackSegmentMap.put(instance, segmentList.get(index)); } // put label and instance to result map put(value, instance); } catch (Exception ex) { Exceptions.printStackTrace(ex); } } // update visw updateVisualRepresentation(); // update result set of the inferenceModel for (Map.Entry<String, List<Instance>> entry : resultMap.entrySet()) { HashSet<TrackSegment> trackSegmentList = new HashSet<TrackSegment>(); for (Instance instance : entry.getValue()) { TrackSegment trackSegment = instanceToTrackSegmentMap.get(instance); if (trackSegment != null) { trackSegmentList.add(trackSegment); } } // only those classes are put into the result data set, which are not empty if (!trackSegmentList.isEmpty()) { getInferenceModel().getResult().put(entry.getKey(), trackSegmentList); } } } else { throw new InferenceModelClassifyException(MessageFormat .format("No attributes available. Attribute list lengeth == {0}", attributeList.size())); } resultMap.clear(); instanceToTrackSegmentMap.clear(); }
From source file:de.tudarmstadt.ukp.dkpro.spelling.experiments.hoo2012.featureextraction.AllFeaturesExtractor.java
License:Apache License
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { Collection<Token> tokens = JCasUtil.select(jcas, Token.class); if (isTest) { Instances trainData = null; Classifier cl = null;/*from w ww.j a va2 s. c o m*/ try { trainData = getInstances(trainingArff); cl = getClassifier(); // SpreadSubsample spread = new SpreadSubsample(); // spread.setDistributionSpread(1.0); // // FilteredClassifier fc = new FilteredClassifier(); // fc.setFilter(spread); // fc.setClassifier(cl); cl.buildClassifier(trainData); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (Token token : tokens) { String tokenString = token.getCoveredText(); if (tokenString.length() > 0 && confusionSet.contains(tokenString)) { Instance<String> instance = new Instance<String>(); for (SimpleFeatureExtractor featExt : featureExtractors) { instance.addAll(featExt.extract(jcas, token)); } instance.setOutcome(tokenString); List<String> classValues = new ArrayList<String>(); for (Enumeration e = trainData.classAttribute().enumerateValues(); e.hasMoreElements();) { classValues.add(e.nextElement().toString()); } // build classifier from training arff and classify try { weka.core.Instance wekaInstance = CleartkInstanceConverter.toWekaInstance(instance, classValues); System.out.println(wekaInstance); double prediction = cl.classifyInstance(wekaInstance); // prediction is the index in the class labels, not the class label itself! String outcome = trainData.classAttribute().value(new Double(prediction).intValue()); if (!tokenString.equals(outcome)) { SpellingAnomaly ann = new SpellingAnomaly(jcas, token.getBegin(), token.getEnd()); ann.setCategory(errorClass); ann.setSuggestions(SpellingUtils.getSuggestedActionArray(jcas, outcome)); ann.addToIndexes(); } } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } } } else { for (Token token : tokens) { String tokenString = token.getCoveredText(); if (tokenString.length() > 0 && confusionSet.contains(tokenString)) { Instance<String> instance = new Instance<String>(); for (SimpleFeatureExtractor featExt : featureExtractors) { instance.addAll(featExt.extract(jcas, token)); } instance.setOutcome(tokenString); // we also need to add a negative example // choose it randomly from the confusion set without the actual token // TODO implement negative examples this.dataWriter.write(instance); } } } }
From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java
License:Apache License
/** * <p>/*from w ww.j av a 2 s. co m*/ * Applies MORPH to a single instance * </p> * * @param instance * instance that is morphed * @param data * data based on which the instance is morphed */ public void morphInstance(Instance instance, Instances data) { Instance nearestUnlikeNeighbor = getNearestUnlikeNeighbor(instance, data); if (nearestUnlikeNeighbor == null) { throw new RuntimeException( "could not find nearest unlike neighbor within the data: " + data.relationName()); } for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) { double randVal = rand.nextDouble() * (beta - alpha) + alpha; instance.setValue(j, instance.value(j) + randVal * (instance.value(j) - nearestUnlikeNeighbor.value(j))); } } }
From source file:de.ugoe.cs.cpdp.dataprocessing.MORPH.java
License:Apache License
/** * <p>/*from ww w . java 2 s .c om*/ * Determines the nearest unlike neighbor of an instance. * </p> * * @param instance * instance to which the nearest unlike neighbor is determined * @param data * data where the nearest unlike neighbor is determined from * @return nearest unlike instance */ public Instance getNearestUnlikeNeighbor(Instance instance, Instances data) { Instance nearestUnlikeNeighbor = null; double[] instanceVector = new double[data.numAttributes() - 1]; int tmp = 0; for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) { instanceVector[tmp] = instance.value(j); } } double minDistance = Double.MAX_VALUE; for (int i = 0; i < data.numInstances(); i++) { if (instance.classValue() != data.instance(i).classValue()) { double[] otherVector = new double[data.numAttributes() - 1]; tmp = 0; for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j) != data.classAttribute() && data.attribute(j).isNumeric()) { otherVector[tmp++] = data.instance(i).value(j); } } if (MathArrays.distance(instanceVector, otherVector) < minDistance) { minDistance = MathArrays.distance(instanceVector, otherVector); nearestUnlikeNeighbor = data.instance(i); } } } return nearestUnlikeNeighbor; }
From source file:de.ugoe.cs.cpdp.dataselection.CLIFF.java
License:Apache License
/** * <p>/* w w w . jav a 2 s . c o m*/ * Applies the CLIFF relevancy filter to the data. * </p> * * @param data * the data * @return CLIFF-filtered data */ protected Instances applyCLIFF(Instances data) { final double[][] powerAttributes = new double[data.size()][data.numAttributes()]; final double[] powerEntity = new double[data.size()]; final int[] counts = data.attributeStats(data.classIndex()).nominalCounts; final double probDefect = data.numInstances() / (double) counts[1]; for (int j = 0; j < data.numAttributes(); j++) { if (data.attribute(j) != data.classAttribute()) { final double[] ranges = getRanges(data, j); final double[] probDefectRange = getRangeProbabilities(data, j, ranges); for (int i = 0; i < data.numInstances(); i++) { final double value = data.instance(i).value(j); final int range = determineRange(ranges, value); double probClass, probNotClass, probRangeClass, probRangeNotClass; if (data.instance(i).classValue() == 1) { probClass = probDefect; probNotClass = 1.0 - probDefect; probRangeClass = probDefectRange[range]; probRangeNotClass = 1.0 - probDefectRange[range]; } else { probClass = 1.0 - probDefect; probNotClass = probDefect; probRangeClass = 1.0 - probDefectRange[range]; probRangeNotClass = probDefectRange[range]; } powerAttributes[i][j] = Math.pow(probRangeClass, 2.0) / (probRangeClass * probClass + probRangeNotClass * probNotClass); } } } for (int i = 0; i < data.numInstances(); i++) { powerEntity[i] = 1.0; for (int j = 0; j < data.numAttributes(); j++) { powerEntity[i] *= powerAttributes[i][j]; } } double[] sortedPower = powerEntity.clone(); Arrays.sort(sortedPower); double cutOff = sortedPower[(int) (data.numInstances() * (1 - percentage))]; final Instances selected = new Instances(data); selected.delete(); for (int i = 0; i < data.numInstances(); i++) { if (powerEntity[i] >= cutOff) { selected.add(data.instance(i)); } } return selected; }