List of usage examples for weka.core Instance dataset
public Instances dataset();
From source file:moa.classifiers.rules.AbstractAMRules.java
License:Apache License
protected void VerboseToConsole(Instance inst) { if (VerbosityOption.getValue() >= 5) { System.out.println();// w ww. j a v a2s .co m System.out.println("I) Dataset: " + inst.dataset().relationName()); if (!this.unorderedRulesOption.isSet()) { System.out.println("I) Method Ordered"); } else { System.out.println("I) Method Unordered"); } } }
From source file:moa.classifiers.trees.ePTTD.java
License:Creative Commons License
@Override public void trainOnInstanceImpl(Instance inst) { // TODO Auto-generated method stub if (inst.weight() > 0.0) { this.trainingWeightSeenByModel += inst.weight(); }/*from ww w.j a v a2 s .c o m*/ if (!isClassificationEnabled) { if (instancesBuffer == null) { //this.instancesBuffer = new Instances(inst.dataset()); this.instancesBuffer = new Instances(inst.dataset(), 0); } instancesBuffer.add(inst); if (instancesBuffer.size() == widthInitOption.getValue()) { //Build first time Classifier checkOptionsIntegity(); this.ePTTDintern.buildClassifier(instancesBuffer); isClassificationEnabled = true; } return; } ePTTDintern.updateClassifier(inst); }
From source file:moa.classifiers.WEKAClassifier.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { try {//w w w. jav a2s . c o m if (numberInstances == 0) { this.instancesBuffer = new Instances(inst.dataset()); if (classifier instanceof UpdateableClassifier) { classifier.buildClassifier(instancesBuffer); this.isClassificationEnabled = true; } else { this.isBufferStoring = true; } } numberInstances++; if (classifier instanceof UpdateableClassifier) { if (numberInstances > 0) { ((UpdateableClassifier) classifier).updateClassifier(inst); } } else { if (numberInstances == widthInitOption.getValue()) { //Build first time Classifier buildClassifier(); isClassificationEnabled = true; //Continue to store instances if (sampleFrequencyOption.getValue() != 0) { isBufferStoring = true; } } if (widthOption.getValue() == 0) { //Used from SingleClassifierDrift if (isBufferStoring == true) { instancesBuffer.add(inst); } } else { //Used form WekaClassifier without using SingleClassifierDrift int numInstances = numberInstances % sampleFrequencyOption.getValue(); if (sampleFrequencyOption.getValue() == 0) { numInstances = numberInstances; } if (numInstances == 0) { //Begin to store instances isBufferStoring = true; } if (isBufferStoring == true && numInstances <= widthOption.getValue()) { //Store instances instancesBuffer.add(inst); } if (numInstances == widthOption.getValue()) { //Build Classifier buildClassifier(); isClassificationEnabled = true; this.instancesBuffer = new Instances(inst.dataset()); } } } } catch (Exception e) { System.err.println("Training: " + e.getMessage()); } }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Use outlier-criteria (selectable strategy) for determining if a data point is an outlier (unlikely member) of cluster * * @param x Instance for comparison to see if it is an outlier to this cluster * @return true if x is an outlier w.r.t. this cluster *///from ww w . jav a 2 s . c o m public final boolean isOutlier(Instance x) { boolean ret; double p = this.getInclusionProbability(x); switch (this.outlierDefinitionStrategyOption.getChosenIndex()) { case 0: //Use Chauvenet's Criteria to determine outlier standing of the data point for this cluster. ret = (p < getChauvenetLimit()); break; case 1: // use Perceptron double[] v = embeddedClassifier.getVotesForInstance(x); try { weka.core.Utils.normalize(v); } catch (Exception e) { } int oIdx = x.dataset().classAttribute().indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR); double po = (v.length > oIdx) ? v[oIdx] : 0; if (po <= 0) { v[oIdx] = 0; } int h = weka.core.Utils.maxIndex(v); double ph = v[h]; double margin = (po - ph); ret = (po > ph) && (margin > (2.0 / v.length)); break; case 2: // 2.5 sigma ret = (p < weka.core.FastStats.normalProbability(2.5)); break; case 3: // 3 sigma ret = (p < weka.core.FastStats.normalProbability(3)); break; case 4: // 6 sigma ret = (p < weka.core.FastStats.normalProbability(6)); break; case 5: // cheat ret = p > 0.5; break; default: ret = p < weka.core.FastStats.normalProbability(2.5); } return ret; }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Sanity check and initialization of dynamic fields * * @param x// w ww . j a v a 2s . c o m */ protected final void safeInit(Instance x) { if (this.embeddedLearnerOption.getValueAsCLIString().contains("Majority class")) { this.excludeOutlierVoting = true; } if (centroid == null) { centroid = x.toDoubleArray(); } if (this.instances == null) { prepareEmbeddedClassifier(); ArrayList<Attribute> attribs = new ArrayList<>(); this.symbolFrequencies = new double[x.dataset().numAttributes()][]; for (int i = 0; i < x.dataset().numAttributes(); ++i) { Attribute a = (Attribute) x.dataset().attribute(i).copy(); if (i == x.classIndex()) { a.setWeight(0.0); } else { a.setWeight(1.0); } switch (a.type()) { case Attribute.STRING: case Attribute.NOMINAL: //UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i)); this.symbolFrequencies[i] = new double[a.numValues()]; break; case Attribute.NUMERIC: case Attribute.RELATIONAL: case Attribute.DATE: default: // UnsafeUtils.setAttributeRange(a, x.value(i), x.value(i)); this.symbolFrequencies[i] = null; } attribs.add(a); } this.instances = new Instances("ClusterData", attribs, 1); this.instances.setClassIndex(x.classIndex()); } // else { // for (int i = 0; i < x.dataset().numAttributes() && i < this.header.numAttributes(); ++i) { // double val = x.value(i); // Attribute a = this.header.attribute(i); // // expand range as necessary // if (val < a.getLowerNumericBound() || val > a.getUpperNumericBound()){ // UnsafeUtils.setAttributeRange(a, Math.min(val,a.getLowerNumericBound()), Math.max(val,a.getUpperNumericBound())); // } // // increase frequency counts if new string value is encountered // if (a.type() == Attribute.STRING && (val >= Math.max(this.symbolFrequencies[i].length, a.numValues()))) { // double newArray[] = new double[Math.max(this.symbolFrequencies[i].length, a.numValues())]; // Arrays.fill(newArray, 0); // for(int j = 0; j <= this.symbolFrequencies[i].length; j++) { // newArray[j] = this.symbolFrequencies[i][j]; // } // this.symbolFrequencies[i] = newArray; // } // } // } if (this.variances == null) { this.variances = new double[x.numAttributes()]; Arrays.fill(this.variances, 1); } if (this.entropies == null) { this.entropies = new double[x.numAttributes()]; Arrays.fill(this.entropies, 0); } if (this.labelFrequencies == null) { this.labelFrequencies = new double[x.numClasses()]; Arrays.fill(this.labelFrequencies, 0); } if (this.gtLabelFrequencies == null) { this.gtLabelFrequencies = new double[x.numClasses()]; Arrays.fill(this.gtLabelFrequencies, 0); } if (this.rho == null) { this.rho = new double[x.numAttributes()]; Arrays.fill(this.rho, 0); } }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * Use inclusion probability to discover the cluster "nearest" the provided instance * Uses main object's outlier container/*from w w w.ja v a 2 s .co m*/ * @param x instance in question * @return sorted set of clusters, ordered by inc */ protected final NearestInstanceTuple[] findNearestOutliers(Instance x) { NearestInstanceTuple[] ret = new NearestInstanceTuple[potentialNovels.size()]; double[] xVals = x.toDoubleArray(); int idx = 0; for (Instance n : potentialNovels) { double distance = VectorDistances.distance(xVals, n.toDoubleArray(), x.dataset(), this.distanceStrategyOption.getChosenIndex()); NearestInstanceTuple nit = new NearestInstanceTuple(n, distance); ret[idx++] = nit; } // end for Arrays.parallelSort(ret); return ret; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * In cases where this class is not used by the moa.tasks.EvaluateNonStationaryDynamicStream task, this safety * (fallback) initialization procedure is necessary. * * @param x// ww w. j a v a 2s .c om */ public final void safeInit(Instance x) { if (this.universalCluster == null) { universalCluster = new Riffle(x); universalCluster.distanceStrategyOption.setChosenIndex(this.distanceStrategyOption.getChosenIndex()); double[] initialVariances = new double[x.numAttributes()]; Arrays.fill(initialVariances, 1.0); universalCluster.setVariances(initialVariances); universalCluster.setWeight(0); universalCluster.recompute(); bestProbabilitySums = 0; bestProbabilityCount = 0; } if (this.knownLabels == null) { this.knownLabels = new int[x.numClasses()]; Arrays.fill(knownLabels, 0); this.numAttributes = x.numAttributes(); } if (this.header == null) { this.header = AbstractNovelClassClassifier.augmentInstances(x.dataset()); } }
From source file:moa.evaluation.BasicClassificationPerformanceEvaluator.java
License:Open Source License
@Override public void addResult(Instance inst, double[] classVotes) { double weight = inst.weight(); int trueClass = (int) inst.classValue(); if (weight > 0.0) { if (this.weightObserved == 0) { reset(inst.dataset().numClasses()); }//from w w w . ja va 2 s . co m this.weightObserved += weight; int predictedClass = Utils.maxIndex(classVotes); if (predictedClass == trueClass) { this.weightCorrect += weight; } this.rowKappa[predictedClass] += weight; this.columnKappa[trueClass] += weight; } if (this.lastSeenClass == trueClass) { this.weightCorrectNoChangeClassifier += weight; } this.lastSeenClass = trueClass; }
From source file:moa.evaluation.BasicClassificationScoringEvaluator.java
License:Open Source License
@Override public void addResult(Instance inst, double[] classVotes) { double weight = inst.weight(); int trueClass = (int) inst.classValue(); if (weight > 0.0) { if (this.weightObserved == 0) { reset(inst.dataset().numClasses()); }/* w w w . ja v a 2s . c o m*/ this.weightObserved += weight; //MSE Calculus int predictedClass = Utils.maxIndex(classVotes); if (predictedClass == trueClass) { this.weightCorrect += weight; } double[] normalized = normalize(classVotes); double vote = 0; if (normalized.length > 0) { vote = trueClass < normalized.length ? normalized[trueClass] : 0; } if (Double.compare(vote, Double.NaN) == 0) { int countNaN = 0; for (int i = 0; i < classVotes.length; ++i) { if (Double.compare(normalized[i], Double.NaN) == 0) { countNaN++; } } vote = 1; if (countNaN > 1 && classVotes.length > 1) { vote = 1.0 / countNaN; } } this.mse += 1 - vote; this.saw++; this.rowKappa[predictedClass] += weight; this.columnKappa[trueClass] += weight; } }
From source file:moa.evaluation.ClassificationWithNovelClassPerformanceEvaluator.java
License:Open Source License
/** * /*from w ww .j a va2 s . c o m*/ * Note that for novel class testing, an addition class value is added to the known classes. T * This extra "Label" represents a prediction of "Novel Class". This approach allows for * algorithms that do not have novel class prediction capabilities to still function, * as this method first bounds checks to see if the prediction array includes the added label * * @param inst instance under test * @param classVotes prediction table for this instance */ @Override public void addResult(Instance inst, double[] classVotes) { if (header == null) { header = AbstractNovelClassClassifier.augmentInstances(inst.dataset()); this.novelClassLabel = header.classAttribute() .indexOfValue(AbstractNovelClassClassifier.NOVEL_LABEL_STR); this.outlierLabel = header.classAttribute() .indexOfValue(AbstractNovelClassClassifier.OUTLIER_LABEL_STR); this.rowKappa = new double[header.numClasses()]; Arrays.fill(this.rowKappa, 0.0); this.columnKappa = new double[header.numClasses()]; Arrays.fill(this.columnKappa, 0.0); this.knownTrueLabels = new int[header.numClasses()]; Arrays.fill(knownTrueLabels, 0); this.observedLabels = new int[header.numClasses()]; Arrays.fill(observedLabels, 0); } final int trueClass = (int) inst.classValue(); if (classVotes == null) { this.knownTrueLabels[trueClass]++; return; } final double[] labelsOnlyVotes = Arrays.copyOf(classVotes, inst.numClasses()); if (labelsOnlyVotes.length > this.novelClassLabel) { labelsOnlyVotes[novelClassLabel] = 0; } if (labelsOnlyVotes.length > this.outlierLabel) { labelsOnlyVotes[outlierLabel] = 0; } final double totalVoteQty = weka.core.Utils.sum(labelsOnlyVotes); final int predictedClass = weka.core.Utils.maxIndex(labelsOnlyVotes); // Don't count the special extended indexes for novel and outlier final boolean isMarkedOutlier = (weka.core.Utils.maxIndex(classVotes) == this.outlierLabel); if (predictedClass < inst.numClasses() && labelsOnlyVotes[predictedClass] > 0.0) { // Only if there is SOME vote (non-zero) this.observedLabels[predictedClass]++; // If we predict it, then it can't be novel! } //final boolean isTrueNovel = !(this.observedLabels[(int)trueClass] > observationsUntilNotNovelOption.getValue()); boolean predictedNovel = ((classVotes.length > this.novelClassLabel) && (classVotes[this.novelClassLabel] > 0));// this.thresholdOfNoveltyOption.getValue())); final boolean isVoteOutlier = (totalVoteQty <= (weka.core.Utils.SMALL * 10.0)); final boolean correctLabelPrediction = (predictedClass == trueClass); switch (this.outlierHandlingStrategyOption.getChosenIndex()) { case 0: // use anyway // keep on trucking... break; case 1: // ignore marked if (isMarkedOutlier) { return; } break; case 2: // ignore no vote if (isVoteOutlier) { return; } break; case 3: // ignore iff marked AND no vote if (isVoteOutlier && isMarkedOutlier) { return; } break; case 4: // ignore pure OR marked if (isVoteOutlier || isMarkedOutlier) { return; } break; case 5: // mark as novel predictedNovel = predictedNovel || isMarkedOutlier; break; default: break; } this.numberOfInstancesSeen++; this.weightObserved += inst.weight(); // /!\ IS THIS RIGHT??? //final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption.getValue()) && (this.observedLabels[trueClass] < observationsUntilNotNovelOption.getValue()); final boolean isTrueNovel = (this.knownTrueLabels[trueClass] < this.maxUnobservationsUntilNotNovelOption .getValue()); // 8x different mutually exclusive options (i.e. 3-bits) if ((!predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Should be most common this.novelClassDetectionTrueNegative++; this.weightCorrect++; } if ((predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Rare if ever this.novelClassDetectionTruePositive++; this.weightCorrect++; assert false : "Paradox 1 - true novel, but predicted the right label"; } if ((predictedNovel) && (!isTrueNovel) && (correctLabelPrediction)) { // Error due to overly restrictive models this.novelClassDetectionFalsePositive++; if (this.goodIsGoodOption.isSet()) { this.weightCorrect++; } } if ((!predictedNovel) && (isTrueNovel) && (correctLabelPrediction)) { // Should never happen? Framework was wrong here, so TN this.novelClassDetectionTrueNegative++; this.weightCorrect++; assert false : "Paradox 2 - true novel, but predicted the right label"; } if ((predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // Should be most common when x is novel this.novelClassDetectionTruePositive++; this.weightCorrect++; } if ((predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Probably an Outlier case this.novelClassDetectionFalsePositive++; if (this.outlierHandlingStrategyOption.getChosenIndex() > 0) { this.weightCorrect++; } } if ((!predictedNovel) && (isTrueNovel) && (!correctLabelPrediction)) { // NCD failure FN this.novelClassDetectionFalseNegative++; } if ((!predictedNovel) && (!isTrueNovel) && (!correctLabelPrediction)) { // Correct NCD, but bad h(x) prediction this.novelClassDetectionTrueNegative++; } this.rowKappa[predictedClass]++; this.columnKappa[trueClass]++; this.knownTrueLabels[trueClass] += inst.weight(); }