List of usage examples for weka.core Instance weight
public double weight();
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * This is not your grandpa's E-M algorithm... it has multiple mini-steps, * but "The e1-m1-e2-m2-e3-m3-Algorithm" is a mouthful, so we just call it *-Means Clustering * {Pronounced "Any-means (necessary) clustering"} * @param D// ww w. j a va2 s . c om * @param subclusters * @param maxK * @return score at the end of the process */ protected final double EMStep(List<ClusterPointPair> D, Collection<Riffle> subclusters, int maxK) { double ret = 0; // clear the pallette for (Riffle c : subclusters) { if (c.instances == null) { c.instances = c.getHeader(); } c.instances.clear(); c.cleanTallies(); } // Assign by X's to nearest clusters (Maximization step 1) for (ClusterPointPair cxp : D) { if (this.potentialNovels.contains(cxp.x)) { // could also be if cxp.c == null, but this is safer continue; // ignore the outliers for a moment } final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, cxp.x); // double ds[] = new double[nearestClusters.length]; // int foo = 0; // for(NearestClusterTuple gnarf : nearestClusters) { // ds[foo++] = gnarf.getDistance(); // } cxp.c = nearestClusters[0].getCluster(); nearestClusters[0].getCluster().instances.add(cxp.x); if (cxp.x.weight() > 0.99) { nearestClusters[0].getCluster().addLabeling((int) cxp.x.classValue(), cxp.x.weight()); } } // Find new radius (Expectation step) for (Riffle c : subclusters) { ret += c.recomputeAll(); } // Remove empty clusters to make room for splits (Expectation-ish) Iterator<Riffle> cIter = subclusters.iterator(); while (cIter.hasNext()) { Riffle rc = cIter.next(); if (rc.instances.size() < 1) { cIter.remove(); } } // Are we full? if (subclusters.size() < maxK) { // Fix bad clusters (Maximization step 2 - breaking up noisy clusters) Riffle sortedClusters[] = new Riffle[subclusters.size()]; int tmpIdx = 0; for (Riffle tmpRfl : subclusters) { if (tmpIdx >= sortedClusters.length) { break; } sortedClusters[tmpIdx] = tmpRfl; tmpIdx++; } Arrays.sort(sortedClusters, new Comparator<Riffle>() { @Override public int compare(Riffle first, Riffle second) { if (first == null) { return 1; } if (second == null) { return -1; } double[] votes1 = first.getVotes().clone(); double[] votes2 = second.getVotes().clone(); double total1 = weka.core.Utils.sum(votes1); double total2 = weka.core.Utils.sum(votes2); Arrays.sort(votes1); Arrays.sort(votes2); double pentultimate1 = 1e-16 + ((votes1.length > 1) ? votes1[votes1.length - 2] : 0); double pentultimate2 = 1e-16 + ((votes2.length > 1) ? votes2[votes2.length - 2] : 0); // this is equiv to purity - margin... yea... really... it's awesome... gotta love math... double score1 = (total1 > 0) ? first.size() * pentultimate1 / total1 : 0; double score2 = (total2 > 0) ? second.size() * pentultimate2 / total2 : 0; return Double.compare(score2, score1); } }); // end Anon sort for (int cIdx = 0; cIdx < sortedClusters.length && subclusters.size() < maxK; cIdx++) { Riffle splitMe = sortedClusters[cIdx]; if (splitMe.getPurity() > 0.9) { continue; } double[] votes = splitMe.getVotes(); final double totalVotes = weka.core.Utils.sum(votes); final double critVotes = 1.0 / (votes.length * 2); if (totalVotes < 2) { continue; } ArrayList<Riffle> splitSet = new ArrayList<>(votes.length); int numberOfNewClusters = 0; for (int lblIdx = 0; lblIdx < votes.length; ++lblIdx) { double labelVote = votes[lblIdx] / totalVotes; if (labelVote >= critVotes) { splitSet.add(this.createNewCluster(splitMe.toInstance())); numberOfNewClusters++; } else { splitSet.add(null); } } if (numberOfNewClusters < 2) { continue; } Instances extras = new Instances(splitMe.getHeader()); for (Instance x : splitMe.instances) { if (x.weight() > 0.999) { Riffle myHopefulCluster = splitSet.get((int) x.classValue()); if (myHopefulCluster != null) { myHopefulCluster.instances.add(x); myHopefulCluster.addLabeling((int) x.classValue(), x.weight()); } else { extras.add(x); } } else { extras.add(x); } } LinkedList<Riffle> goodSet = new LinkedList<>(); for (Riffle rfc : splitSet) { if (rfc == null) { continue; } rfc.recomputeAll(); goodSet.add(rfc); subclusters.add(rfc); } for (Instance x : extras) { final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(goodSet, x); nearestClusters[0].getCluster().instances.add(x); } subclusters.remove(splitMe); } } // The pentultimate Expectation step ret = 0; for (Riffle c : subclusters) { ret += c.recomputeAll(); } // See if any outliers should actually be consumed by a cluster now... (Maximization step 3) Iterator<Instance> xIter = potentialNovels.iterator(); while (xIter.hasNext()) { Instance xOut = xIter.next(); final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, xOut); if (nearestClusters == null || nearestClusters.length < 1) { continue; } Riffle c = nearestClusters[0].getCluster(); double d = nearestClusters[0].getDistance(); if (d > c.getRadius()) { // Welcome home wayward tuple! c.instances.add(xOut); xIter.remove(); } } // And the final Expectation step ret = 0; for (Riffle c : subclusters) { ret += c.recomputeAll(); } // return ret; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * Handle training instance that is an outlier to our current model * @param x Data instance//from www .j a va 2s . c o m * @param ncx nearest cluster (and distance) * @return cluster (if created) or null (if a total outlier) */ private Riffle trainOnOutlierInstance(Instance x, NearestClusterTuple ncx) { Riffle ret = null; final boolean belowClusterLimit = (clusters.size() < this.maximumNumberOfClusterSizeOption.getValue()); final NearestInstanceTuple[] nearestOutliers = findNearestOutliers(x); final int q = this.minimumClusterSizeOption.getValue(); double qDout = 0; double qDmin = 0; if (nearestOutliers.length > q) { for (int i = 0; i < nearestOutliers.length && i < q; ++i) { qDout += nearestOutliers[i].d / (double) q; } final NearestInstanceTuple[] nearestClusterInstances = findNearestNeighbors(ncx.getCluster().instances, x); for (int i = 0; i < nearestClusterInstances.length && i < q; ++i) { qDmin += nearestClusterInstances[i].d / (double) Math.min(q, nearestOutliers.length); } } final double qNSC = (nearestOutliers.length >= q && (qDout > 0 || qDmin > 0)) ? (qDmin - qDout) / Math.max(qDmin, qDout) : -1.5; final boolean necessaryCriteriaForNewCluster = (qNSC > 0) && (nearestOutliers.length > q); if (necessaryCriteriaForNewCluster) { // X has a critical mass of friendly outcasts, so make a new club Riffle newCluster = this.createNewCluster(x); ret = newCluster; // Make new cluster up to radius of nearest cluster, but no more than 2q instances for (int i = 0; i < nearestOutliers.length && i < (q); ++i) { if (nearestOutliers[i].d > ncx.getCluster().getRadius()) { break; } newCluster.addInstance(nearestOutliers[i].x); newCluster.instances.add(nearestOutliers[i].x); newCluster.trainEmbeddedClassifier(nearestOutliers[i].x); } for (Instance otherPts : ncx.getCluster().instances) { if (this.clustererRandom.nextDouble() < 0.5 && otherPts.weight() > 0.99) { newCluster.trainEmbeddedClassifier(otherPts); } } //end for(x) // If at limit, prune the worst cluster to make room for this new one if (!belowClusterLimit) { double worstWeight = Double.MAX_VALUE; Riffle worstCluster = null; for (Riffle rfc : clusters) { if (rfc.getWeight() < worstWeight) { worstWeight = rfc.getWeight(); worstCluster = rfc; } } if (worstCluster != null) { clusters.remove(worstCluster); } } newCluster.recomputeAll(); this.clusters.add(newCluster); } return ret; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * Train on data instance//w w w . jav a 2 s. c o m * * @param x instance to train on */ @Override public final void trainOnInstanceImpl(Instance x) { safeInit(x); assert (x != null) : "Sieve::trainOnInstanceImpl() Training on a null instance!"; int y = (int) x.classValue(); if ((y > 0) && (y < knownLabels.length)) { knownLabels[y] += x.weight(); } this.instancesSeen++; this.weightsSeen += x.weight(); this.universalCluster.addInstance(x); final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(this.clusters, x); if (nearestClusters.length < 1) { // Handles weird corner case Riffle firstCluster = this.createNewCluster(x); clusters.add(firstCluster); System.err.println("Sieve::trainOnInstanceImpl() - no other clusters found!"); } else { // Everyone takes a weight hit, and we will reward the best later... for (NearestClusterTuple nct : nearestClusters) { nct.getCluster().penalize(); } NearestClusterTuple ncx = nearestClusters[0]; // For code convienance ClusterPointPair cxp = new ClusterPointPair(x, ncx.getCluster()); // we will change this later in the function... maybe if (ncx.getDistance() > ncx.getCluster().getRadius()) { // outlier // Hang out with the outcasts and see if you can start your own clique cxp.c = null; if (!onlyCreateNewClusterAtResyncOption.isSet()) { cxp.c = trainOnOutlierInstance(x, ncx); } if (cxp.c == null) { this.potentialNovels.add(x);// or just wait patiently for a friend to sit next to you } } else { // end if(isRadialOutlier) // Or join an existing club if you are in the "IN" crowd... Riffle nc = ncx.getCluster(); nc.reward(); nc.trainEmbeddedClassifier(x); nc.addInstance(x); } // end else (not Outlier) // Randomly (based on distance) cross-train other models for (int i = 0; i < nearestClusters.length; ++i) { double pTrain = ((double) nearestClusters.length - i) / (2.0 * nearestClusters.length); if (this.clustererRandom.nextDouble() < pTrain) { nearestClusters[i].getCluster().trainEmbeddedClassifier(x); } } // end for(i) hopperCache.addLast(cxp); } // corner case safety periodicResync(); }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * Temporary function for algorithm analysis *///from w w w.ja v a 2 s .c o m private void debugMetrics(double qNSC, double qDout, double qDmin, double dist, double rawTally, Instance x, Riffle c) { if (this.logMetaRecordsOption.isSet()) { try { int groundTruth = (int) x.classValue(); boolean isTrueNovel = (groundTruth > 0) && (groundTruth < knownLabels.length) && (knownLabels[groundTruth] < (this.minimumClusterSizeOption.getValue())); String ncCSVLine = "" + universalCluster.size() + "," + universalCluster.getRadius() + "," + rawTally + "," + c.getPurity() + "," + c.size() + "," + c.getWeight() + "," + c.getRadius() + "," + dist + "," + (c.isOutlier(x) ? 1 : 0) + "," + x.weight() + "," + qDmin + "," + qDout + "," + qNSC + "," + isTrueNovel; ncCSVwriter.write(ncCSVLine); ncCSVwriter.newLine(); ncCSVwriter.flush(); } catch (IOException fileIoExcption) { System.err.println("Could not write NC CSV line: " + fileIoExcption.toString()); } } }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
/** * @return training accuracy//from www . j a va 2 s. com */ private double trainPerceptron() { // Train the perceptron from warmup phase clustering final int epochs = 20; final int numberOfPerceptrons = 1; final int MEMBER = 0; final int OUTLIER = 1; double accuracySum = 0; double accuracyCount = 0; this.outlierPerceptronTrainingSet.clear(); Random rng = new Random(this.randomSeed); // Generate training set for (Riffle thisCluster : this.clusters) { for (Instance x : thisCluster.getHeader()) { Instance pseudoPt = makePerceptronInstance(thisCluster, x); for (Riffle thatCluster : this.clusters) { double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER; pseudoPt.setClassValue(groundTruth); this.outlierPerceptronTrainingSet.add(pseudoPt); } } } for (Instance x : this.outlierPerceptronTrainingSet) { x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances()); } ; // Boost it this.perceptrons = new Perceptron[numberOfPerceptrons]; this.pweights = new double[numberOfPerceptrons]; for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) { // Discover new weak learner Perceptron candidatePerceptron = new Perceptron(); candidatePerceptron.prepareForUse(); candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1); for (int epoch = 0; epoch < epochs; epoch++) { for (Instance x : this.outlierPerceptronTrainingSet) { if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling candidatePerceptron.trainOnInstance(x); } } } //end epochs // Evaluate weak learner double errorFunctionSum = 0; double weightSum = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (!candidatePerceptron.correctlyClassifies(x)) { errorFunctionSum += x.weight(); } } // adjust training weights for (Instance x : this.outlierPerceptronTrainingSet) { double newWeight = x.weight(); if (candidatePerceptron.correctlyClassifies(x)) { newWeight *= errorFunctionSum / (1.0 - errorFunctionSum); if (Double.isNaN(newWeight)) { newWeight = weka.core.Utils.SMALL; } x.setWeight(newWeight); } weightSum += newWeight; } // Normalize for (Instance x : this.outlierPerceptronTrainingSet) { x.setWeight(x.weight() / weightSum); } // Add to ensemble double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum); this.perceptrons[perceptronIdx] = candidatePerceptron; this.pweights[perceptronIdx] = newPerceptronWeight; } // end numPerceptrons // Check training error accuracySum = 0; accuracyCount = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) { accuracySum++; } accuracyCount++; } double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0; this.outlierPerceptronTrainingSet.clear(); return trainingAccuracy; }
From source file:moa.clusterers.AbstractClusterer.java
License:Open Source License
public void trainOnInstance(Instance inst) { if (inst.weight() > 0.0) { this.trainingWeightSeenByModel += inst.weight(); trainOnInstanceImpl(inst);// w w w. j a va2 s.c o m } }
From source file:moa.evaluation.BasicClassificationPerformanceEvaluator.java
License:Open Source License
@Override public void addResult(Instance inst, double[] classVotes) { double weight = inst.weight(); int trueClass = (int) inst.classValue(); if (weight > 0.0) { if (this.weightObserved == 0) { reset(inst.dataset().numClasses()); }/*from www. j a v a 2s. c om*/ this.weightObserved += weight; int predictedClass = Utils.maxIndex(classVotes); if (predictedClass == trueClass) { this.weightCorrect += weight; } this.rowKappa[predictedClass] += weight; this.columnKappa[trueClass] += weight; } if (this.lastSeenClass == trueClass) { this.weightCorrectNoChangeClassifier += weight; } this.lastSeenClass = trueClass; }
From source file:moa.evaluation.BasicClassificationScoringEvaluator.java
License:Open Source License
@Override public void addResult(Instance inst, double[] classVotes) { double weight = inst.weight(); int trueClass = (int) inst.classValue(); if (weight > 0.0) { if (this.weightObserved == 0) { reset(inst.dataset().numClasses()); }/* w w w.j av a 2 s. com*/ this.weightObserved += weight; //MSE Calculus int predictedClass = Utils.maxIndex(classVotes); if (predictedClass == trueClass) { this.weightCorrect += weight; } double[] normalized = normalize(classVotes); double vote = 0; if (normalized.length > 0) { vote = trueClass < normalized.length ? normalized[trueClass] : 0; } if (Double.compare(vote, Double.NaN) == 0) { int countNaN = 0; for (int i = 0; i < classVotes.length; ++i) { if (Double.compare(normalized[i], Double.NaN) == 0) { countNaN++; } } vote = 1; if (countNaN > 1 && classVotes.length > 1) { vote = 1.0 / countNaN; } } this.mse += 1 - vote; this.saw++; this.rowKappa[predictedClass] += weight; this.columnKappa[trueClass] += weight; } }
From source file:moa.evaluation.BasicConceptDriftPerformanceEvaluator.java
License:Open Source License
@Override public void addResult(Instance inst, double[] classVotes) { //classVotes[0] -> is Change //classVotes[1] -> is in Warning Zone //classVotes[2] -> delay //classVotes[3] -> estimation this.inputValues = inst.value(2); if (inst.weight() > 0.0 && classVotes.length == 4) { if (inst.numAttributes() > 1) { //if there is ground truth we monitor delay this.delay++; }//ww w . ja va 2 s .c o m this.weightObserved += inst.weight(); if (classVotes[0] == 1.0) { //Change detected //System.out.println("Change detected with delay "+ this.delay ); this.numberDetections += inst.weight(); if (this.hasChangeOccurred == true) { this.totalDelay += this.delay - classVotes[2]; this.numberDetectionsOccurred += inst.weight(); this.hasChangeOccurred = false; } } if (this.hasChangeOccurred && classVotes[1] == 1.0) { //Warning detected //System.out.println("Warning detected at "+getTotalWeightObserved()); if (this.isWarningZone == false) { this.numberWarnings += inst.weight(); this.isWarningZone = true; } } else { this.isWarningZone = false; } if (inst.numAttributes() > 1) { if (inst.value(inst.numAttributes() - 2) == 1.0) {//Attribute 1 //Ground truth Change this.numberChanges += inst.weight(); this.delay = 0; this.hasChangeOccurred = true; } } //Compute error prediction if (classVotes.length > 1) { this.errorPrediction += Math.abs(classVotes[3] - inst.value(0)); } } }
From source file:moa.evaluation.BasicRegressionPerformanceEvaluator.java
License:Open Source License
@Override public void addResult(Instance inst, double[] prediction) { if (inst.weight() > 0.0) { this.weightObserved += inst.weight(); if (prediction.length > 0) { this.squareError += (inst.classValue() - prediction[0]) * (inst.classValue() - prediction[0]); this.averageError += Math.abs(inst.classValue() - prediction[0]); }//from w w w .j a v a2s.c o m } }