List of usage examples for weka.core Instance weight
public double weight();
From source file:moa.classifiers.trees.ePTTD.java
License:Creative Commons License
@Override public void trainOnInstanceImpl(Instance inst) { // TODO Auto-generated method stub if (inst.weight() > 0.0) { this.trainingWeightSeenByModel += inst.weight(); }/*w w w . j a v a2 s . c o m*/ if (!isClassificationEnabled) { if (instancesBuffer == null) { //this.instancesBuffer = new Instances(inst.dataset()); this.instancesBuffer = new Instances(inst.dataset(), 0); } instancesBuffer.add(inst); if (instancesBuffer.size() == widthInitOption.getValue()) { //Build first time Classifier checkOptionsIntegity(); this.ePTTDintern.buildClassifier(instancesBuffer); isClassificationEnabled = true; } return; } ePTTDintern.updateClassifier(inst); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Create a new cluster from an exemplar data point * @param x /*w w w.j a v a 2s . c o m*/ */ public Riffle(Instance x) { safeInit(x); this.numLabeledPoints = (int) Math.ceil(x.weight()); this.labelFrequencies[(int) x.classValue()] += x.weight(); this.gtLabelFrequencies[(int) x.classValue()]++; for (int i = 0; (i < this.symbolFrequencies.length) && (i < x.numAttributes()); ++i) { double value = x.value(i); if (this.symbolFrequencies[i] == null) { if ((this.parentClusterer != null) && (this.parentClusterer.getUniverse() != null)) { this.variances[i] = this.parentClusterer.getUniverse().variances[i]; } else { this.variances[i] = this.initialStandardDeviationOption.getValue(); } } else { this.variances[i] = 1; this.symbolFrequencies[i][(int) value]++; } } this.numTotalPoints = 1; this.setGroundTruth(x.classValue()); this.setCenter(x.toDoubleArray()); this.setWeight(x.weight()); this.setRadius(this.initialStandardDeviationOption.getValue()); this.runningSumOfSquares = 0.0; this.setId(autoindex.getAndIncrement()); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Add a data point instance to this cluster * * @param x//from ww w . ja va 2 s. c o m */ final public void addInstance(Instance x) { safeInit(x); this.numTotalPoints++; this.numLabeledPoints += (x.weight() > 0.9999) ? 1 : 0; this.labelFrequencies[(int) x.classValue()] += x.weight(); //non-training data has a weight of zero this.gtLabelFrequencies[(int) x.classValue()]++; // For non-decision metrics only //Select strategy for on-line *-means (Any means) switch (updateStrategyOption.getChosenIndex()) { case 0: this.addInstanceGrimson(x); break; case 1: this.addInstanceViaShephard(x); break; case 2: this.instances.add(x); return; default: System.err.println("Invalid addInstance strategy"); } recompute(); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Inverse process of adding instance// w w w . j a v a 2 s . c o m * * @param x */ final public void removeInstance(Instance x) { safeInit(x); this.numLabeledPoints -= (int) Math.ceil(x.weight()); this.labelFrequencies[(int) x.classValue()] -= x.weight(); //non-training data has a weight of zero this.gtLabelFrequencies[(int) x.classValue()]--; // For non-decision metrics only this.numTotalPoints--; //Select strategy for on-line *-means switch (updateStrategyOption.getChosenIndex()) { case 0: this.removeInstanceGrimson(x); break; case 1: this.removeInstanceViaShephard(x); break; case 2: this.instances.remove(x); return; default: System.err.println("Invalid removeInstance strategy"); } recompute(); }
From source file:moa.cluster.Riffle.java
License:Apache License
/** * Set pre-computed information fields//from www . j a va2 s. com * @return */ public final double recomputeAll() { if (this.instances != null) { Arrays.fill(this.gtLabelFrequencies, 0); Arrays.fill(this.labelFrequencies, 0); this.numTotalPoints = instances.size(); this.numLabeledPoints = 0; if (!this.instances.isEmpty()) { // double[] clusterCentroid = this.getCenter(); double[] clusterVariance = this.getVariances(); for (int i = 0; i < centroid.length; ++i) { centroid[i] /= (double) this.instances.size() + 1.0; } for (double[] sf : this.symbolFrequencies) { if (sf != null) { Arrays.fill(sf, 0); } } for (Instance x : this.instances) { // Pre-populate univeral cluster with data points if (x == null) { System.out.println("Sieve::MaximizationStep() - x is NULL!"); continue; } this.gtLabelFrequencies[(int) x.classValue()]++; this.labelFrequencies[(int) x.classValue()] += x.weight(); this.numLabeledPoints += x.weight(); double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double val = xValues[i]; centroid[i] += val / ((double) this.instances.size() + 1.0); if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) { this.symbolFrequencies[i][(int) val]++; } } } // for // Set 'centroid' to 'mode' (most frequent symbol) for nominal data: for (int i = 0; i < this.symbolFrequencies.length; ++i) { if (this.symbolFrequencies[i] != null) { centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]); } } setCenter(centroid); // temporary - start with standard gaussian, gets updated below // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so // we use the 2-Pass method for computing sample variance (per dimension) double n = instances.size(); if (n > 1) { double[] cep = new double[centroid.length]; Arrays.fill(cep, 0); Arrays.fill(clusterVariance, 0); for (Instance x : this.instances) { if (x == null) { System.out.println("Riffle::recompute() - x is null!"); continue; } double[] xValues = x.toDoubleArray(); for (int i = 0; i < xValues.length; ++i) { double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i] : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20; cep[i] += delta; clusterVariance[i] += delta * delta; // Statistical Variance } } for (int i = 0; i < clusterVariance.length; ++i) { clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1); } setVariances(clusterVariance); } // end if (enough data for variance) } // end if(!instances.empty) recompute(); } // end if(!instances null) return getRadius() * getEntropy(); }
From source file:moa.clusterer.FeS2.java
License:Apache License
@Override public void trainOnInstance(Instance inst) { this.trainingWeightSeenByModel += inst.weight(); trainOnInstanceImpl(inst); }
From source file:moa.clusterer.FeS2.java
License:Apache License
/** * //from w ww . jav a 2s. co m * @param x * @param c */ private void addToCluster(Instance x, Riffle c) { c.reward(); c.addInstance(x); // If configured, use prediction as a reduced-weight training // WARNING: This often becomes divergant and error-inducing if done with bad weights double fauxWeight = this.hypothesisWeightOption.getValue(); if ((x.weight() == 0) && (fauxWeight > 0)) { int fauxClass = weka.core.Utils.maxIndex(this.getVotesForInstance(x)); c.addLabeling(fauxClass, fauxWeight); } }
From source file:moa.clusterer.FeS2.java
License:Apache License
/**; * * @param x instance to train on//from w w w .jav a2 s . c om */ @Override public void trainOnInstanceImpl(Instance x) { safeInit(x); assert (x != null) : "FeS2::trainOnInstanceImpl() Training on a null instance!"; int classValue = (int) x.classValue(); boolean isNewLabel = (!knownLabels.contains(classValue)) && (x.weight() > 0); if ((x.weight() > 0)) { this.knownLabels.add(classValue); } this.universalCluster.addInstance(x); // Find nearest Cluster final SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(this.clusters, x); assert !nearestClusters.isEmpty() : "Cluster set for probability matching is empty"; // Compute some base metrics we need to know: double maxRadius = 0; double avgRadius = 0; boolean unanimousOutlier = true; double weightTotal = 0; double minWeight = Double.MAX_VALUE; for (NearestClusterTuple nct : nearestClusters) { unanimousOutlier = unanimousOutlier && nct.getCluster().isOutlier(x); maxRadius = Math.max(maxRadius, nct.getCluster().getRadius()); avgRadius += nct.getCluster().getRadius(); } avgRadius /= nearestClusters.size(); // Update weights for (NearestClusterTuple nct : nearestClusters) { Riffle c = nct.getCluster(); c.penalize(); // unilaterally reduce weights int clusterMajorityClass = weka.core.Utils.maxIndex(c.getVotes()); // increase weights for matches (define 'match' criteria by strategy parameter) switch (this.positiveClusterFeedbackStrategyOption.getChosenIndex()) { case 0: // only the closest if (!unanimousOutlier && c == nearestClusters.last().getCluster()) { addToCluster(x, c); } break; case 1: // All label matches // This ternary condition is very important for results int hypothesisClass = (x.weight() > 0) ? classValue : weka.core.Utils.maxIndex(this.getVotesForInstance(x)); if (clusterMajorityClass == hypothesisClass) { addToCluster(x, c); } break; case 2: // All proximity matches if (!nct.getCluster().isOutlier(x)) { addToCluster(x, c); } break; default: break; } //end switch weightTotal += c.getWeight(); minWeight = Math.min(minWeight, c.getWeight()); } // Sort by (weight / sigma) Riffle[] sortedClusters = new Riffle[clusters.size()]; int i = 0; for (Riffle c : clusters) { sortedClusters[i++] = c; } // Kuddos to Java 8 and lambda expressions for making this a one-liner: Arrays.parallelSort(sortedClusters, (Riffle a, Riffle b) -> Double.compare(a.getWeight() / Math.max(a.getRadius(), 1e-96), b.getWeight() / Math.max(b.getRadius(), 1e-96))); boolean atClusterCapacity = (this.clusters.size() >= Math.min( this.clustersPerLabelOption.getValue() * this.knownLabels.size(), this.maximumNumberOfClusterSizeOption.getValue())); // * * * // // Results show that when average P(x|k) < Chauvenet, no new clusters, and visa versa (which is opposite of expected behavior) // // * * * boolean universalOutlier = this.universalCluster.isOutlier(x); if (isNewLabel) { newLabelCount++; } if (universalOutlier) { universalOutlierCount++; } if (unanimousOutlier) { unanimousOutlierCount++; } // If we have no matches at all, then the weakest clsuter is replaced by a new one with a high variance and low weight //if (isNewLabel || (unanimousOutlier && universalOutlier)) { if (isNewLabel || unanimousOutlier) { Riffle weakestLink = sortedClusters[sortedClusters.length - 1]; // get last one Riffle novelCluster = this.createNewCluster(x); //novelCluster.setRadius((avgRadius + maxRadius) / 2.0); // Set to half-way between average and max radius novelCluster.setWeight(weightTotal / nearestClusters.size()); // <---- Validate this ------ weightTotal += novelCluster.getWeight(); // update for new normalization factor // You are the weakest link... Goodbye if (atClusterCapacity) { weightTotal -= weakestLink.getWeight(); // update for new normalization factor this.clusters.remove(weakestLink); } // Everyone please welcome our newest contestant... clusters.add(novelCluster); } // Normalize Weights and Update variance estimates for singleton clusters double[] universeVariance = universalCluster.getVariances(); double[] initialVariance = new double[universeVariance.length]; for (int j = 0; j < initialVariance.length; ++j) { initialVariance[j] = universeVariance[j] * 0.85; } if (weightTotal <= 0) { weightTotal = 1; } for (Riffle c : this.clusters) { if (c.size() < 2) { c.setVariances(initialVariance); } c.setWeight(c.getWeight() / weightTotal); } }
From source file:moa.clusterer.FeS2.java
License:Apache License
/** * @return training accuracy/*from www . ja v a 2 s.c o m*/ */ private double trainPerceptron() { // Train the perceptron from warmup phase clustering final int epochs = 20; final int numberOfPerceptrons = 10; final int MEMBER = 0; final int OUTLIER = 1; double accuracySum = 0; double accuracyCount = 0; this.outlierPerceptronTrainingSet.clear(); Random rng = new Random(this.randomSeed); // Generate training set for (Riffle thisCluster : this.clusters) { for (Riffle thatCluster : this.clusters) { double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER; for (Instance x : thatCluster.getHeader()) { Instance pseudoPt = makePerceptronInstance(thisCluster, x); pseudoPt.setClassValue(groundTruth); this.outlierPerceptronTrainingSet.add(pseudoPt); } } } this.outlierPerceptronTrainingSet.parallelStream().forEach((x) -> { x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances()); }); // Boost it this.perceptrons = new Perceptron[numberOfPerceptrons]; this.pweights = new double[numberOfPerceptrons]; for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) { // Discover new weak learner Perceptron candidatePerceptron = new Perceptron(); candidatePerceptron.prepareForUse(); candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1); for (int epoch = 0; epoch < epochs; epoch++) { for (Instance x : this.outlierPerceptronTrainingSet) { if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling candidatePerceptron.trainOnInstance(x); } } } //end epochs // Evaluate weak learner double errorFunctionSum = 0; double weightSum = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (!candidatePerceptron.correctlyClassifies(x)) { errorFunctionSum += x.weight(); } } // adjust training weights for (Instance x : this.outlierPerceptronTrainingSet) { double newWeight = x.weight(); if (candidatePerceptron.correctlyClassifies(x)) { newWeight *= errorFunctionSum / (1.0 - errorFunctionSum); if (Double.isNaN(newWeight)) { newWeight = weka.core.Utils.SMALL; } x.setWeight(newWeight); } weightSum += newWeight; } // Normalize for (Instance x : this.outlierPerceptronTrainingSet) { x.setWeight(x.weight() / weightSum); } // Add to ensemble double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum); this.perceptrons[perceptronIdx] = candidatePerceptron; this.pweights[perceptronIdx] = newPerceptronWeight; } // end numPerceptrons // Check training error accuracySum = 0; accuracyCount = 0; for (Instance x : this.outlierPerceptronTrainingSet) { if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) { accuracySum++; } accuracyCount++; } double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0; this.outlierPerceptronTrainingSet.clear(); return trainingAccuracy; }
From source file:moa.clusterer.outliers.Sieve.java
License:Apache License
@Override public final void trainOnInstance(Instance inst) { this.trainingWeightSeenByModel += inst.weight(); trainOnInstanceImpl(inst);/* ww w.jav a 2 s. c o m*/ }