Example usage for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * This is not your grandpa's E-M algorithm... it has multiple mini-steps,
 * but "The e1-m1-e2-m2-e3-m3-Algorithm" is a mouthful, so we just call it *-Means Clustering
 * {Pronounced "Any-means (necessary) clustering"}
 * @param D//  ww  w.  j  a  va2  s  . c om
 * @param subclusters
 * @param maxK
 * @return score at the end of the process
 */
protected final double EMStep(List<ClusterPointPair> D, Collection<Riffle> subclusters, int maxK) {
    double ret = 0;
    // clear the pallette
    for (Riffle c : subclusters) {
        if (c.instances == null) {
            c.instances = c.getHeader();
        }
        c.instances.clear();
        c.cleanTallies();
    }

    // Assign by X's to nearest clusters (Maximization step 1)
    for (ClusterPointPair cxp : D) {
        if (this.potentialNovels.contains(cxp.x)) { // could also be if cxp.c == null, but this is safer
            continue; // ignore the outliers for a moment
        }
        final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, cxp.x);
        //            double ds[] = new double[nearestClusters.length];
        //            int foo = 0;
        //            for(NearestClusterTuple gnarf : nearestClusters) {
        //                ds[foo++] = gnarf.getDistance();
        //            }

        cxp.c = nearestClusters[0].getCluster();

        nearestClusters[0].getCluster().instances.add(cxp.x);
        if (cxp.x.weight() > 0.99) {
            nearestClusters[0].getCluster().addLabeling((int) cxp.x.classValue(), cxp.x.weight());
        }
    }

    // Find new radius (Expectation step)
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }

    // Remove empty clusters to make room for splits (Expectation-ish)
    Iterator<Riffle> cIter = subclusters.iterator();
    while (cIter.hasNext()) {
        Riffle rc = cIter.next();
        if (rc.instances.size() < 1) {
            cIter.remove();
        }
    }

    // Are we full?
    if (subclusters.size() < maxK) {
        // Fix bad clusters (Maximization step 2 - breaking up noisy clusters)
        Riffle sortedClusters[] = new Riffle[subclusters.size()];
        int tmpIdx = 0;
        for (Riffle tmpRfl : subclusters) {
            if (tmpIdx >= sortedClusters.length) {
                break;
            }
            sortedClusters[tmpIdx] = tmpRfl;
            tmpIdx++;
        }
        Arrays.sort(sortedClusters, new Comparator<Riffle>() {
            @Override
            public int compare(Riffle first, Riffle second) {
                if (first == null) {
                    return 1;
                }
                if (second == null) {
                    return -1;
                }
                double[] votes1 = first.getVotes().clone();
                double[] votes2 = second.getVotes().clone();
                double total1 = weka.core.Utils.sum(votes1);
                double total2 = weka.core.Utils.sum(votes2);
                Arrays.sort(votes1);
                Arrays.sort(votes2);
                double pentultimate1 = 1e-16 + ((votes1.length > 1) ? votes1[votes1.length - 2] : 0);
                double pentultimate2 = 1e-16 + ((votes2.length > 1) ? votes2[votes2.length - 2] : 0);
                // this is equiv to purity - margin... yea... really... it's awesome... gotta love math...
                double score1 = (total1 > 0) ? first.size() * pentultimate1 / total1 : 0;
                double score2 = (total2 > 0) ? second.size() * pentultimate2 / total2 : 0;
                return Double.compare(score2, score1);
            }
        }); // end Anon sort
        for (int cIdx = 0; cIdx < sortedClusters.length && subclusters.size() < maxK; cIdx++) {
            Riffle splitMe = sortedClusters[cIdx];
            if (splitMe.getPurity() > 0.9) {
                continue;
            }
            double[] votes = splitMe.getVotes();
            final double totalVotes = weka.core.Utils.sum(votes);
            final double critVotes = 1.0 / (votes.length * 2);
            if (totalVotes < 2) {
                continue;
            }
            ArrayList<Riffle> splitSet = new ArrayList<>(votes.length);
            int numberOfNewClusters = 0;
            for (int lblIdx = 0; lblIdx < votes.length; ++lblIdx) {
                double labelVote = votes[lblIdx] / totalVotes;
                if (labelVote >= critVotes) {
                    splitSet.add(this.createNewCluster(splitMe.toInstance()));
                    numberOfNewClusters++;
                } else {
                    splitSet.add(null);
                }
            }
            if (numberOfNewClusters < 2) {
                continue;
            }
            Instances extras = new Instances(splitMe.getHeader());
            for (Instance x : splitMe.instances) {
                if (x.weight() > 0.999) {
                    Riffle myHopefulCluster = splitSet.get((int) x.classValue());
                    if (myHopefulCluster != null) {
                        myHopefulCluster.instances.add(x);
                        myHopefulCluster.addLabeling((int) x.classValue(), x.weight());
                    } else {
                        extras.add(x);
                    }
                } else {
                    extras.add(x);
                }
            }
            LinkedList<Riffle> goodSet = new LinkedList<>();
            for (Riffle rfc : splitSet) {
                if (rfc == null) {
                    continue;
                }
                rfc.recomputeAll();
                goodSet.add(rfc);
                subclusters.add(rfc);
            }
            for (Instance x : extras) {
                final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(goodSet, x);
                nearestClusters[0].getCluster().instances.add(x);
            }
            subclusters.remove(splitMe);
        }
    }

    // The pentultimate Expectation step
    ret = 0;
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }

    // See if any outliers should actually be consumed by a cluster now... (Maximization step 3)
    Iterator<Instance> xIter = potentialNovels.iterator();
    while (xIter.hasNext()) {
        Instance xOut = xIter.next();
        final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, xOut);
        if (nearestClusters == null || nearestClusters.length < 1) {
            continue;
        }
        Riffle c = nearestClusters[0].getCluster();
        double d = nearestClusters[0].getDistance();
        if (d > c.getRadius()) { // Welcome home wayward tuple!
            c.instances.add(xOut);
            xIter.remove();
        }
    }

    // And the final Expectation step
    ret = 0;
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }
    // 
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Handle training instance that is an outlier to our current model
 * @param x Data instance//from   www  .j a va  2s  .  c o  m
 * @param ncx nearest cluster (and distance)
 * @return cluster (if created) or null (if a total outlier)
 */
private Riffle trainOnOutlierInstance(Instance x, NearestClusterTuple ncx) {
    Riffle ret = null;
    final boolean belowClusterLimit = (clusters.size() < this.maximumNumberOfClusterSizeOption.getValue());
    final NearestInstanceTuple[] nearestOutliers = findNearestOutliers(x);
    final int q = this.minimumClusterSizeOption.getValue();
    double qDout = 0;
    double qDmin = 0;
    if (nearestOutliers.length > q) {
        for (int i = 0; i < nearestOutliers.length && i < q; ++i) {
            qDout += nearestOutliers[i].d / (double) q;
        }
        final NearestInstanceTuple[] nearestClusterInstances = findNearestNeighbors(ncx.getCluster().instances,
                x);
        for (int i = 0; i < nearestClusterInstances.length && i < q; ++i) {
            qDmin += nearestClusterInstances[i].d / (double) Math.min(q, nearestOutliers.length);
        }
    }
    final double qNSC = (nearestOutliers.length >= q && (qDout > 0 || qDmin > 0))
            ? (qDmin - qDout) / Math.max(qDmin, qDout)
            : -1.5;
    final boolean necessaryCriteriaForNewCluster = (qNSC > 0) && (nearestOutliers.length > q);

    if (necessaryCriteriaForNewCluster) { // X has a critical mass of friendly outcasts, so make a new club
        Riffle newCluster = this.createNewCluster(x);
        ret = newCluster;
        // Make new cluster up to radius of nearest cluster, but no more than 2q instances
        for (int i = 0; i < nearestOutliers.length && i < (q); ++i) {
            if (nearestOutliers[i].d > ncx.getCluster().getRadius()) {
                break;
            }
            newCluster.addInstance(nearestOutliers[i].x);
            newCluster.instances.add(nearestOutliers[i].x);
            newCluster.trainEmbeddedClassifier(nearestOutliers[i].x);
        }

        for (Instance otherPts : ncx.getCluster().instances) {
            if (this.clustererRandom.nextDouble() < 0.5 && otherPts.weight() > 0.99) {
                newCluster.trainEmbeddedClassifier(otherPts);
            }
        } //end for(x)
          // If at limit, prune the worst cluster to make room for this new one
        if (!belowClusterLimit) {
            double worstWeight = Double.MAX_VALUE;
            Riffle worstCluster = null;
            for (Riffle rfc : clusters) {
                if (rfc.getWeight() < worstWeight) {
                    worstWeight = rfc.getWeight();
                    worstCluster = rfc;
                }
            }
            if (worstCluster != null) {
                clusters.remove(worstCluster);
            }
        }
        newCluster.recomputeAll();
        this.clusters.add(newCluster);
    }
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Train on data instance//w w w  .  jav  a 2  s. c  o  m
 *
 * @param x instance to train on
 */
@Override
public final void trainOnInstanceImpl(Instance x) {
    safeInit(x);
    assert (x != null) : "Sieve::trainOnInstanceImpl() Training on a null instance!";
    int y = (int) x.classValue();
    if ((y > 0) && (y < knownLabels.length)) {
        knownLabels[y] += x.weight();
    }
    this.instancesSeen++;
    this.weightsSeen += x.weight();
    this.universalCluster.addInstance(x);
    final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(this.clusters, x);
    if (nearestClusters.length < 1) { // Handles weird corner case
        Riffle firstCluster = this.createNewCluster(x);
        clusters.add(firstCluster);
        System.err.println("Sieve::trainOnInstanceImpl() - no other clusters found!");

    } else {
        // Everyone takes a weight hit, and we will reward the best later...
        for (NearestClusterTuple nct : nearestClusters) {
            nct.getCluster().penalize();
        }
        NearestClusterTuple ncx = nearestClusters[0]; // For code convienance
        ClusterPointPair cxp = new ClusterPointPair(x, ncx.getCluster()); // we will change this later in the function... maybe

        if (ncx.getDistance() > ncx.getCluster().getRadius()) { // outlier
            // Hang out with the outcasts and see if you can start your own clique
            cxp.c = null;
            if (!onlyCreateNewClusterAtResyncOption.isSet()) {
                cxp.c = trainOnOutlierInstance(x, ncx);
            }
            if (cxp.c == null) {
                this.potentialNovels.add(x);// or just wait patiently for a friend to sit next to you
            }
        } else { // end if(isRadialOutlier)                 
            // Or join an existing club if you are in the "IN" crowd...
            Riffle nc = ncx.getCluster();
            nc.reward();
            nc.trainEmbeddedClassifier(x);
            nc.addInstance(x);
        } // end else (not Outlier)
          // Randomly (based on distance) cross-train other models
        for (int i = 0; i < nearestClusters.length; ++i) {
            double pTrain = ((double) nearestClusters.length - i) / (2.0 * nearestClusters.length);
            if (this.clustererRandom.nextDouble() < pTrain) {
                nearestClusters[i].getCluster().trainEmbeddedClassifier(x);
            }
        } // end for(i)
        hopperCache.addLast(cxp);
    } // corner case safety
    periodicResync();
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Temporary function for algorithm analysis
 *///from   w w w.ja  v  a  2  s .c  o  m
private void debugMetrics(double qNSC, double qDout, double qDmin, double dist, double rawTally, Instance x,
        Riffle c) {
    if (this.logMetaRecordsOption.isSet()) {
        try {
            int groundTruth = (int) x.classValue();
            boolean isTrueNovel = (groundTruth > 0) && (groundTruth < knownLabels.length)
                    && (knownLabels[groundTruth] < (this.minimumClusterSizeOption.getValue()));
            String ncCSVLine = "" + universalCluster.size() + "," + universalCluster.getRadius() + ","
                    + rawTally + "," + c.getPurity() + "," + c.size() + "," + c.getWeight() + ","
                    + c.getRadius() + "," + dist + "," + (c.isOutlier(x) ? 1 : 0) + "," + x.weight() + ","
                    + qDmin + "," + qDout + "," + qNSC + "," + isTrueNovel;
            ncCSVwriter.write(ncCSVLine);
            ncCSVwriter.newLine();
            ncCSVwriter.flush();
        } catch (IOException fileIoExcption) {
            System.err.println("Could not write NC CSV line: " + fileIoExcption.toString());
        }
    }
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * @return training accuracy//from www  . j a va  2 s.  com
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 1;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Instance x : thisCluster.getHeader()) {
            Instance pseudoPt = makePerceptronInstance(thisCluster, x);
            for (Riffle thatCluster : this.clusters) {
                double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    for (Instance x : this.outlierPerceptronTrainingSet) {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    }
    ;

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.clusterers.AbstractClusterer.java

License:Open Source License

public void trainOnInstance(Instance inst) {
    if (inst.weight() > 0.0) {
        this.trainingWeightSeenByModel += inst.weight();
        trainOnInstanceImpl(inst);//  w  w  w.  j a va2  s.c o m
    }
}

From source file:moa.evaluation.BasicClassificationPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }/*from  www. j a  v  a  2s.  c  om*/
        this.weightObserved += weight;
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }
        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
    if (this.lastSeenClass == trueClass) {
        this.weightCorrectNoChangeClassifier += weight;
    }
    this.lastSeenClass = trueClass;
}

From source file:moa.evaluation.BasicClassificationScoringEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }/* w  w w.j av a  2  s.  com*/
        this.weightObserved += weight;

        //MSE Calculus
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }

        double[] normalized = normalize(classVotes);

        double vote = 0;
        if (normalized.length > 0) {
            vote = trueClass < normalized.length ? normalized[trueClass] : 0;
        }

        if (Double.compare(vote, Double.NaN) == 0) {
            int countNaN = 0;
            for (int i = 0; i < classVotes.length; ++i) {
                if (Double.compare(normalized[i], Double.NaN) == 0) {
                    countNaN++;
                }
            }
            vote = 1;
            if (countNaN > 1 && classVotes.length > 1) {
                vote = 1.0 / countNaN;
            }

        }
        this.mse += 1 - vote;

        this.saw++;

        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
}

From source file:moa.evaluation.BasicConceptDriftPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    //classVotes[0] -> is Change
    //classVotes[1] -> is in Warning Zone
    //classVotes[2] -> delay
    //classVotes[3] -> estimation

    this.inputValues = inst.value(2);
    if (inst.weight() > 0.0 && classVotes.length == 4) {
        if (inst.numAttributes() > 1) {
            //if there is ground truth we monitor delay
            this.delay++;
        }//ww w .  ja va 2  s  .c o m
        this.weightObserved += inst.weight();
        if (classVotes[0] == 1.0) {
            //Change detected
            //System.out.println("Change detected with delay "+ this.delay );
            this.numberDetections += inst.weight();
            if (this.hasChangeOccurred == true) {
                this.totalDelay += this.delay - classVotes[2];
                this.numberDetectionsOccurred += inst.weight();
                this.hasChangeOccurred = false;
            }
        }
        if (this.hasChangeOccurred && classVotes[1] == 1.0) {
            //Warning detected
            //System.out.println("Warning detected at "+getTotalWeightObserved());
            if (this.isWarningZone == false) {
                this.numberWarnings += inst.weight();
                this.isWarningZone = true;
            }
        } else {
            this.isWarningZone = false;
        }
        if (inst.numAttributes() > 1) {
            if (inst.value(inst.numAttributes() - 2) == 1.0) {//Attribute 1
                //Ground truth Change
                this.numberChanges += inst.weight();
                this.delay = 0;
                this.hasChangeOccurred = true;
            }
        }
        //Compute error prediction
        if (classVotes.length > 1) {
            this.errorPrediction += Math.abs(classVotes[3] - inst.value(0));
        }
    }
}

From source file:moa.evaluation.BasicRegressionPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] prediction) {
    if (inst.weight() > 0.0) {
        this.weightObserved += inst.weight();
        if (prediction.length > 0) {
            this.squareError += (inst.classValue() - prediction[0]) * (inst.classValue() - prediction[0]);
            this.averageError += Math.abs(inst.classValue() - prediction[0]);
        }//from w  w  w  .j a v  a2s.c o  m
    }
}