Example usage for weka.core Instance weight

List of usage examples for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * This is not your grandpa's E-M algorithm... it has multiple mini-steps,
 * but "The e1-m1-e2-m2-e3-m3-Algorithm" is a mouthful, so we just call it *-Means Clustering
 * {Pronounced "Any-means (necessary) clustering"}
 * @param D//  ww  w.  j  a  va2  s  . c om
 * @param subclusters
 * @param maxK
 * @return score at the end of the process
 */
protected final double EMStep(List<ClusterPointPair> D, Collection<Riffle> subclusters, int maxK) {
    double ret = 0;
    // clear the pallette
    for (Riffle c : subclusters) {
        if (c.instances == null) {
            c.instances = c.getHeader();
        }
        c.instances.clear();
        c.cleanTallies();
    }

    // Assign by X's to nearest clusters (Maximization step 1)
    for (ClusterPointPair cxp : D) {
        if (this.potentialNovels.contains(cxp.x)) { // could also be if cxp.c == null, but this is safer
            continue; // ignore the outliers for a moment
        }
        final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, cxp.x);
        //            double ds[] = new double[nearestClusters.length];
        //            int foo = 0;
        //            for(NearestClusterTuple gnarf : nearestClusters) {
        //                ds[foo++] = gnarf.getDistance();
        //            }

        cxp.c = nearestClusters[0].getCluster();

        nearestClusters[0].getCluster().instances.add(cxp.x);
        if (cxp.x.weight() > 0.99) {
            nearestClusters[0].getCluster().addLabeling((int) cxp.x.classValue(), cxp.x.weight());
        }
    }

    // Find new radius (Expectation step)
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }

    // Remove empty clusters to make room for splits (Expectation-ish)
    Iterator<Riffle> cIter = subclusters.iterator();
    while (cIter.hasNext()) {
        Riffle rc = cIter.next();
        if (rc.instances.size() < 1) {
            cIter.remove();
        }
    }

    // Are we full?
    if (subclusters.size() < maxK) {
        // Fix bad clusters (Maximization step 2 - breaking up noisy clusters)
        Riffle sortedClusters[] = new Riffle[subclusters.size()];
        int tmpIdx = 0;
        for (Riffle tmpRfl : subclusters) {
            if (tmpIdx >= sortedClusters.length) {
                break;
            }
            sortedClusters[tmpIdx] = tmpRfl;
            tmpIdx++;
        }
        Arrays.sort(sortedClusters, new Comparator<Riffle>() {
            @Override
            public int compare(Riffle first, Riffle second) {
                if (first == null) {
                    return 1;
                }
                if (second == null) {
                    return -1;
                }
                double[] votes1 = first.getVotes().clone();
                double[] votes2 = second.getVotes().clone();
                double total1 = weka.core.Utils.sum(votes1);
                double total2 = weka.core.Utils.sum(votes2);
                Arrays.sort(votes1);
                Arrays.sort(votes2);
                double pentultimate1 = 1e-16 + ((votes1.length > 1) ? votes1[votes1.length - 2] : 0);
                double pentultimate2 = 1e-16 + ((votes2.length > 1) ? votes2[votes2.length - 2] : 0);
                // this is equiv to purity - margin... yea... really... it's awesome... gotta love math...
                double score1 = (total1 > 0) ? first.size() * pentultimate1 / total1 : 0;
                double score2 = (total2 > 0) ? second.size() * pentultimate2 / total2 : 0;
                return Double.compare(score2, score1);
            }
        }); // end Anon sort
        for (int cIdx = 0; cIdx < sortedClusters.length && subclusters.size() < maxK; cIdx++) {
            Riffle splitMe = sortedClusters[cIdx];
            if (splitMe.getPurity() > 0.9) {
                continue;
            }
            double[] votes = splitMe.getVotes();
            final double totalVotes = weka.core.Utils.sum(votes);
            final double critVotes = 1.0 / (votes.length * 2);
            if (totalVotes < 2) {
                continue;
            }
            ArrayList<Riffle> splitSet = new ArrayList<>(votes.length);
            int numberOfNewClusters = 0;
            for (int lblIdx = 0; lblIdx < votes.length; ++lblIdx) {
                double labelVote = votes[lblIdx] / totalVotes;
                if (labelVote >= critVotes) {
                    splitSet.add(this.createNewCluster(splitMe.toInstance()));
                    numberOfNewClusters++;
                } else {
                    splitSet.add(null);
                }
            }
            if (numberOfNewClusters < 2) {
                continue;
            }
            Instances extras = new Instances(splitMe.getHeader());
            for (Instance x : splitMe.instances) {
                if (x.weight() > 0.999) {
                    Riffle myHopefulCluster = splitSet.get((int) x.classValue());
                    if (myHopefulCluster != null) {
                        myHopefulCluster.instances.add(x);
                        myHopefulCluster.addLabeling((int) x.classValue(), x.weight());
                    } else {
                        extras.add(x);
                    }
                } else {
                    extras.add(x);
                }
            }
            LinkedList<Riffle> goodSet = new LinkedList<>();
            for (Riffle rfc : splitSet) {
                if (rfc == null) {
                    continue;
                }
                rfc.recomputeAll();
                goodSet.add(rfc);
                subclusters.add(rfc);
            }
            for (Instance x : extras) {
                final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(goodSet, x);
                nearestClusters[0].getCluster().instances.add(x);
            }
            subclusters.remove(splitMe);
        }
    }

    // The pentultimate Expectation step
    ret = 0;
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }

    // See if any outliers should actually be consumed by a cluster now... (Maximization step 3)
    Iterator<Instance> xIter = potentialNovels.iterator();
    while (xIter.hasNext()) {
        Instance xOut = xIter.next();
        final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(subclusters, xOut);
        if (nearestClusters == null || nearestClusters.length < 1) {
            continue;
        }
        Riffle c = nearestClusters[0].getCluster();
        double d = nearestClusters[0].getDistance();
        if (d > c.getRadius()) { // Welcome home wayward tuple!
            c.instances.add(xOut);
            xIter.remove();
        }
    }

    // And the final Expectation step
    ret = 0;
    for (Riffle c : subclusters) {
        ret += c.recomputeAll();
    }
    // 
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Handle training instance that is an outlier to our current model
 * @param x Data instance//from   www  .j a va  2s  .  c o  m
 * @param ncx nearest cluster (and distance)
 * @return cluster (if created) or null (if a total outlier)
 */
private Riffle trainOnOutlierInstance(Instance x, NearestClusterTuple ncx) {
    Riffle ret = null;
    final boolean belowClusterLimit = (clusters.size() < this.maximumNumberOfClusterSizeOption.getValue());
    final NearestInstanceTuple[] nearestOutliers = findNearestOutliers(x);
    final int q = this.minimumClusterSizeOption.getValue();
    double qDout = 0;
    double qDmin = 0;
    if (nearestOutliers.length > q) {
        for (int i = 0; i < nearestOutliers.length && i < q; ++i) {
            qDout += nearestOutliers[i].d / (double) q;
        }
        final NearestInstanceTuple[] nearestClusterInstances = findNearestNeighbors(ncx.getCluster().instances,
                x);
        for (int i = 0; i < nearestClusterInstances.length && i < q; ++i) {
            qDmin += nearestClusterInstances[i].d / (double) Math.min(q, nearestOutliers.length);
        }
    }
    final double qNSC = (nearestOutliers.length >= q && (qDout > 0 || qDmin > 0))
            ? (qDmin - qDout) / Math.max(qDmin, qDout)
            : -1.5;
    final boolean necessaryCriteriaForNewCluster = (qNSC > 0) && (nearestOutliers.length > q);

    if (necessaryCriteriaForNewCluster) { // X has a critical mass of friendly outcasts, so make a new club
        Riffle newCluster = this.createNewCluster(x);
        ret = newCluster;
        // Make new cluster up to radius of nearest cluster, but no more than 2q instances
        for (int i = 0; i < nearestOutliers.length && i < (q); ++i) {
            if (nearestOutliers[i].d > ncx.getCluster().getRadius()) {
                break;
            }
            newCluster.addInstance(nearestOutliers[i].x);
            newCluster.instances.add(nearestOutliers[i].x);
            newCluster.trainEmbeddedClassifier(nearestOutliers[i].x);
        }

        for (Instance otherPts : ncx.getCluster().instances) {
            if (this.clustererRandom.nextDouble() < 0.5 && otherPts.weight() > 0.99) {
                newCluster.trainEmbeddedClassifier(otherPts);
            }
        } //end for(x)
          // If at limit, prune the worst cluster to make room for this new one
        if (!belowClusterLimit) {
            double worstWeight = Double.MAX_VALUE;
            Riffle worstCluster = null;
            for (Riffle rfc : clusters) {
                if (rfc.getWeight() < worstWeight) {
                    worstWeight = rfc.getWeight();
                    worstCluster = rfc;
                }
            }
            if (worstCluster != null) {
                clusters.remove(worstCluster);
            }
        }
        newCluster.recomputeAll();
        this.clusters.add(newCluster);
    }
    return ret;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Train on data instance//w w w  .  jav  a 2  s. c  o  m
 *
 * @param x instance to train on
 */
@Override
public final void trainOnInstanceImpl(Instance x) {
    safeInit(x);
    assert (x != null) : "Sieve::trainOnInstanceImpl() Training on a null instance!";
    int y = (int) x.classValue();
    if ((y > 0) && (y < knownLabels.length)) {
        knownLabels[y] += x.weight();
    }
    this.instancesSeen++;
    this.weightsSeen += x.weight();
    this.universalCluster.addInstance(x);
    final NearestClusterTuple[] nearestClusters = findMostLikelyClusters(this.clusters, x);
    if (nearestClusters.length < 1) { // Handles weird corner case
        Riffle firstCluster = this.createNewCluster(x);
        clusters.add(firstCluster);
        System.err.println("Sieve::trainOnInstanceImpl() - no other clusters found!");

    } else {
        // Everyone takes a weight hit, and we will reward the best later...
        for (NearestClusterTuple nct : nearestClusters) {
            nct.getCluster().penalize();
        }
        NearestClusterTuple ncx = nearestClusters[0]; // For code convienance
        ClusterPointPair cxp = new ClusterPointPair(x, ncx.getCluster()); // we will change this later in the function... maybe

        if (ncx.getDistance() > ncx.getCluster().getRadius()) { // outlier
            // Hang out with the outcasts and see if you can start your own clique
            cxp.c = null;
            if (!onlyCreateNewClusterAtResyncOption.isSet()) {
                cxp.c = trainOnOutlierInstance(x, ncx);
            }
            if (cxp.c == null) {
                this.potentialNovels.add(x);// or just wait patiently for a friend to sit next to you
            }
        } else { // end if(isRadialOutlier)                 
            // Or join an existing club if you are in the "IN" crowd...
            Riffle nc = ncx.getCluster();
            nc.reward();
            nc.trainEmbeddedClassifier(x);
            nc.addInstance(x);
        } // end else (not Outlier)
          // Randomly (based on distance) cross-train other models
        for (int i = 0; i < nearestClusters.length; ++i) {
            double pTrain = ((double) nearestClusters.length - i) / (2.0 * nearestClusters.length);
            if (this.clustererRandom.nextDouble() < pTrain) {
                nearestClusters[i].getCluster().trainEmbeddedClassifier(x);
            }
        } // end for(i)
        hopperCache.addLast(cxp);
    } // corner case safety
    periodicResync();
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * Temporary function for algorithm analysis
 *///from   w w w.ja  v  a  2  s .c  o  m
private void debugMetrics(double qNSC, double qDout, double qDmin, double dist, double rawTally, Instance x,
        Riffle c) {
    if (this.logMetaRecordsOption.isSet()) {
        try {
            int groundTruth = (int) x.classValue();
            boolean isTrueNovel = (groundTruth > 0) && (groundTruth < knownLabels.length)
                    && (knownLabels[groundTruth] < (this.minimumClusterSizeOption.getValue()));
            String ncCSVLine = "" + universalCluster.size() + "," + universalCluster.getRadius() + ","
                    + rawTally + "," + c.getPurity() + "," + c.size() + "," + c.getWeight() + ","
                    + c.getRadius() + "," + dist + "," + (c.isOutlier(x) ? 1 : 0) + "," + x.weight() + ","
                    + qDmin + "," + qDout + "," + qNSC + "," + isTrueNovel;
            ncCSVwriter.write(ncCSVLine);
            ncCSVwriter.newLine();
            ncCSVwriter.flush();
        } catch (IOException fileIoExcption) {
            System.err.println("Could not write NC CSV line: " + fileIoExcption.toString());
        }
    }
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

/**
 * @return training accuracy//from www  . j a va  2 s.  com
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 1;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Instance x : thisCluster.getHeader()) {
            Instance pseudoPt = makePerceptronInstance(thisCluster, x);
            for (Riffle thatCluster : this.clusters) {
                double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    for (Instance x : this.outlierPerceptronTrainingSet) {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    }
    ;

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.clusterers.AbstractClusterer.java

License:Open Source License

public void trainOnInstance(Instance inst) {
    if (inst.weight() > 0.0) {
        this.trainingWeightSeenByModel += inst.weight();
        trainOnInstanceImpl(inst);//  w  w  w.  j a va2  s.c o m
    }
}

From source file:moa.evaluation.BasicClassificationPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }/*from  www. j a  v  a  2s.  c  om*/
        this.weightObserved += weight;
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }
        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
    if (this.lastSeenClass == trueClass) {
        this.weightCorrectNoChangeClassifier += weight;
    }
    this.lastSeenClass = trueClass;
}

From source file:moa.evaluation.BasicClassificationScoringEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    double weight = inst.weight();
    int trueClass = (int) inst.classValue();
    if (weight > 0.0) {
        if (this.weightObserved == 0) {
            reset(inst.dataset().numClasses());
        }/* w  w w.j av a  2  s.  com*/
        this.weightObserved += weight;

        //MSE Calculus
        int predictedClass = Utils.maxIndex(classVotes);
        if (predictedClass == trueClass) {
            this.weightCorrect += weight;
        }

        double[] normalized = normalize(classVotes);

        double vote = 0;
        if (normalized.length > 0) {
            vote = trueClass < normalized.length ? normalized[trueClass] : 0;
        }

        if (Double.compare(vote, Double.NaN) == 0) {
            int countNaN = 0;
            for (int i = 0; i < classVotes.length; ++i) {
                if (Double.compare(normalized[i], Double.NaN) == 0) {
                    countNaN++;
                }
            }
            vote = 1;
            if (countNaN > 1 && classVotes.length > 1) {
                vote = 1.0 / countNaN;
            }

        }
        this.mse += 1 - vote;

        this.saw++;

        this.rowKappa[predictedClass] += weight;
        this.columnKappa[trueClass] += weight;
    }
}

From source file:moa.evaluation.BasicConceptDriftPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] classVotes) {
    //classVotes[0] -> is Change
    //classVotes[1] -> is in Warning Zone
    //classVotes[2] -> delay
    //classVotes[3] -> estimation

    this.inputValues = inst.value(2);
    if (inst.weight() > 0.0 && classVotes.length == 4) {
        if (inst.numAttributes() > 1) {
            //if there is ground truth we monitor delay
            this.delay++;
        }//ww w .  ja va 2  s  .c o m
        this.weightObserved += inst.weight();
        if (classVotes[0] == 1.0) {
            //Change detected
            //System.out.println("Change detected with delay "+ this.delay );
            this.numberDetections += inst.weight();
            if (this.hasChangeOccurred == true) {
                this.totalDelay += this.delay - classVotes[2];
                this.numberDetectionsOccurred += inst.weight();
                this.hasChangeOccurred = false;
            }
        }
        if (this.hasChangeOccurred && classVotes[1] == 1.0) {
            //Warning detected
            //System.out.println("Warning detected at "+getTotalWeightObserved());
            if (this.isWarningZone == false) {
                this.numberWarnings += inst.weight();
                this.isWarningZone = true;
            }
        } else {
            this.isWarningZone = false;
        }
        if (inst.numAttributes() > 1) {
            if (inst.value(inst.numAttributes() - 2) == 1.0) {//Attribute 1
                //Ground truth Change
                this.numberChanges += inst.weight();
                this.delay = 0;
                this.hasChangeOccurred = true;
            }
        }
        //Compute error prediction
        if (classVotes.length > 1) {
            this.errorPrediction += Math.abs(classVotes[3] - inst.value(0));
        }
    }
}

From source file:moa.evaluation.BasicRegressionPerformanceEvaluator.java

License:Open Source License

@Override
public void addResult(Instance inst, double[] prediction) {
    if (inst.weight() > 0.0) {
        this.weightObserved += inst.weight();
        if (prediction.length > 0) {
            this.squareError += (inst.classValue() - prediction[0]) * (inst.classValue() - prediction[0]);
            this.averageError += Math.abs(inst.classValue() - prediction[0]);
        }//from w  w  w  .j a v  a2s.c o  m
    }
}