Example usage for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:moa.classifiers.trees.ePTTD.java

License:Creative Commons License

@Override
public void trainOnInstanceImpl(Instance inst) {
    // TODO Auto-generated method stub

    if (inst.weight() > 0.0) {
        this.trainingWeightSeenByModel += inst.weight();
    }/*w  w  w .  j a  v  a2 s  .  c  o  m*/

    if (!isClassificationEnabled) {

        if (instancesBuffer == null) {
            //this.instancesBuffer = new Instances(inst.dataset());
            this.instancesBuffer = new Instances(inst.dataset(), 0);

        }
        instancesBuffer.add(inst);

        if (instancesBuffer.size() == widthInitOption.getValue()) {
            //Build first time Classifier
            checkOptionsIntegity();
            this.ePTTDintern.buildClassifier(instancesBuffer);
            isClassificationEnabled = true;
        }
        return;
    }
    ePTTDintern.updateClassifier(inst);

}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Create a new cluster from an exemplar data point
 * @param x /*w  w  w.j a v a 2s .  c  o  m*/
 */
public Riffle(Instance x) {
    safeInit(x);
    this.numLabeledPoints = (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] += x.weight();
    this.gtLabelFrequencies[(int) x.classValue()]++;
    for (int i = 0; (i < this.symbolFrequencies.length) && (i < x.numAttributes()); ++i) {
        double value = x.value(i);
        if (this.symbolFrequencies[i] == null) {
            if ((this.parentClusterer != null) && (this.parentClusterer.getUniverse() != null)) {
                this.variances[i] = this.parentClusterer.getUniverse().variances[i];
            } else {
                this.variances[i] = this.initialStandardDeviationOption.getValue();
            }
        } else {
            this.variances[i] = 1;
            this.symbolFrequencies[i][(int) value]++;
        }
    }
    this.numTotalPoints = 1;
    this.setGroundTruth(x.classValue());
    this.setCenter(x.toDoubleArray());
    this.setWeight(x.weight());
    this.setRadius(this.initialStandardDeviationOption.getValue());
    this.runningSumOfSquares = 0.0;
    this.setId(autoindex.getAndIncrement());
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Add a data point instance to this cluster
 *
 * @param x//from ww  w  .  ja va 2 s.  c  o  m
 */
final public void addInstance(Instance x) {
    safeInit(x);
    this.numTotalPoints++;
    this.numLabeledPoints += (x.weight() > 0.9999) ? 1 : 0;
    this.labelFrequencies[(int) x.classValue()] += x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]++; // For non-decision metrics only
    //Select strategy for on-line *-means (Any means)
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.addInstanceGrimson(x);
        break;
    case 1:
        this.addInstanceViaShephard(x);
        break;
    case 2:
        this.instances.add(x);
        return;
    default:
        System.err.println("Invalid addInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Inverse process of adding instance// w w w . j  a v a 2 s  . c o  m
 *
 * @param x
 */
final public void removeInstance(Instance x) {
    safeInit(x);
    this.numLabeledPoints -= (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] -= x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]--; // For non-decision metrics only
    this.numTotalPoints--;

    //Select strategy for on-line *-means
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.removeInstanceGrimson(x);
        break;
    case 1:
        this.removeInstanceViaShephard(x);
        break;
    case 2:
        this.instances.remove(x);
        return;
    default:
        System.err.println("Invalid removeInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Set pre-computed information fields//from  www .  j a va2  s. com
 * @return 
 */
public final double recomputeAll() {
    if (this.instances != null) {
        Arrays.fill(this.gtLabelFrequencies, 0);
        Arrays.fill(this.labelFrequencies, 0);
        this.numTotalPoints = instances.size();
        this.numLabeledPoints = 0;
        if (!this.instances.isEmpty()) {
            // double[] clusterCentroid = this.getCenter();
            double[] clusterVariance = this.getVariances();
            for (int i = 0; i < centroid.length; ++i) {
                centroid[i] /= (double) this.instances.size() + 1.0;
            }
            for (double[] sf : this.symbolFrequencies) {
                if (sf != null) {
                    Arrays.fill(sf, 0);
                }
            }
            for (Instance x : this.instances) { // Pre-populate univeral cluster with data points
                if (x == null) {
                    System.out.println("Sieve::MaximizationStep() - x is NULL!");
                    continue;
                }
                this.gtLabelFrequencies[(int) x.classValue()]++;
                this.labelFrequencies[(int) x.classValue()] += x.weight();
                this.numLabeledPoints += x.weight();
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double val = xValues[i];
                    centroid[i] += val / ((double) this.instances.size() + 1.0);
                    if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) {
                        this.symbolFrequencies[i][(int) val]++;
                    }
                }
            } // for

            // Set 'centroid' to 'mode' (most frequent symbol) for nominal data:
            for (int i = 0; i < this.symbolFrequencies.length; ++i) {
                if (this.symbolFrequencies[i] != null) {
                    centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]);
                }
            }
            setCenter(centroid); // temporary - start with standard gaussian, gets updated below
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            double n = instances.size();
            if (n > 1) {
                double[] cep = new double[centroid.length];
                Arrays.fill(cep, 0);
                Arrays.fill(clusterVariance, 0);
                for (Instance x : this.instances) {
                    if (x == null) {
                        System.out.println("Riffle::recompute() - x is null!");
                        continue;
                    }
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i]
                                : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20;
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
                setVariances(clusterVariance);
            } // end if (enough data for variance)
        } // end if(!instances.empty)
        recompute();
    } // end if(!instances null)
    return getRadius() * getEntropy();
}

From source file:moa.clusterer.FeS2.java

License:Apache License

@Override
public void trainOnInstance(Instance inst) {
    this.trainingWeightSeenByModel += inst.weight();
    trainOnInstanceImpl(inst);
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * //from  w ww  .  jav  a 2s. co  m
 * @param x
 * @param c 
 */
private void addToCluster(Instance x, Riffle c) {
    c.reward();
    c.addInstance(x);
    // If configured, use prediction as a reduced-weight training
    // WARNING: This often becomes divergant and error-inducing if done with bad weights
    double fauxWeight = this.hypothesisWeightOption.getValue();
    if ((x.weight() == 0) && (fauxWeight > 0)) {
        int fauxClass = weka.core.Utils.maxIndex(this.getVotesForInstance(x));
        c.addLabeling(fauxClass, fauxWeight);
    }
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**;
 *
 * @param x instance to train on//from   w  w w  .jav a2 s  .  c om
 */
@Override
public void trainOnInstanceImpl(Instance x) {
    safeInit(x);
    assert (x != null) : "FeS2::trainOnInstanceImpl() Training on a null instance!";
    int classValue = (int) x.classValue();
    boolean isNewLabel = (!knownLabels.contains(classValue)) && (x.weight() > 0);
    if ((x.weight() > 0)) {
        this.knownLabels.add(classValue);
    }
    this.universalCluster.addInstance(x);
    // Find nearest Cluster
    final SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(this.clusters, x);
    assert !nearestClusters.isEmpty() : "Cluster set for probability matching is empty";

    // Compute some base metrics we need to know:
    double maxRadius = 0;
    double avgRadius = 0;
    boolean unanimousOutlier = true;
    double weightTotal = 0;
    double minWeight = Double.MAX_VALUE;
    for (NearestClusterTuple nct : nearestClusters) {
        unanimousOutlier = unanimousOutlier && nct.getCluster().isOutlier(x);
        maxRadius = Math.max(maxRadius, nct.getCluster().getRadius());
        avgRadius += nct.getCluster().getRadius();
    }
    avgRadius /= nearestClusters.size();

    // Update weights
    for (NearestClusterTuple nct : nearestClusters) {
        Riffle c = nct.getCluster();
        c.penalize(); // unilaterally reduce weights
        int clusterMajorityClass = weka.core.Utils.maxIndex(c.getVotes());
        // increase weights for matches (define 'match' criteria by strategy parameter)
        switch (this.positiveClusterFeedbackStrategyOption.getChosenIndex()) {
        case 0: // only the closest
            if (!unanimousOutlier && c == nearestClusters.last().getCluster()) {
                addToCluster(x, c);
            }
            break;
        case 1: // All label matches
            // This ternary condition is very important for results
            int hypothesisClass = (x.weight() > 0) ? classValue
                    : weka.core.Utils.maxIndex(this.getVotesForInstance(x));
            if (clusterMajorityClass == hypothesisClass) {
                addToCluster(x, c);
            }
            break;
        case 2: // All proximity matches
            if (!nct.getCluster().isOutlier(x)) {
                addToCluster(x, c);
            }
            break;
        default:
            break;
        } //end switch
        weightTotal += c.getWeight();
        minWeight = Math.min(minWeight, c.getWeight());
    }

    // Sort by (weight / sigma)
    Riffle[] sortedClusters = new Riffle[clusters.size()];
    int i = 0;
    for (Riffle c : clusters) {
        sortedClusters[i++] = c;
    }
    // Kuddos to Java 8 and lambda expressions for making this a one-liner:
    Arrays.parallelSort(sortedClusters,
            (Riffle a, Riffle b) -> Double.compare(a.getWeight() / Math.max(a.getRadius(), 1e-96),
                    b.getWeight() / Math.max(b.getRadius(), 1e-96)));
    boolean atClusterCapacity = (this.clusters.size() >= Math.min(
            this.clustersPerLabelOption.getValue() * this.knownLabels.size(),
            this.maximumNumberOfClusterSizeOption.getValue()));

    // * * *
    //
    // Results show that when average P(x|k) < Chauvenet, no new clusters, and visa versa (which is opposite of expected behavior)
    //
    // * * *
    boolean universalOutlier = this.universalCluster.isOutlier(x);
    if (isNewLabel) {
        newLabelCount++;
    }
    if (universalOutlier) {
        universalOutlierCount++;
    }
    if (unanimousOutlier) {
        unanimousOutlierCount++;
    }
    // If we have no matches at all, then the weakest clsuter is replaced by a new one with a high variance and low weight
    //if (isNewLabel || (unanimousOutlier && universalOutlier)) {   
    if (isNewLabel || unanimousOutlier) {
        Riffle weakestLink = sortedClusters[sortedClusters.length - 1]; // get last one
        Riffle novelCluster = this.createNewCluster(x);
        //novelCluster.setRadius((avgRadius + maxRadius) / 2.0); // Set to half-way between average and max radius
        novelCluster.setWeight(weightTotal / nearestClusters.size()); // <---- Validate this ------
        weightTotal += novelCluster.getWeight(); // update for new normalization factor
        // You are the weakest link... Goodbye
        if (atClusterCapacity) {
            weightTotal -= weakestLink.getWeight(); // update for new normalization factor
            this.clusters.remove(weakestLink);
        }
        // Everyone please welcome our newest contestant...
        clusters.add(novelCluster);
    }

    // Normalize Weights and Update variance estimates for singleton clusters
    double[] universeVariance = universalCluster.getVariances();
    double[] initialVariance = new double[universeVariance.length];
    for (int j = 0; j < initialVariance.length; ++j) {
        initialVariance[j] = universeVariance[j] * 0.85;
    }
    if (weightTotal <= 0) {
        weightTotal = 1;
    }
    for (Riffle c : this.clusters) {
        if (c.size() < 2) {
            c.setVariances(initialVariance);
        }
        c.setWeight(c.getWeight() / weightTotal);
    }
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * @return training accuracy/*from   www .  ja v a  2 s.c o  m*/
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 10;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Riffle thatCluster : this.clusters) {
            double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
            for (Instance x : thatCluster.getHeader()) {
                Instance pseudoPt = makePerceptronInstance(thisCluster, x);
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    this.outlierPerceptronTrainingSet.parallelStream().forEach((x) -> {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    });

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

@Override
public final void trainOnInstance(Instance inst) {
    this.trainingWeightSeenByModel += inst.weight();
    trainOnInstanceImpl(inst);/* ww w.jav a  2  s.  c  o m*/
}