Example usage for weka.core Instance weight

List of usage examples for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:moa.classifiers.trees.ePTTD.java

License:Creative Commons License

@Override
public void trainOnInstanceImpl(Instance inst) {
    // TODO Auto-generated method stub

    if (inst.weight() > 0.0) {
        this.trainingWeightSeenByModel += inst.weight();
    }/*w  w  w .  j a  v  a2 s  .  c  o  m*/

    if (!isClassificationEnabled) {

        if (instancesBuffer == null) {
            //this.instancesBuffer = new Instances(inst.dataset());
            this.instancesBuffer = new Instances(inst.dataset(), 0);

        }
        instancesBuffer.add(inst);

        if (instancesBuffer.size() == widthInitOption.getValue()) {
            //Build first time Classifier
            checkOptionsIntegity();
            this.ePTTDintern.buildClassifier(instancesBuffer);
            isClassificationEnabled = true;
        }
        return;
    }
    ePTTDintern.updateClassifier(inst);

}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Create a new cluster from an exemplar data point
 * @param x /*w  w  w.j a v a 2s .  c  o  m*/
 */
public Riffle(Instance x) {
    safeInit(x);
    this.numLabeledPoints = (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] += x.weight();
    this.gtLabelFrequencies[(int) x.classValue()]++;
    for (int i = 0; (i < this.symbolFrequencies.length) && (i < x.numAttributes()); ++i) {
        double value = x.value(i);
        if (this.symbolFrequencies[i] == null) {
            if ((this.parentClusterer != null) && (this.parentClusterer.getUniverse() != null)) {
                this.variances[i] = this.parentClusterer.getUniverse().variances[i];
            } else {
                this.variances[i] = this.initialStandardDeviationOption.getValue();
            }
        } else {
            this.variances[i] = 1;
            this.symbolFrequencies[i][(int) value]++;
        }
    }
    this.numTotalPoints = 1;
    this.setGroundTruth(x.classValue());
    this.setCenter(x.toDoubleArray());
    this.setWeight(x.weight());
    this.setRadius(this.initialStandardDeviationOption.getValue());
    this.runningSumOfSquares = 0.0;
    this.setId(autoindex.getAndIncrement());
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Add a data point instance to this cluster
 *
 * @param x//from ww  w  .  ja va 2 s.  c  o  m
 */
final public void addInstance(Instance x) {
    safeInit(x);
    this.numTotalPoints++;
    this.numLabeledPoints += (x.weight() > 0.9999) ? 1 : 0;
    this.labelFrequencies[(int) x.classValue()] += x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]++; // For non-decision metrics only
    //Select strategy for on-line *-means (Any means)
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.addInstanceGrimson(x);
        break;
    case 1:
        this.addInstanceViaShephard(x);
        break;
    case 2:
        this.instances.add(x);
        return;
    default:
        System.err.println("Invalid addInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Inverse process of adding instance// w w w . j  a v a 2 s  . c o  m
 *
 * @param x
 */
final public void removeInstance(Instance x) {
    safeInit(x);
    this.numLabeledPoints -= (int) Math.ceil(x.weight());
    this.labelFrequencies[(int) x.classValue()] -= x.weight(); //non-training data has a weight of zero
    this.gtLabelFrequencies[(int) x.classValue()]--; // For non-decision metrics only
    this.numTotalPoints--;

    //Select strategy for on-line *-means
    switch (updateStrategyOption.getChosenIndex()) {
    case 0:
        this.removeInstanceGrimson(x);
        break;
    case 1:
        this.removeInstanceViaShephard(x);
        break;
    case 2:
        this.instances.remove(x);
        return;
    default:
        System.err.println("Invalid removeInstance strategy");
    }
    recompute();
}

From source file:moa.cluster.Riffle.java

License:Apache License

/**
 * Set pre-computed information fields//from  www .  j a va2  s. com
 * @return 
 */
public final double recomputeAll() {
    if (this.instances != null) {
        Arrays.fill(this.gtLabelFrequencies, 0);
        Arrays.fill(this.labelFrequencies, 0);
        this.numTotalPoints = instances.size();
        this.numLabeledPoints = 0;
        if (!this.instances.isEmpty()) {
            // double[] clusterCentroid = this.getCenter();
            double[] clusterVariance = this.getVariances();
            for (int i = 0; i < centroid.length; ++i) {
                centroid[i] /= (double) this.instances.size() + 1.0;
            }
            for (double[] sf : this.symbolFrequencies) {
                if (sf != null) {
                    Arrays.fill(sf, 0);
                }
            }
            for (Instance x : this.instances) { // Pre-populate univeral cluster with data points
                if (x == null) {
                    System.out.println("Sieve::MaximizationStep() - x is NULL!");
                    continue;
                }
                this.gtLabelFrequencies[(int) x.classValue()]++;
                this.labelFrequencies[(int) x.classValue()] += x.weight();
                this.numLabeledPoints += x.weight();
                double[] xValues = x.toDoubleArray();
                for (int i = 0; i < xValues.length; ++i) {
                    double val = xValues[i];
                    centroid[i] += val / ((double) this.instances.size() + 1.0);
                    if ((this.symbolFrequencies[i] != null) && (val < this.symbolFrequencies[i].length)) {
                        this.symbolFrequencies[i][(int) val]++;
                    }
                }
            } // for

            // Set 'centroid' to 'mode' (most frequent symbol) for nominal data:
            for (int i = 0; i < this.symbolFrequencies.length; ++i) {
                if (this.symbolFrequencies[i] != null) {
                    centroid[i] = weka.core.Utils.maxIndex(this.symbolFrequencies[i]);
                }
            }
            setCenter(centroid); // temporary - start with standard gaussian, gets updated below
            // The cluster class uses an incremental heuristic, but we want to start out as pure as possible, so
            // we use the 2-Pass method for computing sample variance (per dimension)
            double n = instances.size();
            if (n > 1) {
                double[] cep = new double[centroid.length];
                Arrays.fill(cep, 0);
                Arrays.fill(clusterVariance, 0);
                for (Instance x : this.instances) {
                    if (x == null) {
                        System.out.println("Riffle::recompute() - x is null!");
                        continue;
                    }
                    double[] xValues = x.toDoubleArray();
                    for (int i = 0; i < xValues.length; ++i) {
                        double delta = (this.symbolFrequencies[i] == null) ? centroid[i] - xValues[i]
                                : (Math.abs(centroid[i] - xValues[i]) < 1e-32) ? 1 : 1e-20;
                        cep[i] += delta;
                        clusterVariance[i] += delta * delta; // Statistical Variance
                    }
                }
                for (int i = 0; i < clusterVariance.length; ++i) {
                    clusterVariance[i] = (clusterVariance[i] - cep[i] * cep[i] / n) / (n - 1);
                }
                setVariances(clusterVariance);
            } // end if (enough data for variance)
        } // end if(!instances.empty)
        recompute();
    } // end if(!instances null)
    return getRadius() * getEntropy();
}

From source file:moa.clusterer.FeS2.java

License:Apache License

@Override
public void trainOnInstance(Instance inst) {
    this.trainingWeightSeenByModel += inst.weight();
    trainOnInstanceImpl(inst);
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * //from  w ww  .  jav  a 2s. co  m
 * @param x
 * @param c 
 */
private void addToCluster(Instance x, Riffle c) {
    c.reward();
    c.addInstance(x);
    // If configured, use prediction as a reduced-weight training
    // WARNING: This often becomes divergant and error-inducing if done with bad weights
    double fauxWeight = this.hypothesisWeightOption.getValue();
    if ((x.weight() == 0) && (fauxWeight > 0)) {
        int fauxClass = weka.core.Utils.maxIndex(this.getVotesForInstance(x));
        c.addLabeling(fauxClass, fauxWeight);
    }
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**;
 *
 * @param x instance to train on//from   w  w w  .jav a2 s  .  c om
 */
@Override
public void trainOnInstanceImpl(Instance x) {
    safeInit(x);
    assert (x != null) : "FeS2::trainOnInstanceImpl() Training on a null instance!";
    int classValue = (int) x.classValue();
    boolean isNewLabel = (!knownLabels.contains(classValue)) && (x.weight() > 0);
    if ((x.weight() > 0)) {
        this.knownLabels.add(classValue);
    }
    this.universalCluster.addInstance(x);
    // Find nearest Cluster
    final SortedSet<NearestClusterTuple> nearestClusters = findMostLikelyClusters(this.clusters, x);
    assert !nearestClusters.isEmpty() : "Cluster set for probability matching is empty";

    // Compute some base metrics we need to know:
    double maxRadius = 0;
    double avgRadius = 0;
    boolean unanimousOutlier = true;
    double weightTotal = 0;
    double minWeight = Double.MAX_VALUE;
    for (NearestClusterTuple nct : nearestClusters) {
        unanimousOutlier = unanimousOutlier && nct.getCluster().isOutlier(x);
        maxRadius = Math.max(maxRadius, nct.getCluster().getRadius());
        avgRadius += nct.getCluster().getRadius();
    }
    avgRadius /= nearestClusters.size();

    // Update weights
    for (NearestClusterTuple nct : nearestClusters) {
        Riffle c = nct.getCluster();
        c.penalize(); // unilaterally reduce weights
        int clusterMajorityClass = weka.core.Utils.maxIndex(c.getVotes());
        // increase weights for matches (define 'match' criteria by strategy parameter)
        switch (this.positiveClusterFeedbackStrategyOption.getChosenIndex()) {
        case 0: // only the closest
            if (!unanimousOutlier && c == nearestClusters.last().getCluster()) {
                addToCluster(x, c);
            }
            break;
        case 1: // All label matches
            // This ternary condition is very important for results
            int hypothesisClass = (x.weight() > 0) ? classValue
                    : weka.core.Utils.maxIndex(this.getVotesForInstance(x));
            if (clusterMajorityClass == hypothesisClass) {
                addToCluster(x, c);
            }
            break;
        case 2: // All proximity matches
            if (!nct.getCluster().isOutlier(x)) {
                addToCluster(x, c);
            }
            break;
        default:
            break;
        } //end switch
        weightTotal += c.getWeight();
        minWeight = Math.min(minWeight, c.getWeight());
    }

    // Sort by (weight / sigma)
    Riffle[] sortedClusters = new Riffle[clusters.size()];
    int i = 0;
    for (Riffle c : clusters) {
        sortedClusters[i++] = c;
    }
    // Kuddos to Java 8 and lambda expressions for making this a one-liner:
    Arrays.parallelSort(sortedClusters,
            (Riffle a, Riffle b) -> Double.compare(a.getWeight() / Math.max(a.getRadius(), 1e-96),
                    b.getWeight() / Math.max(b.getRadius(), 1e-96)));
    boolean atClusterCapacity = (this.clusters.size() >= Math.min(
            this.clustersPerLabelOption.getValue() * this.knownLabels.size(),
            this.maximumNumberOfClusterSizeOption.getValue()));

    // * * *
    //
    // Results show that when average P(x|k) < Chauvenet, no new clusters, and visa versa (which is opposite of expected behavior)
    //
    // * * *
    boolean universalOutlier = this.universalCluster.isOutlier(x);
    if (isNewLabel) {
        newLabelCount++;
    }
    if (universalOutlier) {
        universalOutlierCount++;
    }
    if (unanimousOutlier) {
        unanimousOutlierCount++;
    }
    // If we have no matches at all, then the weakest clsuter is replaced by a new one with a high variance and low weight
    //if (isNewLabel || (unanimousOutlier && universalOutlier)) {   
    if (isNewLabel || unanimousOutlier) {
        Riffle weakestLink = sortedClusters[sortedClusters.length - 1]; // get last one
        Riffle novelCluster = this.createNewCluster(x);
        //novelCluster.setRadius((avgRadius + maxRadius) / 2.0); // Set to half-way between average and max radius
        novelCluster.setWeight(weightTotal / nearestClusters.size()); // <---- Validate this ------
        weightTotal += novelCluster.getWeight(); // update for new normalization factor
        // You are the weakest link... Goodbye
        if (atClusterCapacity) {
            weightTotal -= weakestLink.getWeight(); // update for new normalization factor
            this.clusters.remove(weakestLink);
        }
        // Everyone please welcome our newest contestant...
        clusters.add(novelCluster);
    }

    // Normalize Weights and Update variance estimates for singleton clusters
    double[] universeVariance = universalCluster.getVariances();
    double[] initialVariance = new double[universeVariance.length];
    for (int j = 0; j < initialVariance.length; ++j) {
        initialVariance[j] = universeVariance[j] * 0.85;
    }
    if (weightTotal <= 0) {
        weightTotal = 1;
    }
    for (Riffle c : this.clusters) {
        if (c.size() < 2) {
            c.setVariances(initialVariance);
        }
        c.setWeight(c.getWeight() / weightTotal);
    }
}

From source file:moa.clusterer.FeS2.java

License:Apache License

/**
 * @return training accuracy/*from   www .  ja v a  2 s.c o  m*/
 */
private double trainPerceptron() {
    // Train the perceptron from warmup phase clustering 
    final int epochs = 20;
    final int numberOfPerceptrons = 10;
    final int MEMBER = 0;
    final int OUTLIER = 1;
    double accuracySum = 0;
    double accuracyCount = 0;
    this.outlierPerceptronTrainingSet.clear();
    Random rng = new Random(this.randomSeed);

    // Generate training set
    for (Riffle thisCluster : this.clusters) {
        for (Riffle thatCluster : this.clusters) {
            double groundTruth = (thisCluster == thatCluster) ? MEMBER : OUTLIER;
            for (Instance x : thatCluster.getHeader()) {
                Instance pseudoPt = makePerceptronInstance(thisCluster, x);
                pseudoPt.setClassValue(groundTruth);
                this.outlierPerceptronTrainingSet.add(pseudoPt);
            }
        }
    }
    this.outlierPerceptronTrainingSet.parallelStream().forEach((x) -> {
        x.setWeight(1.0 / this.outlierPerceptronTrainingSet.numInstances());
    });

    // Boost it
    this.perceptrons = new Perceptron[numberOfPerceptrons];
    this.pweights = new double[numberOfPerceptrons];
    for (int perceptronIdx = 0; perceptronIdx < numberOfPerceptrons; ++perceptronIdx) {
        // Discover new weak learner
        Perceptron candidatePerceptron = new Perceptron();
        candidatePerceptron.prepareForUse();
        candidatePerceptron.learningRatioOption.setValue(rng.nextDouble() * 0.9 + 0.1);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (Instance x : this.outlierPerceptronTrainingSet) {
                if ((rng.nextDouble() / this.outlierPerceptronTrainingSet.numInstances()) < x.weight()) { // weighted subsampling
                    candidatePerceptron.trainOnInstance(x);
                }
            }
        } //end epochs
          // Evaluate weak learner
        double errorFunctionSum = 0;
        double weightSum = 0;
        for (Instance x : this.outlierPerceptronTrainingSet) {
            if (!candidatePerceptron.correctlyClassifies(x)) {
                errorFunctionSum += x.weight();
            }
        }
        // adjust training weights
        for (Instance x : this.outlierPerceptronTrainingSet) {
            double newWeight = x.weight();
            if (candidatePerceptron.correctlyClassifies(x)) {
                newWeight *= errorFunctionSum / (1.0 - errorFunctionSum);
                if (Double.isNaN(newWeight)) {
                    newWeight = weka.core.Utils.SMALL;
                }
                x.setWeight(newWeight);
            }
            weightSum += newWeight;
        }
        // Normalize
        for (Instance x : this.outlierPerceptronTrainingSet) {
            x.setWeight(x.weight() / weightSum);
        }
        // Add to ensemble
        double newPerceptronWeight = Math.log((1 - errorFunctionSum) / errorFunctionSum);

        this.perceptrons[perceptronIdx] = candidatePerceptron;
        this.pweights[perceptronIdx] = newPerceptronWeight;
    } // end numPerceptrons

    // Check training error
    accuracySum = 0;
    accuracyCount = 0;
    for (Instance x : this.outlierPerceptronTrainingSet) {
        if (this.getPerceptronVotesForOutlierStatus(x) == x.classValue()) {
            accuracySum++;
        }
        accuracyCount++;
    }
    double trainingAccuracy = (accuracyCount > 0) ? (accuracySum / accuracyCount) : 0.0;
    this.outlierPerceptronTrainingSet.clear();
    return trainingAccuracy;
}

From source file:moa.clusterer.outliers.Sieve.java

License:Apache License

@Override
public final void trainOnInstance(Instance inst) {
    this.trainingWeightSeenByModel += inst.weight();
    trainOnInstanceImpl(inst);/* ww w.jav a  2  s.  c  o m*/
}