Example usage for weka.core Instance weight

List of usage examples for weka.core Instance weight

Introduction

In this page you can find the example usage for weka.core Instance weight.

Prototype

public double weight();

Source Link

Document

Returns the instance's weight.

Usage

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Sets the class prior probabilities//ww  w.  ja  v a2  s .co m
 * 
 * @param train the training instances used to determine the prior
 *          probabilities
 * @throws Exception if the class attribute of the instances is not set
 */
public void setPriors(Instances train) throws Exception {
    m_NoPriors = false;

    if (!m_ClassIsNominal) {

        m_NumTrainClassVals = 0;
        m_TrainClassVals = null;
        m_TrainClassWeights = null;
        m_PriorErrorEstimator = null;
        m_ErrorEstimator = null;

        for (int i = 0; i < train.numInstances(); i++) {
            Instance currentInst = train.instance(i);
            if (!currentInst.classIsMissing()) {
                addNumericTrainClass(currentInst.classValue(), currentInst.weight());
            }
        }

    } else {
        for (int i = 0; i < m_NumClasses; i++) {
            m_ClassPriors[i] = 1;
        }
        m_ClassPriorsSum = m_NumClasses;
        for (int i = 0; i < train.numInstances(); i++) {
            if (!train.instance(i).classIsMissing()) {
                m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight();
                m_ClassPriorsSum += train.instance(i).weight();
            }
        }
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Updates the class prior probabilities (when incrementally training)
 * /*w ww  . j  ava  2 s. co  m*/
 * @param instance the new training instance seen
 * @throws Exception if the class of the instance is not set
 */
public void updatePriors(Instance instance) throws Exception {
    if (!instance.classIsMissing()) {
        if (!m_ClassIsNominal) {
            if (!instance.classIsMissing()) {
                addNumericTrainClass(instance.classValue(), instance.weight());
            }
        } else {
            m_ClassPriors[(int) instance.classValue()] += instance.weight();
            m_ClassPriorsSum += instance.weight();
        }
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Updates all the statistics about a classifiers performance for the current
 * test instance.//from w  w w. jav  a 2  s .  c o m
 * 
 * @param predictedDistribution the probabilities assigned to each class
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not set
 */
protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception {

    int actualClass = (int) instance.classValue();

    if (!instance.classIsMissing()) {
        updateMargins(predictedDistribution, actualClass, instance.weight());

        // Determine the predicted class (doesn't detect multiple
        // classifications)
        int predictedClass = -1;
        double bestProb = 0.0;
        for (int i = 0; i < m_NumClasses; i++) {
            if (predictedDistribution[i] > bestProb) {
                predictedClass = i;
                bestProb = predictedDistribution[i];
            }
        }

        m_WithClass += instance.weight();

        // Determine misclassification cost
        if (m_CostMatrix != null) {
            if (predictedClass < 0) {
                // For missing predictions, we assume the worst possible cost.
                // This is pretty harsh.
                // Perhaps we could take the negative of the cost of a correct
                // prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
                // although often this will be zero
                m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance);
            } else {
                m_TotalCost += instance.weight()
                        * m_CostMatrix.getElement(actualClass, predictedClass, instance);
            }
        }

        // Update counts when no class was predicted
        if (predictedClass < 0) {
            m_Unclassified += instance.weight();
            return;
        }

        double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]);
        double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum);
        if (predictedProb >= priorProb) {
            m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight();
        } else {
            m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight();
        }

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

        updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight());

        // Update other stats
        m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
        if (predictedClass != actualClass) {
            m_Incorrect += instance.weight();
        } else {
            m_Correct += instance.weight();
        }
    } else {
        m_MissingClass += instance.weight();
    }
}

From source file:bme.mace.logicdomain.Evaluation.java

License:Open Source License

/**
 * Updates all the statistics about a predictors performance for the current
 * test instance./*from  w w w .  j  a  va2 s . co  m*/
 * 
 * @param predictedValue the numeric value the classifier predicts
 * @param instance the instance to be classified
 * @throws Exception if the class of the instance is not set
 */
protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        // Update stats
        m_WithClass += instance.weight();
        if (Instance.isMissingValue(predictedValue)) {
            m_Unclassified += instance.weight();
            return;
        }
        m_SumClass += instance.weight() * instance.classValue();
        m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue();
        m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue;
        m_SumPredicted += instance.weight() * predictedValue;
        m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue;

        if (m_ErrorEstimator == null) {
            setNumericPriorsFromBuffer();
        }
        double predictedProb = Math.max(m_ErrorEstimator.getProbability(predictedValue - instance.classValue()),
                MIN_SF_PROB);
        double priorProb = Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()), MIN_SF_PROB);

        m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
        m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
        m_ErrorEstimator.addValue(predictedValue - instance.classValue(), instance.weight());

        updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()),
                instance.weight());

    } else {
        m_MissingClass += instance.weight();
    }
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for nominal attribute and nominal class
 * and returns value.//from  w ww .jav a  2 s . c o  m
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNominalNominal(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double[][] counts = new double[m_Instances.attribute(index).numValues() + 1][m_Instances.numClasses()];
    double[] sumCounts = new double[m_Instances.numClasses()];
    double[][] bestDist = new double[3][m_Instances.numClasses()];
    int numMissing = 0;

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (inst.isMissing(index)) {
            numMissing++;
            counts[m_Instances.attribute(index).numValues()][(int) inst.classValue()] += inst.weight();
        } else {
            counts[(int) inst.value(index)][(int) inst.classValue()] += inst.weight();
        }
    }

    // Compute sum of counts
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            sumCounts[j] += counts[i][j];
        }
    }

    // Make split counts for each possible split and evaluate
    System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0, m_Distribution[2], 0,
            m_Instances.numClasses());
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        for (int j = 0; j < m_Instances.numClasses(); j++) {
            m_Distribution[0][j] = counts[i][j];
            m_Distribution[1][j] = sumCounts[j] - counts[i][j];
        }
        currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
        if (currVal < bestVal) {
            bestVal = currVal;
            m_SplitPoint = (double) i;
            for (int j = 0; j < 3; j++) {
                System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
            }
        }
    }

    // No missing values in training data.
    if (numMissing == 0) {
        System.arraycopy(sumCounts, 0, bestDist[2], 0, m_Instances.numClasses());
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for nominal attribute and numeric class
 * and returns value./*from   w  ww .  j  a v a 2s  .  c o  m*/
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNominalNumeric(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal;
    double[] sumsSquaresPerValue = new double[m_Instances.attribute(index).numValues()],
            sumsPerValue = new double[m_Instances.attribute(index).numValues()],
            weightsPerValue = new double[m_Instances.attribute(index).numValues()];
    double totalSumSquaresW = 0, totalSumW = 0, totalSumOfWeightsW = 0, totalSumOfWeights = 0, totalSum = 0;
    double[] sumsSquares = new double[3], sumOfWeights = new double[3];
    double[][] bestDist = new double[3][1];

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (inst.isMissing(index)) {
            m_Distribution[2][0] += inst.classValue() * inst.weight();
            sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[2] += inst.weight();
        } else {
            weightsPerValue[(int) inst.value(index)] += inst.weight();
            sumsPerValue[(int) inst.value(index)] += inst.classValue() * inst.weight();
            sumsSquaresPerValue[(int) inst.value(index)] += inst.classValue() * inst.classValue()
                    * inst.weight();
        }
        totalSumOfWeights += inst.weight();
        totalSum += inst.classValue() * inst.weight();
    }

    // Check if the total weight is zero
    if (totalSumOfWeights <= 0) {
        return bestVal;
    }

    // Compute sum of counts without missing ones
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
        totalSumOfWeightsW += weightsPerValue[i];
        totalSumSquaresW += sumsSquaresPerValue[i];
        totalSumW += sumsPerValue[i];
    }

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {

        m_Distribution[0][0] = sumsPerValue[i];
        sumsSquares[0] = sumsSquaresPerValue[i];
        sumOfWeights[0] = weightsPerValue[i];
        m_Distribution[1][0] = totalSumW - sumsPerValue[i];
        sumsSquares[1] = totalSumSquaresW - sumsSquaresPerValue[i];
        sumOfWeights[1] = totalSumOfWeightsW - weightsPerValue[i];

        currVal = variance(m_Distribution, sumsSquares, sumOfWeights);

        if (currVal < bestVal) {
            bestVal = currVal;
            m_SplitPoint = (double) i;
            for (int j = 0; j < 3; j++) {
                if (sumOfWeights[j] > 0) {
                    bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j];
                } else {
                    bestDist[j][0] = totalSum / totalSumOfWeights;
                }
            }
        }
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for numeric attribute and nominal class
 * and returns value.//w ww  . j  a  va  2  s  . c om
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNumericNominal(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;
    int numMissing = 0;
    double[] sum = new double[m_Instances.numClasses()];
    double[][] bestDist = new double[3][m_Instances.numClasses()];

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (!inst.isMissing(index)) {
            m_Distribution[1][(int) inst.classValue()] += inst.weight();
        } else {
            m_Distribution[2][(int) inst.classValue()] += inst.weight();
            numMissing++;
        }
    }
    System.arraycopy(m_Distribution[1], 0, sum, 0, m_Instances.numClasses());

    // Save current distribution as best distribution
    for (int j = 0; j < 3; j++) {
        System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
    }

    // Sort instances
    m_Instances.sort(index);

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) {
        Instance inst = m_Instances.instance(i);
        Instance instPlusOne = m_Instances.instance(i + 1);
        m_Distribution[0][(int) inst.classValue()] += inst.weight();
        m_Distribution[1][(int) inst.classValue()] -= inst.weight();
        if (inst.value(index) < instPlusOne.value(index)) {
            currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0;
            currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
            if (currVal < bestVal) {
                m_SplitPoint = currCutPoint;
                bestVal = currVal;
                for (int j = 0; j < 3; j++) {
                    System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses());
                }
            }
        }
    }

    // No missing values in training data.
    if (numMissing == 0) {
        System.arraycopy(sum, 0, bestDist[2], 0, m_Instances.numClasses());
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boosting.classifiers.DecisionStumpWritable.java

License:Open Source License

/**
 * Finds best split for numeric attribute and numeric class
 * and returns value.//from  ww w  . j a  v  a2  s .  c  o m
 *
 * @param index attribute index
 * @return value of criterion for the best split
 * @throws Exception if something goes wrong
 */
private double findSplitNumericNumeric(int index) throws Exception {

    double bestVal = Double.MAX_VALUE, currVal, currCutPoint;
    int numMissing = 0;
    double[] sumsSquares = new double[3], sumOfWeights = new double[3];
    double[][] bestDist = new double[3][1];
    double totalSum = 0, totalSumOfWeights = 0;

    // Compute counts for all the values
    for (int i = 0; i < m_Instances.numInstances(); i++) {
        Instance inst = m_Instances.instance(i);
        if (!inst.isMissing(index)) {
            m_Distribution[1][0] += inst.classValue() * inst.weight();
            sumsSquares[1] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[1] += inst.weight();
        } else {
            m_Distribution[2][0] += inst.classValue() * inst.weight();
            sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight();
            sumOfWeights[2] += inst.weight();
            numMissing++;
        }
        totalSumOfWeights += inst.weight();
        totalSum += inst.classValue() * inst.weight();
    }

    // Check if the total weight is zero
    if (totalSumOfWeights <= 0) {
        return bestVal;
    }

    // Sort instances
    m_Instances.sort(index);

    // Make split counts for each possible split and evaluate
    for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) {
        Instance inst = m_Instances.instance(i);
        Instance instPlusOne = m_Instances.instance(i + 1);
        m_Distribution[0][0] += inst.classValue() * inst.weight();
        sumsSquares[0] += inst.classValue() * inst.classValue() * inst.weight();
        sumOfWeights[0] += inst.weight();
        m_Distribution[1][0] -= inst.classValue() * inst.weight();
        sumsSquares[1] -= inst.classValue() * inst.classValue() * inst.weight();
        sumOfWeights[1] -= inst.weight();
        if (inst.value(index) < instPlusOne.value(index)) {
            currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0;
            currVal = variance(m_Distribution, sumsSquares, sumOfWeights);
            if (currVal < bestVal) {
                m_SplitPoint = currCutPoint;
                bestVal = currVal;
                for (int j = 0; j < 3; j++) {
                    if (sumOfWeights[j] > 0) {
                        bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j];
                    } else {
                        bestDist[j][0] = totalSum / totalSumOfWeights;
                    }
                }
            }
        }
    }

    m_Distribution = bestDist;
    return bestVal;
}

From source file:boostingPL.boosting.AdaBoost.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;// w  ww.  ja  v  a2 s  . c  o  m
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    //classifiers[t] = ClassifiersHelper.newInstance("C4.5");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    if (e >= 0.5) {
        System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5");
        throw new Exception("error rate > 0.5");
    }

    if (e == 0.0) {
        e = 0.0001; // don't let e == 0
    }
    cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E);
    System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]);

    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() / (2 * e));
        } else {
            inst.setWeight(inst.weight() / (2 * (1 - e)));
        }
    }
}

From source file:boostingPL.boosting.SAMME.java

License:Open Source License

public void run(int t) throws Exception {
    if (t >= numIterations) {
        return;/*w  ww  .  j a  v  a2s.c o m*/
    }

    classifiers[t] = ClassifierWritable.newInstance("DecisionStump");
    classifiers[t].buildClassifier(insts);

    double e = weightError(t);
    final int numClasses = insts.classAttribute().numValues();
    double maxe = 1 - 1.0 / numClasses;
    if (e >= maxe) {
        System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe);
        throw new Exception("error rate > " + maxe);
    }

    if (e == 0.0) {
        e = 0.0001; // dont let e == 0
    }
    cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1);
    System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]);

    double expCWeight = Math.exp(cweights[t]);
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        if (classifiers[t].classifyInstance(inst) != inst.classValue()) {
            inst.setWeight(inst.weight() * expCWeight);
        }
    }

    double weightSum = insts.sumOfWeights();
    for (int i = 0; i < insts.numInstances(); i++) {
        Instance inst = insts.instance(i);
        inst.setWeight(inst.weight() / weightSum);
    }

}