List of usage examples for weka.core Instance weight
public double weight();
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Sets the class prior probabilities//ww w. ja v a2 s .co m * * @param train the training instances used to determine the prior * probabilities * @throws Exception if the class attribute of the instances is not set */ public void setPriors(Instances train) throws Exception { m_NoPriors = false; if (!m_ClassIsNominal) { m_NumTrainClassVals = 0; m_TrainClassVals = null; m_TrainClassWeights = null; m_PriorErrorEstimator = null; m_ErrorEstimator = null; for (int i = 0; i < train.numInstances(); i++) { Instance currentInst = train.instance(i); if (!currentInst.classIsMissing()) { addNumericTrainClass(currentInst.classValue(), currentInst.weight()); } } } else { for (int i = 0; i < m_NumClasses; i++) { m_ClassPriors[i] = 1; } m_ClassPriorsSum = m_NumClasses; for (int i = 0; i < train.numInstances(); i++) { if (!train.instance(i).classIsMissing()) { m_ClassPriors[(int) train.instance(i).classValue()] += train.instance(i).weight(); m_ClassPriorsSum += train.instance(i).weight(); } } } }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Updates the class prior probabilities (when incrementally training) * /*w ww . j ava 2 s. co m*/ * @param instance the new training instance seen * @throws Exception if the class of the instance is not set */ public void updatePriors(Instance instance) throws Exception { if (!instance.classIsMissing()) { if (!m_ClassIsNominal) { if (!instance.classIsMissing()) { addNumericTrainClass(instance.classValue(), instance.weight()); } } else { m_ClassPriors[(int) instance.classValue()] += instance.weight(); m_ClassPriorsSum += instance.weight(); } } }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Updates all the statistics about a classifiers performance for the current * test instance.//from w w w. jav a 2 s . c o m * * @param predictedDistribution the probabilities assigned to each class * @param instance the instance to be classified * @throws Exception if the class of the instance is not set */ protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception { int actualClass = (int) instance.classValue(); if (!instance.classIsMissing()) { updateMargins(predictedDistribution, actualClass, instance.weight()); // Determine the predicted class (doesn't detect multiple // classifications) int predictedClass = -1; double bestProb = 0.0; for (int i = 0; i < m_NumClasses; i++) { if (predictedDistribution[i] > bestProb) { predictedClass = i; bestProb = predictedDistribution[i]; } } m_WithClass += instance.weight(); // Determine misclassification cost if (m_CostMatrix != null) { if (predictedClass < 0) { // For missing predictions, we assume the worst possible cost. // This is pretty harsh. // Perhaps we could take the negative of the cost of a correct // prediction (-m_CostMatrix.getElement(actualClass,actualClass)), // although often this will be zero m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance); } else { m_TotalCost += instance.weight() * m_CostMatrix.getElement(actualClass, predictedClass, instance); } } // Update counts when no class was predicted if (predictedClass < 0) { m_Unclassified += instance.weight(); return; } double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]); double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum); if (predictedProb >= priorProb) { m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight(); } else { m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight(); } m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight(); m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight(); updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight()); // Update other stats m_ConfusionMatrix[actualClass][predictedClass] += instance.weight(); if (predictedClass != actualClass) { m_Incorrect += instance.weight(); } else { m_Correct += instance.weight(); } } else { m_MissingClass += instance.weight(); } }
From source file:bme.mace.logicdomain.Evaluation.java
License:Open Source License
/** * Updates all the statistics about a predictors performance for the current * test instance./*from w w w . j a va2 s . co m*/ * * @param predictedValue the numeric value the classifier predicts * @param instance the instance to be classified * @throws Exception if the class of the instance is not set */ protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception { if (!instance.classIsMissing()) { // Update stats m_WithClass += instance.weight(); if (Instance.isMissingValue(predictedValue)) { m_Unclassified += instance.weight(); return; } m_SumClass += instance.weight() * instance.classValue(); m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue(); m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue; m_SumPredicted += instance.weight() * predictedValue; m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue; if (m_ErrorEstimator == null) { setNumericPriorsFromBuffer(); } double predictedProb = Math.max(m_ErrorEstimator.getProbability(predictedValue - instance.classValue()), MIN_SF_PROB); double priorProb = Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()), MIN_SF_PROB); m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight(); m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight(); m_ErrorEstimator.addValue(predictedValue - instance.classValue(), instance.weight()); updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()), instance.weight()); } else { m_MissingClass += instance.weight(); } }
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Finds best split for nominal attribute and nominal class * and returns value.//from w ww .jav a 2 s . c o m * * @param index attribute index * @return value of criterion for the best split * @throws Exception if something goes wrong */ private double findSplitNominalNominal(int index) throws Exception { double bestVal = Double.MAX_VALUE, currVal; double[][] counts = new double[m_Instances.attribute(index).numValues() + 1][m_Instances.numClasses()]; double[] sumCounts = new double[m_Instances.numClasses()]; double[][] bestDist = new double[3][m_Instances.numClasses()]; int numMissing = 0; // Compute counts for all the values for (int i = 0; i < m_Instances.numInstances(); i++) { Instance inst = m_Instances.instance(i); if (inst.isMissing(index)) { numMissing++; counts[m_Instances.attribute(index).numValues()][(int) inst.classValue()] += inst.weight(); } else { counts[(int) inst.value(index)][(int) inst.classValue()] += inst.weight(); } } // Compute sum of counts for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) { for (int j = 0; j < m_Instances.numClasses(); j++) { sumCounts[j] += counts[i][j]; } } // Make split counts for each possible split and evaluate System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0, m_Distribution[2], 0, m_Instances.numClasses()); for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) { for (int j = 0; j < m_Instances.numClasses(); j++) { m_Distribution[0][j] = counts[i][j]; m_Distribution[1][j] = sumCounts[j] - counts[i][j]; } currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution); if (currVal < bestVal) { bestVal = currVal; m_SplitPoint = (double) i; for (int j = 0; j < 3; j++) { System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses()); } } } // No missing values in training data. if (numMissing == 0) { System.arraycopy(sumCounts, 0, bestDist[2], 0, m_Instances.numClasses()); } m_Distribution = bestDist; return bestVal; }
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Finds best split for nominal attribute and numeric class * and returns value./*from w ww . j a v a 2s . c o m*/ * * @param index attribute index * @return value of criterion for the best split * @throws Exception if something goes wrong */ private double findSplitNominalNumeric(int index) throws Exception { double bestVal = Double.MAX_VALUE, currVal; double[] sumsSquaresPerValue = new double[m_Instances.attribute(index).numValues()], sumsPerValue = new double[m_Instances.attribute(index).numValues()], weightsPerValue = new double[m_Instances.attribute(index).numValues()]; double totalSumSquaresW = 0, totalSumW = 0, totalSumOfWeightsW = 0, totalSumOfWeights = 0, totalSum = 0; double[] sumsSquares = new double[3], sumOfWeights = new double[3]; double[][] bestDist = new double[3][1]; // Compute counts for all the values for (int i = 0; i < m_Instances.numInstances(); i++) { Instance inst = m_Instances.instance(i); if (inst.isMissing(index)) { m_Distribution[2][0] += inst.classValue() * inst.weight(); sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight(); sumOfWeights[2] += inst.weight(); } else { weightsPerValue[(int) inst.value(index)] += inst.weight(); sumsPerValue[(int) inst.value(index)] += inst.classValue() * inst.weight(); sumsSquaresPerValue[(int) inst.value(index)] += inst.classValue() * inst.classValue() * inst.weight(); } totalSumOfWeights += inst.weight(); totalSum += inst.classValue() * inst.weight(); } // Check if the total weight is zero if (totalSumOfWeights <= 0) { return bestVal; } // Compute sum of counts without missing ones for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) { totalSumOfWeightsW += weightsPerValue[i]; totalSumSquaresW += sumsSquaresPerValue[i]; totalSumW += sumsPerValue[i]; } // Make split counts for each possible split and evaluate for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) { m_Distribution[0][0] = sumsPerValue[i]; sumsSquares[0] = sumsSquaresPerValue[i]; sumOfWeights[0] = weightsPerValue[i]; m_Distribution[1][0] = totalSumW - sumsPerValue[i]; sumsSquares[1] = totalSumSquaresW - sumsSquaresPerValue[i]; sumOfWeights[1] = totalSumOfWeightsW - weightsPerValue[i]; currVal = variance(m_Distribution, sumsSquares, sumOfWeights); if (currVal < bestVal) { bestVal = currVal; m_SplitPoint = (double) i; for (int j = 0; j < 3; j++) { if (sumOfWeights[j] > 0) { bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j]; } else { bestDist[j][0] = totalSum / totalSumOfWeights; } } } } m_Distribution = bestDist; return bestVal; }
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Finds best split for numeric attribute and nominal class * and returns value.//w ww . j a va 2 s . c om * * @param index attribute index * @return value of criterion for the best split * @throws Exception if something goes wrong */ private double findSplitNumericNominal(int index) throws Exception { double bestVal = Double.MAX_VALUE, currVal, currCutPoint; int numMissing = 0; double[] sum = new double[m_Instances.numClasses()]; double[][] bestDist = new double[3][m_Instances.numClasses()]; // Compute counts for all the values for (int i = 0; i < m_Instances.numInstances(); i++) { Instance inst = m_Instances.instance(i); if (!inst.isMissing(index)) { m_Distribution[1][(int) inst.classValue()] += inst.weight(); } else { m_Distribution[2][(int) inst.classValue()] += inst.weight(); numMissing++; } } System.arraycopy(m_Distribution[1], 0, sum, 0, m_Instances.numClasses()); // Save current distribution as best distribution for (int j = 0; j < 3; j++) { System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses()); } // Sort instances m_Instances.sort(index); // Make split counts for each possible split and evaluate for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) { Instance inst = m_Instances.instance(i); Instance instPlusOne = m_Instances.instance(i + 1); m_Distribution[0][(int) inst.classValue()] += inst.weight(); m_Distribution[1][(int) inst.classValue()] -= inst.weight(); if (inst.value(index) < instPlusOne.value(index)) { currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0; currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution); if (currVal < bestVal) { m_SplitPoint = currCutPoint; bestVal = currVal; for (int j = 0; j < 3; j++) { System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, m_Instances.numClasses()); } } } } // No missing values in training data. if (numMissing == 0) { System.arraycopy(sum, 0, bestDist[2], 0, m_Instances.numClasses()); } m_Distribution = bestDist; return bestVal; }
From source file:boosting.classifiers.DecisionStumpWritable.java
License:Open Source License
/** * Finds best split for numeric attribute and numeric class * and returns value.//from ww w . j a v a2 s . c o m * * @param index attribute index * @return value of criterion for the best split * @throws Exception if something goes wrong */ private double findSplitNumericNumeric(int index) throws Exception { double bestVal = Double.MAX_VALUE, currVal, currCutPoint; int numMissing = 0; double[] sumsSquares = new double[3], sumOfWeights = new double[3]; double[][] bestDist = new double[3][1]; double totalSum = 0, totalSumOfWeights = 0; // Compute counts for all the values for (int i = 0; i < m_Instances.numInstances(); i++) { Instance inst = m_Instances.instance(i); if (!inst.isMissing(index)) { m_Distribution[1][0] += inst.classValue() * inst.weight(); sumsSquares[1] += inst.classValue() * inst.classValue() * inst.weight(); sumOfWeights[1] += inst.weight(); } else { m_Distribution[2][0] += inst.classValue() * inst.weight(); sumsSquares[2] += inst.classValue() * inst.classValue() * inst.weight(); sumOfWeights[2] += inst.weight(); numMissing++; } totalSumOfWeights += inst.weight(); totalSum += inst.classValue() * inst.weight(); } // Check if the total weight is zero if (totalSumOfWeights <= 0) { return bestVal; } // Sort instances m_Instances.sort(index); // Make split counts for each possible split and evaluate for (int i = 0; i < m_Instances.numInstances() - (numMissing + 1); i++) { Instance inst = m_Instances.instance(i); Instance instPlusOne = m_Instances.instance(i + 1); m_Distribution[0][0] += inst.classValue() * inst.weight(); sumsSquares[0] += inst.classValue() * inst.classValue() * inst.weight(); sumOfWeights[0] += inst.weight(); m_Distribution[1][0] -= inst.classValue() * inst.weight(); sumsSquares[1] -= inst.classValue() * inst.classValue() * inst.weight(); sumOfWeights[1] -= inst.weight(); if (inst.value(index) < instPlusOne.value(index)) { currCutPoint = (inst.value(index) + instPlusOne.value(index)) / 2.0; currVal = variance(m_Distribution, sumsSquares, sumOfWeights); if (currVal < bestVal) { m_SplitPoint = currCutPoint; bestVal = currVal; for (int j = 0; j < 3; j++) { if (sumOfWeights[j] > 0) { bestDist[j][0] = m_Distribution[j][0] / sumOfWeights[j]; } else { bestDist[j][0] = totalSum / totalSumOfWeights; } } } } } m_Distribution = bestDist; return bestVal; }
From source file:boostingPL.boosting.AdaBoost.java
License:Open Source License
public void run(int t) throws Exception { if (t >= numIterations) { return;// w ww. ja v a2 s . c o m } classifiers[t] = ClassifierWritable.newInstance("DecisionStump"); //classifiers[t] = ClassifiersHelper.newInstance("C4.5"); classifiers[t].buildClassifier(insts); double e = weightError(t); if (e >= 0.5) { System.out.println("AdaBoost Error: error rate = " + e + ", >= 0.5"); throw new Exception("error rate > 0.5"); } if (e == 0.0) { e = 0.0001; // don't let e == 0 } cweights[t] = 0.5 * Math.log((1 - e) / e) / Math.log(Math.E); System.out.println("Round = " + t + "\t ErrorRate = " + e + "\t\t Weights = " + cweights[t]); for (int i = 0; i < insts.numInstances(); i++) { Instance inst = insts.instance(i); if (classifiers[t].classifyInstance(inst) != inst.classValue()) { inst.setWeight(inst.weight() / (2 * e)); } else { inst.setWeight(inst.weight() / (2 * (1 - e))); } } }
From source file:boostingPL.boosting.SAMME.java
License:Open Source License
public void run(int t) throws Exception { if (t >= numIterations) { return;/*w ww . j a v a2s.c o m*/ } classifiers[t] = ClassifierWritable.newInstance("DecisionStump"); classifiers[t].buildClassifier(insts); double e = weightError(t); final int numClasses = insts.classAttribute().numValues(); double maxe = 1 - 1.0 / numClasses; if (e >= maxe) { System.out.println("SAMME Error: error rate = " + e + ", >= " + maxe); throw new Exception("error rate > " + maxe); } if (e == 0.0) { e = 0.0001; // dont let e == 0 } cweights[t] = Math.log((1 - e) / e) + Math.log(numClasses - 1); System.out.println("Round = " + t + "\tErrorRate = " + e + "\tCWeight = " + cweights[t]); double expCWeight = Math.exp(cweights[t]); for (int i = 0; i < insts.numInstances(); i++) { Instance inst = insts.instance(i); if (classifiers[t].classifyInstance(inst) != inst.classValue()) { inst.setWeight(inst.weight() * expCWeight); } } double weightSum = insts.sumOfWeights(); for (int i = 0; i < insts.numInstances(); i++) { Instance inst = insts.instance(i); inst.setWeight(inst.weight() / weightSum); } }