List of usage examples for weka.core Instance weight
public double weight();
From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java
License:Apache License
/** * Samoa instance from weka instance./* ww w . j a v a 2 s .c o m*/ * * @param inst the inst * @return the instance */ public Instance samoaInstance(weka.core.Instance inst) { Instance samoaInstance; if (inst instanceof weka.core.SparseInstance) { double[] attributeValues = new double[inst.numValues()]; int[] indexValues = new int[inst.numValues()]; for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != inst.classIndex()) { attributeValues[i] = inst.valueSparse(i); indexValues[i] = inst.index(i); } } samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes()); } else { samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray()); //samoaInstance.deleteAttributeAt(inst.classIndex()); } if (this.samoaInstanceInformation == null) { this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset()); } samoaInstance.setDataset(samoaInstanceInformation); samoaInstance.setClassValue(inst.classValue()); return samoaInstance; }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Evaluates the classifier on a single instance and records the * prediction (if the class is nominal). * * @param classifier machine learning classifier * @param instance the test instance to be classified * @return the prediction made by the clasifier * @throws Exception if model could not be evaluated * successfully or the data contains string attributes *///from w w w .ja va 2s . co m public double evaluateModelOnceAndRecordPrediction(Classifier classifier, Instance instance) throws Exception { Instance classMissing = (Instance) instance.copy(); double pred = 0; classMissing.setDataset(instance.dataset()); classMissing.setClassMissing(); if (m_ClassIsNominal) { if (m_Predictions == null) { m_Predictions = new FastVector(); } double[] dist = classifier.distributionForInstance(classMissing); pred = Utils.maxIndex(dist); if (dist[(int) pred] <= 0) { pred = Instance.missingValue(); } updateStatsForClassifier(dist, instance); m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist, instance.weight())); } else { pred = classifier.classifyInstance(classMissing); updateStatsForPredictor(pred, instance); } return pred; }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Updates all the statistics about a classifiers performance for * the current test instance.// w w w .j a v a 2 s. co m * * @param predictedDistribution the probabilities assigned to * each class * @param instance the instance to be classified * @throws Exception if the class of the instance is not * set */ protected void updateStatsForClassifier(double[] predictedDistribution, Instance instance) throws Exception { int actualClass = (int) instance.classValue(); if (!instance.classIsMissing()) { updateMargins(predictedDistribution, actualClass, instance.weight()); // Determine the predicted class (doesn't detect multiple // classifications) int predictedClass = -1; double bestProb = 0.0; for (int i = 0; i < m_NumClasses; i++) { if (predictedDistribution[i] > bestProb) { predictedClass = i; bestProb = predictedDistribution[i]; } } m_WithClass += instance.weight(); // Determine misclassification cost if (m_CostMatrix != null) { if (predictedClass < 0) { // For missing predictions, we assume the worst possible cost. // This is pretty harsh. // Perhaps we could take the negative of the cost of a correct // prediction (-m_CostMatrix.getElement(actualClass,actualClass)), // although often this will be zero m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance); } else { m_TotalCost += instance.weight() * m_CostMatrix.getElement(actualClass, predictedClass, instance); } } // Update counts when no class was predicted if (predictedClass < 0) { m_Unclassified += instance.weight(); return; } double predictedProb = Math.max(MIN_SF_PROB, predictedDistribution[actualClass]); double priorProb = Math.max(MIN_SF_PROB, m_ClassPriors[actualClass] / m_ClassPriorsSum); if (predictedProb >= priorProb) { m_SumKBInfo += (Utils.log2(predictedProb) - Utils.log2(priorProb)) * instance.weight(); } else { m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) - Utils.log2(1.0 - priorProb)) * instance.weight(); } m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight(); m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight(); updateNumericScores(predictedDistribution, makeDistribution(instance.classValue()), instance.weight()); // Update other stats m_ConfusionMatrix[actualClass][predictedClass] += instance.weight(); if (predictedClass != actualClass) { m_Incorrect += instance.weight(); } else { m_Correct += instance.weight(); } } else { m_MissingClass += instance.weight(); } }
From source file:cotraining.copy.Evaluation_D.java
License:Open Source License
/** * Updates all the statistics about a predictors performance for * the current test instance.//from www.ja va 2s .c o m * * @param predictedValue the numeric value the classifier predicts * @param instance the instance to be classified * @throws Exception if the class of the instance is not * set */ protected void updateStatsForPredictor(double predictedValue, Instance instance) throws Exception { if (!instance.classIsMissing()) { // Update stats m_WithClass += instance.weight(); if (Instance.isMissingValue(predictedValue)) { m_Unclassified += instance.weight(); return; } m_SumClass += instance.weight() * instance.classValue(); m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue(); m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue; m_SumPredicted += instance.weight() * predictedValue; m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue; if (m_ErrorEstimator == null) { setNumericPriorsFromBuffer(); } double predictedProb = Math.max(m_ErrorEstimator.getProbability(predictedValue - instance.classValue()), MIN_SF_PROB); double priorProb = Math.max(m_PriorErrorEstimator.getProbability(instance.classValue()), MIN_SF_PROB); m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight(); m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight(); m_ErrorEstimator.addValue(predictedValue - instance.classValue(), instance.weight()); updateNumericScores(makeDistribution(predictedValue), makeDistribution(instance.classValue()), instance.weight()); } else m_MissingClass += instance.weight(); }
From source file:data.Bag.java
License:Open Source License
/** * Constructor./* w w w .j ava2 s . c o m*/ * * @param instance * A Weka's Instance to be transformed into a Bag. * @throws Exception * To be handled in an upper level. * */ public Bag(Instance instance) throws Exception { super(instance); m_AttValues = instance.toDoubleArray(); m_Weight = instance.weight(); m_Dataset = instance.dataset(); }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Convert a pc transformed instance back to the original space * //from w w w. j a v a 2 s . c om * @param inst the instance to convert * @return the processed instance * @throws Exception if something goes wrong */ private Instance convertInstanceToOriginal(Instance inst) throws Exception { double[] newVals = null; if (m_hasClass) { newVals = new double[m_numAttribs + 1]; } else { newVals = new double[m_numAttribs]; } if (m_hasClass) { // class is always appended as the last attribute newVals[m_numAttribs] = inst.value(inst.numAttributes() - 1); } for (int i = 0; i < m_eTranspose[0].length; i++) { double tempval = 0.0; for (int j = 1; j < m_eTranspose.length; j++) { tempval += (m_eTranspose[j][i] * inst.value(j - 1)); } newVals[i] = tempval; if (!m_center) { newVals[i] *= m_stdDevs[i]; } newVals[i] += m_means[i]; } if (inst instanceof SparseInstance) { return new SparseInstance(inst.weight(), newVals); } else { return new Instance(inst.weight(), newVals); } }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Transform an instance in original (unormalized) format. Convert back * to the original space if requested.// w w w. j ava 2 s .c o m * @param instance an instance in the original (unormalized) format * @return a transformed instance * @throws Exception if instance cant be transformed */ public Instance convertInstance(Instance instance) throws Exception { if (m_eigenvalues == null) { throw new Exception("convertInstance: Principal components not " + "built yet"); } double[] newVals = new double[m_outputNumAtts]; Instance tempInst = (Instance) instance.copy(); if (!instance.dataset().equalHeaders(m_trainHeader)) { throw new Exception("Can't convert instance: header's don't match: " + "PrincipalComponents\n" + "Can't convert instance: header's don't match."); } m_replaceMissingFilter.input(tempInst); m_replaceMissingFilter.batchFinished(); tempInst = m_replaceMissingFilter.output(); /*if (m_normalize) { m_normalizeFilter.input(tempInst); m_normalizeFilter.batchFinished(); tempInst = m_normalizeFilter.output(); }*/ m_nominalToBinFilter.input(tempInst); m_nominalToBinFilter.batchFinished(); tempInst = m_nominalToBinFilter.output(); if (m_attributeFilter != null) { m_attributeFilter.input(tempInst); m_attributeFilter.batchFinished(); tempInst = m_attributeFilter.output(); } if (!m_center) { m_standardizeFilter.input(tempInst); m_standardizeFilter.batchFinished(); tempInst = m_standardizeFilter.output(); } else { m_centerFilter.input(tempInst); m_centerFilter.batchFinished(); tempInst = m_centerFilter.output(); } if (m_hasClass) { newVals[m_outputNumAtts - 1] = instance.value(instance.classIndex()); } double cumulative = 0; for (int i = m_numAttribs - 1; i >= 0; i--) { double tempval = 0.0; for (int j = 0; j < m_numAttribs; j++) { tempval += (m_eigenvectors[j][m_sortedEigens[i]] * tempInst.value(j)); } newVals[m_numAttribs - i - 1] = tempval; cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (!m_transBackToOriginal) { if (instance instanceof SparseInstance) { return new SparseInstance(instance.weight(), newVals); } else { return new Instance(instance.weight(), newVals); } } else { if (instance instanceof SparseInstance) { return convertInstanceToOriginal(new SparseInstance(instance.weight(), newVals)); } else { return convertInstanceToOriginal(new Instance(instance.weight(), newVals)); } } }
From source file:distributedRedditAnalyser.OzaBoost.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { try {/*from w ww . jav a 2s . com*/ lock.acquire(); //Get a new classifier Classifier newClassifier = ((Classifier) getPreparedClassOption(this.baseLearnerOption)).copy(); ensemble.add(new ClassifierInstance(newClassifier)); //If we have too many classifiers while (ensemble.size() > ensembleSizeOption.getValue()) ensemble.pollFirst(); double lambda_d = 1.0; for (ClassifierInstance c : ensemble) { double k = this.pureBoostOption.isSet() ? lambda_d : MiscUtils.poisson(lambda_d, this.classifierRandom); if (k > 0.0) { Instance weightedInst = (Instance) inst.copy(); weightedInst.setWeight(inst.weight() * k); c.getClassifier().trainOnInstance(weightedInst); } if (c.getClassifier().correctlyClassifies(inst)) { c.setScms(c.getScms() + lambda_d); lambda_d *= this.trainingWeightSeenByModel / (2 * c.getScms()); } else { c.setSwms(c.getSwms() + lambda_d); lambda_d *= this.trainingWeightSeenByModel / (2 * c.getSwms()); } } } catch (InterruptedException e) { e.printStackTrace(); } finally { lock.release(); } }
From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java
License:Open Source License
/** * Initializes a chi-squared attribute evaluator. * Discretizes all attributes that are numeric. * * @param data set of instances serving as training data * @throws Exception if the evaluator has not been * generated successfully//w w w . j ava2 s.c om */ public void buildEvaluator(Instances data) throws Exception { // can evaluator handle data? getCapabilities().testWithFail(data); int classIndex = data.classIndex(); int numInstances = data.numInstances(); if (!m_Binarize) { Discretize disTransform = new Discretize(); disTransform.setUseBetterEncoding(true); disTransform.setInputFormat(data); data = Filter.useFilter(data, disTransform); } else { NumericToBinary binTransform = new NumericToBinary(); binTransform.setInputFormat(data); data = Filter.useFilter(data, binTransform); } int numClasses = data.attribute(classIndex).numValues(); // Reserve space and initialize counters double[][][] counts = new double[data.numAttributes()][][]; for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); counts[k] = new double[numValues + 1][numClasses + 1]; } } // Initialize counters double[] temp = new double[numClasses + 1]; for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); if (inst.classIsMissing()) { temp[numClasses] += inst.weight(); } else { temp[(int) inst.classValue()] += inst.weight(); } } for (int k = 0; k < counts.length; k++) { if (k != classIndex) { for (int i = 0; i < temp.length; i++) { counts[k][0][i] = temp[i]; } } } // Get counts for (int k = 0; k < numInstances; k++) { Instance inst = data.instance(k); for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != classIndex) { if (inst.isMissingSparse(i) || inst.classIsMissing()) { if (!inst.isMissingSparse(i)) { counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } else if (!inst.classIsMissing()) { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst .classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } else { counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst .weight(); counts[inst.index(i)][0][numClasses] -= inst.weight(); } } else { counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight(); counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight(); } } } } // distribute missing counts if required if (m_missing_merge) { for (int k = 0; k < data.numAttributes(); k++) { if (k != classIndex) { int numValues = data.attribute(k).numValues(); // Compute marginals double[] rowSums = new double[numValues]; double[] columnSums = new double[numClasses]; double sum = 0; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { rowSums[i] += counts[k][i][j]; columnSums[j] += counts[k][i][j]; } sum += rowSums[i]; } if (Utils.gr(sum, 0)) { double[][] additions = new double[numValues][numClasses]; // Compute what needs to be added to each row for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j]; } } // Compute what needs to be added to each column for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses]; } } // Compute what needs to be added to each cell for (int i = 0; i < numClasses; i++) { for (int j = 0; j < numValues; j++) { additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses]; } } // Make new contingency table double[][] newTable = new double[numValues][numClasses]; for (int i = 0; i < numValues; i++) { for (int j = 0; j < numClasses; j++) { newTable[i][j] = counts[k][i][j] + additions[i][j]; } } counts[k] = newTable; } } } } // Compute chi-squared values m_ChiSquareds = new double[data.numAttributes()]; for (int i = 0; i < data.numAttributes(); i++) { if (i != classIndex) { m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i])); } } }
From source file:edu.oregonstate.eecs.mcplan.abstraction.EvaluateSimilarityFunction.java
License:Open Source License
public static Instances transformInstances(final Instances src, final CoordinateTransform transform) { final ArrayList<Attribute> out_attributes = new ArrayList<Attribute>(); for (int i = 0; i < transform.outDimension(); ++i) { out_attributes.add(new Attribute("x" + i)); }//from w w w .j ava2 s . co m out_attributes.add((Attribute) src.classAttribute().copy()); final Instances out = new Instances(src.relationName() + "_" + transform.name(), out_attributes, 0); for (int i = 0; i < src.size(); ++i) { final Instance inst = src.get(i); final RealVector flat = new ArrayRealVector(WekaUtil.unlabeledFeatures(inst)); final RealVector transformed_vector = transform.encode(flat).x; final double[] transformed = new double[transformed_vector.getDimension() + 1]; for (int j = 0; j < transformed_vector.getDimension(); ++j) { transformed[j] = transformed_vector.getEntry(j); } transformed[transformed.length - 1] = inst.classValue(); final Instance transformed_instance = new DenseInstance(inst.weight(), transformed); out.add(transformed_instance); transformed_instance.setDataset(out); } out.setClassIndex(out.numAttributes() - 1); return out; }