List of usage examples for weka.core Instance valueSparse
public double valueSparse(int indexOfIndex);
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * /*w w w. j a va 2 s . c o m*/ * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:ml.engine.LibSVM.java
License:Open Source License
/** * returns an instance into a sparse libsvm array * /*from w ww. j ava2 s . co m*/ * @param instance the instance to work on * @return the libsvm array * @throws Exception if setup of array fails */ protected Object instanceToArray(Instance instance) throws Exception { int index; int count; int i; Object result; // determine number of non-zero attributes /* * for (i = 0; i < instance.numAttributes(); i++) { if (i == * instance.classIndex()) continue; if (instance.value(i) != 0) count++; } */ count = 0; for (i = 0; i < instance.numValues(); i++) { if (instance.index(i) == instance.classIndex()) { continue; } if (instance.valueSparse(i) != 0) { count++; } } // fill array /* * result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index = * 0; for (i = 0; i < instance.numAttributes(); i++) { if (i == * instance.classIndex()) continue; if (instance.value(i) == 0) continue; * * Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance()); * setField(Array.get(result, index), "index", new Integer(i + 1)); * setField(Array.get(result, index), "value", new * Double(instance.value(i))); index++; } */ result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index = 0; for (i = 0; i < instance.numValues(); i++) { int idx = instance.index(i); if (idx == instance.classIndex()) { continue; } if (instance.valueSparse(i) == 0) { continue; } Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance()); setField(Array.get(result, index), "index", new Integer(idx + 1)); setField(Array.get(result, index), "value", new Double(instance.valueSparse(i))); index++; } return result; }
From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java
License:Open Source License
/** * Trains the classifier with the given instance. * * @param instance the new training instance to include in the model *//* w w w . jav a 2s . c o m*/ @Override public void trainOnInstanceImpl(Instance inst) { if (this.reset == true) { this.m_numClasses = inst.numClasses(); double laplace = this.laplaceCorrectionOption.getValue(); int numAttributes = inst.numAttributes(); m_probOfClass = new double[m_numClasses]; Arrays.fill(m_probOfClass, laplace); m_classTotals = new double[m_numClasses]; Arrays.fill(m_classTotals, laplace * numAttributes); m_wordTotalForClass = new DoubleVector[m_numClasses]; for (int i = 0; i < m_numClasses; i++) { //Arrays.fill(wordTotal, laplace); m_wordTotalForClass[i] = new DoubleVector(); } this.reset = false; } // Update classifier int classIndex = inst.classIndex(); int classValue = (int) inst.value(classIndex); double w = inst.weight(); m_probOfClass[classValue] += w; m_classTotals[classValue] += w * totalSize(inst); double total = m_classTotals[classValue]; for (int i = 0; i < inst.numValues(); i++) { int index = inst.index(i); if (index != classIndex && !inst.isMissing(i)) { //m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i); double laplaceCorrection = 0.0; if (m_wordTotalForClass[classValue].getValue(index) == 0) { laplaceCorrection = this.laplaceCorrectionOption.getValue(); } m_wordTotalForClass[classValue].addToValue(index, w * inst.valueSparse(i) + laplaceCorrection); } } }
From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test * instance.//from ww w . ja v a 2s .c o m * * @param instance the instance to be classified * @return predicted class probability distribution */ @Override public double[] getVotesForInstance(Instance instance) { if (this.reset == true) { return new double[2]; } double[] probOfClassGivenDoc = new double[m_numClasses]; double totalSize = totalSize(instance); for (int i = 0; i < m_numClasses; i++) { probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]); } for (int i = 0; i < instance.numValues(); i++) { int index = instance.index(i); if (index == instance.classIndex() || instance.isMissing(i)) { continue; } double wordCount = instance.valueSparse(i); for (int c = 0; c < m_numClasses; c++) { double value = m_wordTotalForClass[c].getValue(index); probOfClassGivenDoc[c] += wordCount * Math.log(value == 0 ? this.laplaceCorrectionOption.getValue() : value); } } return Utils.logs2probs(probOfClassGivenDoc); }
From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java
License:Open Source License
public double totalSize(Instance instance) { int classIndex = instance.classIndex(); double total = 0.0; for (int i = 0; i < instance.numValues(); i++) { int index = instance.index(i); if (index == classIndex || instance.isMissing(i)) { continue; }/* ww w .j av a 2s . c om*/ double count = instance.valueSparse(i); if (count >= 0) { total += count; } else { //throw new Exception("Numeric attribute value is not >= 0. " + i + " " + index + " " + // instance.valueSparse(i) + " " + " " + instance); } } return total; }
From source file:moa.classifiers.functions.SGD.java
License:Open Source License
protected static double dotProd(Instance inst1, DoubleVector weights, int classIndex) { double result = 0; int n1 = inst1.numValues(); int n2 = weights.numValues(); for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst1.index(p1); int ind2 = p2; if (ind1 == ind2) { if (ind1 != classIndex && !inst1.isMissingSparse(p1)) { result += inst1.valueSparse(p1) * weights.getValue(p2); }//from ww w. j a va 2 s .c o m p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } return (result); }
From source file:moa.classifiers.functions.SGD.java
License:Open Source License
/** * Trains the classifier with the given instance. * * @param instance the new training instance to include in the model */// w w w . ja v a 2 s. co m @Override public void trainOnInstanceImpl(Instance instance) { if (m_weights == null) { m_weights = new DoubleVector(); m_bias = 0.0; } if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights, instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() == 0) ? -1 : 1; z = y * (wx + m_bias); } else { y = instance.classValue(); z = y - (wx + m_bias); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights.numValues(); i++) { m_weights.setValue(i, m_weights.getValue(i) * multiplier); } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights.addToValue(indS, factor * instance.valueSparse(p1)); } } // update the bias m_bias += factor; } m_t++; } }
From source file:moa.classifiers.functions.SGDMultiClass.java
License:Open Source License
public void trainOnInstanceImpl(Instance instance, int classLabel) { if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights[classLabel], instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() != classLabel) ? -1 : 1; z = y * (wx + m_bias[classLabel]); } else {//from w w w . ja v a2s . com y = instance.classValue(); z = y - (wx + m_bias[classLabel]); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights[classLabel].numValues(); i++) { m_weights[classLabel].setValue(i, m_weights[classLabel].getValue(i) * multiplier); } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights[classLabel].addToValue(indS, factor * instance.valueSparse(p1)); } } // update the bias m_bias[classLabel] += factor; } } }
From source file:moa.classifiers.functions.SGDOld.java
License:Open Source License
protected static double dotProd(Instance inst1, double[] weights, int classIndex) { double result = 0; int n1 = inst1.numValues(); int n2 = weights.length - 1; for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst1.index(p1); int ind2 = p2; if (ind1 == ind2) { if (ind1 != classIndex && !inst1.isMissingSparse(p1)) { result += inst1.valueSparse(p1) * weights[p2]; }/* w w w . ja v a 2 s . co m*/ p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } return (result); }
From source file:moa.classifiers.functions.SGDOld.java
License:Open Source License
/** * Trains the classifier with the given instance. * * @param instance the new training instance to include in the model */// ww w . j a va2 s .c om @Override public void trainOnInstanceImpl(Instance instance) { if (m_weights == null) { m_weights = new double[instance.numAttributes() + 1]; } if (!instance.classIsMissing()) { double wx = dotProd(instance, m_weights, instance.classIndex()); double y; double z; if (instance.classAttribute().isNominal()) { y = (instance.classValue() == 0) ? -1 : 1; z = y * (wx + m_weights[m_weights.length - 1]); } else { y = instance.classValue(); z = y - (wx + m_weights[m_weights.length - 1]); y = 1; } // Compute multiplier for weight decay double multiplier = 1.0; if (m_numInstances == 0) { multiplier = 1.0 - (m_learningRate * m_lambda) / m_t; } else { multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances; } for (int i = 0; i < m_weights.length - 1; i++) { m_weights[i] *= multiplier; } // Only need to do the following if the loss is non-zero if (m_loss != HINGE || (z < 1)) { // Compute Factor for updates double factor = m_learningRate * y * dloss(z); // Update coefficients for attributes int n1 = instance.numValues(); for (int p1 = 0; p1 < n1; p1++) { int indS = instance.index(p1); if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) { m_weights[indS] += factor * instance.valueSparse(p1); } } // update the bias m_weights[m_weights.length - 1] += factor; } m_t++; } }