List of usage examples for weka.core Instance numValues
public int numValues();
From source file:machinelearningq2.BasicNaiveBayesV1.java
/** * * The method distributionForInstance should work out the probabilities of * class membership for a single instance. * * @param instnc// ww w .jav a 2 s .co m * @return * @throws Exception */ @Override public double[] distributionForInstance(Instance instnc) throws Exception { // creates a double array for storing the naive calculations for each class double[] naiveBayes = new double[classValueCounts.length]; // loops through each class and computes the naive bayes for (int c = 0; c < naiveBayes.length; c++) { // stores all conditional probabilities for class membership such: // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1) // and also it stores the prior probability: P(crime=1) ArrayList<Double> conditionalProbs = new ArrayList<>(); double priorProbability = classValueCounts[c] / countData; conditionalProbs.add(priorProbability); for (int i = 0; i < instnc.numValues() - 1; i++) { double attributeValue = instnc.value(i); DataFound d = new DataFound(attributeValue, c, i); int index = data.indexOf(d); if (index != -1) { double classValueCount = classValueCounts[(int) d.getClassValue()]; conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount)); } } System.out.println(conditionalProbs); // compute the naive bayes double total = 1; for (Double x : conditionalProbs) { total *= x; } naiveBayes[c] = total; } prettyPrintProbabilities(naiveBayes); return naiveBayes; }
From source file:machinelearningq2.ExtendedNaiveBayes.java
/** * * The method distributionForInstance should work out the probabilities of * class membership for a single instance. * * @param instnc// w ww .j av a 2 s . com * @return * @throws Exception */ public double[] distributionForDiscrete(Instance instnc) throws Exception { // creates a double array for storing the naive calculations for each class double[] naiveBayes = new double[classValueCounts.length]; // loops through each class and computes the naive bayes for (int c = 0; c < naiveBayes.length; c++) { // stores all conditional probabilities for class membership such: // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1) // and also it stores the prior probability: P(crime=1) ArrayList<Double> conditionalProbs = new ArrayList<>(); double priorProbability = classValueCounts[c] / countData; conditionalProbs.add(priorProbability); for (int i = 0; i < instnc.numValues() - 1; i++) { double attributeValue = instnc.value(i); DataFound d = new DataFound(attributeValue, c, i); int index = data.indexOf(d); if (index != -1) { double classValueCount = classValueCounts[(int) d.getClassValue()]; conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount)); } } // compute the naive bayes double total = 1; for (Double x : conditionalProbs) { total *= x; } naiveBayes[c] = total; } return naiveBayes; }
From source file:ml.dataprocess.CorrelationAttributeEval.java
License:Open Source License
/** * Initializes an information gain attribute evaluator. Replaces missing * values with means/modes; Deletes instances with missing class values. * /*from w ww . j av a2 s. c om*/ * @param data set of instances serving as training data * @throws Exception if the evaluator has not been generated successfully */ @Override public void buildEvaluator(Instances data) throws Exception { data = new Instances(data); data.deleteWithMissingClass(); ReplaceMissingValues rmv = new ReplaceMissingValues(); rmv.setInputFormat(data); data = Filter.useFilter(data, rmv); int numClasses = data.classAttribute().numValues(); int classIndex = data.classIndex(); int numInstances = data.numInstances(); m_correlations = new double[data.numAttributes()]; /* * boolean hasNominals = false; boolean hasNumerics = false; */ List<Integer> numericIndexes = new ArrayList<Integer>(); List<Integer> nominalIndexes = new ArrayList<Integer>(); if (m_detailedOutput) { m_detailedOutputBuff = new StringBuffer(); } // TODO for instance weights (folded into computing weighted correlations) // add another dimension just before the last [2] (0 for 0/1 binary vector // and // 1 for corresponding instance weights for the 1's) double[][][] nomAtts = new double[data.numAttributes()][][]; for (int i = 0; i < data.numAttributes(); i++) { if (data.attribute(i).isNominal() && i != classIndex) { nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()]; Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all // 1's nominalIndexes.add(i); } else if (data.attribute(i).isNumeric() && i != classIndex) { numericIndexes.add(i); } } // do the nominal attributes if (nominalIndexes.size() > 0) { for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); for (int j = 0; j < current.numValues(); j++) { if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) { // Will need to check for zero in case this isn't a sparse // instance (unless we add 1 and subtract 1) nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1; nomAtts[current.index(j)][0][i] -= 1; } } } } if (data.classAttribute().isNumeric()) { double[] classVals = data.attributeToDoubleArray(classIndex); // do the numeric attributes for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length); if (m_correlations[i] == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { m_correlations[i] = 0; } } } // do the nominal attributes if (nominalIndexes.size() > 0) { // now compute the correlations for the binarized nominal attributes for (Integer i : nominalIndexes) { double sum = 0; double corr = 0; double sumCorr = 0; double sumForValue = 0; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } for (int j = 0; j < data.attribute(i).numValues(); j++) { sumForValue = Utils.sum(nomAtts[i][j]); corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += sumForValue * corr; sum += sumForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(corr, 6)); } } m_correlations[i] = (sum > 0) ? sumCorr / sum : 0; } } } else { // class is nominal // TODO extra dimension for storing instance weights too double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()]; // this is equal to the number of instances for all inst weights = 1 double[] classValCounts = new double[data.classAttribute().numValues()]; for (int i = 0; i < data.numInstances(); i++) { Instance current = data.instance(i); binarizedClasses[(int) current.classValue()][i] = 1; } for (int i = 0; i < data.classAttribute().numValues(); i++) { classValCounts[i] = Utils.sum(binarizedClasses[i]); } double sumClass = Utils.sum(classValCounts); // do numeric attributes first if (numericIndexes.size() > 0) { for (Integer i : numericIndexes) { double[] numAttVals = data.attributeToDoubleArray(i); double corr = 0; double sumCorr = 0; for (int j = 0; j < data.classAttribute().numValues(); j++) { corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length); if (corr < 0.0) { corr = -corr; } if (corr == 1.0) { // check for zero variance (useless numeric attribute) if (Utils.variance(numAttVals) == 0) { corr = 0; } } sumCorr += classValCounts[j] * corr; } m_correlations[i] = sumCorr / sumClass; } } if (nominalIndexes.size() > 0) { for (Integer i : nominalIndexes) { if (m_detailedOutput) { m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name()); } double sumForAtt = 0; double corrForAtt = 0; for (int j = 0; j < data.attribute(i).numValues(); j++) { double sumForValue = Utils.sum(nomAtts[i][j]); double corr = 0; double sumCorr = 0; double avgCorrForValue = 0; sumForAtt += sumForValue; for (int k = 0; k < numClasses; k++) { // corr between value j and class k corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k], binarizedClasses[k].length); // useless attribute - all instances have the same value if (sumForValue == numInstances || sumForValue == 0) { corr = 0; } if (corr < 0.0) { corr = -corr; } sumCorr += classValCounts[k] * corr; } avgCorrForValue = sumCorr / sumClass; corrForAtt += sumForValue * avgCorrForValue; if (m_detailedOutput) { m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": "); m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6)); } } // the weighted average corr for att i as // a whole (wighted by value frequencies) m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0; } } } if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) { m_detailedOutputBuff.append("\n"); } }
From source file:ml.engine.LibSVM.java
License:Open Source License
/** * returns an instance into a sparse libsvm array * // w w w . java2 s . c om * @param instance the instance to work on * @return the libsvm array * @throws Exception if setup of array fails */ protected Object instanceToArray(Instance instance) throws Exception { int index; int count; int i; Object result; // determine number of non-zero attributes /* * for (i = 0; i < instance.numAttributes(); i++) { if (i == * instance.classIndex()) continue; if (instance.value(i) != 0) count++; } */ count = 0; for (i = 0; i < instance.numValues(); i++) { if (instance.index(i) == instance.classIndex()) { continue; } if (instance.valueSparse(i) != 0) { count++; } } // fill array /* * result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index = * 0; for (i = 0; i < instance.numAttributes(); i++) { if (i == * instance.classIndex()) continue; if (instance.value(i) == 0) continue; * * Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance()); * setField(Array.get(result, index), "index", new Integer(i + 1)); * setField(Array.get(result, index), "value", new * Double(instance.value(i))); index++; } */ result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index = 0; for (i = 0; i < instance.numValues(); i++) { int idx = instance.index(i); if (idx == instance.classIndex()) { continue; } if (instance.valueSparse(i) == 0) { continue; } Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance()); setField(Array.get(result, index), "index", new Integer(idx + 1)); setField(Array.get(result, index), "value", new Double(instance.valueSparse(i))); index++; } return result; }
From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java
License:Open Source License
/** * Trains the classifier with the given instance. * * @param instance the new training instance to include in the model *//* w w w . j a v a 2s .c o m*/ @Override public void trainOnInstanceImpl(Instance inst) { if (this.reset == true) { this.m_numClasses = inst.numClasses(); double laplace = this.laplaceCorrectionOption.getValue(); int numAttributes = inst.numAttributes(); m_probOfClass = new double[m_numClasses]; Arrays.fill(m_probOfClass, laplace); m_classTotals = new double[m_numClasses]; Arrays.fill(m_classTotals, laplace * numAttributes); m_wordTotalForClass = new DoubleVector[m_numClasses]; for (int i = 0; i < m_numClasses; i++) { //Arrays.fill(wordTotal, laplace); m_wordTotalForClass[i] = new DoubleVector(); } this.reset = false; } // Update classifier int classIndex = inst.classIndex(); int classValue = (int) inst.value(classIndex); double w = inst.weight(); m_probOfClass[classValue] += w; m_classTotals[classValue] += w * totalSize(inst); double total = m_classTotals[classValue]; for (int i = 0; i < inst.numValues(); i++) { int index = inst.index(i); if (index != classIndex && !inst.isMissing(i)) { //m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i); double laplaceCorrection = 0.0; if (m_wordTotalForClass[classValue].getValue(index) == 0) { laplaceCorrection = this.laplaceCorrectionOption.getValue(); } m_wordTotalForClass[classValue].addToValue(index, w * inst.valueSparse(i) + laplaceCorrection); } } }
From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java
License:Open Source License
/** * Calculates the class membership probabilities for the given test * instance.//from w w w . ja v a 2 s . c o m * * @param instance the instance to be classified * @return predicted class probability distribution */ @Override public double[] getVotesForInstance(Instance instance) { if (this.reset == true) { return new double[2]; } double[] probOfClassGivenDoc = new double[m_numClasses]; double totalSize = totalSize(instance); for (int i = 0; i < m_numClasses; i++) { probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]); } for (int i = 0; i < instance.numValues(); i++) { int index = instance.index(i); if (index == instance.classIndex() || instance.isMissing(i)) { continue; } double wordCount = instance.valueSparse(i); for (int c = 0; c < m_numClasses; c++) { double value = m_wordTotalForClass[c].getValue(index); probOfClassGivenDoc[c] += wordCount * Math.log(value == 0 ? this.laplaceCorrectionOption.getValue() : value); } } return Utils.logs2probs(probOfClassGivenDoc); }
From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java
License:Open Source License
public double totalSize(Instance instance) { int classIndex = instance.classIndex(); double total = 0.0; for (int i = 0; i < instance.numValues(); i++) { int index = instance.index(i); if (index == classIndex || instance.isMissing(i)) { continue; }//from w w w.j a v a 2 s .c o m double count = instance.valueSparse(i); if (count >= 0) { total += count; } else { //throw new Exception("Numeric attribute value is not >= 0. " + i + " " + index + " " + // instance.valueSparse(i) + " " + " " + instance); } } return total; }
From source file:moa.classifiers.featureselection.OFSL.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { double y_t, m_bias_p1, m_bias_p2, m_bias; double[] m_weights_p1, m_weights_p2, m_weights; if (this.weights == null) { this.weights = new double[inst.numValues()]; for (int i = 0; i < this.weights.length; i++) this.weights[i] = 0.0; this.bias = 0.0; }//from w w w .j a v a2s . c om if (inst.classAttribute().isNominal()) { y_t = (inst.classValue() == 0) ? -1 : 1; } else { y_t = inst.classValue(); } double f_t = dot(inst.toDoubleArray(), this.weights); f_t += this.bias; if (y_t * f_t < 0) { m_weights_p1 = scalar_vector(1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue(), this.weights); m_bias_p1 = (1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue()) * this.bias; m_weights_p2 = scalar_vector(this.learningRateOption.getValue() * y_t, inst.toDoubleArray()); m_bias_p2 = this.learningRateOption.getValue() * y_t; m_weights = vector_add(m_weights_p1, m_weights_p2); m_bias = m_bias_p1 + m_bias_p2; m_weights = l2_projection(m_weights, m_bias, this.learningRateOption.getValue()); m_weights = truncate(m_weights, this.numSelectOption.getValue()); for (int i = 0; i < m_weights_p1.length; i++) this.weights[i] = m_weights[i]; this.bias = m_weights[m_weights.length - 1]; } else { this.weights = scalar_vector(1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue(), this.weights); this.bias = (1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue()) * this.bias; } }
From source file:moa.classifiers.featureselection.OFSP.java
License:Open Source License
@Override public void trainOnInstanceImpl(Instance inst) { double y_t, f_t, denom, m_bias; int[] indices = new int[this.numSelectOption.getValue()]; double[] m_weights; if (this.weights == null) { this.weights = new double[inst.numValues()]; for (int i = 0; i < this.weights.length; i++) this.weights[i] = this.rand.nextGaussian(); this.bias = 0.0; this.weights = truncate(this.weights, this.numSelectOption.getValue()); }/*from ww w .j a v a 2 s . c o m*/ if (inst.classAttribute().isNominal()) { y_t = (inst.classValue() == 0) ? -1 : 1; } else { y_t = inst.classValue(); } double[] x_t = inst.toDoubleArray(); double[] x_hat = inst.toDoubleArray(); if (this.rand.nextDouble() < this.searchOption.getValue()) { int[] indices_perm = perm(inst.numAttributes()); for (int i = 0; i < this.numSelectOption.getValue(); i++) indices[i] = indices_perm[i]; } else { int[] sorted_indices = bubblesort_index(abs_vector(this.weights)); for (int i = 0; i < inst.numAttributes() - this.numSelectOption.getValue(); i++) x_hat[sorted_indices[i]] = 0.0; for (int i = 0; i < this.numSelectOption.getValue(); i++) indices[i] = sorted_indices[sorted_indices.length - i - 1]; } f_t = 0; for (int i = 0; i < this.numSelectOption.getValue(); i++) f_t += this.weights[indices[i]] * x_t[indices[i]]; f_t += this.bias; if (f_t * y_t < 0) { for (int i = 0; i < x_hat.length; i++) { denom = this.numSelectOption.getValue() / x_hat.length * this.searchOption.getValue(); if (this.weights[i] != 0) denom += (1 - this.searchOption.getValue()) * this.weights[i]; x_hat[i] /= denom; } m_weights = scalar_vector(y_t * this.stepSizeOption.getValue(), x_hat); m_bias = y_t * this.stepSizeOption.getValue() * this.bias; m_weights = vector_add(m_weights, this.weights); m_bias += m_bias + this.bias; m_weights = l2_projection(m_weights, m_bias, this.boundOption.getValue()); m_weights = truncate(m_weights, this.numSelectOption.getValue()); for (int i = 0; i < m_weights.length - 1; i++) this.weights[i] = m_weights[i]; this.bias = m_weights[m_weights.length - 1]; } }
From source file:moa.classifiers.functions.SGD.java
License:Open Source License
protected static double dotProd(Instance inst1, DoubleVector weights, int classIndex) { double result = 0; int n1 = inst1.numValues(); int n2 = weights.numValues(); for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst1.index(p1); int ind2 = p2; if (ind1 == ind2) { if (ind1 != classIndex && !inst1.isMissingSparse(p1)) { result += inst1.valueSparse(p1) * weights.getValue(p2); }//from w w w . j a va 2 s .com p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } return (result); }