Example usage for weka.core Instance numValues

Introduction

In this page you can find the example usage for weka.core Instance numValues.

Prototype

public int numValues();

Source Link

Document

Returns the number of values present in a sparse representation.

Usage

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * The method distributionForInstance should work out the probabilities of
 * class membership for a single instance.
 *
 * @param instnc// ww  w  .jav  a 2  s .co m
 * @return
 * @throws Exception
 */
@Override
public double[] distributionForInstance(Instance instnc) throws Exception {

    // creates a double array for storing the naive calculations for each class
    double[] naiveBayes = new double[classValueCounts.length];

    // loops through each class and computes the naive bayes 
    for (int c = 0; c < naiveBayes.length; c++) {

        // stores all conditional probabilities for class membership such:
        // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1)
        // and also it stores the prior probability: P(crime=1)
        ArrayList<Double> conditionalProbs = new ArrayList<>();
        double priorProbability = classValueCounts[c] / countData;
        conditionalProbs.add(priorProbability);
        for (int i = 0; i < instnc.numValues() - 1; i++) {
            double attributeValue = instnc.value(i);
            DataFound d = new DataFound(attributeValue, c, i);

            int index = data.indexOf(d);
            if (index != -1) {
                double classValueCount = classValueCounts[(int) d.getClassValue()];
                conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount));
            }
        }
        System.out.println(conditionalProbs);
        // compute the naive bayes
        double total = 1;
        for (Double x : conditionalProbs) {
            total *= x;
        }
        naiveBayes[c] = total;
    }
    prettyPrintProbabilities(naiveBayes);
    return naiveBayes;
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

/**
 *
 * The method distributionForInstance should work out the probabilities of
 * class membership for a single instance.
 *
 * @param instnc// w ww .j av a 2  s  . com
 * @return
 * @throws Exception
 */
public double[] distributionForDiscrete(Instance instnc) throws Exception {

    // creates a double array for storing the naive calculations for each class
    double[] naiveBayes = new double[classValueCounts.length];

    // loops through each class and computes the naive bayes 
    for (int c = 0; c < naiveBayes.length; c++) {

        // stores all conditional probabilities for class membership such:
        // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1)
        // and also it stores the prior probability: P(crime=1)
        ArrayList<Double> conditionalProbs = new ArrayList<>();
        double priorProbability = classValueCounts[c] / countData;
        conditionalProbs.add(priorProbability);
        for (int i = 0; i < instnc.numValues() - 1; i++) {
            double attributeValue = instnc.value(i);
            DataFound d = new DataFound(attributeValue, c, i);

            int index = data.indexOf(d);
            if (index != -1) {
                double classValueCount = classValueCounts[(int) d.getClassValue()];
                conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount));
            }
        }
        // compute the naive bayes
        double total = 1;
        for (Double x : conditionalProbs) {
            total *= x;
        }
        naiveBayes[c] = total;
    }
    return naiveBayes;
}

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * /*from  w ww  .  j av  a2 s.  c om*/
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:ml.engine.LibSVM.java

License:Open Source License

/**
 * returns an instance into a sparse libsvm array
 * //  w  w w . java2 s . c  om
 * @param instance the instance to work on
 * @return the libsvm array
 * @throws Exception if setup of array fails
 */
protected Object instanceToArray(Instance instance) throws Exception {
    int index;
    int count;
    int i;
    Object result;

    // determine number of non-zero attributes
    /*
     * for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) != 0) count++; }
     */
    count = 0;
    for (i = 0; i < instance.numValues(); i++) {
        if (instance.index(i) == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) != 0) {
            count++;
        }
    }

    // fill array
    /*
     * result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index =
     * 0; for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) == 0) continue;
     * 
     * Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
     * setField(Array.get(result, index), "index", new Integer(i + 1));
     * setField(Array.get(result, index), "value", new
     * Double(instance.value(i))); index++; }
     */

    result = Array.newInstance(Class.forName(CLASS_SVMNODE), count);
    index = 0;
    for (i = 0; i < instance.numValues(); i++) {

        int idx = instance.index(i);
        if (idx == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) == 0) {
            continue;
        }

        Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
        setField(Array.get(result, index), "index", new Integer(idx + 1));
        setField(Array.get(result, index), "value", new Double(instance.valueSparse(i)));
        index++;
    }

    return result;
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *//* w w  w  .  j  a v  a 2s  .c  o  m*/
@Override
public void trainOnInstanceImpl(Instance inst) {
    if (this.reset == true) {
        this.m_numClasses = inst.numClasses();
        double laplace = this.laplaceCorrectionOption.getValue();
        int numAttributes = inst.numAttributes();

        m_probOfClass = new double[m_numClasses];
        Arrays.fill(m_probOfClass, laplace);

        m_classTotals = new double[m_numClasses];
        Arrays.fill(m_classTotals, laplace * numAttributes);

        m_wordTotalForClass = new DoubleVector[m_numClasses];
        for (int i = 0; i < m_numClasses; i++) {
            //Arrays.fill(wordTotal, laplace);
            m_wordTotalForClass[i] = new DoubleVector();
        }
        this.reset = false;
    }
    // Update classifier
    int classIndex = inst.classIndex();
    int classValue = (int) inst.value(classIndex);

    double w = inst.weight();
    m_probOfClass[classValue] += w;

    m_classTotals[classValue] += w * totalSize(inst);
    double total = m_classTotals[classValue];

    for (int i = 0; i < inst.numValues(); i++) {
        int index = inst.index(i);
        if (index != classIndex && !inst.isMissing(i)) {
            //m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i);
            double laplaceCorrection = 0.0;
            if (m_wordTotalForClass[classValue].getValue(index) == 0) {
                laplaceCorrection = this.laplaceCorrectionOption.getValue();
            }
            m_wordTotalForClass[classValue].addToValue(index, w * inst.valueSparse(i) + laplaceCorrection);
        }
    }
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from  w  w  w .  ja v a 2  s  .  c  o m
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 */
@Override
public double[] getVotesForInstance(Instance instance) {
    if (this.reset == true) {
        return new double[2];
    }
    double[] probOfClassGivenDoc = new double[m_numClasses];
    double totalSize = totalSize(instance);

    for (int i = 0; i < m_numClasses; i++) {
        probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]);
    }

    for (int i = 0; i < instance.numValues(); i++) {

        int index = instance.index(i);
        if (index == instance.classIndex() || instance.isMissing(i)) {
            continue;
        }

        double wordCount = instance.valueSparse(i);
        for (int c = 0; c < m_numClasses; c++) {
            double value = m_wordTotalForClass[c].getValue(index);
            probOfClassGivenDoc[c] += wordCount
                    * Math.log(value == 0 ? this.laplaceCorrectionOption.getValue() : value);
        }
    }

    return Utils.logs2probs(probOfClassGivenDoc);
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

public double totalSize(Instance instance) {
    int classIndex = instance.classIndex();
    double total = 0.0;
    for (int i = 0; i < instance.numValues(); i++) {
        int index = instance.index(i);
        if (index == classIndex || instance.isMissing(i)) {
            continue;
        }//from   w  w  w.j a  v  a  2  s .c o m
        double count = instance.valueSparse(i);
        if (count >= 0) {
            total += count;
        } else {
            //throw new Exception("Numeric attribute value is not >= 0. " + i + " " + index + " " +
            //          instance.valueSparse(i) + " " + " " + instance);
        }
    }
    return total;
}

From source file:moa.classifiers.featureselection.OFSL.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    double y_t, m_bias_p1, m_bias_p2, m_bias;
    double[] m_weights_p1, m_weights_p2, m_weights;

    if (this.weights == null) {
        this.weights = new double[inst.numValues()];
        for (int i = 0; i < this.weights.length; i++)
            this.weights[i] = 0.0;
        this.bias = 0.0;
    }//from   w  w  w  .j  a  v a2s  . c om
    if (inst.classAttribute().isNominal()) {
        y_t = (inst.classValue() == 0) ? -1 : 1;
    } else {
        y_t = inst.classValue();
    }

    double f_t = dot(inst.toDoubleArray(), this.weights);
    f_t += this.bias;

    if (y_t * f_t < 0) {
        m_weights_p1 = scalar_vector(1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue(),
                this.weights);
        m_bias_p1 = (1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue()) * this.bias;
        m_weights_p2 = scalar_vector(this.learningRateOption.getValue() * y_t, inst.toDoubleArray());
        m_bias_p2 = this.learningRateOption.getValue() * y_t;
        m_weights = vector_add(m_weights_p1, m_weights_p2);
        m_bias = m_bias_p1 + m_bias_p2;

        m_weights = l2_projection(m_weights, m_bias, this.learningRateOption.getValue());
        m_weights = truncate(m_weights, this.numSelectOption.getValue());

        for (int i = 0; i < m_weights_p1.length; i++)
            this.weights[i] = m_weights[i];
        this.bias = m_weights[m_weights.length - 1];
    } else {
        this.weights = scalar_vector(1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue(),
                this.weights);
        this.bias = (1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue()) * this.bias;
    }

}

From source file:moa.classifiers.featureselection.OFSP.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    double y_t, f_t, denom, m_bias;
    int[] indices = new int[this.numSelectOption.getValue()];
    double[] m_weights;

    if (this.weights == null) {

        this.weights = new double[inst.numValues()];
        for (int i = 0; i < this.weights.length; i++)
            this.weights[i] = this.rand.nextGaussian();
        this.bias = 0.0;
        this.weights = truncate(this.weights, this.numSelectOption.getValue());
    }/*from  ww w .j  a v  a 2  s . c o  m*/

    if (inst.classAttribute().isNominal()) {
        y_t = (inst.classValue() == 0) ? -1 : 1;
    } else {
        y_t = inst.classValue();
    }
    double[] x_t = inst.toDoubleArray();
    double[] x_hat = inst.toDoubleArray();

    if (this.rand.nextDouble() < this.searchOption.getValue()) {
        int[] indices_perm = perm(inst.numAttributes());
        for (int i = 0; i < this.numSelectOption.getValue(); i++)
            indices[i] = indices_perm[i];

    } else {
        int[] sorted_indices = bubblesort_index(abs_vector(this.weights));

        for (int i = 0; i < inst.numAttributes() - this.numSelectOption.getValue(); i++)
            x_hat[sorted_indices[i]] = 0.0;

        for (int i = 0; i < this.numSelectOption.getValue(); i++)
            indices[i] = sorted_indices[sorted_indices.length - i - 1];
    }

    f_t = 0;
    for (int i = 0; i < this.numSelectOption.getValue(); i++)
        f_t += this.weights[indices[i]] * x_t[indices[i]];
    f_t += this.bias;

    if (f_t * y_t < 0) {

        for (int i = 0; i < x_hat.length; i++) {
            denom = this.numSelectOption.getValue() / x_hat.length * this.searchOption.getValue();
            if (this.weights[i] != 0)
                denom += (1 - this.searchOption.getValue()) * this.weights[i];
            x_hat[i] /= denom;
        }

        m_weights = scalar_vector(y_t * this.stepSizeOption.getValue(), x_hat);
        m_bias = y_t * this.stepSizeOption.getValue() * this.bias;
        m_weights = vector_add(m_weights, this.weights);
        m_bias += m_bias + this.bias;

        m_weights = l2_projection(m_weights, m_bias, this.boundOption.getValue());
        m_weights = truncate(m_weights, this.numSelectOption.getValue());

        for (int i = 0; i < m_weights.length - 1; i++)
            this.weights[i] = m_weights[i];
        this.bias = m_weights[m_weights.length - 1];
    }

}

From source file:moa.classifiers.functions.SGD.java

License:Open Source License

protected static double dotProd(Instance inst1, DoubleVector weights, int classIndex) {
    double result = 0;

    int n1 = inst1.numValues();
    int n2 = weights.numValues();

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
        int ind1 = inst1.index(p1);
        int ind2 = p2;
        if (ind1 == ind2) {
            if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
                result += inst1.valueSparse(p1) * weights.getValue(p2);
            }//from w  w w  .  j  a  va  2  s  .com
            p1++;
            p2++;
        } else if (ind1 > ind2) {
            p2++;
        } else {
            p1++;
        }
    }
    return (result);
}