Example usage for weka.core Instance numValues

List of usage examples for weka.core Instance numValues

Introduction

In this page you can find the example usage for weka.core Instance numValues.

Prototype

public int numValues();

Source Link

Document

Returns the number of values present in a sparse representation.

Usage

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * The method distributionForInstance should work out the probabilities of
 * class membership for a single instance.
 *
 * @param instnc// ww  w  .jav  a 2  s .co m
 * @return
 * @throws Exception
 */
@Override
public double[] distributionForInstance(Instance instnc) throws Exception {

    // creates a double array for storing the naive calculations for each class
    double[] naiveBayes = new double[classValueCounts.length];

    // loops through each class and computes the naive bayes 
    for (int c = 0; c < naiveBayes.length; c++) {

        // stores all conditional probabilities for class membership such:
        // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1)
        // and also it stores the prior probability: P(crime=1)
        ArrayList<Double> conditionalProbs = new ArrayList<>();
        double priorProbability = classValueCounts[c] / countData;
        conditionalProbs.add(priorProbability);
        for (int i = 0; i < instnc.numValues() - 1; i++) {
            double attributeValue = instnc.value(i);
            DataFound d = new DataFound(attributeValue, c, i);

            int index = data.indexOf(d);
            if (index != -1) {
                double classValueCount = classValueCounts[(int) d.getClassValue()];
                conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount));
            }
        }
        System.out.println(conditionalProbs);
        // compute the naive bayes
        double total = 1;
        for (Double x : conditionalProbs) {
            total *= x;
        }
        naiveBayes[c] = total;
    }
    prettyPrintProbabilities(naiveBayes);
    return naiveBayes;
}

From source file:machinelearningq2.ExtendedNaiveBayes.java

/**
 *
 * The method distributionForInstance should work out the probabilities of
 * class membership for a single instance.
 *
 * @param instnc// w ww .j av a 2  s  . com
 * @return
 * @throws Exception
 */
public double[] distributionForDiscrete(Instance instnc) throws Exception {

    // creates a double array for storing the naive calculations for each class
    double[] naiveBayes = new double[classValueCounts.length];

    // loops through each class and computes the naive bayes 
    for (int c = 0; c < naiveBayes.length; c++) {

        // stores all conditional probabilities for class membership such:
        // P(struct=0|crime=1), P(security=1|crime=1), P(area=1|crime=1)
        // and also it stores the prior probability: P(crime=1)
        ArrayList<Double> conditionalProbs = new ArrayList<>();
        double priorProbability = classValueCounts[c] / countData;
        conditionalProbs.add(priorProbability);
        for (int i = 0; i < instnc.numValues() - 1; i++) {
            double attributeValue = instnc.value(i);
            DataFound d = new DataFound(attributeValue, c, i);

            int index = data.indexOf(d);
            if (index != -1) {
                double classValueCount = classValueCounts[(int) d.getClassValue()];
                conditionalProbs.add(data.get(index).getConditionalProbability((int) classValueCount));
            }
        }
        // compute the naive bayes
        double total = 1;
        for (Double x : conditionalProbs) {
            total *= x;
        }
        naiveBayes[c] = total;
    }
    return naiveBayes;
}

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * /*from  w ww  .  j av  a2 s.  c om*/
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:ml.engine.LibSVM.java

License:Open Source License

/**
 * returns an instance into a sparse libsvm array
 * //  w  w w . java2 s . c  om
 * @param instance the instance to work on
 * @return the libsvm array
 * @throws Exception if setup of array fails
 */
protected Object instanceToArray(Instance instance) throws Exception {
    int index;
    int count;
    int i;
    Object result;

    // determine number of non-zero attributes
    /*
     * for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) != 0) count++; }
     */
    count = 0;
    for (i = 0; i < instance.numValues(); i++) {
        if (instance.index(i) == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) != 0) {
            count++;
        }
    }

    // fill array
    /*
     * result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index =
     * 0; for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) == 0) continue;
     * 
     * Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
     * setField(Array.get(result, index), "index", new Integer(i + 1));
     * setField(Array.get(result, index), "value", new
     * Double(instance.value(i))); index++; }
     */

    result = Array.newInstance(Class.forName(CLASS_SVMNODE), count);
    index = 0;
    for (i = 0; i < instance.numValues(); i++) {

        int idx = instance.index(i);
        if (idx == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) == 0) {
            continue;
        }

        Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
        setField(Array.get(result, index), "index", new Integer(idx + 1));
        setField(Array.get(result, index), "value", new Double(instance.valueSparse(i)));
        index++;
    }

    return result;
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *//* w w  w  .  j  a v  a 2s  .c  o  m*/
@Override
public void trainOnInstanceImpl(Instance inst) {
    if (this.reset == true) {
        this.m_numClasses = inst.numClasses();
        double laplace = this.laplaceCorrectionOption.getValue();
        int numAttributes = inst.numAttributes();

        m_probOfClass = new double[m_numClasses];
        Arrays.fill(m_probOfClass, laplace);

        m_classTotals = new double[m_numClasses];
        Arrays.fill(m_classTotals, laplace * numAttributes);

        m_wordTotalForClass = new DoubleVector[m_numClasses];
        for (int i = 0; i < m_numClasses; i++) {
            //Arrays.fill(wordTotal, laplace);
            m_wordTotalForClass[i] = new DoubleVector();
        }
        this.reset = false;
    }
    // Update classifier
    int classIndex = inst.classIndex();
    int classValue = (int) inst.value(classIndex);

    double w = inst.weight();
    m_probOfClass[classValue] += w;

    m_classTotals[classValue] += w * totalSize(inst);
    double total = m_classTotals[classValue];

    for (int i = 0; i < inst.numValues(); i++) {
        int index = inst.index(i);
        if (index != classIndex && !inst.isMissing(i)) {
            //m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i);
            double laplaceCorrection = 0.0;
            if (m_wordTotalForClass[classValue].getValue(index) == 0) {
                laplaceCorrection = this.laplaceCorrectionOption.getValue();
            }
            m_wordTotalForClass[classValue].addToValue(index, w * inst.valueSparse(i) + laplaceCorrection);
        }
    }
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from  w  w  w .  ja v a 2  s  .  c  o m
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 */
@Override
public double[] getVotesForInstance(Instance instance) {
    if (this.reset == true) {
        return new double[2];
    }
    double[] probOfClassGivenDoc = new double[m_numClasses];
    double totalSize = totalSize(instance);

    for (int i = 0; i < m_numClasses; i++) {
        probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]);
    }

    for (int i = 0; i < instance.numValues(); i++) {

        int index = instance.index(i);
        if (index == instance.classIndex() || instance.isMissing(i)) {
            continue;
        }

        double wordCount = instance.valueSparse(i);
        for (int c = 0; c < m_numClasses; c++) {
            double value = m_wordTotalForClass[c].getValue(index);
            probOfClassGivenDoc[c] += wordCount
                    * Math.log(value == 0 ? this.laplaceCorrectionOption.getValue() : value);
        }
    }

    return Utils.logs2probs(probOfClassGivenDoc);
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

public double totalSize(Instance instance) {
    int classIndex = instance.classIndex();
    double total = 0.0;
    for (int i = 0; i < instance.numValues(); i++) {
        int index = instance.index(i);
        if (index == classIndex || instance.isMissing(i)) {
            continue;
        }//from   w  w  w.j a  v  a  2  s .c o m
        double count = instance.valueSparse(i);
        if (count >= 0) {
            total += count;
        } else {
            //throw new Exception("Numeric attribute value is not >= 0. " + i + " " + index + " " +
            //          instance.valueSparse(i) + " " + " " + instance);
        }
    }
    return total;
}

From source file:moa.classifiers.featureselection.OFSL.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    double y_t, m_bias_p1, m_bias_p2, m_bias;
    double[] m_weights_p1, m_weights_p2, m_weights;

    if (this.weights == null) {
        this.weights = new double[inst.numValues()];
        for (int i = 0; i < this.weights.length; i++)
            this.weights[i] = 0.0;
        this.bias = 0.0;
    }//from   w  w  w  .j  a  v a2s  . c om
    if (inst.classAttribute().isNominal()) {
        y_t = (inst.classValue() == 0) ? -1 : 1;
    } else {
        y_t = inst.classValue();
    }

    double f_t = dot(inst.toDoubleArray(), this.weights);
    f_t += this.bias;

    if (y_t * f_t < 0) {
        m_weights_p1 = scalar_vector(1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue(),
                this.weights);
        m_bias_p1 = (1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue()) * this.bias;
        m_weights_p2 = scalar_vector(this.learningRateOption.getValue() * y_t, inst.toDoubleArray());
        m_bias_p2 = this.learningRateOption.getValue() * y_t;
        m_weights = vector_add(m_weights_p1, m_weights_p2);
        m_bias = m_bias_p1 + m_bias_p2;

        m_weights = l2_projection(m_weights, m_bias, this.learningRateOption.getValue());
        m_weights = truncate(m_weights, this.numSelectOption.getValue());

        for (int i = 0; i < m_weights_p1.length; i++)
            this.weights[i] = m_weights[i];
        this.bias = m_weights[m_weights.length - 1];
    } else {
        this.weights = scalar_vector(1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue(),
                this.weights);
        this.bias = (1.0 - this.stepSizeOption.getValue() * this.learningRateOption.getValue()) * this.bias;
    }

}

From source file:moa.classifiers.featureselection.OFSP.java

License:Open Source License

@Override
public void trainOnInstanceImpl(Instance inst) {
    double y_t, f_t, denom, m_bias;
    int[] indices = new int[this.numSelectOption.getValue()];
    double[] m_weights;

    if (this.weights == null) {

        this.weights = new double[inst.numValues()];
        for (int i = 0; i < this.weights.length; i++)
            this.weights[i] = this.rand.nextGaussian();
        this.bias = 0.0;
        this.weights = truncate(this.weights, this.numSelectOption.getValue());
    }/*from  ww w .j  a v  a 2  s . c o  m*/

    if (inst.classAttribute().isNominal()) {
        y_t = (inst.classValue() == 0) ? -1 : 1;
    } else {
        y_t = inst.classValue();
    }
    double[] x_t = inst.toDoubleArray();
    double[] x_hat = inst.toDoubleArray();

    if (this.rand.nextDouble() < this.searchOption.getValue()) {
        int[] indices_perm = perm(inst.numAttributes());
        for (int i = 0; i < this.numSelectOption.getValue(); i++)
            indices[i] = indices_perm[i];

    } else {
        int[] sorted_indices = bubblesort_index(abs_vector(this.weights));

        for (int i = 0; i < inst.numAttributes() - this.numSelectOption.getValue(); i++)
            x_hat[sorted_indices[i]] = 0.0;

        for (int i = 0; i < this.numSelectOption.getValue(); i++)
            indices[i] = sorted_indices[sorted_indices.length - i - 1];
    }

    f_t = 0;
    for (int i = 0; i < this.numSelectOption.getValue(); i++)
        f_t += this.weights[indices[i]] * x_t[indices[i]];
    f_t += this.bias;

    if (f_t * y_t < 0) {

        for (int i = 0; i < x_hat.length; i++) {
            denom = this.numSelectOption.getValue() / x_hat.length * this.searchOption.getValue();
            if (this.weights[i] != 0)
                denom += (1 - this.searchOption.getValue()) * this.weights[i];
            x_hat[i] /= denom;
        }

        m_weights = scalar_vector(y_t * this.stepSizeOption.getValue(), x_hat);
        m_bias = y_t * this.stepSizeOption.getValue() * this.bias;
        m_weights = vector_add(m_weights, this.weights);
        m_bias += m_bias + this.bias;

        m_weights = l2_projection(m_weights, m_bias, this.boundOption.getValue());
        m_weights = truncate(m_weights, this.numSelectOption.getValue());

        for (int i = 0; i < m_weights.length - 1; i++)
            this.weights[i] = m_weights[i];
        this.bias = m_weights[m_weights.length - 1];
    }

}

From source file:moa.classifiers.functions.SGD.java

License:Open Source License

protected static double dotProd(Instance inst1, DoubleVector weights, int classIndex) {
    double result = 0;

    int n1 = inst1.numValues();
    int n2 = weights.numValues();

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
        int ind1 = inst1.index(p1);
        int ind2 = p2;
        if (ind1 == ind2) {
            if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
                result += inst1.valueSparse(p1) * weights.getValue(p2);
            }//from w  w w  .  j  a  va  2  s  .com
            p1++;
            p2++;
        } else if (ind1 > ind2) {
            p2++;
        } else {
            p1++;
        }
    }
    return (result);
}