Example usage for weka.core Instance valueSparse

Introduction

In this page you can find the example usage for weka.core Instance valueSparse.

Prototype

public double valueSparse(int indexOfIndex);

Source Link

Document

Returns an instance's attribute value in internal format, given an index in the sparse representation.

Usage

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * /*w w w.  j a  va  2  s  .  c o  m*/
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:ml.engine.LibSVM.java

License:Open Source License

/**
 * returns an instance into a sparse libsvm array
 * /*from   w ww. j ava2  s . co m*/
 * @param instance the instance to work on
 * @return the libsvm array
 * @throws Exception if setup of array fails
 */
protected Object instanceToArray(Instance instance) throws Exception {
    int index;
    int count;
    int i;
    Object result;

    // determine number of non-zero attributes
    /*
     * for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) != 0) count++; }
     */
    count = 0;
    for (i = 0; i < instance.numValues(); i++) {
        if (instance.index(i) == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) != 0) {
            count++;
        }
    }

    // fill array
    /*
     * result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index =
     * 0; for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) == 0) continue;
     * 
     * Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
     * setField(Array.get(result, index), "index", new Integer(i + 1));
     * setField(Array.get(result, index), "value", new
     * Double(instance.value(i))); index++; }
     */

    result = Array.newInstance(Class.forName(CLASS_SVMNODE), count);
    index = 0;
    for (i = 0; i < instance.numValues(); i++) {

        int idx = instance.index(i);
        if (idx == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) == 0) {
            continue;
        }

        Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
        setField(Array.get(result, index), "index", new Integer(idx + 1));
        setField(Array.get(result, index), "value", new Double(instance.valueSparse(i)));
        index++;
    }

    return result;
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *//*  w w w  . jav  a 2s . c  o m*/
@Override
public void trainOnInstanceImpl(Instance inst) {
    if (this.reset == true) {
        this.m_numClasses = inst.numClasses();
        double laplace = this.laplaceCorrectionOption.getValue();
        int numAttributes = inst.numAttributes();

        m_probOfClass = new double[m_numClasses];
        Arrays.fill(m_probOfClass, laplace);

        m_classTotals = new double[m_numClasses];
        Arrays.fill(m_classTotals, laplace * numAttributes);

        m_wordTotalForClass = new DoubleVector[m_numClasses];
        for (int i = 0; i < m_numClasses; i++) {
            //Arrays.fill(wordTotal, laplace);
            m_wordTotalForClass[i] = new DoubleVector();
        }
        this.reset = false;
    }
    // Update classifier
    int classIndex = inst.classIndex();
    int classValue = (int) inst.value(classIndex);

    double w = inst.weight();
    m_probOfClass[classValue] += w;

    m_classTotals[classValue] += w * totalSize(inst);
    double total = m_classTotals[classValue];

    for (int i = 0; i < inst.numValues(); i++) {
        int index = inst.index(i);
        if (index != classIndex && !inst.isMissing(i)) {
            //m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i);
            double laplaceCorrection = 0.0;
            if (m_wordTotalForClass[classValue].getValue(index) == 0) {
                laplaceCorrection = this.laplaceCorrectionOption.getValue();
            }
            m_wordTotalForClass[classValue].addToValue(index, w * inst.valueSparse(i) + laplaceCorrection);
        }
    }
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from   ww  w .  ja  v  a  2s  .c o  m
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 */
@Override
public double[] getVotesForInstance(Instance instance) {
    if (this.reset == true) {
        return new double[2];
    }
    double[] probOfClassGivenDoc = new double[m_numClasses];
    double totalSize = totalSize(instance);

    for (int i = 0; i < m_numClasses; i++) {
        probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]);
    }

    for (int i = 0; i < instance.numValues(); i++) {

        int index = instance.index(i);
        if (index == instance.classIndex() || instance.isMissing(i)) {
            continue;
        }

        double wordCount = instance.valueSparse(i);
        for (int c = 0; c < m_numClasses; c++) {
            double value = m_wordTotalForClass[c].getValue(index);
            probOfClassGivenDoc[c] += wordCount
                    * Math.log(value == 0 ? this.laplaceCorrectionOption.getValue() : value);
        }
    }

    return Utils.logs2probs(probOfClassGivenDoc);
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

public double totalSize(Instance instance) {
    int classIndex = instance.classIndex();
    double total = 0.0;
    for (int i = 0; i < instance.numValues(); i++) {
        int index = instance.index(i);
        if (index == classIndex || instance.isMissing(i)) {
            continue;
        }/*  ww  w .j av a  2s  . c om*/
        double count = instance.valueSparse(i);
        if (count >= 0) {
            total += count;
        } else {
            //throw new Exception("Numeric attribute value is not >= 0. " + i + " " + index + " " +
            //          instance.valueSparse(i) + " " + " " + instance);
        }
    }
    return total;
}

From source file:moa.classifiers.functions.SGD.java

License:Open Source License

protected static double dotProd(Instance inst1, DoubleVector weights, int classIndex) {
    double result = 0;

    int n1 = inst1.numValues();
    int n2 = weights.numValues();

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
        int ind1 = inst1.index(p1);
        int ind2 = p2;
        if (ind1 == ind2) {
            if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
                result += inst1.valueSparse(p1) * weights.getValue(p2);
            }//from ww  w. j  a va 2 s .c o m
            p1++;
            p2++;
        } else if (ind1 > ind2) {
            p2++;
        } else {
            p1++;
        }
    }
    return (result);
}

From source file:moa.classifiers.functions.SGD.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 */// w w w .  ja v  a  2  s. co m
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new DoubleVector();
        m_bias = 0.0;
    }

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_bias);
        } else {
            y = instance.classValue();
            z = y - (wx + m_bias);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.numValues(); i++) {
            m_weights.setValue(i, m_weights.getValue(i) * multiplier);
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights.addToValue(indS, factor * instance.valueSparse(p1));
                }
            }

            // update the bias
            m_bias += factor;
        }
        m_t++;
    }
}

From source file:moa.classifiers.functions.SGDMultiClass.java

License:Open Source License

public void trainOnInstanceImpl(Instance instance, int classLabel) {
    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights[classLabel], instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() != classLabel) ? -1 : 1;
            z = y * (wx + m_bias[classLabel]);
        } else {//from   w  w  w  .  ja v  a2s  .  com
            y = instance.classValue();
            z = y - (wx + m_bias[classLabel]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights[classLabel].numValues(); i++) {
            m_weights[classLabel].setValue(i, m_weights[classLabel].getValue(i) * multiplier);
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[classLabel].addToValue(indS, factor * instance.valueSparse(p1));
                }
            }

            // update the bias
            m_bias[classLabel] += factor;
        }

    }
}

From source file:moa.classifiers.functions.SGDOld.java

License:Open Source License

protected static double dotProd(Instance inst1, double[] weights, int classIndex) {
    double result = 0;

    int n1 = inst1.numValues();
    int n2 = weights.length - 1;

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
        int ind1 = inst1.index(p1);
        int ind2 = p2;
        if (ind1 == ind2) {
            if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
                result += inst1.valueSparse(p1) * weights[p2];
            }/*  w  w  w . ja v  a 2 s . co  m*/
            p1++;
            p2++;
        } else if (ind1 > ind2) {
            p2++;
        } else {
            p1++;
        }
    }
    return (result);
}

From source file:moa.classifiers.functions.SGDOld.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 *///  ww w .  j  a  va2 s  .c om
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new double[instance.numAttributes() + 1];
    }

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_weights[m_weights.length - 1]);
        } else {
            y = instance.classValue();
            z = y - (wx + m_weights[m_weights.length - 1]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.length - 1; i++) {
            m_weights[i] *= multiplier;
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[indS] += factor * instance.valueSparse(p1);
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += factor;
        }
        m_t++;
    }
}