Example usage for weka.core Instance valueSparse

List of usage examples for weka.core Instance valueSparse

Introduction

In this page you can find the example usage for weka.core Instance valueSparse.

Prototype

public double valueSparse(int indexOfIndex);

Source Link

Document

Returns an instance's attribute value in internal format, given an index in the sparse representation.

Usage

From source file:ml.dataprocess.CorrelationAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Replaces missing
 * values with means/modes; Deletes instances with missing class values.
 * /*w w w.  j a  va  2  s  .  c o  m*/
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
@Override
public void buildEvaluator(Instances data) throws Exception {
    data = new Instances(data);
    data.deleteWithMissingClass();

    ReplaceMissingValues rmv = new ReplaceMissingValues();
    rmv.setInputFormat(data);
    data = Filter.useFilter(data, rmv);

    int numClasses = data.classAttribute().numValues();
    int classIndex = data.classIndex();
    int numInstances = data.numInstances();
    m_correlations = new double[data.numAttributes()];
    /*
     * boolean hasNominals = false; boolean hasNumerics = false;
     */
    List<Integer> numericIndexes = new ArrayList<Integer>();
    List<Integer> nominalIndexes = new ArrayList<Integer>();
    if (m_detailedOutput) {
        m_detailedOutputBuff = new StringBuffer();
    }

    // TODO for instance weights (folded into computing weighted correlations)
    // add another dimension just before the last [2] (0 for 0/1 binary vector
    // and
    // 1 for corresponding instance weights for the 1's)
    double[][][] nomAtts = new double[data.numAttributes()][][];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (data.attribute(i).isNominal() && i != classIndex) {
            nomAtts[i] = new double[data.attribute(i).numValues()][data.numInstances()];
            Arrays.fill(nomAtts[i][0], 1.0); // set zero index for this att to all
                                             // 1's
            nominalIndexes.add(i);
        } else if (data.attribute(i).isNumeric() && i != classIndex) {
            numericIndexes.add(i);
        }
    }

    // do the nominal attributes
    if (nominalIndexes.size() > 0) {
        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            for (int j = 0; j < current.numValues(); j++) {
                if (current.attribute(current.index(j)).isNominal() && current.index(j) != classIndex) {
                    // Will need to check for zero in case this isn't a sparse
                    // instance (unless we add 1 and subtract 1)
                    nomAtts[current.index(j)][(int) current.valueSparse(j)][i] += 1;
                    nomAtts[current.index(j)][0][i] -= 1;
                }
            }
        }
    }

    if (data.classAttribute().isNumeric()) {
        double[] classVals = data.attributeToDoubleArray(classIndex);

        // do the numeric attributes
        for (Integer i : numericIndexes) {
            double[] numAttVals = data.attributeToDoubleArray(i);
            m_correlations[i] = Utils.correlation(numAttVals, classVals, numAttVals.length);

            if (m_correlations[i] == 1.0) {
                // check for zero variance (useless numeric attribute)
                if (Utils.variance(numAttVals) == 0) {
                    m_correlations[i] = 0;
                }
            }
        }

        // do the nominal attributes
        if (nominalIndexes.size() > 0) {

            // now compute the correlations for the binarized nominal attributes
            for (Integer i : nominalIndexes) {
                double sum = 0;
                double corr = 0;
                double sumCorr = 0;
                double sumForValue = 0;

                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    sumForValue = Utils.sum(nomAtts[i][j]);
                    corr = Utils.correlation(nomAtts[i][j], classVals, classVals.length);

                    // useless attribute - all instances have the same value
                    if (sumForValue == numInstances || sumForValue == 0) {
                        corr = 0;
                    }
                    if (corr < 0.0) {
                        corr = -corr;
                    }
                    sumCorr += sumForValue * corr;
                    sum += sumForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(corr, 6));
                    }
                }
                m_correlations[i] = (sum > 0) ? sumCorr / sum : 0;
            }
        }
    } else {
        // class is nominal
        // TODO extra dimension for storing instance weights too
        double[][] binarizedClasses = new double[data.classAttribute().numValues()][data.numInstances()];

        // this is equal to the number of instances for all inst weights = 1
        double[] classValCounts = new double[data.classAttribute().numValues()];

        for (int i = 0; i < data.numInstances(); i++) {
            Instance current = data.instance(i);
            binarizedClasses[(int) current.classValue()][i] = 1;
        }
        for (int i = 0; i < data.classAttribute().numValues(); i++) {
            classValCounts[i] = Utils.sum(binarizedClasses[i]);
        }

        double sumClass = Utils.sum(classValCounts);

        // do numeric attributes first
        if (numericIndexes.size() > 0) {
            for (Integer i : numericIndexes) {
                double[] numAttVals = data.attributeToDoubleArray(i);
                double corr = 0;
                double sumCorr = 0;

                for (int j = 0; j < data.classAttribute().numValues(); j++) {
                    corr = Utils.correlation(numAttVals, binarizedClasses[j], numAttVals.length);
                    if (corr < 0.0) {
                        corr = -corr;
                    }

                    if (corr == 1.0) {
                        // check for zero variance (useless numeric attribute)
                        if (Utils.variance(numAttVals) == 0) {
                            corr = 0;
                        }
                    }

                    sumCorr += classValCounts[j] * corr;
                }
                m_correlations[i] = sumCorr / sumClass;
            }
        }

        if (nominalIndexes.size() > 0) {
            for (Integer i : nominalIndexes) {
                if (m_detailedOutput) {
                    m_detailedOutputBuff.append("\n\n").append(data.attribute(i).name());
                }

                double sumForAtt = 0;
                double corrForAtt = 0;
                for (int j = 0; j < data.attribute(i).numValues(); j++) {
                    double sumForValue = Utils.sum(nomAtts[i][j]);
                    double corr = 0;
                    double sumCorr = 0;
                    double avgCorrForValue = 0;

                    sumForAtt += sumForValue;
                    for (int k = 0; k < numClasses; k++) {

                        // corr between value j and class k
                        corr = Utils.correlation(nomAtts[i][j], binarizedClasses[k],
                                binarizedClasses[k].length);

                        // useless attribute - all instances have the same value
                        if (sumForValue == numInstances || sumForValue == 0) {
                            corr = 0;
                        }
                        if (corr < 0.0) {
                            corr = -corr;
                        }
                        sumCorr += classValCounts[k] * corr;
                    }
                    avgCorrForValue = sumCorr / sumClass;
                    corrForAtt += sumForValue * avgCorrForValue;

                    if (m_detailedOutput) {
                        m_detailedOutputBuff.append("\n\t").append(data.attribute(i).value(j)).append(": ");
                        m_detailedOutputBuff.append(Utils.doubleToString(avgCorrForValue, 6));
                    }
                }

                // the weighted average corr for att i as
                // a whole (wighted by value frequencies)
                m_correlations[i] = (sumForAtt > 0) ? corrForAtt / sumForAtt : 0;
            }
        }
    }

    if (m_detailedOutputBuff != null && m_detailedOutputBuff.length() > 0) {
        m_detailedOutputBuff.append("\n");
    }
}

From source file:ml.engine.LibSVM.java

License:Open Source License

/**
 * returns an instance into a sparse libsvm array
 * /*from   w ww. j ava2  s . co m*/
 * @param instance the instance to work on
 * @return the libsvm array
 * @throws Exception if setup of array fails
 */
protected Object instanceToArray(Instance instance) throws Exception {
    int index;
    int count;
    int i;
    Object result;

    // determine number of non-zero attributes
    /*
     * for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) != 0) count++; }
     */
    count = 0;
    for (i = 0; i < instance.numValues(); i++) {
        if (instance.index(i) == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) != 0) {
            count++;
        }
    }

    // fill array
    /*
     * result = Array.newInstance(Class.forName(CLASS_SVMNODE), count); index =
     * 0; for (i = 0; i < instance.numAttributes(); i++) { if (i ==
     * instance.classIndex()) continue; if (instance.value(i) == 0) continue;
     * 
     * Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
     * setField(Array.get(result, index), "index", new Integer(i + 1));
     * setField(Array.get(result, index), "value", new
     * Double(instance.value(i))); index++; }
     */

    result = Array.newInstance(Class.forName(CLASS_SVMNODE), count);
    index = 0;
    for (i = 0; i < instance.numValues(); i++) {

        int idx = instance.index(i);
        if (idx == instance.classIndex()) {
            continue;
        }
        if (instance.valueSparse(i) == 0) {
            continue;
        }

        Array.set(result, index, Class.forName(CLASS_SVMNODE).newInstance());
        setField(Array.get(result, index), "index", new Integer(idx + 1));
        setField(Array.get(result, index), "value", new Double(instance.valueSparse(i)));
        index++;
    }

    return result;
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *//*  w w w  . jav  a 2s . c  o m*/
@Override
public void trainOnInstanceImpl(Instance inst) {
    if (this.reset == true) {
        this.m_numClasses = inst.numClasses();
        double laplace = this.laplaceCorrectionOption.getValue();
        int numAttributes = inst.numAttributes();

        m_probOfClass = new double[m_numClasses];
        Arrays.fill(m_probOfClass, laplace);

        m_classTotals = new double[m_numClasses];
        Arrays.fill(m_classTotals, laplace * numAttributes);

        m_wordTotalForClass = new DoubleVector[m_numClasses];
        for (int i = 0; i < m_numClasses; i++) {
            //Arrays.fill(wordTotal, laplace);
            m_wordTotalForClass[i] = new DoubleVector();
        }
        this.reset = false;
    }
    // Update classifier
    int classIndex = inst.classIndex();
    int classValue = (int) inst.value(classIndex);

    double w = inst.weight();
    m_probOfClass[classValue] += w;

    m_classTotals[classValue] += w * totalSize(inst);
    double total = m_classTotals[classValue];

    for (int i = 0; i < inst.numValues(); i++) {
        int index = inst.index(i);
        if (index != classIndex && !inst.isMissing(i)) {
            //m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i);
            double laplaceCorrection = 0.0;
            if (m_wordTotalForClass[classValue].getValue(index) == 0) {
                laplaceCorrection = this.laplaceCorrectionOption.getValue();
            }
            m_wordTotalForClass[classValue].addToValue(index, w * inst.valueSparse(i) + laplaceCorrection);
        }
    }
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance.//from   ww  w .  ja  v  a  2s  .c o  m
 *
 * @param instance the instance to be classified
 * @return predicted class probability distribution
 */
@Override
public double[] getVotesForInstance(Instance instance) {
    if (this.reset == true) {
        return new double[2];
    }
    double[] probOfClassGivenDoc = new double[m_numClasses];
    double totalSize = totalSize(instance);

    for (int i = 0; i < m_numClasses; i++) {
        probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]);
    }

    for (int i = 0; i < instance.numValues(); i++) {

        int index = instance.index(i);
        if (index == instance.classIndex() || instance.isMissing(i)) {
            continue;
        }

        double wordCount = instance.valueSparse(i);
        for (int c = 0; c < m_numClasses; c++) {
            double value = m_wordTotalForClass[c].getValue(index);
            probOfClassGivenDoc[c] += wordCount
                    * Math.log(value == 0 ? this.laplaceCorrectionOption.getValue() : value);
        }
    }

    return Utils.logs2probs(probOfClassGivenDoc);
}

From source file:moa.classifiers.bayes.NaiveBayesMultinomial.java

License:Open Source License

public double totalSize(Instance instance) {
    int classIndex = instance.classIndex();
    double total = 0.0;
    for (int i = 0; i < instance.numValues(); i++) {
        int index = instance.index(i);
        if (index == classIndex || instance.isMissing(i)) {
            continue;
        }/*  ww  w .j av a  2s  . c om*/
        double count = instance.valueSparse(i);
        if (count >= 0) {
            total += count;
        } else {
            //throw new Exception("Numeric attribute value is not >= 0. " + i + " " + index + " " +
            //          instance.valueSparse(i) + " " + " " + instance);
        }
    }
    return total;
}

From source file:moa.classifiers.functions.SGD.java

License:Open Source License

protected static double dotProd(Instance inst1, DoubleVector weights, int classIndex) {
    double result = 0;

    int n1 = inst1.numValues();
    int n2 = weights.numValues();

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
        int ind1 = inst1.index(p1);
        int ind2 = p2;
        if (ind1 == ind2) {
            if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
                result += inst1.valueSparse(p1) * weights.getValue(p2);
            }//from ww  w. j  a va 2 s .c o m
            p1++;
            p2++;
        } else if (ind1 > ind2) {
            p2++;
        } else {
            p1++;
        }
    }
    return (result);
}

From source file:moa.classifiers.functions.SGD.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 */// w w w .  ja v  a  2  s. co m
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new DoubleVector();
        m_bias = 0.0;
    }

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_bias);
        } else {
            y = instance.classValue();
            z = y - (wx + m_bias);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.numValues(); i++) {
            m_weights.setValue(i, m_weights.getValue(i) * multiplier);
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights.addToValue(indS, factor * instance.valueSparse(p1));
                }
            }

            // update the bias
            m_bias += factor;
        }
        m_t++;
    }
}

From source file:moa.classifiers.functions.SGDMultiClass.java

License:Open Source License

public void trainOnInstanceImpl(Instance instance, int classLabel) {
    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights[classLabel], instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() != classLabel) ? -1 : 1;
            z = y * (wx + m_bias[classLabel]);
        } else {//from   w  w  w  .  ja v  a2s  .  com
            y = instance.classValue();
            z = y - (wx + m_bias[classLabel]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights[classLabel].numValues(); i++) {
            m_weights[classLabel].setValue(i, m_weights[classLabel].getValue(i) * multiplier);
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[classLabel].addToValue(indS, factor * instance.valueSparse(p1));
                }
            }

            // update the bias
            m_bias[classLabel] += factor;
        }

    }
}

From source file:moa.classifiers.functions.SGDOld.java

License:Open Source License

protected static double dotProd(Instance inst1, double[] weights, int classIndex) {
    double result = 0;

    int n1 = inst1.numValues();
    int n2 = weights.length - 1;

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
        int ind1 = inst1.index(p1);
        int ind2 = p2;
        if (ind1 == ind2) {
            if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
                result += inst1.valueSparse(p1) * weights[p2];
            }/*  w  w  w . ja v  a 2 s . co  m*/
            p1++;
            p2++;
        } else if (ind1 > ind2) {
            p2++;
        } else {
            p1++;
        }
    }
    return (result);
}

From source file:moa.classifiers.functions.SGDOld.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 *///  ww w .  j  a  va2 s  .c om
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new double[instance.numAttributes() + 1];
    }

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_weights[m_weights.length - 1]);
        } else {
            y = instance.classValue();
            z = y - (wx + m_weights[m_weights.length - 1]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.length - 1; i++) {
            m_weights[i] *= multiplier;
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[indS] += factor * instance.valueSparse(p1);
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += factor;
        }
        m_t++;
    }
}