Example usage for weka.core Instance valueSparse

Introduction

In this page you can find the example usage for weka.core Instance valueSparse.

Prototype

public double valueSparse(int indexOfIndex);

Source Link

Document

Returns an instance's attribute value in internal format, given an index in the sparse representation.

Usage

From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all attributes that are
 * numeric.//from w  w w.j a v  a  2  s .c  om
 *
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();

    int numAttributes = data.numAttributes();
    m_DFs = new int[numAttributes];
    Enumeration e = data.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance instance = (Instance) e.nextElement();
        int numValues = instance.numValues();
        for (int valueIndex = 0; valueIndex < numValues; valueIndex++) {
            int attIndex = instance.index(valueIndex);
            if (attIndex != classIndex) {
                double value = instance.valueSparse(valueIndex);
                //missingvalues werden also 0 betrachtet.
                if (m_missingAsZero) {
                    if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex)
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                } else {
                    if (value != 0.0) {
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                }
            }
        }
    }
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Checks if an instance contains an item set.
 *
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 *//*from w w  w.  jav a 2s  .co m*/

public boolean containedBy(Instance instance) {

    if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) {
        int numInstVals = instance.numValues();
        int numItemSetVals = m_items.length;

        for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
            int instIndex = Integer.MAX_VALUE;
            if (p1 < numInstVals) {
                instIndex = instance.index(p1);
            }
            int itemIndex = p2;

            if (m_items[itemIndex] > -1) {
                if (itemIndex != instIndex) {
                    return false;
                } else {
                    if (instance.isMissingSparse(p1)) {
                        return false;
                    }
                    if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
                        return false;
                    }
                }

                p1++;
                p2++;
            } else {
                if (itemIndex < instIndex) {
                    p2++;
                } else if (itemIndex == instIndex) {
                    p2++;
                    p1++;
                }
            }
        }
    } else {
        for (int i = 0; i < instance.numAttributes(); i++)
            if (m_items[i] > -1) {
                if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0))
                    return false;
                if (m_items[i] != (int) instance.value(i))
                    return false;
            }
    }

    return true;
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully/* w w  w.j  av  a  2s .  com*/
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances.//from  ww w  . jav a 2 s  .  c  o m
 * 
 * @return true if there are instances pending output.
 * @throws IllegalStateException
 *             if no input structure has been defined.
 */
public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
        throw new IllegalStateException("No input instance format defined");
    }

    // We only need to do something in this method
    // if the first batch hasn't been processed. Otherwise
    // input() has already done all the work.
    if (!isFirstBatchDone()) {

        // Determine the dictionary from the first batch (training data)
        determineDictionary();

        // Convert all instances w/o normalization
        FastVector fv = new FastVector();
        int firstCopy = 0;
        for (int i = 0; i < m_NumInstances; i++) {
            firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv);
        }

        // Need to compute average document length if necessary
        if (m_filterType != FILTER_NONE) {
            m_AvgDocLength = 0;
            for (int i = 0; i < fv.size(); i++) {
                Instance inst = (Instance) fv.elementAt(i);
                double docLength = 0;
                for (int j = 0; j < inst.numValues(); j++) {
                    if (inst.index(j) >= firstCopy) {
                        docLength += inst.valueSparse(j) * inst.valueSparse(j);
                    }
                }
                m_AvgDocLength += Math.sqrt(docLength);
            }
            m_AvgDocLength /= m_NumInstances;
        }

        // Perform normalization if necessary.
        if (m_filterType == FILTER_NORMALIZE_ALL) {
            for (int i = 0; i < fv.size(); i++) {
                normalizeInstance((Instance) fv.elementAt(i), firstCopy);
            }
        }

        // Push all instances into the output queue
        for (int i = 0; i < fv.size(); i++) {
            push((Instance) fv.elementAt(i));
        }
    }

    // Flush the input
    flushInput();

    m_NewBatch = true;
    m_FirstBatchDone = true;
    return (numPendingOutput() != 0);
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./* www .j  a  v  a 2  s . c o m*/
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */
private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Normalizes the values of a SparseInstance in L2 norm
 *
 * @author Sugato Basu/*  w  ww.j  av a 2 s .com*/
 * @param inst SparseInstance to be normalized
 */

public static void normalizeSparseInstance(Instance inst) throws Exception {
    double norm = 0;
    int length = inst.numValues();

    if (!(inst instanceof SparseInstance)) {
        System.err.println("Not SparseInstance, using normalizeInstance function instead");
        normalizeInstance(inst);
    }

    for (int i = 0; i < length; i++) {
        if (inst.index(i) != inst.classIndex()) { // don't normalize the class index
            norm += inst.valueSparse(i) * inst.valueSparse(i);
        }
    }
    norm = Math.sqrt(norm);
    for (int i = 0; i < length; i++) { // don't normalize the class index
        if (inst.index(i) != inst.classIndex()) {
            inst.setValueSparse(i, inst.valueSparse(i) / norm);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** This function divides every attribute value in an instance by
 *  the instance weight -- useful to find the mean of a cluster in
 *  Euclidean space /* ww w  . j a  va  2s.c om*/
 *  @param inst Instance passed in for normalization (destructive update)
 */
public static void normalizeByWeight(Instance inst) {
    double weight = inst.weight();
    if (inst instanceof SparseInstance) {
        for (int i = 0; i < inst.numValues(); i++) {
            inst.setValueSparse(i, inst.valueSparse(i) / weight);
        }
    } else if (!(inst instanceof SparseInstance)) {
        for (int i = 0; i < inst.numAttributes(); i++) {
            inst.setValue(i, inst.value(i) / weight);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *//*from   ww w  .ja v  a2s .  c  o  m*/

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes)./*from www. j  a v a2  s . c  o m*/
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */

private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
 * Samoa instance from weka instance./*from   w  ww .  j  a  v  a  2s  . c  om*/
 *
 * @param inst the inst
 * @return the instance
 */
public Instance samoaInstance(weka.core.Instance inst) {
    Instance samoaInstance;
    if (inst instanceof weka.core.SparseInstance) {
        double[] attributeValues = new double[inst.numValues()];
        int[] indexValues = new int[inst.numValues()];
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != inst.classIndex()) {
                attributeValues[i] = inst.valueSparse(i);
                indexValues[i] = inst.index(i);
            }
        }
        samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes());
    } else {
        samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray());
        //samoaInstance.deleteAttributeAt(inst.classIndex());
    }
    if (this.samoaInstanceInformation == null) {
        this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset());
    }
    samoaInstance.setDataset(samoaInstanceInformation);
    samoaInstance.setClassValue(inst.classValue());
    return samoaInstance;
}