Example usage for weka.core Instance index

List of usage examples for weka.core Instance index

Introduction

In this page you can find the example usage for weka.core Instance index.

Prototype

public int index(int position);

Source Link

Document

Returns the index of the attribute stored at the given position in the sparse representation.

Usage

From source file:br.com.ufu.lsi.utils.DocumentFrequencyAttributeEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all attributes that are
 * numeric./*from  w w  w  .  j a  v a2  s  .  c  om*/
 *
 * @param data set of instances serving as training data
 * @throws Exception if the evaluator has not been generated successfully
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();

    int numAttributes = data.numAttributes();
    m_DFs = new int[numAttributes];
    Enumeration e = data.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance instance = (Instance) e.nextElement();
        int numValues = instance.numValues();
        for (int valueIndex = 0; valueIndex < numValues; valueIndex++) {
            int attIndex = instance.index(valueIndex);
            if (attIndex != classIndex) {
                double value = instance.valueSparse(valueIndex);
                //missingvalues werden also 0 betrachtet.
                if (m_missingAsZero) {
                    if (!Instance.isMissingValue(value) && value != 0.0) { //man knnte auch isMissingSparce(valueIndex) verwenden, oder ineffizienterweise isMissing(attIndex)
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                } else {
                    if (value != 0.0) {
                        m_DFs[attIndex]++;
                        //m_DFs[ attIndex ]+=value ;
                    }
                }
            }
        }
    }
}

From source file:cba.ItemSet.java

License:Open Source License

/**
 * Checks if an instance contains an item set.
 *
 * @param instance the instance to be tested
 * @return true if the given instance contains this item set
 *///w ww.  j av a2  s.  c om

public boolean containedBy(Instance instance) {

    if (instance instanceof weka.core.SparseInstance && m_treatZeroAsMissing) {
        int numInstVals = instance.numValues();
        int numItemSetVals = m_items.length;

        for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals;) {
            int instIndex = Integer.MAX_VALUE;
            if (p1 < numInstVals) {
                instIndex = instance.index(p1);
            }
            int itemIndex = p2;

            if (m_items[itemIndex] > -1) {
                if (itemIndex != instIndex) {
                    return false;
                } else {
                    if (instance.isMissingSparse(p1)) {
                        return false;
                    }
                    if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
                        return false;
                    }
                }

                p1++;
                p2++;
            } else {
                if (itemIndex < instIndex) {
                    p2++;
                } else if (itemIndex == instIndex) {
                    p2++;
                    p1++;
                }
            }
        }
    } else {
        for (int i = 0; i < instance.numAttributes(); i++)
            if (m_items[i] > -1) {
                if (instance.isMissing(i) || (m_treatZeroAsMissing && (int) instance.value(i) == 0))
                    return false;
                if (m_items[i] != (int) instance.value(i))
                    return false;
            }
    }

    return true;
}

From source file:ChiSquare.ChiSquaredAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully// w  w  w.j a  v  a2  s.  com
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = ContingencyTables.chiVal(ContingencyTables.reduceMatrix(counts[i]), false);
        }
    }
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances.//w  ww. j  av  a  2  s  . com
 * 
 * @return true if there are instances pending output.
 * @throws IllegalStateException
 *             if no input structure has been defined.
 */
public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
        throw new IllegalStateException("No input instance format defined");
    }

    // We only need to do something in this method
    // if the first batch hasn't been processed. Otherwise
    // input() has already done all the work.
    if (!isFirstBatchDone()) {

        // Determine the dictionary from the first batch (training data)
        determineDictionary();

        // Convert all instances w/o normalization
        FastVector fv = new FastVector();
        int firstCopy = 0;
        for (int i = 0; i < m_NumInstances; i++) {
            firstCopy = convertInstancewoDocNorm(getInputFormat().instance(i), fv);
        }

        // Need to compute average document length if necessary
        if (m_filterType != FILTER_NONE) {
            m_AvgDocLength = 0;
            for (int i = 0; i < fv.size(); i++) {
                Instance inst = (Instance) fv.elementAt(i);
                double docLength = 0;
                for (int j = 0; j < inst.numValues(); j++) {
                    if (inst.index(j) >= firstCopy) {
                        docLength += inst.valueSparse(j) * inst.valueSparse(j);
                    }
                }
                m_AvgDocLength += Math.sqrt(docLength);
            }
            m_AvgDocLength /= m_NumInstances;
        }

        // Perform normalization if necessary.
        if (m_filterType == FILTER_NORMALIZE_ALL) {
            for (int i = 0; i < fv.size(); i++) {
                normalizeInstance((Instance) fv.elementAt(i), firstCopy);
            }
        }

        // Push all instances into the output queue
        for (int i = 0; i < fv.size(); i++) {
            push((Instance) fv.elementAt(i));
        }
    }

    // Flush the input
    flushInput();

    m_NewBatch = true;
    m_FirstBatchDone = true;
    return (numPendingOutput() != 0);
}

From source file:classifier.CustomStringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes).//ww  w .ja  v  a2s  . co  m
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */
private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Normalizes the values of a SparseInstance in L2 norm
 *
 * @author Sugato Basu/*from w w  w  .ja  v  a2s  .co  m*/
 * @param inst SparseInstance to be normalized
 */

public static void normalizeSparseInstance(Instance inst) throws Exception {
    double norm = 0;
    int length = inst.numValues();

    if (!(inst instanceof SparseInstance)) {
        System.err.println("Not SparseInstance, using normalizeInstance function instead");
        normalizeInstance(inst);
    }

    for (int i = 0; i < length; i++) {
        if (inst.index(i) != inst.classIndex()) { // don't normalize the class index
            norm += inst.valueSparse(i) * inst.valueSparse(i);
        }
    }
    norm = Math.sqrt(norm);
    for (int i = 0; i < length; i++) { // don't normalize the class index
        if (inst.index(i) != inst.classIndex()) {
            inst.setValueSparse(i, inst.valueSparse(i) / norm);
        }
    }
}

From source file:cluster.ABC.ClusterUtils.java

License:Open Source License

/** Finds sum of 2 instances (handles sparse and non-sparse)
 *///from w w  w  .  j a v  a  2  s.  c o m

public static Instance sumInstances(Instance inst1, Instance inst2, Instances m_Instances) throws Exception {
    int numAttributes = inst1.numAttributes();
    if (inst2.numAttributes() != numAttributes) {
        throw new Exception("Error!! inst1 and inst2 should have same number of attributes.");
    }
    double weight1 = inst1.weight(), weight2 = inst2.weight();
    double[] values = new double[numAttributes];

    for (int i = 0; i < numAttributes; i++) {
        values[i] = 0;
    }

    if (inst1 instanceof SparseInstance && inst2 instanceof SparseInstance) {
        for (int i = 0; i < inst1.numValues(); i++) {
            int indexOfIndex = inst1.index(i);
            values[indexOfIndex] = inst1.valueSparse(i);
        }
        for (int i = 0; i < inst2.numValues(); i++) {
            int indexOfIndex = inst2.index(i);
            values[indexOfIndex] += inst2.valueSparse(i);
        }
        SparseInstance newInst = new SparseInstance(weight1 + weight2, values);
        newInst.setDataset(m_Instances);
        return newInst;
    } else if (!(inst1 instanceof SparseInstance) && !(inst2 instanceof SparseInstance)) {
        for (int i = 0; i < numAttributes; i++) {
            values[i] = inst1.value(i) + inst2.value(i);
        }
    } else {
        throw new Exception("Error!! inst1 and inst2 should be both of same type -- sparse or non-sparse");
    }
    Instance newInst = new Instance(weight1 + weight2, values);
    newInst.setDataset(m_Instances);
    return newInst;
}

From source file:cn.edu.xjtu.dbmine.StringToWordVector.java

License:Open Source License

/**
 * Normalizes given instance to average doc length (only the newly
 * constructed attributes).//  w  ww  .j a  v a 2 s  .co  m
 * 
 * @param inst
 *            the instance to normalize
 * @param firstCopy
 * @throws Exception
 *             if avg. doc length not set
 */

private void normalizeInstance(Instance inst, int firstCopy) throws Exception {

    double docLength = 0;

    if (m_AvgDocLength < 0) {
        throw new Exception("Average document length not set.");
    }

    // Compute length of document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            docLength += inst.valueSparse(j) * inst.valueSparse(j);
        }
    }
    docLength = Math.sqrt(docLength);

    // Normalize document vector
    for (int j = 0; j < inst.numValues(); j++) {
        if (inst.index(j) >= firstCopy) {
            double val = inst.valueSparse(j) * m_AvgDocLength / docLength;
            inst.setValueSparse(j, val);
            if (val == 0) {
                System.err.println("setting value " + inst.index(j) + " to zero.");
                j--;
            }
        }
    }
}

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
 * Samoa instance from weka instance.//from w  w  w . jav a  2 s.c o  m
 *
 * @param inst the inst
 * @return the instance
 */
public Instance samoaInstance(weka.core.Instance inst) {
    Instance samoaInstance;
    if (inst instanceof weka.core.SparseInstance) {
        double[] attributeValues = new double[inst.numValues()];
        int[] indexValues = new int[inst.numValues()];
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != inst.classIndex()) {
                attributeValues[i] = inst.valueSparse(i);
                indexValues[i] = inst.index(i);
            }
        }
        samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes());
    } else {
        samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray());
        //samoaInstance.deleteAttributeAt(inst.classIndex());
    }
    if (this.samoaInstanceInformation == null) {
        this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset());
    }
    samoaInstance.setDataset(samoaInstanceInformation);
    samoaInstance.setClassValue(inst.classValue());
    return samoaInstance;
}

From source file:de.uni_potsdam.hpi.bpt.promnicat.util.WeightedEuclideanDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances. Offers speed up (if the 
 * distance function class in use supports it) in nearest neighbour search by 
 * taking into account the cutOff or maximum distance. Depending on the 
 * distance function class, post processing of the distances by 
 * postProcessDistances(double []) may be required if this function is used.
 *
 * @param first    the first instance/*from   w ww .  ja  va 2 s .  co m*/
 * @param second    the second instance
 * @param cutOffValue If the distance being calculated becomes larger than 
 *                    cutOffValue then the rest of the calculation is 
 *                    discarded.
 * @param stats    the performance stats object
 * @return       the distance between the two given instances or 
 *          Double.POSITIVE_INFINITY if the distance being 
 *          calculated becomes larger than cutOffValue. 
 */
public double distance(Instance first, Instance second, double cutOffValue, PerformanceStats stats) {
    double distance = 0;
    int firstI, secondI;
    int firstNumValues = first.numValues();
    int secondNumValues = second.numValues();
    int numAttributes = m_Data.numAttributes();
    int classIndex = m_Data.classIndex();
    double weights = 1;

    validate();

    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        weights += first.attribute(p1).weight();
        if (p1 >= firstNumValues)
            firstI = numAttributes;
        else
            firstI = first.index(p1);

        if (p2 >= secondNumValues)
            secondI = numAttributes;
        else
            secondI = second.index(p2);

        if (firstI == classIndex) {
            p1++;
            continue;
        }
        if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) {
            p1++;
            continue;
        }

        if (secondI == classIndex) {
            p2++;
            continue;
        }
        if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) {
            p2++;
            continue;
        }

        double diff;

        if (firstI == secondI) {
            diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, first.valueSparse(p1), 0);
            p1++;
        }
        if (stats != null)
            stats.incrCoordCount();

        distance = updateDistance(distance, diff);
        if (distance > cutOffValue)
            return Double.POSITIVE_INFINITY;
    }

    if (weights > 1) {
        return distance / (weights - 1);
    }
    return distance / weights;
}