Example usage for weka.core Instance numValues

List of usage examples for weka.core Instance numValues

Introduction

In this page you can find the example usage for weka.core Instance numValues.

Prototype

public int numValues();

Source Link

Document

Returns the number of values present in a sparse representation.

Usage

From source file:com.yahoo.labs.samoa.instances.WekaToSamoaInstanceConverter.java

License:Apache License

/**
 * Samoa instance from weka instance.//from   w  ww .  j  av a  2  s. c  o m
 *
 * @param inst the inst
 * @return the instance
 */
public Instance samoaInstance(weka.core.Instance inst) {
    Instance samoaInstance;
    if (inst instanceof weka.core.SparseInstance) {
        double[] attributeValues = new double[inst.numValues()];
        int[] indexValues = new int[inst.numValues()];
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != inst.classIndex()) {
                attributeValues[i] = inst.valueSparse(i);
                indexValues[i] = inst.index(i);
            }
        }
        samoaInstance = new SparseInstance(inst.weight(), attributeValues, indexValues, inst.numAttributes());
    } else {
        samoaInstance = new DenseInstance(inst.weight(), inst.toDoubleArray());
        //samoaInstance.deleteAttributeAt(inst.classIndex());
    }
    if (this.samoaInstanceInformation == null) {
        this.samoaInstanceInformation = this.samoaInstancesInformation(inst.dataset());
    }
    samoaInstance.setDataset(samoaInstanceInformation);
    samoaInstance.setClassValue(inst.classValue());
    return samoaInstance;
}

From source file:de.uni_potsdam.hpi.bpt.promnicat.util.WeightedEuclideanDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances. Offers speed up (if the 
 * distance function class in use supports it) in nearest neighbour search by 
 * taking into account the cutOff or maximum distance. Depending on the 
 * distance function class, post processing of the distances by 
 * postProcessDistances(double []) may be required if this function is used.
 *
 * @param first    the first instance//from  w ww. j  a  va 2s  . co  m
 * @param second    the second instance
 * @param cutOffValue If the distance being calculated becomes larger than 
 *                    cutOffValue then the rest of the calculation is 
 *                    discarded.
 * @param stats    the performance stats object
 * @return       the distance between the two given instances or 
 *          Double.POSITIVE_INFINITY if the distance being 
 *          calculated becomes larger than cutOffValue. 
 */
public double distance(Instance first, Instance second, double cutOffValue, PerformanceStats stats) {
    double distance = 0;
    int firstI, secondI;
    int firstNumValues = first.numValues();
    int secondNumValues = second.numValues();
    int numAttributes = m_Data.numAttributes();
    int classIndex = m_Data.classIndex();
    double weights = 1;

    validate();

    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        weights += first.attribute(p1).weight();
        if (p1 >= firstNumValues)
            firstI = numAttributes;
        else
            firstI = first.index(p1);

        if (p2 >= secondNumValues)
            secondI = numAttributes;
        else
            secondI = second.index(p2);

        if (firstI == classIndex) {
            p1++;
            continue;
        }
        if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) {
            p1++;
            continue;
        }

        if (secondI == classIndex) {
            p2++;
            continue;
        }
        if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) {
            p2++;
            continue;
        }

        double diff;

        if (firstI == secondI) {
            diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, first.valueSparse(p1), 0);
            p1++;
        }
        if (stats != null)
            stats.incrCoordCount();

        distance = updateDistance(distance, diff);
        if (distance > cutOffValue)
            return Double.POSITIVE_INFINITY;
    }

    if (weights > 1) {
        return distance / (weights - 1);
    }
    return distance / weights;
}

From source file:edu.columbia.cs.ltrie.sampling.queries.generation.ChiSquaredWithYatesCorrectionAttributeEval.java

License:Open Source License

/**
 * Initializes a chi-squared attribute evaluator.
 * Discretizes all attributes that are numeric.
 *
 * @param data set of instances serving as training data 
 * @throws Exception if the evaluator has not been 
 * generated successfully//  w ww  .  ja v a2s .c  o  m
 */
public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute chi-squared values
    m_ChiSquareds = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_ChiSquareds[i] = chiVal(ContingencyTables.reduceMatrix(counts[i]));
        }
    }
}

From source file:expshell.Neural.java

public double cal(Instance ins) {
    double sum = 0;
    sum += -1 * weights.get(0);/*from   w w w  . j a v  a  2s . c  om*/
    for (int i = 1; i < weights.size(); i++) {
        sum += ins.value(i - 1) * weights.get(i);
    }

    List<Double> temp = new ArrayList<Double>();
    for (int i = 0; i < ins.numValues(); i++) {
        temp.add(ins.value(i));
    }
    values = temp;
    //if (sum > 0)
    //    return 1.0;
    return threshold(sum);
}

From source file:feature.InfoGainEval.java

License:Open Source License

/**
 * Initializes an information gain attribute evaluator. Discretizes all
 * attributes that are numeric./*from  w  w  w.java 2  s.  c o  m*/
 *
 * @param data
 *            set of instances serving as training data
 * @throws Exception
 *             if the evaluator has not been generated successfully
 */
public double computeInfoGain(Instances data, int att) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    m_InfoGains[att] = (ContingencyTables.entropyOverColumns(counts[att])
            - ContingencyTables.entropyConditionedOnRows(counts[att]));

    return m_InfoGains[att];
}

From source file:feature.InfoGainEval.java

License:Open Source License

public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    int classIndex = data.classIndex();
    int numInstances = data.numInstances();

    if (!m_Binarize) {
        Discretize disTransform = new Discretize();
        disTransform.setUseBetterEncoding(true);
        disTransform.setInputFormat(data);
        data = Filter.useFilter(data, disTransform);
    } else {/*  ww  w .ja va  2 s .  c o m*/
        NumericToBinary binTransform = new NumericToBinary();
        binTransform.setInputFormat(data);
        data = Filter.useFilter(data, binTransform);
    }
    int numClasses = data.attribute(classIndex).numValues();

    // Reserve space and initialize counters
    double[][][] counts = new double[data.numAttributes()][][];
    for (int k = 0; k < data.numAttributes(); k++) {
        if (k != classIndex) {
            int numValues = data.attribute(k).numValues();
            counts[k] = new double[numValues + 1][numClasses + 1];
        }
    }

    // Initialize counters
    double[] temp = new double[numClasses + 1];
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        if (inst.classIsMissing()) {
            temp[numClasses] += inst.weight();
        } else {
            temp[(int) inst.classValue()] += inst.weight();
        }
    }
    for (int k = 0; k < counts.length; k++) {
        if (k != classIndex) {
            for (int i = 0; i < temp.length; i++) {
                counts[k][0][i] = temp[i];
            }
        }
    }

    // Get counts
    for (int k = 0; k < numInstances; k++) {
        Instance inst = data.instance(k);
        for (int i = 0; i < inst.numValues(); i++) {
            if (inst.index(i) != classIndex) {
                if (inst.isMissingSparse(i) || inst.classIsMissing()) {
                    if (!inst.isMissingSparse(i)) {
                        counts[inst.index(i)][(int) inst.valueSparse(i)][numClasses] += inst.weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    } else if (!inst.classIsMissing()) {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][(int) inst
                                .classValue()] += inst.weight();
                        counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                    } else {
                        counts[inst.index(i)][data.attribute(inst.index(i)).numValues()][numClasses] += inst
                                .weight();
                        counts[inst.index(i)][0][numClasses] -= inst.weight();
                    }
                } else {
                    counts[inst.index(i)][(int) inst.valueSparse(i)][(int) inst.classValue()] += inst.weight();
                    counts[inst.index(i)][0][(int) inst.classValue()] -= inst.weight();
                }
            }
        }
    }

    // distribute missing counts if required
    if (m_missing_merge) {

        for (int k = 0; k < data.numAttributes(); k++) {
            if (k != classIndex) {
                int numValues = data.attribute(k).numValues();

                // Compute marginals
                double[] rowSums = new double[numValues];
                double[] columnSums = new double[numClasses];
                double sum = 0;
                for (int i = 0; i < numValues; i++) {
                    for (int j = 0; j < numClasses; j++) {
                        rowSums[i] += counts[k][i][j];
                        columnSums[j] += counts[k][i][j];
                    }
                    sum += rowSums[i];
                }

                if (Utils.gr(sum, 0)) {
                    double[][] additions = new double[numValues][numClasses];

                    // Compute what needs to be added to each row
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            additions[i][j] = (rowSums[i] / sum) * counts[k][numValues][j];
                        }
                    }

                    // Compute what needs to be added to each column
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (columnSums[i] / sum) * counts[k][j][numClasses];
                        }
                    }

                    // Compute what needs to be added to each cell
                    for (int i = 0; i < numClasses; i++) {
                        for (int j = 0; j < numValues; j++) {
                            additions[j][i] += (counts[k][j][i] / sum) * counts[k][numValues][numClasses];
                        }
                    }

                    // Make new contingency table
                    double[][] newTable = new double[numValues][numClasses];
                    for (int i = 0; i < numValues; i++) {
                        for (int j = 0; j < numClasses; j++) {
                            newTable[i][j] = counts[k][i][j] + additions[i][j];
                        }
                    }
                    counts[k] = newTable;
                }
            }
        }
    }

    // Compute info gains
    m_InfoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != classIndex) {
            m_InfoGains[i] = (ContingencyTables.entropyOverColumns(counts[i])
                    - ContingencyTables.entropyConditionedOnRows(counts[i]));
        }
    }
}

From source file:FeatureSelection.ReliefFAttributeEval.java

License:Open Source License

/**
 * Updates the minimum and maximum values for all the attributes based on a
 * new instance./*from  w w w.jav  a2  s . com*/
 *
 * @param instance
 *            the new instance
 */
private void updateMinMax(Instance instance) {
    // for (int j = 0; j < m_numAttribs; j++) {
    try {
        for (int j = 0; j < instance.numValues(); j++) {
            if ((instance.attributeSparse(j).isNumeric()) && (!instance.isMissingSparse(j))) {
                if (Double.isNaN(m_minArray[instance.index(j)])) {
                    m_minArray[instance.index(j)] = instance.valueSparse(j);
                    m_maxArray[instance.index(j)] = instance.valueSparse(j);
                } else {
                    if (instance.valueSparse(j) < m_minArray[instance.index(j)]) {
                        m_minArray[instance.index(j)] = instance.valueSparse(j);
                    } else {
                        if (instance.valueSparse(j) > m_maxArray[instance.index(j)]) {
                            m_maxArray[instance.index(j)] = instance.valueSparse(j);
                        }
                    }
                }
            }
        }
    } catch (Exception ex) {
        System.err.println(ex);
        ex.printStackTrace();
    }
}

From source file:FeatureSelection.ReliefFAttributeEval.java

License:Open Source License

/**
 * Calculates the distance between two instances
 *
 * @param first/*  w ww.j  av  a 2  s . c  o  m*/
 *            the first instance
 * @param second
 *            the second instance
 * @return the distance between the two given instances, between 0 and 1
 */
private double distance(Instance first, Instance second) {

    double distance = 0;
    int firstI, secondI;

    for (int p1 = 0, p2 = 0; p1 < first.numValues() || p2 < second.numValues();) {
        if (p1 >= first.numValues()) {
            firstI = m_trainInstances.numAttributes();
        } else {
            firstI = first.index(p1);
        }
        if (p2 >= second.numValues()) {
            secondI = m_trainInstances.numAttributes();
        } else {
            secondI = second.index(p2);
        }
        if (firstI == m_trainInstances.classIndex()) {
            p1++;
            continue;
        }
        if (secondI == m_trainInstances.classIndex()) {
            p2++;
            continue;
        }
        double diff;
        if (firstI == secondI) {
            diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, first.valueSparse(p1), 0);
            p1++;
        }
        // distance += diff * diff;
        distance += diff;
    }

    // return Math.sqrt(distance / m_NumAttributesUsed);
    return distance;
}

From source file:FeatureSelection.ReliefFAttributeEval.java

License:Open Source License

/**
 * update attribute weights given an instance when the class is numeric
 *
 * @param instNum//  w  w w .ja  v  a  2 s .  co m
 *            the index of the instance to use when updating weights
 */
private void updateWeightsNumericClass(int instNum) {
    int i, j;
    double temp, temp2;
    int[] tempSorted = null;
    double[] tempDist = null;
    double distNorm = 1.0;
    int firstI, secondI;

    Instance inst = m_trainInstances.instance(instNum);

    // sort nearest neighbours and set up normalization variable
    if (m_weightByDistance) {
        tempDist = new double[m_stored[0]];

        for (j = 0, distNorm = 0; j < m_stored[0]; j++) {
            // copy the distances
            tempDist[j] = m_karray[0][j][0];
            // sum normalizer
            distNorm += m_weightsByRank[j];
        }

        tempSorted = Utils.sort(tempDist);
    }

    for (i = 0; i < m_stored[0]; i++) {
        // P diff prediction (class) given nearest instances
        if (m_weightByDistance) {
            temp = difference(m_classIndex, inst.value(m_classIndex),
                    m_trainInstances.instance((int) m_karray[0][tempSorted[i]][1]).value(m_classIndex));
            temp *= (m_weightsByRank[i] / distNorm);
        } else {
            temp = difference(m_classIndex, inst.value(m_classIndex),
                    m_trainInstances.instance((int) m_karray[0][i][1]).value(m_classIndex));
            temp *= (1.0 / (double) m_stored[0]); // equal influence
        }

        m_ndc += temp;

        Instance cmp;
        cmp = (m_weightByDistance) ? m_trainInstances.instance((int) m_karray[0][tempSorted[i]][1])
                : m_trainInstances.instance((int) m_karray[0][i][1]);

        double temp_diffP_diffA_givNearest = difference(m_classIndex, inst.value(m_classIndex),
                cmp.value(m_classIndex));
        // now the attributes
        for (int p1 = 0, p2 = 0; p1 < inst.numValues() || p2 < cmp.numValues();) {
            if (p1 >= inst.numValues()) {
                firstI = m_trainInstances.numAttributes();
            } else {
                firstI = inst.index(p1);
            }
            if (p2 >= cmp.numValues()) {
                secondI = m_trainInstances.numAttributes();
            } else {
                secondI = cmp.index(p2);
            }
            if (firstI == m_trainInstances.classIndex()) {
                p1++;
                continue;
            }
            if (secondI == m_trainInstances.classIndex()) {
                p2++;
                continue;
            }
            temp = 0.0;
            temp2 = 0.0;

            if (firstI == secondI) {
                j = firstI;
                temp = difference(j, inst.valueSparse(p1), cmp.valueSparse(p2));
                p1++;
                p2++;
            } else if (firstI > secondI) {
                j = secondI;
                temp = difference(j, 0, cmp.valueSparse(p2));
                p2++;
            } else {
                j = firstI;
                temp = difference(j, inst.valueSparse(p1), 0);
                p1++;
            }

            temp2 = temp_diffP_diffA_givNearest * temp;
            // P of different prediction and different att value given
            // nearest instances
            if (m_weightByDistance) {
                temp2 *= (m_weightsByRank[i] / distNorm);
            } else {
                temp2 *= (1.0 / (double) m_stored[0]); // equal influence
            }

            m_ndcda[j] += temp2;

            // P of different attribute val given nearest instances
            if (m_weightByDistance) {
                temp *= (m_weightsByRank[i] / distNorm);
            } else {
                temp *= (1.0 / (double) m_stored[0]); // equal influence
            }

            m_nda[j] += temp;
        }
    }
}

From source file:FeatureSelection.ReliefFAttributeEval.java

License:Open Source License

/**
 * update attribute weights given an instance when the class is discrete
 *
 * @param instNum/*from  w w  w. j  a  v a 2  s.co m*/
 *            the index of the instance to use when updating weights
 */
private void updateWeightsDiscreteClass(int instNum) {
    int i, j, k;
    int cl;
    double temp_diff, w_norm = 1.0;
    double[] tempDistClass;
    int[] tempSortedClass = null;
    double distNormClass = 1.0;
    double[] tempDistAtt;
    int[][] tempSortedAtt = null;
    double[] distNormAtt = null;
    int firstI, secondI;

    // store the indexes (sparse instances) of non-zero elements
    Instance inst = m_trainInstances.instance(instNum);

    // get the class of this instance
    cl = (int) m_trainInstances.instance(instNum).value(m_classIndex);

    // sort nearest neighbours and set up normalization variables
    if (m_weightByDistance) {
        // do class (hits) first
        // sort the distances
        tempDistClass = new double[m_stored[cl]];

        for (j = 0, distNormClass = 0; j < m_stored[cl]; j++) {
            // copy the distances
            tempDistClass[j] = m_karray[cl][j][0];
            // sum normalizer
            distNormClass += m_weightsByRank[j];
        }

        tempSortedClass = Utils.sort(tempDistClass);
        // do misses (other classes)
        tempSortedAtt = new int[m_numClasses][1];
        distNormAtt = new double[m_numClasses];

        for (k = 0; k < m_numClasses; k++) {
            if (k != cl) // already done cl
            {
                // sort the distances
                tempDistAtt = new double[m_stored[k]];

                for (j = 0, distNormAtt[k] = 0; j < m_stored[k]; j++) {
                    // copy the distances
                    tempDistAtt[j] = m_karray[k][j][0];
                    // sum normalizer
                    distNormAtt[k] += m_weightsByRank[j];
                }

                tempSortedAtt[k] = Utils.sort(tempDistAtt);
            }
        }
    }

    if (m_numClasses > 2) {
        // the amount of probability space left after removing the
        // probability of this instance's class value
        w_norm = (1.0 - m_classProbs[cl]);
    }

    // do the k nearest hits of the same class
    for (j = 0, temp_diff = 0.0; j < m_stored[cl]; j++) {
        Instance cmp;
        cmp = (m_weightByDistance) ? m_trainInstances.instance((int) m_karray[cl][tempSortedClass[j]][1])
                : m_trainInstances.instance((int) m_karray[cl][j][1]);

        for (int p1 = 0, p2 = 0; p1 < inst.numValues() || p2 < cmp.numValues();) {
            if (p1 >= inst.numValues()) {
                firstI = m_trainInstances.numAttributes();
            } else {
                firstI = inst.index(p1);
            }
            if (p2 >= cmp.numValues()) {
                secondI = m_trainInstances.numAttributes();
            } else {
                secondI = cmp.index(p2);
            }
            if (firstI == m_trainInstances.classIndex()) {
                p1++;
                continue;
            }
            if (secondI == m_trainInstances.classIndex()) {
                p2++;
                continue;
            }
            if (firstI == secondI) {
                i = firstI;
                temp_diff = difference(i, inst.valueSparse(p1), cmp.valueSparse(p2));
                p1++;
                p2++;
            } else if (firstI > secondI) {
                i = secondI;
                temp_diff = difference(i, 0, cmp.valueSparse(p2));
                p2++;
            } else {
                i = firstI;
                temp_diff = difference(i, inst.valueSparse(p1), 0);
                p1++;
            }

            if (m_weightByDistance) {
                temp_diff *= (m_weightsByRank[j] / distNormClass);
            } else {
                if (m_stored[cl] > 0) {
                    temp_diff /= (double) m_stored[cl];
                }
            }
            m_weights[i] -= temp_diff;

        }
    }

    // now do k nearest misses from each of the other classes
    temp_diff = 0.0;

    for (k = 0; k < m_numClasses; k++) {
        if (k != cl) // already done cl
        {
            for (j = 0; j < m_stored[k]; j++) {
                Instance cmp;
                cmp = (m_weightByDistance)
                        ? m_trainInstances.instance((int) m_karray[k][tempSortedAtt[k][j]][1])
                        : m_trainInstances.instance((int) m_karray[k][j][1]);

                for (int p1 = 0, p2 = 0; p1 < inst.numValues() || p2 < cmp.numValues();) {
                    if (p1 >= inst.numValues()) {
                        firstI = m_trainInstances.numAttributes();
                    } else {
                        firstI = inst.index(p1);
                    }
                    if (p2 >= cmp.numValues()) {
                        secondI = m_trainInstances.numAttributes();
                    } else {
                        secondI = cmp.index(p2);
                    }
                    if (firstI == m_trainInstances.classIndex()) {
                        p1++;
                        continue;
                    }
                    if (secondI == m_trainInstances.classIndex()) {
                        p2++;
                        continue;
                    }
                    if (firstI == secondI) {
                        i = firstI;
                        temp_diff = difference(i, inst.valueSparse(p1), cmp.valueSparse(p2));
                        p1++;
                        p2++;
                    } else if (firstI > secondI) {
                        i = secondI;
                        temp_diff = difference(i, 0, cmp.valueSparse(p2));
                        p2++;
                    } else {
                        i = firstI;
                        temp_diff = difference(i, inst.valueSparse(p1), 0);
                        p1++;
                    }

                    if (m_weightByDistance) {
                        temp_diff *= (m_weightsByRank[j] / distNormAtt[k]);
                    } else {
                        if (m_stored[k] > 0) {
                            temp_diff /= (double) m_stored[k];
                        }
                    }
                    if (m_numClasses > 2) {
                        m_weights[i] += ((m_classProbs[k] / w_norm) * temp_diff);
                    } else {
                        m_weights[i] += temp_diff;
                    }
                }
            }
        }
    }
}