Example usage for weka.core Instance valueSparse

Introduction

In this page you can find the example usage for weka.core Instance valueSparse.

Prototype

public double valueSparse(int indexOfIndex);

Source Link

Document

Returns an instance's attribute value in internal format, given an index in the sparse representation.

Usage

From source file:moa.classifiers.functions.SPegasos.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *///from w ww. j av a  2  s.c  o  m
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new double[instance.numAttributes() + 1];
    }
    if (!instance.classIsMissing()) {

        double learningRate = 1.0 / (m_lambda * m_t);
        //double scale = 1.0 - learningRate * m_lambda;
        double scale = 1.0 - 1.0 / m_t;
        double y = (instance.classValue() == 0) ? -1 : 1;
        double wx = dotProd(instance, m_weights, instance.classIndex());
        double z = y * (wx + m_weights[m_weights.length - 1]);

        for (int j = 0; j < m_weights.length - 1; j++) {
            if (j != instance.classIndex()) {
                m_weights[j] *= scale;
            }
        }

        if (m_loss == LOGLOSS || (z < 1)) {
            double loss = dloss(z);
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    double m = learningRate * loss * (instance.valueSparse(p1) * y);
                    m_weights[indS] += m;
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += learningRate * loss * y;
        }

        double norm = 0;
        for (int k = 0; k < m_weights.length - 1; k++) {
            if (k != instance.classIndex()) {
                norm += (m_weights[k] * m_weights[k]);
            }
        }

        double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm)));
        if (scale2 < 1.0) {
            scale2 = Math.sqrt(scale2);
            for (int j = 0; j < m_weights.length - 1; j++) {
                if (j != instance.classIndex()) {
                    m_weights[j] *= scale2;
                }
            }
        }
        m_t++;
    }
}

From source file:moa.classifiers.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 *///  w w  w.ja v a2s.  c o  m
@Override
public void trainOnInstanceImpl(Instance inst) {
    if (this.reset == true) {
        this.m_numClasses = inst.numClasses();
        double laplace = this.laplaceCorrectionOption.getValue();
        int numAttributes = inst.numAttributes();

        m_probOfClass = new double[m_numClasses];
        Arrays.fill(m_probOfClass, laplace);

        m_classTotals = new double[m_numClasses];
        Arrays.fill(m_classTotals, laplace * numAttributes);

        m_wordTotalForClass = new double[numAttributes][m_numClasses];
        for (double[] wordTotal : m_wordTotalForClass) {
            Arrays.fill(wordTotal, laplace);
        }
        this.reset = false;
    }
    // Update classifier
    int classIndex = inst.classIndex();
    int classValue = (int) inst.value(classIndex);

    double w = inst.weight();
    m_probOfClass[classValue] += w;

    m_classTotals[classValue] += w * totalSize(inst);
    double total = m_classTotals[classValue];

    for (int i = 0; i < inst.numValues(); i++) {
        int index = inst.index(i);
        if (index != classIndex && !inst.isMissing(i)) {
            m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i);
        }
    }
}

From source file:moa.classifiers.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance./*ww  w .  j  a  v a2s .  com*/
 *
 * @param instance    the instance to be classified
 * @return       predicted class probability distribution
 */
@Override
public double[] getVotesForInstance(Instance instance) {
    if (this.reset == true) {
        return new double[2];
    }
    double[] probOfClassGivenDoc = new double[m_numClasses];
    double totalSize = totalSize(instance);

    for (int i = 0; i < m_numClasses; i++) {
        probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]);
    }

    for (int i = 0; i < instance.numValues(); i++) {

        int index = instance.index(i);
        if (index == instance.classIndex() || instance.isMissing(i)) {
            continue;
        }

        double wordCount = instance.valueSparse(i);
        for (int c = 0; c < m_numClasses; c++) {
            probOfClassGivenDoc[c] += wordCount * Math.log(m_wordTotalForClass[index][c]);
        }
    }

    return Utils.logs2probs(probOfClassGivenDoc);
}

From source file:mulan.classifier.meta.ConstrainedKMeans.java

License:Open Source License

/**
 * Calculates the distance between two instances
 *
 * @param first the first instance/*from w  w w. j  a v a 2 s .  c  o  m*/
 * @param second the second instance
 * @return the distance between the two given instances, between 0 and 1
 */
private double distance(Instance first, Instance second) {
    double distance = 0;
    int firstI, secondI;
    for (int p1 = 0, p2 = 0; p1 < first.numValues() || p2 < second.numValues();) {
        if (p1 >= first.numValues()) {
            firstI = m_ClusterCentroids.numAttributes();
        } else {
            firstI = first.index(p1);
        }
        if (p2 >= second.numValues()) {
            secondI = m_ClusterCentroids.numAttributes();
        } else {
            secondI = second.index(p2);
        }
        /*      if (firstI == m_ClusterCentroids.classIndex()) {
        p1++; continue;
        }
        if (secondI == m_ClusterCentroids.classIndex()) {
        p2++; continue;
        } */
        double diff;
        if (firstI == secondI) {
            diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, first.valueSparse(p1), 0);
            p1++;
        }
        distance += diff * diff;
    }
    //return Math.sqrt(distance / m_ClusterCentroids.numAttributes());
    return distance;
}

From source file:net.paudan.evosvm.LibLINEAR.java

License:Open Source License

/**
* returns an instance into a sparse liblinear array
*
* @param instance the instance to work on
* @return the liblinear array/* ww  w  .  j a  va2s . c  om*/
* @throws Exception if setup of array fails
*/
protected FeatureNode[] instanceToArray(Instance instance) throws Exception {
    // determine number of non-zero attributes
    int count = 0;

    for (int i = 0; i < instance.numValues(); i++) {
        if (instance.index(i) == instance.classIndex())
            continue;
        if (instance.valueSparse(i) != 0)
            count++;
    }

    if (m_Bias >= 0) {
        count++;
    }

    // fill array
    FeatureNode[] nodes = new FeatureNode[count];
    int index = 0;
    for (int i = 0; i < instance.numValues(); i++) {

        int idx = instance.index(i);
        double val = instance.valueSparse(i);

        if (idx == instance.classIndex())
            continue;
        if (val == 0)
            continue;

        nodes[index] = new FeatureNode(idx + 1, val);
        index++;
    }

    // add bias term
    if (m_Bias >= 0) {
        nodes[index] = new FeatureNode(instance.numAttributes() + 1, m_Bias);
    }

    return nodes;
}

From source file:net.sf.jclal.util.distancefunction.CosineDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances. Offers speed up (if the
 * distance function class in use supports it) in nearest neighbour search
 * by taking into account the cutOff or maximum distance. Depending on the
 * distance function class, post processing of the distances by
 * postProcessDistances(double []) may be required if this function is used.
 *
 * @param first the first instance/* ww  w  .  j  a v a  2  s.  c o  m*/
 * @param second the second instance
 * @param cutOffValue If the distance being calculated becomes larger than
 * cutOffValue then the rest of the calculation is discarded.
 * @param stats the performance stats object
 * @return the distance between the two given instances or
 * Double.POSITIVE_INFINITY if the distance being calculated becomes larger
 * than cutOffValue.
 */
@Override
public double distance(Instance first, Instance second, double cutOffValue, PerformanceStats stats) {
    double distance = 0;
    int firstI, secondI;
    int firstNumValues = first.numValues();
    int secondNumValues = second.numValues();
    int numAttributes = m_Data.numAttributes();
    int classIndex = m_Data.classIndex();

    double norm2First = 0, norm2Second = 0;

    validate();

    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues) {
            firstI = numAttributes;
        } else {
            firstI = first.index(p1);
        }

        if (p2 >= secondNumValues) {
            secondI = numAttributes;
        } else {
            secondI = second.index(p2);
        }

        if (firstI == classIndex) {
            p1++;
            continue;
        }
        if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) {
            p1++;
            continue;
        }

        if (secondI == classIndex) {
            p2++;
            continue;
        }
        if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) {
            p2++;
            continue;
        }

        double diff;

        if (firstI == secondI) {
            diff = localSimilarity(firstI, first.valueSparse(p1), second.valueSparse(p2));

            norm2First += localSimilarity(firstI, first.valueSparse(p1), first.valueSparse(p1));
            norm2Second += localSimilarity(secondI, second.valueSparse(p2), second.valueSparse(p2));

            p1++;
            p2++;
        } else if (firstI > secondI) {

            diff = localSimilarity(secondI, 0, second.valueSparse(p2));

            norm2Second += localSimilarity(secondI, second.valueSparse(p2), second.valueSparse(p2));

            p2++;
        } else {

            diff = localSimilarity(firstI, first.valueSparse(p1), 0);

            norm2First += localSimilarity(firstI, first.valueSparse(p1), first.valueSparse(p1));

            p1++;
        }
        if (stats != null) {
            stats.incrCoordCount();
        }

        distance = updateDistance(distance, diff);
        if (distance > cutOffValue) {
            return Double.POSITIVE_INFINITY;
        }
    }

    return distance / (Math.sqrt(norm2First * norm2Second));
}

From source file:org.esa.nest.gpf.SGD.java

/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 * @exception Exception if the instance could not be incorporated in the
 * model.// w ww. ja v  a 2 s . c om
 */
@Override
public void updateClassifier(Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_weights[m_weights.length - 1]);
        } else {
            y = instance.classValue();
            z = y - (wx + m_weights[m_weights.length - 1]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.length - 1; i++) {
            m_weights[i] *= multiplier;
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[indS] += factor * instance.valueSparse(p1);
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += factor;
        }
        m_t++;
    }
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances. Offers speed up (if the 
 * distance function class in use supports it) in nearest neighbour search by 
 * taking into account the cutOff or maximum distance. Depending on the 
 * distance function class, post processing of the distances by 
 * postProcessDistances(float []) may be required if this function is used.
 *
 * @param first    the first instance/*w w w.j a va 2s. c  o m*/
 * @param second    the second instance
 * @param cutOffValue If the distance being calculated becomes larger than 
 *                    cutOffValue then the rest of the calculation is 
 *                    discarded.
 * @param stats    the performance stats object
 * @return       the distance between the two given instances or 
 *          float.POSITIVE_INFINITY if the distance being 
 *          calculated becomes larger than cutOffValue. 
 */
public float distance(Instance first, Instance second, float cutOffValue, PerformanceStats stats) {
    float distance = 0;
    int firstI, secondI;
    int firstNumValues = first.numValues();
    int secondNumValues = second.numValues();
    int numAttributes = m_Data.numAttributes();
    int classIndex = m_Data.classIndex();

    validate();

    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues)
            firstI = numAttributes;
        else
            firstI = first.index(p1);

        if (p2 >= secondNumValues)
            secondI = numAttributes;
        else
            secondI = second.index(p2);

        if (firstI == classIndex) {
            p1++;
            continue;
        }
        if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) {
            p1++;
            continue;
        }

        if (secondI == classIndex) {
            p2++;
            continue;
        }
        if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) {
            p2++;
            continue;
        }

        float diff;

        if (firstI == secondI) {
            diff = difference(firstI, (float) first.valueSparse(p1), (float) second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, (float) second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, (float) first.valueSparse(p1), 0);
            p1++;
        }
        if (stats != null)
            stats.incrCoordCount();

        distance = updateDistance(distance, diff);
        if (distance > cutOffValue)
            return Float.POSITIVE_INFINITY;
    }

    return distance;
}

From source file:pk.lums.edu.sma.processing.ml.DBSCAN.EuclideanDataObject.java

License:Open Source License

/**
 * Compares two DataObjects in respect to their attribute-values
 * /*www.  j a  va  2s . co m*/
 * @param dataObject
 *            The DataObject, that is compared with this.dataObject; now
 *            assumed to be of the same type and with the same structure
 * @return Returns true, if the DataObjects correspond in each value, else
 *         returns false
 */
public boolean equals(DataObject dataObject) {
    if (this == dataObject)
        return true;

    Instance firstInstance = getInstance();
    Instance secondInstance = dataObject.getInstance();
    int firstNumValues = firstInstance.numValues();
    int secondNumValues = secondInstance.numValues();
    int numAttributes = firstInstance.numAttributes();

    int firstI, secondI;
    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues) {
            firstI = numAttributes;
        } else {
            firstI = firstInstance.index(p1);
        }

        if (p2 >= secondNumValues) {
            secondI = numAttributes;
        } else {
            secondI = secondInstance.index(p2);
        }

        if (firstI == secondI) {
            if (firstInstance.valueSparse(p1) != secondInstance.valueSparse(p2)) {
                return false;
            }
            p1++;
            p2++;
        } else if (firstI > secondI) {
            if (0 != secondInstance.valueSparse(p2)) {
                return false;
            }
            p2++;
        } else {
            if (0 != firstInstance.valueSparse(p1)) {
                return false;
            }
            p1++;
        }
    }
    return true;
}

From source file:pk.lums.edu.sma.processing.ml.DBSCAN.EuclideanDataObject.java

License:Open Source License

/**
 * Calculates the euclidian-distance between dataObject and this.dataObject
 * // www  .j a  v a2  s . c  o  m
 * @param dataObject
 *            The DataObject, that is used for distance-calculation with
 *            this.dataObject; now assumed to be of the same type and with
 *            the same structure
 * @return double-value The euclidian-distance between dataObject and
 *         this.dataObject
 */
public double distance(DataObject dataObject) {
    double dist = 0.0;

    Instance firstInstance = getInstance();
    Instance secondInstance = dataObject.getInstance();
    int firstNumValues = firstInstance.numValues();
    int secondNumValues = secondInstance.numValues();
    int numAttributes = firstInstance.numAttributes();

    int firstI, secondI;
    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues) {
            firstI = numAttributes;
        } else {
            firstI = firstInstance.index(p1);
        }

        if (p2 >= secondNumValues) {
            secondI = numAttributes;
        } else {
            secondI = secondInstance.index(p2);
        }

        double cDistance = 0;
        if (firstI == secondI) {
            cDistance = computeDistance(firstI, firstInstance.valueSparse(p1), secondInstance.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            cDistance = computeDistance(secondI, 0, secondInstance.valueSparse(p2));
            p2++;
        } else {
            cDistance = computeDistance(firstI, firstInstance.valueSparse(p1), 0);
            p1++;
        }
        dist += cDistance * cDistance;
    }
    return Math.sqrt(dist);
}