Example usage for weka.core Instance valueSparse

List of usage examples for weka.core Instance valueSparse

Introduction

In this page you can find the example usage for weka.core Instance valueSparse.

Prototype

public double valueSparse(int indexOfIndex);

Source Link

Document

Returns an instance's attribute value in internal format, given an index in the sparse representation.

Usage

From source file:moa.classifiers.functions.SPegasos.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 *///from w ww. j av a  2  s.c  o  m
@Override
public void trainOnInstanceImpl(Instance instance) {

    if (m_weights == null) {
        m_weights = new double[instance.numAttributes() + 1];
    }
    if (!instance.classIsMissing()) {

        double learningRate = 1.0 / (m_lambda * m_t);
        //double scale = 1.0 - learningRate * m_lambda;
        double scale = 1.0 - 1.0 / m_t;
        double y = (instance.classValue() == 0) ? -1 : 1;
        double wx = dotProd(instance, m_weights, instance.classIndex());
        double z = y * (wx + m_weights[m_weights.length - 1]);

        for (int j = 0; j < m_weights.length - 1; j++) {
            if (j != instance.classIndex()) {
                m_weights[j] *= scale;
            }
        }

        if (m_loss == LOGLOSS || (z < 1)) {
            double loss = dloss(z);
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    double m = learningRate * loss * (instance.valueSparse(p1) * y);
                    m_weights[indS] += m;
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += learningRate * loss * y;
        }

        double norm = 0;
        for (int k = 0; k < m_weights.length - 1; k++) {
            if (k != instance.classIndex()) {
                norm += (m_weights[k] * m_weights[k]);
            }
        }

        double scale2 = Math.min(1.0, (1.0 / (m_lambda * norm)));
        if (scale2 < 1.0) {
            scale2 = Math.sqrt(scale2);
            for (int j = 0; j < m_weights.length - 1; j++) {
                if (j != instance.classIndex()) {
                    m_weights[j] *= scale2;
                }
            }
        }
        m_t++;
    }
}

From source file:moa.classifiers.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Trains the classifier with the given instance.
 *
 * @param instance    the new training instance to include in the model
 *///  w w  w.ja v a2s.  c o  m
@Override
public void trainOnInstanceImpl(Instance inst) {
    if (this.reset == true) {
        this.m_numClasses = inst.numClasses();
        double laplace = this.laplaceCorrectionOption.getValue();
        int numAttributes = inst.numAttributes();

        m_probOfClass = new double[m_numClasses];
        Arrays.fill(m_probOfClass, laplace);

        m_classTotals = new double[m_numClasses];
        Arrays.fill(m_classTotals, laplace * numAttributes);

        m_wordTotalForClass = new double[numAttributes][m_numClasses];
        for (double[] wordTotal : m_wordTotalForClass) {
            Arrays.fill(wordTotal, laplace);
        }
        this.reset = false;
    }
    // Update classifier
    int classIndex = inst.classIndex();
    int classValue = (int) inst.value(classIndex);

    double w = inst.weight();
    m_probOfClass[classValue] += w;

    m_classTotals[classValue] += w * totalSize(inst);
    double total = m_classTotals[classValue];

    for (int i = 0; i < inst.numValues(); i++) {
        int index = inst.index(i);
        if (index != classIndex && !inst.isMissing(i)) {
            m_wordTotalForClass[index][classValue] += w * inst.valueSparse(i);
        }
    }
}

From source file:moa.classifiers.NaiveBayesMultinomial.java

License:Open Source License

/**
 * Calculates the class membership probabilities for the given test
 * instance./*ww  w .  j  a  v a2s .  com*/
 *
 * @param instance    the instance to be classified
 * @return       predicted class probability distribution
 */
@Override
public double[] getVotesForInstance(Instance instance) {
    if (this.reset == true) {
        return new double[2];
    }
    double[] probOfClassGivenDoc = new double[m_numClasses];
    double totalSize = totalSize(instance);

    for (int i = 0; i < m_numClasses; i++) {
        probOfClassGivenDoc[i] = Math.log(m_probOfClass[i]) - totalSize * Math.log(m_classTotals[i]);
    }

    for (int i = 0; i < instance.numValues(); i++) {

        int index = instance.index(i);
        if (index == instance.classIndex() || instance.isMissing(i)) {
            continue;
        }

        double wordCount = instance.valueSparse(i);
        for (int c = 0; c < m_numClasses; c++) {
            probOfClassGivenDoc[c] += wordCount * Math.log(m_wordTotalForClass[index][c]);
        }
    }

    return Utils.logs2probs(probOfClassGivenDoc);
}

From source file:mulan.classifier.meta.ConstrainedKMeans.java

License:Open Source License

/**
 * Calculates the distance between two instances
 *
 * @param first the first instance/*from w  w w. j  a v a 2 s .  c  o  m*/
 * @param second the second instance
 * @return the distance between the two given instances, between 0 and 1
 */
private double distance(Instance first, Instance second) {
    double distance = 0;
    int firstI, secondI;
    for (int p1 = 0, p2 = 0; p1 < first.numValues() || p2 < second.numValues();) {
        if (p1 >= first.numValues()) {
            firstI = m_ClusterCentroids.numAttributes();
        } else {
            firstI = first.index(p1);
        }
        if (p2 >= second.numValues()) {
            secondI = m_ClusterCentroids.numAttributes();
        } else {
            secondI = second.index(p2);
        }
        /*      if (firstI == m_ClusterCentroids.classIndex()) {
        p1++; continue;
        }
        if (secondI == m_ClusterCentroids.classIndex()) {
        p2++; continue;
        } */
        double diff;
        if (firstI == secondI) {
            diff = difference(firstI, first.valueSparse(p1), second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, first.valueSparse(p1), 0);
            p1++;
        }
        distance += diff * diff;
    }
    //return Math.sqrt(distance / m_ClusterCentroids.numAttributes());
    return distance;
}

From source file:net.paudan.evosvm.LibLINEAR.java

License:Open Source License

/**
* returns an instance into a sparse liblinear array
*
* @param instance the instance to work on
* @return the liblinear array/* ww  w  .  j a  va2s . c  om*/
* @throws Exception if setup of array fails
*/
protected FeatureNode[] instanceToArray(Instance instance) throws Exception {
    // determine number of non-zero attributes
    int count = 0;

    for (int i = 0; i < instance.numValues(); i++) {
        if (instance.index(i) == instance.classIndex())
            continue;
        if (instance.valueSparse(i) != 0)
            count++;
    }

    if (m_Bias >= 0) {
        count++;
    }

    // fill array
    FeatureNode[] nodes = new FeatureNode[count];
    int index = 0;
    for (int i = 0; i < instance.numValues(); i++) {

        int idx = instance.index(i);
        double val = instance.valueSparse(i);

        if (idx == instance.classIndex())
            continue;
        if (val == 0)
            continue;

        nodes[index] = new FeatureNode(idx + 1, val);
        index++;
    }

    // add bias term
    if (m_Bias >= 0) {
        nodes[index] = new FeatureNode(instance.numAttributes() + 1, m_Bias);
    }

    return nodes;
}

From source file:net.sf.jclal.util.distancefunction.CosineDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances. Offers speed up (if the
 * distance function class in use supports it) in nearest neighbour search
 * by taking into account the cutOff or maximum distance. Depending on the
 * distance function class, post processing of the distances by
 * postProcessDistances(double []) may be required if this function is used.
 *
 * @param first the first instance/* ww  w  .  j  a v a  2  s.  c o  m*/
 * @param second the second instance
 * @param cutOffValue If the distance being calculated becomes larger than
 * cutOffValue then the rest of the calculation is discarded.
 * @param stats the performance stats object
 * @return the distance between the two given instances or
 * Double.POSITIVE_INFINITY if the distance being calculated becomes larger
 * than cutOffValue.
 */
@Override
public double distance(Instance first, Instance second, double cutOffValue, PerformanceStats stats) {
    double distance = 0;
    int firstI, secondI;
    int firstNumValues = first.numValues();
    int secondNumValues = second.numValues();
    int numAttributes = m_Data.numAttributes();
    int classIndex = m_Data.classIndex();

    double norm2First = 0, norm2Second = 0;

    validate();

    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues) {
            firstI = numAttributes;
        } else {
            firstI = first.index(p1);
        }

        if (p2 >= secondNumValues) {
            secondI = numAttributes;
        } else {
            secondI = second.index(p2);
        }

        if (firstI == classIndex) {
            p1++;
            continue;
        }
        if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) {
            p1++;
            continue;
        }

        if (secondI == classIndex) {
            p2++;
            continue;
        }
        if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) {
            p2++;
            continue;
        }

        double diff;

        if (firstI == secondI) {
            diff = localSimilarity(firstI, first.valueSparse(p1), second.valueSparse(p2));

            norm2First += localSimilarity(firstI, first.valueSparse(p1), first.valueSparse(p1));
            norm2Second += localSimilarity(secondI, second.valueSparse(p2), second.valueSparse(p2));

            p1++;
            p2++;
        } else if (firstI > secondI) {

            diff = localSimilarity(secondI, 0, second.valueSparse(p2));

            norm2Second += localSimilarity(secondI, second.valueSparse(p2), second.valueSparse(p2));

            p2++;
        } else {

            diff = localSimilarity(firstI, first.valueSparse(p1), 0);

            norm2First += localSimilarity(firstI, first.valueSparse(p1), first.valueSparse(p1));

            p1++;
        }
        if (stats != null) {
            stats.incrCoordCount();
        }

        distance = updateDistance(distance, diff);
        if (distance > cutOffValue) {
            return Double.POSITIVE_INFINITY;
        }
    }

    return distance / (Math.sqrt(norm2First * norm2Second));
}

From source file:org.esa.nest.gpf.SGD.java

/**
 * Updates the classifier with the given instance.
 *
 * @param instance the new training instance to include in the model
 * @exception Exception if the instance could not be incorporated in the
 * model.// w ww. ja v  a 2 s . c om
 */
@Override
public void updateClassifier(Instance instance) throws Exception {

    if (!instance.classIsMissing()) {

        double wx = dotProd(instance, m_weights, instance.classIndex());

        double y;
        double z;
        if (instance.classAttribute().isNominal()) {
            y = (instance.classValue() == 0) ? -1 : 1;
            z = y * (wx + m_weights[m_weights.length - 1]);
        } else {
            y = instance.classValue();
            z = y - (wx + m_weights[m_weights.length - 1]);
            y = 1;
        }

        // Compute multiplier for weight decay
        double multiplier = 1.0;
        if (m_numInstances == 0) {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_t;
        } else {
            multiplier = 1.0 - (m_learningRate * m_lambda) / m_numInstances;
        }
        for (int i = 0; i < m_weights.length - 1; i++) {
            m_weights[i] *= multiplier;
        }

        // Only need to do the following if the loss is non-zero
        if (m_loss != HINGE || (z < 1)) {

            // Compute Factor for updates
            double factor = m_learningRate * y * dloss(z);

            // Update coefficients for attributes
            int n1 = instance.numValues();
            for (int p1 = 0; p1 < n1; p1++) {
                int indS = instance.index(p1);
                if (indS != instance.classIndex() && !instance.isMissingSparse(p1)) {
                    m_weights[indS] += factor * instance.valueSparse(p1);
                }
            }

            // update the bias
            m_weights[m_weights.length - 1] += factor;
        }
        m_t++;
    }
}

From source file:org.stream_gpu.float_knn.float_search.NormalizableDistance.java

License:Open Source License

/**
 * Calculates the distance between two instances. Offers speed up (if the 
 * distance function class in use supports it) in nearest neighbour search by 
 * taking into account the cutOff or maximum distance. Depending on the 
 * distance function class, post processing of the distances by 
 * postProcessDistances(float []) may be required if this function is used.
 *
 * @param first    the first instance/*w w w.j a va 2s. c  o m*/
 * @param second    the second instance
 * @param cutOffValue If the distance being calculated becomes larger than 
 *                    cutOffValue then the rest of the calculation is 
 *                    discarded.
 * @param stats    the performance stats object
 * @return       the distance between the two given instances or 
 *          float.POSITIVE_INFINITY if the distance being 
 *          calculated becomes larger than cutOffValue. 
 */
public float distance(Instance first, Instance second, float cutOffValue, PerformanceStats stats) {
    float distance = 0;
    int firstI, secondI;
    int firstNumValues = first.numValues();
    int secondNumValues = second.numValues();
    int numAttributes = m_Data.numAttributes();
    int classIndex = m_Data.classIndex();

    validate();

    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues)
            firstI = numAttributes;
        else
            firstI = first.index(p1);

        if (p2 >= secondNumValues)
            secondI = numAttributes;
        else
            secondI = second.index(p2);

        if (firstI == classIndex) {
            p1++;
            continue;
        }
        if ((firstI < numAttributes) && !m_ActiveIndices[firstI]) {
            p1++;
            continue;
        }

        if (secondI == classIndex) {
            p2++;
            continue;
        }
        if ((secondI < numAttributes) && !m_ActiveIndices[secondI]) {
            p2++;
            continue;
        }

        float diff;

        if (firstI == secondI) {
            diff = difference(firstI, (float) first.valueSparse(p1), (float) second.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            diff = difference(secondI, 0, (float) second.valueSparse(p2));
            p2++;
        } else {
            diff = difference(firstI, (float) first.valueSparse(p1), 0);
            p1++;
        }
        if (stats != null)
            stats.incrCoordCount();

        distance = updateDistance(distance, diff);
        if (distance > cutOffValue)
            return Float.POSITIVE_INFINITY;
    }

    return distance;
}

From source file:pk.lums.edu.sma.processing.ml.DBSCAN.EuclideanDataObject.java

License:Open Source License

/**
 * Compares two DataObjects in respect to their attribute-values
 * /*www.  j a  va  2s . co m*/
 * @param dataObject
 *            The DataObject, that is compared with this.dataObject; now
 *            assumed to be of the same type and with the same structure
 * @return Returns true, if the DataObjects correspond in each value, else
 *         returns false
 */
public boolean equals(DataObject dataObject) {
    if (this == dataObject)
        return true;

    Instance firstInstance = getInstance();
    Instance secondInstance = dataObject.getInstance();
    int firstNumValues = firstInstance.numValues();
    int secondNumValues = secondInstance.numValues();
    int numAttributes = firstInstance.numAttributes();

    int firstI, secondI;
    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues) {
            firstI = numAttributes;
        } else {
            firstI = firstInstance.index(p1);
        }

        if (p2 >= secondNumValues) {
            secondI = numAttributes;
        } else {
            secondI = secondInstance.index(p2);
        }

        if (firstI == secondI) {
            if (firstInstance.valueSparse(p1) != secondInstance.valueSparse(p2)) {
                return false;
            }
            p1++;
            p2++;
        } else if (firstI > secondI) {
            if (0 != secondInstance.valueSparse(p2)) {
                return false;
            }
            p2++;
        } else {
            if (0 != firstInstance.valueSparse(p1)) {
                return false;
            }
            p1++;
        }
    }
    return true;
}

From source file:pk.lums.edu.sma.processing.ml.DBSCAN.EuclideanDataObject.java

License:Open Source License

/**
 * Calculates the euclidian-distance between dataObject and this.dataObject
 * // www  .j a  v a2  s . c  o  m
 * @param dataObject
 *            The DataObject, that is used for distance-calculation with
 *            this.dataObject; now assumed to be of the same type and with
 *            the same structure
 * @return double-value The euclidian-distance between dataObject and
 *         this.dataObject
 */
public double distance(DataObject dataObject) {
    double dist = 0.0;

    Instance firstInstance = getInstance();
    Instance secondInstance = dataObject.getInstance();
    int firstNumValues = firstInstance.numValues();
    int secondNumValues = secondInstance.numValues();
    int numAttributes = firstInstance.numAttributes();

    int firstI, secondI;
    for (int p1 = 0, p2 = 0; p1 < firstNumValues || p2 < secondNumValues;) {
        if (p1 >= firstNumValues) {
            firstI = numAttributes;
        } else {
            firstI = firstInstance.index(p1);
        }

        if (p2 >= secondNumValues) {
            secondI = numAttributes;
        } else {
            secondI = secondInstance.index(p2);
        }

        double cDistance = 0;
        if (firstI == secondI) {
            cDistance = computeDistance(firstI, firstInstance.valueSparse(p1), secondInstance.valueSparse(p2));
            p1++;
            p2++;
        } else if (firstI > secondI) {
            cDistance = computeDistance(secondI, 0, secondInstance.valueSparse(p2));
            p2++;
        } else {
            cDistance = computeDistance(firstI, firstInstance.valueSparse(p1), 0);
            p1++;
        }
        dist += cDistance * cDistance;
    }
    return Math.sqrt(dist);
}