Example usage for org.apache.mahout.math Vector times

List of usage examples for org.apache.mahout.math Vector times

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector times.

Prototype

Vector times(Vector x);

Source Link

Document

Return a new vector containing the element-wise product of the recipient and the argument

Usage

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.classification.LogisticRegCmpTask.java

License:Apache License

@Override
public final void run(final int iteration) {

    // measure accuracy
    posNum = 0;/*from  w w w.j a  va 2s  .  c  o  m*/
    negNum = 0;
    for (final Row row : rows) {
        final double output = row.getOutput();
        final double predict = model.predict(row.getFeature());
        if (output * predict > 0) {
            posNum++;
        } else {
            negNum++;
        }
    }

    // optimize
    for (final Row row : rows) {
        final double output = row.getOutput();
        final Vector input = row.getFeature();
        final Vector gradient = loss.gradient(input, model.predict(input), output)
                .plus(regularization.gradient(model));
        model.setParameters(model.getParameters().minus(gradient.times(stepSize)));
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

@Override
public void run(final int iteration) {
    clusterToStats = new HashMap<>();
    final int numClusters = clusterSummaries.size();

    // Compute the partial statistics of each cluster
    for (final Vector vector : points) {
        final int dimension = vector.size();
        Matrix outProd = null;// w ww .  j a v a2 s .c  om

        if (isCovarianceDiagonal) {
            outProd = new SparseMatrix(dimension, dimension);
            for (int j = 0; j < dimension; j++) {
                outProd.set(j, j, vector.get(j) * vector.get(j));
            }
        } else {
            outProd = vector.cross(vector);
        }

        double denominator = 0;
        final double[] numerators = new double[numClusters];
        for (int i = 0; i < numClusters; i++) {
            final ClusterSummary clusterSummary = clusterSummaries.get(i);
            final Vector centroid = clusterSummary.getCentroid();
            final Matrix covariance = clusterSummary.getCovariance();
            final Double prior = clusterSummary.getPrior();

            final Vector differ = vector.minus(centroid);
            numerators[i] = prior / Math.sqrt(covariance.determinant())
                    * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2));
            denominator += numerators[i];
        }

        for (int i = 0; i < numClusters; i++) {
            final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator;
            if (!clusterToStats.containsKey(i)) {
                clusterToStats.put(i,
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            } else {
                clusterToStats.get(i).add(
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            }
        }
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.regression.LinearRegCmpTask.java

License:Apache License

@Override
public final void run(final int iteration) {

    // measure loss
    lossSum = 0;/*w  w  w. jav  a2 s  .  c  om*/
    for (final Row row : rows) {
        final double output = row.getOutput();
        final double predict = model.predict(row.getFeature());
        lossSum += loss.loss(predict, output);
    }

    // optimize
    for (final Row row : rows) {
        final double output = row.getOutput();
        final Vector input = row.getFeature();
        final Vector gradient = loss.gradient(input, model.predict(input), output)
                .plus(regularization.gradient(model));
        model.setParameters(model.getParameters().minus(gradient.times(stepSize)));
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.loss.HingeLoss.java

License:Apache License

@Override
public Vector gradient(final Vector feature, final double predict, final double output) {
    return feature.times(predict * output >= 1 ? 0 : -output);
}

From source file:edu.snu.dolphin.bsp.examples.ml.loss.LogisticLoss.java

License:Apache License

@Override
public Vector gradient(final Vector feature, final double predict, final double output) {

    // http://lingpipe-blog.com/2012/02/16/howprevent-overflow-underflow-logistic-regression/
    final double exponent = -predict * output;
    final double maxExponent = Math.max(exponent, 0);
    final double logSumExp = maxExponent + Math.log(Math.exp(-maxExponent) + Math.exp(exponent - maxExponent));
    return feature.times(output * (Math.exp(-logSumExp) - 1));
}

From source file:edu.snu.dolphin.bsp.examples.ml.loss.SquareLoss.java

License:Apache License

@Override
public Vector gradient(final Vector feature, final double predict, final double output) {
    return feature.times(predict - output);
}

From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java

License:Apache License

private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* each entry here is the sum in the numerator of the prediction formula */
    Vector numerators = null;//w ww.  j a v a  2 s .  co m
    /* each entry here is the sum in the denominator of the prediction formula */
    Vector denominators = null;
    /* each entry here is the number of similar items used in the prediction formula */
    Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);

    for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
        Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
        float prefValue = prefAndSimilarityColumn.getPrefValue();
        /* count the number of items used for each prediction */
        Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero();
        while (usedItemsIterator.hasNext()) {
            int itemIDIndex = usedItemsIterator.next().index();
            numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
        }

        numerators = numerators == null
                ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue)
                : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue));

        simColumn.assign(ABSOLUTE_VALUES);
        denominators = denominators == null ? simColumn : denominators.plus(simColumn);
    }

    if (numerators == null) {
        return;
    }

    Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    Iterator<Vector.Element> iterator = numerators.iterateNonZero();
    while (iterator.hasNext()) {
        Vector.Element element = iterator.next();
        int itemIDIndex = element.index();
        /* preference estimations must be based on at least 2 datapoints */
        if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {
            /* compute normalized prediction */
            double prediction = element.get() / denominators.getQuick(itemIDIndex);
            recommendationVector.setQuick(itemIDIndex, prediction);
        }
    }
    writeRecommendedItems(userID, recommendationVector, context);
}

From source file:org.trustedanalytics.atk.giraph.algorithms.als.AlternatingLeastSquaresComputation.java

License:Apache License

@Override
public void compute(Vertex<CFVertexId, VertexData4CFWritable, EdgeData4CFWritable> vertex,
        Iterable<MessageData4CFWritable> messages) throws IOException {
    long step = getSuperstep();
    if (step == 0) {
        initialize(vertex);/*from   w w w  .  ja  va  2s  .  co m*/
        vertex.voteToHalt();
        return;
    }

    Vector currentValue = vertex.getValue().getVector();
    double currentBias = vertex.getValue().getBias();
    // update aggregators every (2 * interval) super steps
    if ((step % (2 * learningCurveOutputInterval)) == 0) {
        double errorOnTrain = 0d;
        double errorOnValidate = 0d;
        double errorOnTest = 0d;
        int numTrain = 0;
        for (MessageData4CFWritable message : messages) {
            EdgeType et = message.getType();
            double weight = message.getWeight();
            Vector vector = message.getVector();
            double otherBias = message.getBias();
            double predict = currentBias + otherBias + currentValue.dot(vector);
            double e = weight - predict;
            switch (et) {
            case TRAIN:
                errorOnTrain += e * e;
                numTrain++;
                break;
            case VALIDATE:
                errorOnValidate += e * e;
                break;
            case TEST:
                errorOnTest += e * e;
                break;
            default:
                throw new IllegalArgumentException("Unknown recognized edge type: " + et.toString());
            }
        }
        double costOnTrain = 0d;
        if (numTrain > 0) {
            costOnTrain = errorOnTrain / numTrain
                    + lambda * (currentBias * currentBias + currentValue.dot(currentValue));
        }
        aggregate(SUM_TRAIN_COST, new DoubleWritable(costOnTrain));
        aggregate(SUM_VALIDATE_ERROR, new DoubleWritable(errorOnValidate));
        aggregate(SUM_TEST_ERROR, new DoubleWritable(errorOnTest));
    }

    // update vertex value
    if (step < maxSupersteps) {
        // xxt records the result of x times x transpose
        Matrix xxt = new DenseMatrix(featureDimension, featureDimension);
        xxt = xxt.assign(0d);
        // xr records the result of x times rating
        Vector xr = currentValue.clone().assign(0d);
        int numTrain = 0;
        for (MessageData4CFWritable message : messages) {
            EdgeType et = message.getType();
            if (et == EdgeType.TRAIN) {
                double weight = message.getWeight();
                Vector vector = message.getVector();
                double otherBias = message.getBias();
                xxt = xxt.plus(vector.cross(vector));
                xr = xr.plus(vector.times(weight - currentBias - otherBias));
                numTrain++;
            }
        }
        xxt = xxt.plus(new DiagonalMatrix(lambda * numTrain, featureDimension));
        Matrix bMatrix = new DenseMatrix(featureDimension, 1).assignColumn(0, xr);
        Vector value = new QRDecomposition(xxt).solve(bMatrix).viewColumn(0);
        vertex.getValue().setVector(value);

        // update vertex bias
        if (biasOn) {
            double bias = computeBias(value, messages);
            vertex.getValue().setBias(bias);
        }

        // send out messages
        for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) {
            MessageData4CFWritable newMessage = new MessageData4CFWritable(vertex.getValue(), edge.getValue());
            sendMessage(edge.getTargetVertexId(), newMessage);
        }
    }

    vertex.voteToHalt();
}

From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java

License:Apache License

/**
 * Compute gradient/*from   www .  ja v a 2 s .  c om*/
 *
 * @param bias of type double
 * @param value of type Vector
 * @param messages of type Iterable
 * @return gradient of type Vector
 */
private Vector computeGradient(double bias, Vector value, Iterable<MessageData4CFWritable> messages) {
    Vector xr = value.clone().assign(0d);
    int numTrain = 0;
    for (MessageData4CFWritable message : messages) {
        EdgeType et = message.getType();
        if (et == EdgeType.TRAIN) {
            double weight = message.getWeight();
            Vector vector = message.getVector();
            double otherBias = message.getBias();
            double predict = bias + otherBias + value.dot(vector);
            double e = predict - weight;
            xr = xr.plus(vector.times(e));
            numTrain++;
        }
    }
    Vector gradient = value.clone().assign(0d);
    if (numTrain > 0) {
        gradient = xr.divide(numTrain).plus(value.times(lambda));
    }
    return gradient;
}

From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java

License:Apache License

@Override
public void compute(Vertex<CFVertexId, VertexData4CGDWritable, EdgeData4CFWritable> vertex,
        Iterable<MessageData4CFWritable> messages) throws IOException {
    long step = getSuperstep();
    if (step == 0) {
        initialize(vertex);/*from  www.  jav  a 2 s. c  o  m*/
        vertex.voteToHalt();
        return;
    }

    Vector currentValue = vertex.getValue().getVector();
    double currentBias = vertex.getValue().getBias();
    // update aggregators every (2 * interval) super steps
    if ((step % (2 * learningCurveOutputInterval)) == 0) {
        double errorOnTrain = 0d;
        double errorOnValidate = 0d;
        double errorOnTest = 0d;
        int numTrain = 0;
        for (MessageData4CFWritable message : messages) {
            EdgeType et = message.getType();
            double weight = message.getWeight();
            Vector vector = message.getVector();
            double otherBias = message.getBias();
            double predict = currentBias + otherBias + currentValue.dot(vector);
            double e = weight - predict;
            switch (et) {
            case TRAIN:
                errorOnTrain += e * e;
                numTrain++;
                break;
            case VALIDATE:
                errorOnValidate += e * e;
                break;
            case TEST:
                errorOnTest += e * e;
                break;
            default:
                throw new IllegalArgumentException("Unknown recognized edge type: " + et.toString());
            }
        }
        double costOnTrain = 0d;
        if (numTrain > 0) {
            costOnTrain = errorOnTrain / numTrain
                    + lambda * (currentBias * currentBias + currentValue.dot(currentValue));
        }
        aggregate(SUM_TRAIN_COST, new DoubleWritable(costOnTrain));
        aggregate(SUM_VALIDATE_ERROR, new DoubleWritable(errorOnValidate));
        aggregate(SUM_TEST_ERROR, new DoubleWritable(errorOnTest));
    }

    if (step < maxSupersteps) {
        // implement CGD iterations
        Vector value0 = vertex.getValue().getVector();
        Vector gradient0 = vertex.getValue().getGradient();
        Vector conjugate0 = vertex.getValue().getConjugate();
        double bias0 = vertex.getValue().getBias();
        for (int i = 0; i < numCGDIters; i++) {
            double alpha = computeAlpha(gradient0, conjugate0, messages);
            Vector value = value0.plus(conjugate0.times(alpha));
            Vector gradient = computeGradient(bias0, value, messages);
            double beta = computeBeta(gradient0, conjugate0, gradient);
            Vector conjugate = conjugate0.times(beta).minus(gradient);
            value0 = value;
            gradient0 = gradient;
            conjugate0 = conjugate;
        }
        // update vertex values
        vertex.getValue().setVector(value0);
        vertex.getValue().setConjugate(conjugate0);
        vertex.getValue().setGradient(gradient0);

        // update vertex bias
        if (biasOn) {
            double bias = computeBias(value0, messages);
            vertex.getValue().setBias(bias);
        }

        // send out messages
        for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) {
            MessageData4CFWritable newMessage = new MessageData4CFWritable(vertex.getValue(), edge.getValue());
            sendMessage(edge.getTargetVertexId(), newMessage);
        }
    }

    vertex.voteToHalt();
}