List of usage examples for org.apache.mahout.math Vector times
Vector times(Vector x);
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.classification.LogisticRegCmpTask.java
License:Apache License
@Override public final void run(final int iteration) { // measure accuracy posNum = 0;/*from w w w.j a va 2s . c o m*/ negNum = 0; for (final Row row : rows) { final double output = row.getOutput(); final double predict = model.predict(row.getFeature()); if (output * predict > 0) { posNum++; } else { negNum++; } } // optimize for (final Row row : rows) { final double output = row.getOutput(); final Vector input = row.getFeature(); final Vector gradient = loss.gradient(input, model.predict(input), output) .plus(regularization.gradient(model)); model.setParameters(model.getParameters().minus(gradient.times(stepSize))); } }
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java
License:Apache License
@Override public void run(final int iteration) { clusterToStats = new HashMap<>(); final int numClusters = clusterSummaries.size(); // Compute the partial statistics of each cluster for (final Vector vector : points) { final int dimension = vector.size(); Matrix outProd = null;// w ww . j a v a2 s .c om if (isCovarianceDiagonal) { outProd = new SparseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { outProd.set(j, j, vector.get(j) * vector.get(j)); } } else { outProd = vector.cross(vector); } double denominator = 0; final double[] numerators = new double[numClusters]; for (int i = 0; i < numClusters; i++) { final ClusterSummary clusterSummary = clusterSummaries.get(i); final Vector centroid = clusterSummary.getCentroid(); final Matrix covariance = clusterSummary.getCovariance(); final Double prior = clusterSummary.getPrior(); final Vector differ = vector.minus(centroid); numerators[i] = prior / Math.sqrt(covariance.determinant()) * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2)); denominator += numerators[i]; } for (int i = 0; i < numClusters; i++) { final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator; if (!clusterToStats.containsKey(i)) { clusterToStats.put(i, new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } else { clusterToStats.get(i).add( new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } } } }
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.regression.LinearRegCmpTask.java
License:Apache License
@Override public final void run(final int iteration) { // measure loss lossSum = 0;/*w w w. jav a2 s . c om*/ for (final Row row : rows) { final double output = row.getOutput(); final double predict = model.predict(row.getFeature()); lossSum += loss.loss(predict, output); } // optimize for (final Row row : rows) { final double output = row.getOutput(); final Vector input = row.getFeature(); final Vector gradient = loss.gradient(input, model.predict(input), output) .plus(regularization.gradient(model)); model.setParameters(model.getParameters().minus(gradient.times(stepSize))); } }
From source file:edu.snu.dolphin.bsp.examples.ml.loss.HingeLoss.java
License:Apache License
@Override public Vector gradient(final Vector feature, final double predict, final double output) { return feature.times(predict * output >= 1 ? 0 : -output); }
From source file:edu.snu.dolphin.bsp.examples.ml.loss.LogisticLoss.java
License:Apache License
@Override public Vector gradient(final Vector feature, final double predict, final double output) { // http://lingpipe-blog.com/2012/02/16/howprevent-overflow-underflow-logistic-regression/ final double exponent = -predict * output; final double maxExponent = Math.max(exponent, 0); final double logSumExp = maxExponent + Math.log(Math.exp(-maxExponent) + Math.exp(exponent - maxExponent)); return feature.times(output * (Math.exp(-logSumExp) - 1)); }
From source file:edu.snu.dolphin.bsp.examples.ml.loss.SquareLoss.java
License:Apache License
@Override public Vector gradient(final Vector feature, final double predict, final double output) { return feature.times(predict - output); }
From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;//w ww. j a v a 2 s . co m /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero(); while (usedItemsIterator.hasNext()) { int itemIDIndex = usedItemsIterator.next().index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } numerators = numerators == null ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue) : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue)); simColumn.assign(ABSOLUTE_VALUES); denominators = denominators == null ? simColumn : denominators.plus(simColumn); } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); Iterator<Vector.Element> iterator = numerators.iterateNonZero(); while (iterator.hasNext()) { Vector.Element element = iterator.next(); int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:org.trustedanalytics.atk.giraph.algorithms.als.AlternatingLeastSquaresComputation.java
License:Apache License
@Override public void compute(Vertex<CFVertexId, VertexData4CFWritable, EdgeData4CFWritable> vertex, Iterable<MessageData4CFWritable> messages) throws IOException { long step = getSuperstep(); if (step == 0) { initialize(vertex);/*from w w w . ja va 2s . co m*/ vertex.voteToHalt(); return; } Vector currentValue = vertex.getValue().getVector(); double currentBias = vertex.getValue().getBias(); // update aggregators every (2 * interval) super steps if ((step % (2 * learningCurveOutputInterval)) == 0) { double errorOnTrain = 0d; double errorOnValidate = 0d; double errorOnTest = 0d; int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); double predict = currentBias + otherBias + currentValue.dot(vector); double e = weight - predict; switch (et) { case TRAIN: errorOnTrain += e * e; numTrain++; break; case VALIDATE: errorOnValidate += e * e; break; case TEST: errorOnTest += e * e; break; default: throw new IllegalArgumentException("Unknown recognized edge type: " + et.toString()); } } double costOnTrain = 0d; if (numTrain > 0) { costOnTrain = errorOnTrain / numTrain + lambda * (currentBias * currentBias + currentValue.dot(currentValue)); } aggregate(SUM_TRAIN_COST, new DoubleWritable(costOnTrain)); aggregate(SUM_VALIDATE_ERROR, new DoubleWritable(errorOnValidate)); aggregate(SUM_TEST_ERROR, new DoubleWritable(errorOnTest)); } // update vertex value if (step < maxSupersteps) { // xxt records the result of x times x transpose Matrix xxt = new DenseMatrix(featureDimension, featureDimension); xxt = xxt.assign(0d); // xr records the result of x times rating Vector xr = currentValue.clone().assign(0d); int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); if (et == EdgeType.TRAIN) { double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); xxt = xxt.plus(vector.cross(vector)); xr = xr.plus(vector.times(weight - currentBias - otherBias)); numTrain++; } } xxt = xxt.plus(new DiagonalMatrix(lambda * numTrain, featureDimension)); Matrix bMatrix = new DenseMatrix(featureDimension, 1).assignColumn(0, xr); Vector value = new QRDecomposition(xxt).solve(bMatrix).viewColumn(0); vertex.getValue().setVector(value); // update vertex bias if (biasOn) { double bias = computeBias(value, messages); vertex.getValue().setBias(bias); } // send out messages for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) { MessageData4CFWritable newMessage = new MessageData4CFWritable(vertex.getValue(), edge.getValue()); sendMessage(edge.getTargetVertexId(), newMessage); } } vertex.voteToHalt(); }
From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java
License:Apache License
/** * Compute gradient/*from www . ja v a 2 s . c om*/ * * @param bias of type double * @param value of type Vector * @param messages of type Iterable * @return gradient of type Vector */ private Vector computeGradient(double bias, Vector value, Iterable<MessageData4CFWritable> messages) { Vector xr = value.clone().assign(0d); int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); if (et == EdgeType.TRAIN) { double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); double predict = bias + otherBias + value.dot(vector); double e = predict - weight; xr = xr.plus(vector.times(e)); numTrain++; } } Vector gradient = value.clone().assign(0d); if (numTrain > 0) { gradient = xr.divide(numTrain).plus(value.times(lambda)); } return gradient; }
From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java
License:Apache License
@Override public void compute(Vertex<CFVertexId, VertexData4CGDWritable, EdgeData4CFWritable> vertex, Iterable<MessageData4CFWritable> messages) throws IOException { long step = getSuperstep(); if (step == 0) { initialize(vertex);/*from www. jav a 2 s. c o m*/ vertex.voteToHalt(); return; } Vector currentValue = vertex.getValue().getVector(); double currentBias = vertex.getValue().getBias(); // update aggregators every (2 * interval) super steps if ((step % (2 * learningCurveOutputInterval)) == 0) { double errorOnTrain = 0d; double errorOnValidate = 0d; double errorOnTest = 0d; int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); double predict = currentBias + otherBias + currentValue.dot(vector); double e = weight - predict; switch (et) { case TRAIN: errorOnTrain += e * e; numTrain++; break; case VALIDATE: errorOnValidate += e * e; break; case TEST: errorOnTest += e * e; break; default: throw new IllegalArgumentException("Unknown recognized edge type: " + et.toString()); } } double costOnTrain = 0d; if (numTrain > 0) { costOnTrain = errorOnTrain / numTrain + lambda * (currentBias * currentBias + currentValue.dot(currentValue)); } aggregate(SUM_TRAIN_COST, new DoubleWritable(costOnTrain)); aggregate(SUM_VALIDATE_ERROR, new DoubleWritable(errorOnValidate)); aggregate(SUM_TEST_ERROR, new DoubleWritable(errorOnTest)); } if (step < maxSupersteps) { // implement CGD iterations Vector value0 = vertex.getValue().getVector(); Vector gradient0 = vertex.getValue().getGradient(); Vector conjugate0 = vertex.getValue().getConjugate(); double bias0 = vertex.getValue().getBias(); for (int i = 0; i < numCGDIters; i++) { double alpha = computeAlpha(gradient0, conjugate0, messages); Vector value = value0.plus(conjugate0.times(alpha)); Vector gradient = computeGradient(bias0, value, messages); double beta = computeBeta(gradient0, conjugate0, gradient); Vector conjugate = conjugate0.times(beta).minus(gradient); value0 = value; gradient0 = gradient; conjugate0 = conjugate; } // update vertex values vertex.getValue().setVector(value0); vertex.getValue().setConjugate(conjugate0); vertex.getValue().setGradient(gradient0); // update vertex bias if (biasOn) { double bias = computeBias(value0, messages); vertex.getValue().setBias(bias); } // send out messages for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) { MessageData4CFWritable newMessage = new MessageData4CFWritable(vertex.getValue(), edge.getValue()); sendMessage(edge.getTargetVertexId(), newMessage); } } vertex.voteToHalt(); }