List of usage examples for org.apache.mahout.math Vector clone
@SuppressWarnings("CloneDoesntDeclareCloneNotSupportedException")
Vector clone();
From source file:edu.snu.dolphin.bsp.examples.ml.data.ClusterStats.java
License:Apache License
/** * We may select whether to create a deep copy of @member pointSum and @member outProdSum, or just a reference. * @param outProdSum/*from www.ja v a 2 s . c o m*/ * @param pointSum * @param probSum * @param isDeepCopy */ public ClusterStats(final Matrix outProdSum, final Vector pointSum, final double probSum, final boolean isDeepCopy) { if (isDeepCopy) { this.outProdSum = outProdSum.clone(); this.pointSum = pointSum.clone(); } else { this.outProdSum = outProdSum; this.pointSum = pointSum; } this.probSum = probSum; }
From source file:eu.stratosphere.library.clustering.DistributedOnePassKMeans.BallKMeans.java
License:Apache License
/** * Examines the datapoints and updates cluster centers to be the centroid of the nearest datapoints points. To * compute a new center for cluster c_i, we average all points that are closer than d_i * trimFraction * where d_i is//from www . java 2 s. co m * * d_i = min_j \sqrt ||c_j - c_i||^2 * * By ignoring distant points, the centroids converge more quickly to a good approximation of the * optimal k-means solution (given good starting points). * * @param datapoints the points to cluster. */ private void iterativeAssignment(List<? extends WeightedVector> datapoints) { DistanceMeasure distanceMeasure = centroids.getDistanceMeasure(); // closestClusterDistances.get(i) is the distance from the i'th cluster to its closest // neighboring cluster. List<Double> closestClusterDistances = Lists.newArrayListWithExpectedSize(numClusters); // clusterAssignments[i] == j means that the i'th point is assigned to the j'th cluster. When // these don't change, we are done. // Each point is assigned to the invalid "-1" cluster initially. List<Integer> clusterAssignments = Lists.newArrayList(Collections.nCopies(datapoints.size(), -1)); boolean changed = true; for (int i = 0; changed && i < maxNumIterations; i++) { changed = false; // We compute what the distance between each cluster and its closest neighbor is to set a // proportional distance threshold for points that should be involved in calculating the // centroid. closestClusterDistances.clear(); for (Vector center : centroids) { // If a centroid has no points assigned to it, the clustering failed. Vector closestOtherCluster = centroids.searchFirst(center, true).getValue(); closestClusterDistances.add(distanceMeasure.distance(center, closestOtherCluster)); } // Copies the current cluster centroids to newClusters and sets their weights to 0. This is // so we calculate the new centroids as we go through the datapoints. List<Centroid> newCentroids = Lists.newArrayList(); for (Vector centroid : centroids) { // need a deep copy because we will mutate these values Centroid newCentroid = (Centroid) centroid.clone(); newCentroid.setWeight(0); newCentroids.add(newCentroid); } // Pass over the datapoints computing new centroids. for (int j = 0; j < datapoints.size(); ++j) { WeightedVector datapoint = datapoints.get(j); // Get the closest cluster this point belongs to. WeightedThing<Vector> closestPair = centroids.searchFirst(datapoint, false); int closestIndex = ((WeightedVector) closestPair.getValue()).getIndex(); double closestDistance = closestPair.getWeight(); // Update its cluster assignment if necessary. if (closestIndex != clusterAssignments.get(j)) { changed = true; clusterAssignments.set(j, closestIndex); } // Only update if the datapoints point is near enough. What this means is that the weight // of outliers is NOT taken into account and the final weights of the centroids will // reflect this (it will be less or equal to the initial sum of the weights). if (closestDistance < trimFraction * closestClusterDistances.get(closestIndex)) { newCentroids.get(closestIndex).update(datapoint); } } // Add the new centers back into searcher. centroids.clear(); centroids.addAll(newCentroids); } if (correctWeights) { for (Vector v : centroids) { ((Centroid) v).setWeight(0); } for (WeightedVector datapoint : datapoints) { Centroid closestCentroid = (Centroid) centroids.searchFirst(datapoint, false).getValue(); closestCentroid.setWeight(closestCentroid.getWeight() + datapoint.getWeight()); } } }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;/* ww w . j a va 2s. c o m*/ /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ for (Element e : simColumn.nonZeroes()) { int itemIDIndex = e.index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } if (denominators == null) { denominators = simColumn.clone(); } else { denominators.assign(simColumn, Functions.PLUS_ABS); } if (numerators == null) { numerators = simColumn.clone(); if (prefValue != BOOLEAN_PREF_VALUE) { numerators.assign(Functions.MULT, prefValue); } } else { if (prefValue != BOOLEAN_PREF_VALUE) { simColumn.assign(Functions.MULT, prefValue); } numerators.assign(simColumn, Functions.PLUS); } } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (Element element : numerators.nonZeroes()) { int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;//w ww . j ava2 s . co m /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero(); while (usedItemsIterator.hasNext()) { int itemIDIndex = usedItemsIterator.next().index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } numerators = numerators == null ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue) : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue)); simColumn.assign(ABSOLUTE_VALUES); denominators = denominators == null ? simColumn : denominators.plus(simColumn); } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); Iterator<Vector.Element> iterator = numerators.iterateNonZero(); while (iterator.hasNext()) { Vector.Element element = iterator.next(); int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:org.trustedanalytics.atk.giraph.algorithms.als.AlternatingLeastSquaresComputation.java
License:Apache License
@Override public void compute(Vertex<CFVertexId, VertexData4CFWritable, EdgeData4CFWritable> vertex, Iterable<MessageData4CFWritable> messages) throws IOException { long step = getSuperstep(); if (step == 0) { initialize(vertex);/*from w w w .j a va 2 s . co m*/ vertex.voteToHalt(); return; } Vector currentValue = vertex.getValue().getVector(); double currentBias = vertex.getValue().getBias(); // update aggregators every (2 * interval) super steps if ((step % (2 * learningCurveOutputInterval)) == 0) { double errorOnTrain = 0d; double errorOnValidate = 0d; double errorOnTest = 0d; int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); double predict = currentBias + otherBias + currentValue.dot(vector); double e = weight - predict; switch (et) { case TRAIN: errorOnTrain += e * e; numTrain++; break; case VALIDATE: errorOnValidate += e * e; break; case TEST: errorOnTest += e * e; break; default: throw new IllegalArgumentException("Unknown recognized edge type: " + et.toString()); } } double costOnTrain = 0d; if (numTrain > 0) { costOnTrain = errorOnTrain / numTrain + lambda * (currentBias * currentBias + currentValue.dot(currentValue)); } aggregate(SUM_TRAIN_COST, new DoubleWritable(costOnTrain)); aggregate(SUM_VALIDATE_ERROR, new DoubleWritable(errorOnValidate)); aggregate(SUM_TEST_ERROR, new DoubleWritable(errorOnTest)); } // update vertex value if (step < maxSupersteps) { // xxt records the result of x times x transpose Matrix xxt = new DenseMatrix(featureDimension, featureDimension); xxt = xxt.assign(0d); // xr records the result of x times rating Vector xr = currentValue.clone().assign(0d); int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); if (et == EdgeType.TRAIN) { double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); xxt = xxt.plus(vector.cross(vector)); xr = xr.plus(vector.times(weight - currentBias - otherBias)); numTrain++; } } xxt = xxt.plus(new DiagonalMatrix(lambda * numTrain, featureDimension)); Matrix bMatrix = new DenseMatrix(featureDimension, 1).assignColumn(0, xr); Vector value = new QRDecomposition(xxt).solve(bMatrix).viewColumn(0); vertex.getValue().setVector(value); // update vertex bias if (biasOn) { double bias = computeBias(value, messages); vertex.getValue().setBias(bias); } // send out messages for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) { MessageData4CFWritable newMessage = new MessageData4CFWritable(vertex.getValue(), edge.getValue()); sendMessage(edge.getTargetVertexId(), newMessage); } } vertex.voteToHalt(); }
From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java
License:Apache License
/** * Initialize vertex, collect graph statistics and send out messages * * @param vertex of the graph/*from ww w . j a v a2 s . c om*/ */ private void initialize(Vertex<CFVertexId, VertexData4CGDWritable, EdgeData4CFWritable> vertex) { // initialize vertex data: bias, vector, gradient, conjugate vertex.getValue().setBias(0d); vertex.getValue().setType(vertex.getId().isUser() ? VertexType.User : VertexType.Item); double sum = 0d; int numTrain = 0; for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) { EdgeType et = edge.getValue().getType(); if (et == EdgeType.TRAIN) { double weight = edge.getValue().getWeight(); if (weight < minVal || weight > maxVal) { throw new IllegalArgumentException(String.format( "Vertex ID: %s has an edge with weight value " + "out of the range of [%f, %f].", vertex.getId().getValue(), minVal, maxVal)); } sum += weight; numTrain++; } } Random rand = new Random(vertex.getId().seed()); double[] values = new double[featureDimension]; values[0] = 0d; if (numTrain > 0) { values[0] = sum / numTrain; } for (int i = 1; i < featureDimension; i++) { values[i] = rand.nextDouble() * values[0]; } Vector value = new DenseVector(values); vertex.getValue().setVector(value); vertex.getValue().setGradient(value.clone().assign(0d)); vertex.getValue().setConjugate(value.clone().assign(0d)); // collect graph statistics and send out messages VertexType vt = vertex.getValue().getType(); switch (vt) { case User: aggregate(SUM_LEFT_VERTICES, new LongWritable(1)); break; case Item: aggregate(SUM_RIGHT_VERTICES, new LongWritable(1)); long numTrainEdges = 0L; long numValidateEdges = 0L; long numTestEdges = 0L; for (Edge<CFVertexId, EdgeData4CFWritable> edge : vertex.getEdges()) { EdgeType et = edge.getValue().getType(); switch (et) { case TRAIN: numTrainEdges++; break; case VALIDATE: numValidateEdges++; break; case TEST: numTestEdges++; break; default: throw new IllegalArgumentException("Unknown recognized edge type: " + et.toString()); } // send out messages MessageData4CFWritable newMessage = new MessageData4CFWritable(vertex.getValue(), edge.getValue()); sendMessage(edge.getTargetVertexId(), newMessage); } if (numTrainEdges > 0) { aggregate(SUM_TRAIN_EDGES, new LongWritable(numTrainEdges)); } if (numValidateEdges > 0) { aggregate(SUM_VALIDATE_EDGES, new LongWritable(numValidateEdges)); } if (numTestEdges > 0) { aggregate(SUM_TEST_EDGES, new LongWritable(numTestEdges)); } break; default: throw new IllegalArgumentException("Unknown recognized vertex type: " + vt.toString()); } }
From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java
License:Apache License
/** * Compute gradient//from w w w . j a v a2 s . c o m * * @param bias of type double * @param value of type Vector * @param messages of type Iterable * @return gradient of type Vector */ private Vector computeGradient(double bias, Vector value, Iterable<MessageData4CFWritable> messages) { Vector xr = value.clone().assign(0d); int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); if (et == EdgeType.TRAIN) { double weight = message.getWeight(); Vector vector = message.getVector(); double otherBias = message.getBias(); double predict = bias + otherBias + value.dot(vector); double e = predict - weight; xr = xr.plus(vector.times(e)); numTrain++; } } Vector gradient = value.clone().assign(0d); if (numTrain > 0) { gradient = xr.divide(numTrain).plus(value.times(lambda)); } return gradient; }
From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java
License:Apache License
/** * Initialize vertex/*from w w w.ja v a2 s .com*/ * * @param vertex of the graph */ private void initializeVertex(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex) { // normalize prior and posterior Vector prior = vertex.getValue().getPriorVector(); Vector posterior = vertex.getValue().getPosteriorVector(); int nStates = prior.size(); double sum = 0d; for (int i = 0; i < nStates; i++) { double v = prior.getQuick(i); if (v < 0d) { throw new IllegalArgumentException("Vertex ID: " + vertex.getId() + " has negative prior value."); } else if (v < MIN_PRIOR_VALUE) { v = MIN_PRIOR_VALUE; prior.setQuick(i, v); } sum += v; } for (int i = 0; i < nStates; i++) { posterior.setQuick(i, prior.getQuick(i) / sum); prior.setQuick(i, Math.log(posterior.getQuick(i))); } // collect graph statistics VertexType vt = vertex.getValue().getType(); vt = ignoreVertexType ? VertexType.TRAIN : vt; switch (vt) { case TRAIN: aggregate(SUM_TRAIN_VERTICES, new LongWritable(1)); break; case VALIDATE: aggregate(SUM_VALIDATE_VERTICES, new LongWritable(1)); break; case TEST: aggregate(SUM_TEST_VERTICES, new LongWritable(1)); break; default: throw new IllegalArgumentException("Unknown vertex type: " + vt.toString()); } // if it's not a training vertex, use uniform posterior and don't send out messages if (vt != VertexType.TRAIN) { posterior.assign(1.0 / nStates); return; } // calculate messages IdWithVectorMessage newMessage = new IdWithVectorMessage(); newMessage.setData(vertex.getId().get()); // calculate initial belief Vector belief = prior.clone(); for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) { double weight = edge.getValue().get(); if (weight <= 0d) { throw new IllegalArgumentException("Vertex ID: " + vertex.getId() + " has an edge with negative or zero weight value " + weight); } for (int i = 0; i < nStates; i++) { sum = 0d; for (int j = 0; j < nStates; j++) { double msg = Math.exp( prior.getQuick(j) + edgePotential(Math.abs(i - j) / (double) (nStates - 1), weight)); if (maxProduct) { sum = sum > msg ? sum : msg; } else { sum += msg; } } belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE); } belief = belief.plus(-belief.maxValue()); // send out messages newMessage.setVector(belief); sendMessage(edge.getTargetVertexId(), newMessage); } }
From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java
License:Apache License
@Override public void compute(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex, Iterable<IdWithVectorMessage> messages) throws IOException { long step = getSuperstep(); if (step == 0) { initializeVertex(vertex);/*from w ww . j a va 2 s. c o m*/ return; } // collect messages sent to this vertex HashMap<Long, Vector> map = new HashMap<Long, Vector>(); for (IdWithVectorMessage message : messages) { map.put(message.getData(), message.getVector()); } // update posterior according to prior and messages VertexData4LBPWritable vertexValue = vertex.getValue(); VertexType vt = vertexValue.getType(); vt = ignoreVertexType ? VertexType.TRAIN : vt; Vector prior = vertexValue.getPriorVector(); double nStates = prior.size(); if (vt != VertexType.TRAIN) { // assign a uniform prior for validate/test vertex prior = prior.clone().assign(Math.log(1.0 / nStates)); } // sum of prior and messages Vector sumPosterior = prior; for (IdWithVectorMessage message : messages) { sumPosterior = sumPosterior.plus(message.getVector()); } sumPosterior = sumPosterior.plus(-sumPosterior.maxValue()); // update posterior if this isn't an anchor vertex if (prior.maxValue() < anchorThreshold) { // normalize posterior Vector posterior = sumPosterior.clone().assign(Functions.EXP); posterior = posterior.normalize(1d); Vector oldPosterior = vertexValue.getPosteriorVector(); double delta = posterior.minus(oldPosterior).norm(1d); // aggregate deltas switch (vt) { case TRAIN: aggregate(SUM_TRAIN_DELTA, new DoubleWritable(delta)); break; case VALIDATE: aggregate(SUM_VALIDATE_DELTA, new DoubleWritable(delta)); break; case TEST: aggregate(SUM_TEST_DELTA, new DoubleWritable(delta)); break; default: throw new IllegalArgumentException("Unknown vertex type: " + vt.toString()); } // update posterior vertexValue.setPosteriorVector(posterior); } if (step < maxSupersteps) { // if it's not a training vertex, don't send out messages if (vt != VertexType.TRAIN) { return; } IdWithVectorMessage newMessage = new IdWithVectorMessage(); newMessage.setData(vertex.getId().get()); // update belief Vector belief = prior.clone(); for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) { double weight = edge.getValue().get(); long id = edge.getTargetVertexId().get(); Vector tempVector = sumPosterior; if (map.containsKey(id)) { tempVector = sumPosterior.minus(map.get(id)); } for (int i = 0; i < nStates; i++) { double sum = 0d; for (int j = 0; j < nStates; j++) { double msg = Math.exp( tempVector.getQuick(j) + edgePotential(Math.abs(i - j) / (nStates - 1), weight)); if (maxProduct) { sum = sum > msg ? sum : msg; } else { sum += msg; } } belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE); } belief = belief.plus(-belief.maxValue()); newMessage.setVector(belief); sendMessage(edge.getTargetVertexId(), newMessage); } } else { // convert prior back to regular scale before output prior = vertexValue.getPriorVector(); prior = prior.assign(Functions.EXP); vertexValue.setPriorVector(prior); vertex.voteToHalt(); } }
From source file:org.trustedanalytics.atk.giraph.algorithms.lp.LabelPropagationComputation.java
License:Apache License
/** * initialize vertex and edges/*from w ww . j a v a 2 s .c om*/ * * @param vertex a graph vertex */ private void initializeVertexEdges(Vertex<LongWritable, VertexData4LPWritable, DoubleWritable> vertex) { // normalize prior and initialize posterior VertexData4LPWritable vertexValue = vertex.getValue(); Vector priorValues = vertexValue.getPriorVector(); if (null != priorValues) { priorValues = priorValues.normalize(1d); initialVectorValues = priorValues; } else if (initialVectorValues != null) { priorValues = initialVectorValues; vertexValue.setLabeledStatus(false); } else { throw new RuntimeException("Vector labels missing from input data for vertex " + vertex.getId() + ". Add edge with vertex as first column."); } vertexValue.setPriorVector(priorValues); vertexValue.setPosteriorVector(priorValues.clone()); vertexValue.setDegree(initializeEdge(vertex)); // send out messages IdWithVectorMessage newMessage = new IdWithVectorMessage(vertex.getId().get(), priorValues); sendMessageToAllEdges(vertex, newMessage); }