List of usage examples for org.apache.mahout.math Vector setQuick
void setQuick(int index, double value);
From source file:org.qcri.pca.NormalizeJob.java
static void sparseVectorAssign(Vector mainV, final Vector otherV, DoubleDoubleFunction function) { java.util.Vector<IndexValue> newZeroElements = new java.util.Vector<IndexValue>(); Iterator<Vector.Element> nonZeroElements = mainV.nonZeroes().iterator(); while (nonZeroElements.hasNext()) { Vector.Element e = nonZeroElements.next(); double res = function.apply(e.get(), otherV.getQuick(e.index())); if (res != 0) mainV.setQuick(e.index(), res); else //Don't affect the iterator newZeroElements.add(new IndexValue(e.index(), res)); }/*from ww w . j a v a 2 s.c o m*/ for (IndexValue iv : newZeroElements) mainV.setQuick(iv.index, iv.value); }
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * /* w w w . j a v a 2 s. c o m*/ * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) throws Exception { Matrix centralC = initVal.C; double ss = initVal.ss; final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); //ignore NaN elements by replacing them with 0 for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } //centralize and normalize the input matrix Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); //also normalize the matrix by dividing each element by its columns range Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count Matrix centralX = null; int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // Sx = inv( eye(d) + CtC/ss ); Matrix Sx = eye(nPCs).times(ss).plus(centralCtC); Sx = inv(Sx); // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(Sx.transpose()); // XtX = X'*X + ss * Sx; Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss)); // C = (Ye'*X) / XtX; Matrix tmpInv = inv(centralXtX); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D); double norm2 = centralY.clone().assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC)); //ss3 = sum (X(i:0) * C' * Y(i,:)') DenseVector resVector = new DenseVector(nCols); double xctyt = 0; for (int i = 0; i < nRows; i++) { PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector); double res = resVector.dot(centralY.viewRow(i)); xctyt += res; } ss -= 2 * xctyt; ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(Sx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); double objective = ss; double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; log.info("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Ye is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * //from w ww.ja v a 2s . co m * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) { Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of // initVal final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); // Y = Y - mean(Ye) // Also normalize the matrix for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); // -------------------------- initialization // CtC = C'*C; Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); // X = Ye * C * inv(CtC); Matrix centralX = centralY.times(centralC).times(inv(centralCtC)); log.info("tracex= " + PCACommon.trace(centralX)); // recon = X * C'; Matrix recon = centralX.times(centralC.transpose()); log.info("tracerec= " + PCACommon.trace(recon)); // ss = sum(sum((recon-Ye).^2)) / (N*D-missing); double ss = recon.minus(centralY).assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum() / (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // ------------------ E-step, (co)variances // Sx = inv( eye(d) + CtC/ss ); Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss)); centralSx = inv(centralSx); // ------------------ E-step expected value // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(centralSx.divide(ss)); // ------------------ M-step // SumXtX = X'*X; Matrix centralSumXtX = centralX.transpose().times(centralX); // C = (Ye'*X) / (SumXtX + N*Sx ); Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows))); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) + // missing*ss_old ) /(N*D); recon = centralX.times(centralC.transpose()); double error = recon.minus(centralY).assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum(); ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(centralSx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralSumXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) ) // +PCACommon.trace(SumXtX) -missing*log(ss_old); double objective = nRows * nCols + nRows * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant())) + PCACommon.trace(centralSumXtX); double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; System.out.printf("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Y is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }
From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java
License:Apache License
/** * Initialize vertex//from ww w . j a va 2 s.c o m * * @param vertex of the graph */ private void initializeVertex(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex) { // normalize prior and posterior Vector prior = vertex.getValue().getPriorVector(); Vector posterior = vertex.getValue().getPosteriorVector(); int nStates = prior.size(); double sum = 0d; for (int i = 0; i < nStates; i++) { double v = prior.getQuick(i); if (v < 0d) { throw new IllegalArgumentException("Vertex ID: " + vertex.getId() + " has negative prior value."); } else if (v < MIN_PRIOR_VALUE) { v = MIN_PRIOR_VALUE; prior.setQuick(i, v); } sum += v; } for (int i = 0; i < nStates; i++) { posterior.setQuick(i, prior.getQuick(i) / sum); prior.setQuick(i, Math.log(posterior.getQuick(i))); } // collect graph statistics VertexType vt = vertex.getValue().getType(); vt = ignoreVertexType ? VertexType.TRAIN : vt; switch (vt) { case TRAIN: aggregate(SUM_TRAIN_VERTICES, new LongWritable(1)); break; case VALIDATE: aggregate(SUM_VALIDATE_VERTICES, new LongWritable(1)); break; case TEST: aggregate(SUM_TEST_VERTICES, new LongWritable(1)); break; default: throw new IllegalArgumentException("Unknown vertex type: " + vt.toString()); } // if it's not a training vertex, use uniform posterior and don't send out messages if (vt != VertexType.TRAIN) { posterior.assign(1.0 / nStates); return; } // calculate messages IdWithVectorMessage newMessage = new IdWithVectorMessage(); newMessage.setData(vertex.getId().get()); // calculate initial belief Vector belief = prior.clone(); for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) { double weight = edge.getValue().get(); if (weight <= 0d) { throw new IllegalArgumentException("Vertex ID: " + vertex.getId() + " has an edge with negative or zero weight value " + weight); } for (int i = 0; i < nStates; i++) { sum = 0d; for (int j = 0; j < nStates; j++) { double msg = Math.exp( prior.getQuick(j) + edgePotential(Math.abs(i - j) / (double) (nStates - 1), weight)); if (maxProduct) { sum = sum > msg ? sum : msg; } else { sum += msg; } } belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE); } belief = belief.plus(-belief.maxValue()); // send out messages newMessage.setVector(belief); sendMessage(edge.getTargetVertexId(), newMessage); } }
From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java
License:Apache License
@Override public void compute(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex, Iterable<IdWithVectorMessage> messages) throws IOException { long step = getSuperstep(); if (step == 0) { initializeVertex(vertex);/* ww w . j a va2 s . c o m*/ return; } // collect messages sent to this vertex HashMap<Long, Vector> map = new HashMap<Long, Vector>(); for (IdWithVectorMessage message : messages) { map.put(message.getData(), message.getVector()); } // update posterior according to prior and messages VertexData4LBPWritable vertexValue = vertex.getValue(); VertexType vt = vertexValue.getType(); vt = ignoreVertexType ? VertexType.TRAIN : vt; Vector prior = vertexValue.getPriorVector(); double nStates = prior.size(); if (vt != VertexType.TRAIN) { // assign a uniform prior for validate/test vertex prior = prior.clone().assign(Math.log(1.0 / nStates)); } // sum of prior and messages Vector sumPosterior = prior; for (IdWithVectorMessage message : messages) { sumPosterior = sumPosterior.plus(message.getVector()); } sumPosterior = sumPosterior.plus(-sumPosterior.maxValue()); // update posterior if this isn't an anchor vertex if (prior.maxValue() < anchorThreshold) { // normalize posterior Vector posterior = sumPosterior.clone().assign(Functions.EXP); posterior = posterior.normalize(1d); Vector oldPosterior = vertexValue.getPosteriorVector(); double delta = posterior.minus(oldPosterior).norm(1d); // aggregate deltas switch (vt) { case TRAIN: aggregate(SUM_TRAIN_DELTA, new DoubleWritable(delta)); break; case VALIDATE: aggregate(SUM_VALIDATE_DELTA, new DoubleWritable(delta)); break; case TEST: aggregate(SUM_TEST_DELTA, new DoubleWritable(delta)); break; default: throw new IllegalArgumentException("Unknown vertex type: " + vt.toString()); } // update posterior vertexValue.setPosteriorVector(posterior); } if (step < maxSupersteps) { // if it's not a training vertex, don't send out messages if (vt != VertexType.TRAIN) { return; } IdWithVectorMessage newMessage = new IdWithVectorMessage(); newMessage.setData(vertex.getId().get()); // update belief Vector belief = prior.clone(); for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) { double weight = edge.getValue().get(); long id = edge.getTargetVertexId().get(); Vector tempVector = sumPosterior; if (map.containsKey(id)) { tempVector = sumPosterior.minus(map.get(id)); } for (int i = 0; i < nStates; i++) { double sum = 0d; for (int j = 0; j < nStates; j++) { double msg = Math.exp( tempVector.getQuick(j) + edgePotential(Math.abs(i - j) / (nStates - 1), weight)); if (maxProduct) { sum = sum > msg ? sum : msg; } else { sum += msg; } } belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE); } belief = belief.plus(-belief.maxValue()); newMessage.setVector(belief); sendMessage(edge.getTargetVertexId(), newMessage); } } else { // convert prior back to regular scale before output prior = vertexValue.getPriorVector(); prior = prior.assign(Functions.EXP); vertexValue.setPriorVector(prior); vertex.voteToHalt(); } }