List of usage examples for org.apache.mahout.math Vector getQuick
double getQuick(int index);
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * /*from w ww . j av a 2 s .c om*/ * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) { Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of // initVal final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); // Y = Y - mean(Ye) // Also normalize the matrix for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); // -------------------------- initialization // CtC = C'*C; Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); // X = Ye * C * inv(CtC); Matrix centralX = centralY.times(centralC).times(inv(centralCtC)); log.info("tracex= " + PCACommon.trace(centralX)); // recon = X * C'; Matrix recon = centralX.times(centralC.transpose()); log.info("tracerec= " + PCACommon.trace(recon)); // ss = sum(sum((recon-Ye).^2)) / (N*D-missing); double ss = recon.minus(centralY).assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum() / (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // ------------------ E-step, (co)variances // Sx = inv( eye(d) + CtC/ss ); Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss)); centralSx = inv(centralSx); // ------------------ E-step expected value // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(centralSx.divide(ss)); // ------------------ M-step // SumXtX = X'*X; Matrix centralSumXtX = centralX.transpose().times(centralX); // C = (Ye'*X) / (SumXtX + N*Sx ); Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows))); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) + // missing*ss_old ) /(N*D); recon = centralX.times(centralC.transpose()); double error = recon.minus(centralY).assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum(); ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(centralSx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralSumXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) ) // +PCACommon.trace(SumXtX) -missing*log(ss_old); double objective = nRows * nCols + nRows * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant())) + PCACommon.trace(centralSumXtX); double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; System.out.printf("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Y is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * Subtract a vector from an array//from www .ja v a 2s. c o m * @param res: The minuend and the variable where the difference is stored * @param subtractor: the subtrahend * @return difference array */ public static double[] subtractVectorArray(double[] res, Vector subtractor) { for (int i = 0; i < res.length; i++) { res[i] -= subtractor.getQuick(i); } return res; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * dot Product of vector and array/*from w w w .j a v a2s. com*/ * @param vector * @param arr2 * @return */ public static double dotVectorArray(Vector vector, double[] arr2) { double dotRes = 0; for (int i = 0; i < arr2.length; i++) { dotRes += vector.getQuick(i) * arr2[i]; } return dotRes; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * multiply a dense vector by a matrix//from w w w . j a va 2s .com * @param xm_mahout: result vector * @return */ static Vector denseVectorTimesMatrix(Vector vector, Matrix matrix, Vector xm_mahout) { int nRows = matrix.numRows(); int nCols = matrix.numCols(); for (int c = 0; c < nCols; c++) { double dotres = 0; for (int r = 0; r < nRows; r++) dotres += vector.getQuick(r) * matrix.getQuick(r, c); xm_mahout.set(c, dotres); } return xm_mahout; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * multiply a dense vector by a matrix/*from www . j a v a2 s .c om*/ * @param arr: result array * @return */ static double[] denseVectorTimesMatrix(Vector vector, Matrix matrix, double[] arr) { int nRows = matrix.numRows(); int nCols = matrix.numCols(); for (int c = 0; c < nCols; c++) { double dotres = 0; for (int r = 0; r < nRows; r++) dotres += vector.getQuick(r) * matrix.getQuick(r, c); arr[c] = dotres; } return arr; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * multiply a dense vector by the transpose of a matrix * @param resArray: result array/*from ww w . j a v a 2 s. co m*/ * @return */ static double[] vectorTimesMatrixTranspose(Vector vector, Matrix matrix, double[] resArray) { int nRows = matrix.numRows(); int nCols = matrix.numCols(); for (int r = 0; r < nRows; r++) { double dotres = 0; for (int c = 0; c < nCols; c++) dotres += vector.getQuick(c) * matrix.getQuick(r, c); resArray[r] = dotres; } return resArray; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * Subtract a vector from a sparse vector * @param sparseVector/*from ww w . j a v a 2 s . co m*/ * @param vector * @param nonZeroIndices: the indices of nonzero elements in the sparse vector * @param resArray: result array * @return */ static double[] sparseVectorMinusVector(org.apache.spark.mllib.linalg.Vector sparseVector, Vector vector, double[] resArray, int[] nonZeroIndices) { int index = 0; double value = 0; for (int i = 0; i < nonZeroIndices.length; i++) { index = nonZeroIndices[i]; value = sparseVector.apply(index); resArray[index] = value - vector.getQuick(index); //because the array is already negated } return resArray; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * Subtract two dense vectors// w w w . ja v a2 s. com * @param vector1 * @param vector2 * @param resArray: result array * @return */ static double[] denseVectorMinusVector(org.apache.spark.mllib.linalg.Vector vector1, Vector vector2, double[] resArray) { for (int i = 0; i < vector2.size(); i++) { resArray[i] = vector1.apply(i) - vector2.getQuick(i); } return resArray; }
From source file:org.qcri.sparkpca.PCAUtils.java
/** * computes the outer (tensor) product of two vectors. The result of applying the outer product on a pair of vectors is a matrix * @param yi: sparse vector/*from w w w.jav a 2 s. co m*/ * @param ym: mean vector * @param xi: dense vector * @param xm: mean vector * @param nonZeroIndices: the indices of nonzero elements in the sparse vector * @param resArray: resulting two-dimensional array */ public static void outerProductWithIndices(org.apache.spark.mllib.linalg.Vector yi, Vector ym, double[] xi, Vector xm, double[][] resArray, int[] nonZeroIndices) { // 1. Sum(Yi' x (Xi-Xm)) int xSize = xi.length; double yScale = 0; int i, yRow; for (i = 0; i < nonZeroIndices.length; i++) { yRow = nonZeroIndices[i]; yScale = yi.apply(yRow); for (int xCol = 0; xCol < xSize; xCol++) { double centeredValue = xi[xCol] - xm.getQuick(xCol); resArray[yRow][xCol] += centeredValue * yScale; } } }
From source file:org.qcri.sparkpca.PCAUtils.java
public static void outerProductArrayInput(double[] yi, Vector ym, double[] xi, Vector xm, double[][] resArray) { int ySize = yi.length; int xSize = xi.length; for (int yRow = 0; yRow < ySize; yRow++) { for (int xCol = 0; xCol < xSize; xCol++) { double centeredValue = xi[xCol] - xm.getQuick(xCol); resArray[yRow][xCol] += centeredValue * yi[yRow]; }/* w ww .j a v a2s. com*/ } }