List of usage examples for org.apache.mahout.math Matrix viewRow
Vector viewRow(int row);
From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresImplicitSolver.java
License:Apache License
/** Y' (Cu - I) Y + I */ private Matrix YtransponseCuMinusIYPlusLambdaI(Vector userRatings) { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); /* (Cu -I) Y */ OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>(); Iterator<Vector.Element> ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); Vector curYRow = Y.viewRow(e.index()); CuMinusIY.put(e.index(), curYRow.times(confidence(e.get()) - 1)); }//from w w w . j av a 2 s . c o m Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures); /* Y' (Cu -I) Y by outer products */ ratings = userRatings.iterateNonZero(); while (ratings.hasNext()) { Vector.Element e = ratings.next(); for (Vector.Element feature : Y.viewRow(e.index())) { Vector partial = CuMinusIY.get(e.index()).times(feature.get()); YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS); } } /* Y' (Cu - I) Y + I add lambda on the diagonal */ for (int feature = 0; feature < numFeatures; feature++) { YtransponseCuMinusIY.setQuick(feature, feature, YtransponseCuMinusIY.getQuick(feature, feature) + lambda); } return YtransponseCuMinusIY; }
From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java
License:Apache License
public static void main(String[] args) throws IOException { int numUsers = 1823179; int numItems = 136736; double mu = 3.157255412010664; String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/"; String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv"; String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv"; String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv"; String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv"; Matrix similarities = new SparseRowMatrix(numItems, numItems); System.out.println("Reading similarities..."); int similaritiesRead = 0; Configuration conf = new Configuration(); for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) { int item = pair.getFirst().get(); Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero(); while (elements.hasNext()) { Vector.Element elem = elements.next(); similarities.setQuick(item, elem.index(), elem.get()); similaritiesRead++;/* www . j a v a 2 s .c o m*/ } } System.out.println("Found " + similaritiesRead + " similarities"); Pattern sep = Pattern.compile("\t"); double[] itemBiases = new double[numItems]; double[] userBiases = new double[numUsers]; System.out.println("Reading item biases"); for (String line : new FileLineIterable(new File(itemBiasesFilePath))) { String[] parts = sep.split(line); itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]); } System.out.println("Reading user biases"); for (String line : new FileLineIterable(new File(userBiasesFilePath))) { String[] parts = sep.split(line); userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]); } Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator(); Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator(); int currentUser = 0; OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap(); int usersProcessed = 0; RunningAverage rmse = new FullRunningAverage(); RunningAverage mae = new FullRunningAverage(); RunningAverage rmseBase = new FullRunningAverage(); RunningAverage maeBase = new FullRunningAverage(); while (trainRatings.hasNext()) { Rating rating = trainRatings.next(); if (rating.user() != currentUser) { for (int n = 0; n < 10; n++) { Rating heldOutRating = heldOutRatings.next(); Preconditions.checkState(heldOutRating.user() == currentUser); double preference = 0.0; double totalSimilarity = 0.0; int count = 0; Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item()) .iterateNonZero(); while (similarItems.hasNext()) { Vector.Element similarity = similarItems.next(); int similarItem = similarity.index(); if (prefs.containsKey(similarItem)) { preference += similarity.get() * (prefs.get(similarItem) - (mu + userBiases[currentUser] + itemBiases[similarItem])); totalSimilarity += Math.abs(similarity.get()); count++; } } double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()]; double estimate = baselineEstimate; if (count > 1) { estimate += preference / totalSimilarity; } double baseError = Math.abs(heldOutRating.rating() - baselineEstimate); maeBase.addDatum(baseError); rmseBase.addDatum(baseError * baseError); double error = Math.abs(heldOutRating.rating() - estimate); mae.addDatum(error); rmse.addDatum(error * error); } if (++usersProcessed % 10000 == 0) { System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE " + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage())); } currentUser = rating.user(); prefs.clear(); } prefs.put(rating.item(), rating.rating()); } System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE " + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage())); }
From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansAdapterTest.java
License:Apache License
@Test public static void testCluster() { int dimension = 500; // construct data samplers centered on the corners of a unit cube Matrix mean = new DenseMatrix(8, dimension); List<MultiNormal> rowSamplers = Lists.newArrayList(); for (int i = 0; i < 8; i++) { // mean.viewRow(i).assign( // new double[] { 0.25 * (i & 4), 0.5 * (i & 2), i & 1 }); double[] random = new double[dimension]; for (int j = 0; j < random.length; j++) { random[j] = Math.random(); }/* w w w . j a v a2 s.c o m*/ mean.viewRow(i).assign(random); rowSamplers.add(new MultiNormal(0.01, mean.viewRow(i))); } // sample a bunch of data points Matrix data = new DenseMatrix(10000, dimension); for (MatrixSlice row : data) { row.vector().assign(rowSamplers.get(row.index() % 8).sample()); } // cluster the data long t0 = System.currentTimeMillis(); double cutoff = StreamingKMeansAdapter.estimateCutoff(data, 100); Configuration conf = new Configuration(); conf.setInt(StreamingKMeansConfigKeys.MAXCLUSTER, 1000); conf.setFloat(StreamingKMeansConfigKeys.CUTOFF, (float) cutoff); conf.setClass(StreamingKMeansConfigKeys.DIST_MEASUREMENT, EuclideanDistanceMeasure.class, DistanceMeasure.class); conf.setInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, dimension); StreamingKMeansAdapter skmeans = new StreamingKMeansAdapter(conf); // for (MatrixSlice row : Iterables.skip(data, 1)) { // skmeans.cluster(row.vector()); // } for (MatrixSlice row : data) { skmeans.cluster(row.vector()); } // validate Searcher r = skmeans.getCentroids(); // StreamingKMeansAdapter skmeans = new StreamingKMeansAdapter(); // Searcher r = skmeans.cluster(data, 1000, centroidFactory); long t1 = System.currentTimeMillis(); assertEquals("Total weight not preserved", totalWeight(data), totalWeight(r), 1e-9); // and verify that each corner of the cube has a centroid very nearby for (MatrixSlice row : mean) { WeightedVector v = r.search(row.vector(), 1).get(0); assertTrue(v.getWeight() < 0.05); } System.out.printf("%.2f for clustering\n%.1f us per row\n", (t1 - t0) / 1000.0, (t1 - t0) / 1000.0 / data.rowSize() * 1e6); System.out.println("Done??"); }
From source file:org.qcri.pca.MahoutCompatibilityTest.java
License:Apache License
@Test public void testMAHOUT_1221() { // create a matrix with an unassigned row 0 Matrix matrix = new SparseMatrix(1, 1); Vector view = matrix.viewRow(0); final double value = 1.23; view.assign(value);/*from w w w .j a v a2s .co m*/ // test whether the update in the view is reflected in the matrix assertEquals("Matrix valye", view.getQuick(0), matrix.getQuick(0, 0), EPSILON); }
From source file:org.qcri.pca.PCACommon.java
/** * Convert an in-memory representation of a matrix to a distributed version It * then can be used in distributed jobs/*w w w . j av a 2s.c om*/ * * @param oriMatrix * @return path that contains the matrix files * @throws IOException */ static DistributedRowMatrix toDistributedRowMatrix(Matrix origMatrix, Path outPath, Path tmpPath, String label) throws IOException { Configuration conf = new Configuration(); Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols()); FileSystem fs = FileSystem.get(outputDir.toUri(), conf); if (!fs.exists(outputDir)) { Path outputFile = new Path(outputDir, "singleSliceMatrix"); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class, VectorWritable.class); VectorWritable vectorWritable = new VectorWritable(); try { for (int r = 0; r < origMatrix.numRows(); r++) { Vector vector = origMatrix.viewRow(r); vectorWritable.set(vector); writer.append(new IntWritable(r), vectorWritable); } } finally { writer.close(); } } else { log.warn("----------- Skip matrix " + outputDir + " - already exists"); } DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(), origMatrix.numCols()); dMatrix.setConf(conf); return dMatrix; }
From source file:org.qcri.pca.SPCADriver.java
/** * Run PPCA sequentially given the small input Y which fit into memory This * could be used also on sampled data from a distributed matrix * /*from ww w . j a va 2 s . c om*/ * Note: this implementation ignore NaN values by replacing them with 0 * * @param conf * the configuration * @param centralY * the input matrix * @param initVal * the initial values for C and ss * @param MAX_ROUNDS * maximum number of iterations * @return the error * @throws Exception */ double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS) throws Exception { Matrix centralC = initVal.C; double ss = initVal.ss; final int nRows = centralY.numRows(); final int nCols = centralY.numCols(); final int nPCs = centralC.numCols(); final float threshold = 0.00001f; log.info("tracec= " + PCACommon.trace(centralC)); //ignore NaN elements by replacing them with 0 for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) if (new Double(centralY.getQuick(r, c)).isNaN()) { centralY.setQuick(r, c, 0); } //centralize and normalize the input matrix Vector mean = centralY.aggregateColumns(new VectorFunction() { @Override public double apply(Vector v) { return v.zSum() / nRows; } }); //also normalize the matrix by dividing each element by its columns range Vector spanVector = new DenseVector(nCols); for (int c = 0; c < nCols; c++) { Vector col = centralY.viewColumn(c); double max = col.maxValue(); double min = col.minValue(); double span = max - min; spanVector.setQuick(c, span); } for (int r = 0; r < nRows; r++) for (int c = 0; c < nCols; c++) centralY.set(r, c, (centralY.get(r, c) - mean.get(c)) / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1)); Matrix centralCtC = centralC.transpose().times(centralC); log.info("tracectc= " + PCACommon.trace(centralCtC)); log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC))); log.info("traceye= " + PCACommon.trace(centralY)); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); int count = 1; // old = Inf; double old = Double.MAX_VALUE; // -------------------------- EM Iterations // while count Matrix centralX = null; int round = 0; while (round < MAX_ROUNDS && count > 0) { round++; // Sx = inv( eye(d) + CtC/ss ); Matrix Sx = eye(nPCs).times(ss).plus(centralCtC); Sx = inv(Sx); // X = Ye*C*(Sx/ss); centralX = centralY.times(centralC).times(Sx.transpose()); // XtX = X'*X + ss * Sx; Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss)); // C = (Ye'*X) / XtX; Matrix tmpInv = inv(centralXtX); centralC = centralY.transpose().times(centralX).times(tmpInv); // CtC = C'*C; centralCtC = centralC.transpose().times(centralC); // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D); double norm2 = centralY.clone().assign(new DoubleFunction() { @Override public double apply(double arg1) { return arg1 * arg1; } }).zSum(); ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC)); //ss3 = sum (X(i:0) * C' * Y(i,:)') DenseVector resVector = new DenseVector(nCols); double xctyt = 0; for (int i = 0; i < nRows; i++) { PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector); double res = resVector.dot(centralY.viewRow(i)); xctyt += res; } ss -= 2 * xctyt; ss /= (nRows * nCols); log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss); double traceSx = PCACommon.trace(Sx); double traceX = PCACommon.trace(centralX); double traceSumXtX = PCACommon.trace(centralXtX); double traceC = PCACommon.trace(centralC); double traceCtC = PCACommon.trace(centralCtC); log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " " + traceCtC + " " + 0); double objective = ss; double rel_ch = Math.abs(1 - objective / old); old = objective; count++; if (rel_ch < threshold && count > 5) count = 0; log.info("Objective: %.6f relative change: %.6f \n", objective, rel_ch); } double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of Ye is: " + norm1Y); Matrix newYerror = centralY.minus(centralX.times(centralC.transpose())); double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue(); log.info("Norm1 of the reconstruction error is: " + norm1Err); initVal.C = centralC; initVal.ss = ss; return norm1Err / norm1Y; }
From source file:org.qcri.pca.SPCADriver.java
static <M extends VectorIterable> Matrix sample(M bigMatrix, Matrix sampleMatrix) { log.info("Sampling a " + bigMatrix.numRows() + "x" + bigMatrix.numCols() + " into a " + sampleMatrix.numRows() + "x" + sampleMatrix.numCols()); int row = 0;/*from w w w .j a va 2 s .c o m*/ Iterator<MatrixSlice> sliceIterator = bigMatrix.iterateAll(); while (sliceIterator.hasNext() && row < sampleMatrix.numRows()) { MatrixSlice slice = sliceIterator.next(); if (!PCACommon.pass(SAMPLE_RATE)) { sampleMatrix.viewRow(row).assign(slice.vector()); row++; } } return sampleMatrix; }
From source file:org.qcri.pca.SPCADriver.java
static void writeMatrix(Matrix origMatrix, Path outPath, Path tmpPath, String label) throws IOException { Configuration conf = new Configuration(); Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols()); FileSystem fs = FileSystem.get(outputDir.toUri(), conf); if (!fs.exists(outputDir)) { Path outputFile = new Path(outputDir, "singleSliceMatrix"); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class, VectorWritable.class); VectorWritable vectorWritable = new VectorWritable(); try {//from w w w . j a va 2 s. com for (int r = 0; r < origMatrix.numRows(); r++) { Vector vector = origMatrix.viewRow(r); vectorWritable.set(vector); writer.append(new IntWritable(r), vectorWritable); } } finally { writer.close(); } } else { log.warn("----------- Skip matrix " + outputDir + " - already exists"); } }
From source file:zx.soft.mahout.knn.search.AbstractSearchTest.java
License:Apache License
@Test public void testOrdering() { Matrix queries = new DenseMatrix(100, 20); MultiNormal gen = new MultiNormal(20); for (int i = 0; i < 100; i++) { queries.viewRow(i).assign(gen.sample()); }//from ww w. j av a 2 s .co m Searcher s = getSearch(20); // s.setSearchSize(200); s.addAllMatrixSlices(testData()); for (MatrixSlice query : queries) { List<WeightedThing<Vector>> r = s.search(query.vector(), 200); double x = 0; for (WeightedThing<Vector> thing : r) { assertTrue("Scores must be monotonic increasing", thing.getWeight() > x); x = thing.getWeight(); } } }
From source file:zx.soft.mahout.knn.search.AbstractSearchTest.java
License:Apache License
@Test public void testSmallSearch() { Matrix m = new DenseMatrix(8, 3); for (int i = 0; i < 8; i++) { m.viewRow(i).assign(new double[] { 0.125 * (i & 4), i & 2, i & 1 }); }//w w w . j a v a 2s. com Searcher s = getSearch(3); s.addAllMatrixSlices(m); for (MatrixSlice row : m) { final List<WeightedThing<Vector>> r = s.search(row.vector(), 3); assertEquals(0, r.get(0).getWeight(), 1e-8); assertEquals(0, r.get(1).getWeight(), 0.5); assertEquals(0, r.get(2).getWeight(), 1); } }