Example usage for org.apache.mahout.math Matrix viewRow

List of usage examples for org.apache.mahout.math Matrix viewRow

Introduction

In this page you can find the example usage for org.apache.mahout.math Matrix viewRow.

Prototype

Vector viewRow(int row);

Source Link

Document

Return a reference to a row.

Usage

From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresImplicitSolver.java

License:Apache License

/** Y' (Cu - I) Y +  I */
private Matrix YtransponseCuMinusIYPlusLambdaI(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    /* (Cu -I) Y */
    OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>();
    Iterator<Vector.Element> ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        Vector curYRow = Y.viewRow(e.index());
        CuMinusIY.put(e.index(), curYRow.times(confidence(e.get()) - 1));
    }//from  w w w  . j av a 2  s  .  c  o m

    Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures);

    /* Y' (Cu -I) Y by outer products */
    ratings = userRatings.iterateNonZero();
    while (ratings.hasNext()) {
        Vector.Element e = ratings.next();
        for (Vector.Element feature : Y.viewRow(e.index())) {
            Vector partial = CuMinusIY.get(e.index()).times(feature.get());
            YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS);
        }
    }

    /* Y' (Cu - I) Y +  I  add lambda on the diagonal */
    for (int feature = 0; feature < numFeatures; feature++) {
        YtransponseCuMinusIY.setQuick(feature, feature,
                YtransponseCuMinusIY.getQuick(feature, feature) + lambda);
    }

    return YtransponseCuMinusIY;
}

From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java

License:Apache License

public static void main(String[] args) throws IOException {

    int numUsers = 1823179;
    int numItems = 136736;
    double mu = 3.157255412010664;

    String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/";
    String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv";
    String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv";
    String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv";
    String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv";

    Matrix similarities = new SparseRowMatrix(numItems, numItems);

    System.out.println("Reading similarities...");
    int similaritiesRead = 0;
    Configuration conf = new Configuration();
    for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>(
            new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) {

        int item = pair.getFirst().get();
        Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero();

        while (elements.hasNext()) {
            Vector.Element elem = elements.next();
            similarities.setQuick(item, elem.index(), elem.get());
            similaritiesRead++;/* www .  j  a v  a  2  s  .c  o  m*/
        }
    }
    System.out.println("Found " + similaritiesRead + " similarities");

    Pattern sep = Pattern.compile("\t");

    double[] itemBiases = new double[numItems];
    double[] userBiases = new double[numUsers];

    System.out.println("Reading item biases");
    for (String line : new FileLineIterable(new File(itemBiasesFilePath))) {
        String[] parts = sep.split(line);
        itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    System.out.println("Reading user biases");
    for (String line : new FileLineIterable(new File(userBiasesFilePath))) {
        String[] parts = sep.split(line);
        userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]);
    }

    Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator();
    Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator();

    int currentUser = 0;
    OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap();

    int usersProcessed = 0;
    RunningAverage rmse = new FullRunningAverage();
    RunningAverage mae = new FullRunningAverage();

    RunningAverage rmseBase = new FullRunningAverage();
    RunningAverage maeBase = new FullRunningAverage();

    while (trainRatings.hasNext()) {
        Rating rating = trainRatings.next();
        if (rating.user() != currentUser) {

            for (int n = 0; n < 10; n++) {
                Rating heldOutRating = heldOutRatings.next();
                Preconditions.checkState(heldOutRating.user() == currentUser);

                double preference = 0.0;
                double totalSimilarity = 0.0;
                int count = 0;

                Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item())
                        .iterateNonZero();
                while (similarItems.hasNext()) {
                    Vector.Element similarity = similarItems.next();
                    int similarItem = similarity.index();
                    if (prefs.containsKey(similarItem)) {
                        preference += similarity.get() * (prefs.get(similarItem)
                                - (mu + userBiases[currentUser] + itemBiases[similarItem]));
                        totalSimilarity += Math.abs(similarity.get());
                        count++;

                    }
                }

                double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()];
                double estimate = baselineEstimate;

                if (count > 1) {
                    estimate += preference / totalSimilarity;
                }

                double baseError = Math.abs(heldOutRating.rating() - baselineEstimate);
                maeBase.addDatum(baseError);
                rmseBase.addDatum(baseError * baseError);

                double error = Math.abs(heldOutRating.rating() - estimate);
                mae.addDatum(error);
                rmse.addDatum(error * error);

            }

            if (++usersProcessed % 10000 == 0) {
                System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
                        + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage()
                        + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage()));
            }

            currentUser = rating.user();
            prefs.clear();

        }
        prefs.put(rating.item(), rating.rating());

    }

    System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE "
            + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE "
            + Math.sqrt(rmseBase.getAverage()));
}

From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansAdapterTest.java

License:Apache License

@Test
public static void testCluster() {
    int dimension = 500;

    // construct data samplers centered on the corners of a unit cube
    Matrix mean = new DenseMatrix(8, dimension);
    List<MultiNormal> rowSamplers = Lists.newArrayList();
    for (int i = 0; i < 8; i++) {
        //         mean.viewRow(i).assign(
        //               new double[] { 0.25 * (i & 4), 0.5 * (i & 2), i & 1 });

        double[] random = new double[dimension];
        for (int j = 0; j < random.length; j++) {
            random[j] = Math.random();
        }/*  w w w . j  a  v a2 s.c o m*/
        mean.viewRow(i).assign(random);
        rowSamplers.add(new MultiNormal(0.01, mean.viewRow(i)));
    }

    // sample a bunch of data points
    Matrix data = new DenseMatrix(10000, dimension);
    for (MatrixSlice row : data) {
        row.vector().assign(rowSamplers.get(row.index() % 8).sample());
    }

    // cluster the data
    long t0 = System.currentTimeMillis();

    double cutoff = StreamingKMeansAdapter.estimateCutoff(data, 100);
    Configuration conf = new Configuration();
    conf.setInt(StreamingKMeansConfigKeys.MAXCLUSTER, 1000);
    conf.setFloat(StreamingKMeansConfigKeys.CUTOFF, (float) cutoff);
    conf.setClass(StreamingKMeansConfigKeys.DIST_MEASUREMENT, EuclideanDistanceMeasure.class,
            DistanceMeasure.class);
    conf.setInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, dimension);
    StreamingKMeansAdapter skmeans = new StreamingKMeansAdapter(conf);
    // for (MatrixSlice row : Iterables.skip(data, 1)) {
    // skmeans.cluster(row.vector());
    // }
    for (MatrixSlice row : data) {
        skmeans.cluster(row.vector());
    }

    // validate
    Searcher r = skmeans.getCentroids();

    // StreamingKMeansAdapter skmeans = new StreamingKMeansAdapter();
    // Searcher r = skmeans.cluster(data, 1000, centroidFactory);

    long t1 = System.currentTimeMillis();

    assertEquals("Total weight not preserved", totalWeight(data), totalWeight(r), 1e-9);

    // and verify that each corner of the cube has a centroid very nearby
    for (MatrixSlice row : mean) {
        WeightedVector v = r.search(row.vector(), 1).get(0);
        assertTrue(v.getWeight() < 0.05);
    }
    System.out.printf("%.2f for clustering\n%.1f us per row\n", (t1 - t0) / 1000.0,
            (t1 - t0) / 1000.0 / data.rowSize() * 1e6);

    System.out.println("Done??");
}

From source file:org.qcri.pca.MahoutCompatibilityTest.java

License:Apache License

@Test
public void testMAHOUT_1221() {
    // create a matrix with an unassigned row 0
    Matrix matrix = new SparseMatrix(1, 1);
    Vector view = matrix.viewRow(0);
    final double value = 1.23;
    view.assign(value);/*from w w w .j  a v a2s  .co  m*/
    // test whether the update in the view is reflected in the matrix
    assertEquals("Matrix valye", view.getQuick(0), matrix.getQuick(0, 0), EPSILON);
}

From source file:org.qcri.pca.PCACommon.java

/**
 * Convert an in-memory representation of a matrix to a distributed version It
 * then can be used in distributed jobs/*w w  w .  j av  a  2s.c om*/
 * 
 * @param oriMatrix
 * @return path that contains the matrix files
 * @throws IOException
 */
static DistributedRowMatrix toDistributedRowMatrix(Matrix origMatrix, Path outPath, Path tmpPath, String label)
        throws IOException {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path outputFile = new Path(outputDir, "singleSliceMatrix");
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorWritable = new VectorWritable();
        try {
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorWritable.set(vector);
                writer.append(new IntWritable(r), vectorWritable);
            }
        } finally {
            writer.close();
        }
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
    DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(),
            origMatrix.numCols());
    dMatrix.setConf(conf);
    return dMatrix;
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * /*from  ww  w  .  j a va  2  s  . c  om*/
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS)
        throws Exception {
    Matrix centralC = initVal.C;
    double ss = initVal.ss;
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    //ignore NaN elements by replacing them with 0
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }

    //centralize and normalize the input matrix
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    //also normalize the matrix by dividing each element by its columns range
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    Matrix centralX = null;
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // Sx = inv( eye(d) + CtC/ss );
        Matrix Sx = eye(nPCs).times(ss).plus(centralCtC);
        Sx = inv(Sx);
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(Sx.transpose());
        // XtX = X'*X + ss * Sx;
        Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss));
        // C = (Ye'*X) / XtX;
        Matrix tmpInv = inv(centralXtX);
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D);
        double norm2 = centralY.clone().assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC));
        //ss3 = sum (X(i:0) * C' * Y(i,:)')
        DenseVector resVector = new DenseVector(nCols);
        double xctyt = 0;
        for (int i = 0; i < nRows; i++) {
            PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector);
            double res = resVector.dot(centralY.viewRow(i));
            xctyt += res;
        }
        ss -= 2 * xctyt;
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(Sx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        double objective = ss;
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Ye is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.qcri.pca.SPCADriver.java

static <M extends VectorIterable> Matrix sample(M bigMatrix, Matrix sampleMatrix) {
    log.info("Sampling a " + bigMatrix.numRows() + "x" + bigMatrix.numCols() + " into a "
            + sampleMatrix.numRows() + "x" + sampleMatrix.numCols());
    int row = 0;/*from   w w w  .j  a va  2  s .c o m*/
    Iterator<MatrixSlice> sliceIterator = bigMatrix.iterateAll();
    while (sliceIterator.hasNext() && row < sampleMatrix.numRows()) {
        MatrixSlice slice = sliceIterator.next();
        if (!PCACommon.pass(SAMPLE_RATE)) {
            sampleMatrix.viewRow(row).assign(slice.vector());
            row++;
        }
    }
    return sampleMatrix;
}

From source file:org.qcri.pca.SPCADriver.java

static void writeMatrix(Matrix origMatrix, Path outPath, Path tmpPath, String label) throws IOException {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path outputFile = new Path(outputDir, "singleSliceMatrix");
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorWritable = new VectorWritable();
        try {//from  w  w w  . j  a  va  2 s. com
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorWritable.set(vector);
                writer.append(new IntWritable(r), vectorWritable);
            }
        } finally {
            writer.close();
        }
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
}

From source file:zx.soft.mahout.knn.search.AbstractSearchTest.java

License:Apache License

@Test
public void testOrdering() {
    Matrix queries = new DenseMatrix(100, 20);
    MultiNormal gen = new MultiNormal(20);
    for (int i = 0; i < 100; i++) {
        queries.viewRow(i).assign(gen.sample());
    }//from  ww  w.  j av a  2  s  .co m

    Searcher s = getSearch(20);
    // s.setSearchSize(200);
    s.addAllMatrixSlices(testData());

    for (MatrixSlice query : queries) {
        List<WeightedThing<Vector>> r = s.search(query.vector(), 200);
        double x = 0;
        for (WeightedThing<Vector> thing : r) {
            assertTrue("Scores must be monotonic increasing", thing.getWeight() > x);
            x = thing.getWeight();
        }
    }
}

From source file:zx.soft.mahout.knn.search.AbstractSearchTest.java

License:Apache License

@Test
public void testSmallSearch() {
    Matrix m = new DenseMatrix(8, 3);
    for (int i = 0; i < 8; i++) {
        m.viewRow(i).assign(new double[] { 0.125 * (i & 4), i & 2, i & 1 });
    }//w w  w .  j a v a  2s.  com

    Searcher s = getSearch(3);
    s.addAllMatrixSlices(m);
    for (MatrixSlice row : m) {
        final List<WeightedThing<Vector>> r = s.search(row.vector(), 3);
        assertEquals(0, r.get(0).getWeight(), 1e-8);
        assertEquals(0, r.get(1).getWeight(), 0.5);
        assertEquals(0, r.get(2).getWeight(), 1);
    }
}