Example usage for org.apache.mahout.math Vector get

List of usage examples for org.apache.mahout.math Vector get

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector get.

Prototype

double get(int index);

Source Link

Document

Return the value at the given index

Usage

From source file:org.qcri.pca.CompositeJobTest.java

License:Apache License

private void verifyYtX(DummyRecordWriter<IntWritable, VectorWritable> writer) {
    Assert.assertEquals("The reducer should output " + cols + " keys!", cols, writer.getKeys().size());
    for (IntWritable key : writer.getKeys()) {
        List<VectorWritable> list = writer.getValue(key);
        assertEquals("reducer produces more than one values per key!", 1, list.size());
        Vector v = list.get(0).get();
        assertEquals("reducer vector size must match the x size!", xsize, v.size());
        for (int c = 0; c < xsize; c++)
            Assert.assertEquals("The ytx[" + key.get() + "][" + c + "] is incorrect: ", ytx[key.get()][c],
                    v.get(c), EPSILON);
    }/*from  w w w  . ja v  a 2 s . c o  m*/
}

From source file:org.qcri.pca.MeanAndSpanJobTest.java

License:Apache License

private void verifyMapperOutput(DummyRecordWriter<IntWritable, VectorWritable> writer) {
    Assert.assertEquals("Each mapper should output three keys!", 3, writer.getKeys().size());
    for (IntWritable key : writer.getKeys()) {
        List<VectorWritable> list = writer.getValue(key);
        assertEquals("Mapper did not combine the results!", 1, list.size());
        Vector v = list.get(0).get();
        switch (key.get()) {
        case MeanAndSpanJob.MEANVECTOR:
            Assert.assertEquals("MeanVector size does not match!", v.size(), cols + 1);
            Assert.assertEquals("MeanVector count does not match!", rows, v.get(0), EPSILON);
            verifySum(inputVectors, v.viewPart(1, cols));
            break;
        case MeanAndSpanJob.MINVECTOR:
            Assert.assertEquals("MinVector size does not match!", v.size(), cols);
            verifyMin(inputVectors, v);//from   w ww . j  ava 2 s  .c o m
            break;
        case MeanAndSpanJob.MAXVECTOR:
            Assert.assertEquals("MaxVector size does not match!", v.size(), cols);
            verifyMax(inputVectors, v);
            break;
        default:
            Assert.fail("Unknown key from mapper");
        }
    }
}

From source file:org.qcri.pca.MeanAndSpanJobTest.java

License:Apache License

private void verifySpan(double[][] vectors, Vector spanVec) {
    for (int c = 0; c < cols; c++) {
        double max = NaN2Zero(vectors[0][c]);
        double min = max;
        for (int r = 0; r < rows; r++) {
            double val = vectors[r][c];
            max = Math.max(max, NaN2Zero(val));
            min = Math.min(min, NaN2Zero(val));
        }/*from w  w w .ja v  a 2s. c  om*/
        Assert.assertEquals("The span is incorrect: column: " + c, max - min, spanVec.get(c), EPSILON);
    }
}

From source file:org.qcri.pca.MeanAndSpanJobTest.java

License:Apache License

private void verifyMax(double[][] vectors, Vector maxVec) {
    for (int c = 0; c < cols; c++) {
        double max = NaN2Zero(vectors[0][c]);
        for (int r = 0; r < rows; r++) {
            double val = vectors[r][c];
            max = Math.max(max, NaN2Zero(val));
        }//from w  w w. j  av  a2s  . c  o m
        Assert.assertEquals("The max is incorrect: column: " + c, max, maxVec.get(c), EPSILON);
    }
}

From source file:org.qcri.pca.MeanAndSpanJobTest.java

License:Apache License

private void verifyMin(double[][] vectors, Vector minVec) {
    for (int c = 0; c < cols; c++) {
        double min = NaN2Zero(vectors[0][c]);
        for (int r = 0; r < rows; r++) {
            double val = vectors[r][c];
            min = Math.min(min, NaN2Zero(val));
        }// w  w  w  .  j  av a2 s.co  m
        Assert.assertEquals("The min is incorrect: column: " + c, min, minVec.get(c), EPSILON);
    }
}

From source file:org.qcri.pca.MeanAndSpanJobTest.java

License:Apache License

private void verifySum(double[][] vectors, Vector sumVec) {
    for (int c = 0; c < cols; c++) {
        double sum = 0;
        for (int r = 0; r < rows; r++) {
            double val = vectors[r][c];
            sum += NaN2Zero(val);
        }/* w ww  .  jav a  2  s  .  com*/
        Assert.assertEquals("The sum is incorrect: column: " + c, sum, sumVec.get(c), EPSILON);
    }
}

From source file:org.qcri.pca.MeanAndSpanJobTest.java

License:Apache License

private void verifyMean(double[][] vectors, Vector meanVec) {
    for (int c = 0; c < cols; c++) {
        double sum = 0;
        for (int r = 0; r < rows; r++) {
            double val = vectors[r][c];
            sum += NaN2Zero(val);
        }//from ww w .  ja v  a2s  . c om
        double mean = sum / rows;
        Assert.assertEquals("The mean is incorrect: column: " + c, mean, meanVec.get(c), EPSILON);
    }
}

From source file:org.qcri.pca.NormalizeJobTest.java

License:Apache License

private void verifyMapperOutput(DummyRecordWriter<IntWritable, VectorWritable> writer) {
    Assert.assertEquals("The mapper should output " + rows + " keys!", rows, writer.getKeys().size());
    double[][] normalizedVectors = normalize(inputVectors);
    for (IntWritable key : writer.getKeys()) {
        List<VectorWritable> list = writer.getValue(key);
        assertEquals("Mapper produces more than one values per key!", 1, list.size());
        Vector v = list.get(0).get();
        for (int c = 0; c < cols; c++)
            Assert.assertEquals("The normalized value is incorrect: ", normalizedVectors[key.get()][c],
                    v.get(c), EPSILON);
    }//from www. j  av  a2 s.c  o m
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * //w w  w.  j  a  v  a  2 s.  co  m
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS)
        throws Exception {
    Matrix centralC = initVal.C;
    double ss = initVal.ss;
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    //ignore NaN elements by replacing them with 0
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }

    //centralize and normalize the input matrix
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    //also normalize the matrix by dividing each element by its columns range
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    Matrix centralX = null;
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // Sx = inv( eye(d) + CtC/ss );
        Matrix Sx = eye(nPCs).times(ss).plus(centralCtC);
        Sx = inv(Sx);
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(Sx.transpose());
        // XtX = X'*X + ss * Sx;
        Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss));
        // C = (Ye'*X) / XtX;
        Matrix tmpInv = inv(centralXtX);
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D);
        double norm2 = centralY.clone().assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC));
        //ss3 = sum (X(i:0) * C' * Y(i,:)')
        DenseVector resVector = new DenseVector(nCols);
        double xctyt = 0;
        for (int i = 0; i < nRows; i++) {
            PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector);
            double res = resVector.dot(centralY.viewRow(i));
            xctyt += res;
        }
        ss -= 2 * xctyt;
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(Sx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        double objective = ss;
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Ye is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * //from  w ww . ja v  a2  s.  co  m
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal,
        final int MAX_ROUNDS) {
    Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of
    // initVal
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    // Y = Y - mean(Ye)
    // Also normalize the matrix
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    // -------------------------- initialization
    // CtC = C'*C;
    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    // X = Ye * C * inv(CtC);
    Matrix centralX = centralY.times(centralC).times(inv(centralCtC));
    log.info("tracex= " + PCACommon.trace(centralX));
    // recon = X * C';
    Matrix recon = centralX.times(centralC.transpose());
    log.info("tracerec= " + PCACommon.trace(recon));
    // ss = sum(sum((recon-Ye).^2)) / (N*D-missing);
    double ss = recon.minus(centralY).assign(new DoubleFunction() {
        @Override
        public double apply(double arg1) {
            return arg1 * arg1;
        }
    }).zSum() / (nRows * nCols);
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // ------------------ E-step, (co)variances
        // Sx = inv( eye(d) + CtC/ss );
        Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss));
        centralSx = inv(centralSx);
        // ------------------ E-step expected value
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(centralSx.divide(ss));
        // ------------------ M-step
        // SumXtX = X'*X;
        Matrix centralSumXtX = centralX.transpose().times(centralX);
        // C = (Ye'*X) / (SumXtX + N*Sx );
        Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows)));
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) +
        // missing*ss_old ) /(N*D);
        recon = centralX.times(centralC.transpose());
        double error = recon.minus(centralY).assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum();
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(centralSx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralSumXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) )
        // +PCACommon.trace(SumXtX) -missing*log(ss_old);
        double objective = nRows * nCols + nRows
                * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant()))
                + PCACommon.trace(centralSumXtX);
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        System.out.printf("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Y is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}