Example usage for org.apache.mahout.math Vector setQuick

List of usage examples for org.apache.mahout.math Vector setQuick

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector setQuick.

Prototype

void setQuick(int index, double value);

Source Link

Document

Set the value at the given index, without checking bounds

Usage

From source file:org.qcri.pca.NormalizeJob.java

static void sparseVectorAssign(Vector mainV, final Vector otherV, DoubleDoubleFunction function) {
    java.util.Vector<IndexValue> newZeroElements = new java.util.Vector<IndexValue>();
    Iterator<Vector.Element> nonZeroElements = mainV.nonZeroes().iterator();
    while (nonZeroElements.hasNext()) {
        Vector.Element e = nonZeroElements.next();
        double res = function.apply(e.get(), otherV.getQuick(e.index()));
        if (res != 0)
            mainV.setQuick(e.index(), res);
        else //Don't affect the iterator
            newZeroElements.add(new IndexValue(e.index(), res));
    }/*from  ww w  . j  a v a  2  s.c  o  m*/
    for (IndexValue iv : newZeroElements)
        mainV.setQuick(iv.index, iv.value);
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * /* w w  w  .  j  a  v  a 2 s. c o m*/
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS)
        throws Exception {
    Matrix centralC = initVal.C;
    double ss = initVal.ss;
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    //ignore NaN elements by replacing them with 0
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }

    //centralize and normalize the input matrix
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    //also normalize the matrix by dividing each element by its columns range
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    Matrix centralX = null;
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // Sx = inv( eye(d) + CtC/ss );
        Matrix Sx = eye(nPCs).times(ss).plus(centralCtC);
        Sx = inv(Sx);
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(Sx.transpose());
        // XtX = X'*X + ss * Sx;
        Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss));
        // C = (Ye'*X) / XtX;
        Matrix tmpInv = inv(centralXtX);
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D);
        double norm2 = centralY.clone().assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC));
        //ss3 = sum (X(i:0) * C' * Y(i,:)')
        DenseVector resVector = new DenseVector(nCols);
        double xctyt = 0;
        for (int i = 0; i < nRows; i++) {
            PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector);
            double res = resVector.dot(centralY.viewRow(i));
            xctyt += res;
        }
        ss -= 2 * xctyt;
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(Sx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        double objective = ss;
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Ye is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * //from   w  ww.ja v  a 2s  .  co  m
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal,
        final int MAX_ROUNDS) {
    Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of
    // initVal
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    // Y = Y - mean(Ye)
    // Also normalize the matrix
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    // -------------------------- initialization
    // CtC = C'*C;
    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    // X = Ye * C * inv(CtC);
    Matrix centralX = centralY.times(centralC).times(inv(centralCtC));
    log.info("tracex= " + PCACommon.trace(centralX));
    // recon = X * C';
    Matrix recon = centralX.times(centralC.transpose());
    log.info("tracerec= " + PCACommon.trace(recon));
    // ss = sum(sum((recon-Ye).^2)) / (N*D-missing);
    double ss = recon.minus(centralY).assign(new DoubleFunction() {
        @Override
        public double apply(double arg1) {
            return arg1 * arg1;
        }
    }).zSum() / (nRows * nCols);
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // ------------------ E-step, (co)variances
        // Sx = inv( eye(d) + CtC/ss );
        Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss));
        centralSx = inv(centralSx);
        // ------------------ E-step expected value
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(centralSx.divide(ss));
        // ------------------ M-step
        // SumXtX = X'*X;
        Matrix centralSumXtX = centralX.transpose().times(centralX);
        // C = (Ye'*X) / (SumXtX + N*Sx );
        Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows)));
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) +
        // missing*ss_old ) /(N*D);
        recon = centralX.times(centralC.transpose());
        double error = recon.minus(centralY).assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum();
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(centralSx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralSumXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) )
        // +PCACommon.trace(SumXtX) -missing*log(ss_old);
        double objective = nRows * nCols + nRows
                * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant()))
                + PCACommon.trace(centralSumXtX);
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        System.out.printf("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Y is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java

License:Apache License

/**
 * Initialize vertex//from   ww  w  .  j a  va 2 s.c o m
 *
 * @param vertex of the graph
 */
private void initializeVertex(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex) {
    // normalize prior and posterior
    Vector prior = vertex.getValue().getPriorVector();
    Vector posterior = vertex.getValue().getPosteriorVector();
    int nStates = prior.size();
    double sum = 0d;
    for (int i = 0; i < nStates; i++) {
        double v = prior.getQuick(i);
        if (v < 0d) {
            throw new IllegalArgumentException("Vertex ID: " + vertex.getId() + " has negative prior value.");
        } else if (v < MIN_PRIOR_VALUE) {
            v = MIN_PRIOR_VALUE;
            prior.setQuick(i, v);
        }
        sum += v;
    }
    for (int i = 0; i < nStates; i++) {
        posterior.setQuick(i, prior.getQuick(i) / sum);
        prior.setQuick(i, Math.log(posterior.getQuick(i)));
    }
    // collect graph statistics
    VertexType vt = vertex.getValue().getType();
    vt = ignoreVertexType ? VertexType.TRAIN : vt;
    switch (vt) {
    case TRAIN:
        aggregate(SUM_TRAIN_VERTICES, new LongWritable(1));
        break;
    case VALIDATE:
        aggregate(SUM_VALIDATE_VERTICES, new LongWritable(1));
        break;
    case TEST:
        aggregate(SUM_TEST_VERTICES, new LongWritable(1));
        break;
    default:
        throw new IllegalArgumentException("Unknown vertex type: " + vt.toString());
    }
    // if it's not a training vertex, use uniform posterior and don't send out messages
    if (vt != VertexType.TRAIN) {
        posterior.assign(1.0 / nStates);
        return;
    }
    // calculate messages
    IdWithVectorMessage newMessage = new IdWithVectorMessage();
    newMessage.setData(vertex.getId().get());
    // calculate initial belief
    Vector belief = prior.clone();
    for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) {
        double weight = edge.getValue().get();
        if (weight <= 0d) {
            throw new IllegalArgumentException("Vertex ID: " + vertex.getId()
                    + " has an edge with negative or zero weight value " + weight);
        }
        for (int i = 0; i < nStates; i++) {
            sum = 0d;
            for (int j = 0; j < nStates; j++) {
                double msg = Math.exp(
                        prior.getQuick(j) + edgePotential(Math.abs(i - j) / (double) (nStates - 1), weight));
                if (maxProduct) {
                    sum = sum > msg ? sum : msg;
                } else {
                    sum += msg;
                }
            }
            belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE);
        }
        belief = belief.plus(-belief.maxValue());
        // send out messages
        newMessage.setVector(belief);
        sendMessage(edge.getTargetVertexId(), newMessage);
    }
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java

License:Apache License

@Override
public void compute(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex,
        Iterable<IdWithVectorMessage> messages) throws IOException {
    long step = getSuperstep();
    if (step == 0) {
        initializeVertex(vertex);/*  ww w . j  a va2  s  . c  o  m*/
        return;
    }

    // collect messages sent to this vertex
    HashMap<Long, Vector> map = new HashMap<Long, Vector>();
    for (IdWithVectorMessage message : messages) {
        map.put(message.getData(), message.getVector());
    }

    // update posterior according to prior and messages
    VertexData4LBPWritable vertexValue = vertex.getValue();
    VertexType vt = vertexValue.getType();
    vt = ignoreVertexType ? VertexType.TRAIN : vt;
    Vector prior = vertexValue.getPriorVector();
    double nStates = prior.size();
    if (vt != VertexType.TRAIN) {
        // assign a uniform prior for validate/test vertex
        prior = prior.clone().assign(Math.log(1.0 / nStates));
    }
    // sum of prior and messages
    Vector sumPosterior = prior;
    for (IdWithVectorMessage message : messages) {
        sumPosterior = sumPosterior.plus(message.getVector());
    }
    sumPosterior = sumPosterior.plus(-sumPosterior.maxValue());
    // update posterior if this isn't an anchor vertex
    if (prior.maxValue() < anchorThreshold) {
        // normalize posterior
        Vector posterior = sumPosterior.clone().assign(Functions.EXP);
        posterior = posterior.normalize(1d);
        Vector oldPosterior = vertexValue.getPosteriorVector();
        double delta = posterior.minus(oldPosterior).norm(1d);
        // aggregate deltas
        switch (vt) {
        case TRAIN:
            aggregate(SUM_TRAIN_DELTA, new DoubleWritable(delta));
            break;
        case VALIDATE:
            aggregate(SUM_VALIDATE_DELTA, new DoubleWritable(delta));
            break;
        case TEST:
            aggregate(SUM_TEST_DELTA, new DoubleWritable(delta));
            break;
        default:
            throw new IllegalArgumentException("Unknown vertex type: " + vt.toString());
        }
        // update posterior
        vertexValue.setPosteriorVector(posterior);
    }

    if (step < maxSupersteps) {
        // if it's not a training vertex, don't send out messages
        if (vt != VertexType.TRAIN) {
            return;
        }
        IdWithVectorMessage newMessage = new IdWithVectorMessage();
        newMessage.setData(vertex.getId().get());
        // update belief
        Vector belief = prior.clone();
        for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) {
            double weight = edge.getValue().get();
            long id = edge.getTargetVertexId().get();
            Vector tempVector = sumPosterior;
            if (map.containsKey(id)) {
                tempVector = sumPosterior.minus(map.get(id));
            }
            for (int i = 0; i < nStates; i++) {
                double sum = 0d;
                for (int j = 0; j < nStates; j++) {
                    double msg = Math.exp(
                            tempVector.getQuick(j) + edgePotential(Math.abs(i - j) / (nStates - 1), weight));
                    if (maxProduct) {
                        sum = sum > msg ? sum : msg;
                    } else {
                        sum += msg;
                    }
                }
                belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE);
            }
            belief = belief.plus(-belief.maxValue());
            newMessage.setVector(belief);
            sendMessage(edge.getTargetVertexId(), newMessage);
        }
    } else {
        // convert prior back to regular scale before output
        prior = vertexValue.getPriorVector();
        prior = prior.assign(Functions.EXP);
        vertexValue.setPriorVector(prior);
        vertex.voteToHalt();
    }
}