Example usage for org.apache.mahout.math Vector maxValue

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector maxValue.

Prototype

double maxValue();

Source Link

Usage

From source file:com.memonews.mahout.sentiment.SentimentModelTester.java

License:Apache License

public void run(final PrintWriter output) throws IOException {

    final File base = new File(inputFile);
    // contains the best model
    final OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile),
            OnlineLogisticRegression.class);

    final Dictionary newsGroups = new Dictionary();
    final Multiset<String> overallCounts = HashMultiset.create();

    final List<File> files = Lists.newArrayList();
    for (final File newsgroup : base.listFiles()) {
        if (newsgroup.isDirectory()) {
            newsGroups.intern(newsgroup.getName());
            files.addAll(Arrays.asList(newsgroup.listFiles()));
        }//from w  w w .  ja  va 2  s. co  m
    }
    System.out.printf("%d test files\n", files.size());
    final ResultAnalyzer ra = new ResultAnalyzer(newsGroups.values(), "DEFAULT");
    for (final File file : files) {
        final String ng = file.getParentFile().getName();

        final int actual = newsGroups.intern(ng);
        final SentimentModelHelper helper = new SentimentModelHelper();
        final Vector input = helper.encodeFeatureVector(file, overallCounts);// no
        // leak
        // type
        // ensures
        // this
        // is
        // a
        // normal
        // vector
        final Vector result = classifier.classifyFull(input);
        final int cat = result.maxValueIndex();
        final double score = result.maxValue();
        final double ll = classifier.logLikelihood(actual, input);
        final ClassifierResult cr = new ClassifierResult(newsGroups.values().get(cat), score, ll);
        ra.addInstance(newsGroups.values().get(actual), cr);

    }
    output.printf("%s\n\n", ra.toString());
}

From source file:edu.utsa.sifter.som.SelfOrganizingMap.java

License:Apache License

int maxTermDifference(final int tID, final int uID) {
    final Vector t = getCell(tID);
    final Vector u = getCell(uID);
    final Vector diff = t.minus(u);
    final double maxValue = diff.maxValue();
    final double minValue = diff.minValue();

    if (minValue < 0 && Math.abs(minValue) > maxValue) { // even if maxValue is negative, this will hold
        return -diff.minValueIndex();
    } else {/*  w  ww  .j  a  v  a 2s.  c  o  m*/
        return diff.maxValueIndex();
    }
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * /*  w  w w. ja  v  a2 s .co m*/
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential(Configuration conf, Matrix centralY, InitialValues initVal, final int MAX_ROUNDS)
        throws Exception {
    Matrix centralC = initVal.C;
    double ss = initVal.ss;
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    //ignore NaN elements by replacing them with 0
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }

    //centralize and normalize the input matrix
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    //also normalize the matrix by dividing each element by its columns range
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    Matrix centralX = null;
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // Sx = inv( eye(d) + CtC/ss );
        Matrix Sx = eye(nPCs).times(ss).plus(centralCtC);
        Sx = inv(Sx);
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(Sx.transpose());
        // XtX = X'*X + ss * Sx;
        Matrix centralXtX = centralX.transpose().times(centralX).plus(Sx.times(ss));
        // C = (Ye'*X) / XtX;
        Matrix tmpInv = inv(centralXtX);
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + trace(XtX*CtC) - 2*xcty ) /(N*D);
        double norm2 = centralY.clone().assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = norm2 + PCACommon.trace(centralXtX.times(centralCtC));
        //ss3 = sum (X(i:0) * C' * Y(i,:)')
        DenseVector resVector = new DenseVector(nCols);
        double xctyt = 0;
        for (int i = 0; i < nRows; i++) {
            PCACommon.vectorTimesMatrixTranspose(centralX.viewRow(i), centralC, resVector);
            double res = resVector.dot(centralY.viewRow(i));
            xctyt += res;
        }
        ss -= 2 * xctyt;
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(Sx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        double objective = ss;
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        log.info("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Ye is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.qcri.pca.SPCADriver.java

/**
 * Run PPCA sequentially given the small input Y which fit into memory This
 * could be used also on sampled data from a distributed matrix
 * /*from  w  w  w . ja  v  a  2 s .  c o m*/
 * Note: this implementation ignore NaN values by replacing them with 0
 * 
 * @param conf
 *          the configuration
 * @param centralY
 *          the input matrix
 * @param initVal
 *          the initial values for C and ss
 * @param MAX_ROUNDS
 *          maximum number of iterations
 * @return the error
 * @throws Exception
 */
double runSequential_JacobVersion(Configuration conf, Matrix centralY, InitialValues initVal,
        final int MAX_ROUNDS) {
    Matrix centralC = initVal.C;// the current implementation doesn't use initial ss of
    // initVal
    final int nRows = centralY.numRows();
    final int nCols = centralY.numCols();
    final int nPCs = centralC.numCols();
    final float threshold = 0.00001f;

    log.info("tracec= " + PCACommon.trace(centralC));
    // Y = Y - mean(Ye)
    // Also normalize the matrix
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            if (new Double(centralY.getQuick(r, c)).isNaN()) {
                centralY.setQuick(r, c, 0);
            }
    Vector mean = centralY.aggregateColumns(new VectorFunction() {
        @Override
        public double apply(Vector v) {
            return v.zSum() / nRows;
        }
    });
    Vector spanVector = new DenseVector(nCols);
    for (int c = 0; c < nCols; c++) {
        Vector col = centralY.viewColumn(c);
        double max = col.maxValue();
        double min = col.minValue();
        double span = max - min;
        spanVector.setQuick(c, span);
    }
    for (int r = 0; r < nRows; r++)
        for (int c = 0; c < nCols; c++)
            centralY.set(r, c, (centralY.get(r, c) - mean.get(c))
                    / (spanVector.getQuick(c) != 0 ? spanVector.getQuick(c) : 1));

    // -------------------------- initialization
    // CtC = C'*C;
    Matrix centralCtC = centralC.transpose().times(centralC);
    log.info("tracectc= " + PCACommon.trace(centralCtC));
    log.info("traceinvctc= " + PCACommon.trace(inv(centralCtC)));
    log.info("traceye= " + PCACommon.trace(centralY));
    // X = Ye * C * inv(CtC);
    Matrix centralX = centralY.times(centralC).times(inv(centralCtC));
    log.info("tracex= " + PCACommon.trace(centralX));
    // recon = X * C';
    Matrix recon = centralX.times(centralC.transpose());
    log.info("tracerec= " + PCACommon.trace(recon));
    // ss = sum(sum((recon-Ye).^2)) / (N*D-missing);
    double ss = recon.minus(centralY).assign(new DoubleFunction() {
        @Override
        public double apply(double arg1) {
            return arg1 * arg1;
        }
    }).zSum() / (nRows * nCols);
    log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);

    int count = 1;
    // old = Inf;
    double old = Double.MAX_VALUE;
    // -------------------------- EM Iterations
    // while count
    int round = 0;
    while (round < MAX_ROUNDS && count > 0) {
        round++;
        // ------------------ E-step, (co)variances
        // Sx = inv( eye(d) + CtC/ss );
        Matrix centralSx = eye(nPCs).plus(centralCtC.divide(ss));
        centralSx = inv(centralSx);
        // ------------------ E-step expected value
        // X = Ye*C*(Sx/ss);
        centralX = centralY.times(centralC).times(centralSx.divide(ss));
        // ------------------ M-step
        // SumXtX = X'*X;
        Matrix centralSumXtX = centralX.transpose().times(centralX);
        // C = (Ye'*X) / (SumXtX + N*Sx );
        Matrix tmpInv = inv(centralSumXtX.plus(centralSx.times(nRows)));
        centralC = centralY.transpose().times(centralX).times(tmpInv);
        // CtC = C'*C;
        centralCtC = centralC.transpose().times(centralC);
        // ss = ( sum(sum( (X*C'-Ye).^2 )) + N*sum(sum(CtC.*Sx)) +
        // missing*ss_old ) /(N*D);
        recon = centralX.times(centralC.transpose());
        double error = recon.minus(centralY).assign(new DoubleFunction() {
            @Override
            public double apply(double arg1) {
                return arg1 * arg1;
            }
        }).zSum();
        ss = error + nRows * dot(centralCtC.clone(), centralSx).zSum();
        ss /= (nRows * nCols);

        log.info("SSSSSSSSSSSSSSSSSSSSSSSSSSSS " + ss);
        double traceSx = PCACommon.trace(centralSx);
        double traceX = PCACommon.trace(centralX);
        double traceSumXtX = PCACommon.trace(centralSumXtX);
        double traceC = PCACommon.trace(centralC);
        double traceCtC = PCACommon.trace(centralCtC);
        log.info("TTTTTTTTTTTTTTTTT " + traceSx + " " + traceX + " " + traceSumXtX + " " + traceC + " "
                + traceCtC + " " + 0);

        // objective = N*D + N*(D*log(ss) +PCACommon.trace(Sx)-log(det(Sx)) )
        // +PCACommon.trace(SumXtX) -missing*log(ss_old);
        double objective = nRows * nCols + nRows
                * (nCols * Math.log(ss) + PCACommon.trace(centralSx) - Math.log(centralSx.determinant()))
                + PCACommon.trace(centralSumXtX);
        double rel_ch = Math.abs(1 - objective / old);
        old = objective;
        count++;
        if (rel_ch < threshold && count > 5)
            count = 0;
        System.out.printf("Objective:  %.6f    relative change: %.6f \n", objective, rel_ch);
    }

    double norm1Y = centralY.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of Y is: " + norm1Y);
    Matrix newYerror = centralY.minus(centralX.times(centralC.transpose()));
    double norm1Err = newYerror.aggregateColumns(new VectorNorm1()).maxValue();
    log.info("Norm1 of the reconstruction error is: " + norm1Err);

    initVal.C = centralC;
    initVal.ss = ss;
    return norm1Err / norm1Y;
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java

License:Apache License

/**
 * Initialize vertex/*from  w  ww .j a  v a2  s . c  o  m*/
 *
 * @param vertex of the graph
 */
private void initializeVertex(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex) {
    // normalize prior and posterior
    Vector prior = vertex.getValue().getPriorVector();
    Vector posterior = vertex.getValue().getPosteriorVector();
    int nStates = prior.size();
    double sum = 0d;
    for (int i = 0; i < nStates; i++) {
        double v = prior.getQuick(i);
        if (v < 0d) {
            throw new IllegalArgumentException("Vertex ID: " + vertex.getId() + " has negative prior value.");
        } else if (v < MIN_PRIOR_VALUE) {
            v = MIN_PRIOR_VALUE;
            prior.setQuick(i, v);
        }
        sum += v;
    }
    for (int i = 0; i < nStates; i++) {
        posterior.setQuick(i, prior.getQuick(i) / sum);
        prior.setQuick(i, Math.log(posterior.getQuick(i)));
    }
    // collect graph statistics
    VertexType vt = vertex.getValue().getType();
    vt = ignoreVertexType ? VertexType.TRAIN : vt;
    switch (vt) {
    case TRAIN:
        aggregate(SUM_TRAIN_VERTICES, new LongWritable(1));
        break;
    case VALIDATE:
        aggregate(SUM_VALIDATE_VERTICES, new LongWritable(1));
        break;
    case TEST:
        aggregate(SUM_TEST_VERTICES, new LongWritable(1));
        break;
    default:
        throw new IllegalArgumentException("Unknown vertex type: " + vt.toString());
    }
    // if it's not a training vertex, use uniform posterior and don't send out messages
    if (vt != VertexType.TRAIN) {
        posterior.assign(1.0 / nStates);
        return;
    }
    // calculate messages
    IdWithVectorMessage newMessage = new IdWithVectorMessage();
    newMessage.setData(vertex.getId().get());
    // calculate initial belief
    Vector belief = prior.clone();
    for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) {
        double weight = edge.getValue().get();
        if (weight <= 0d) {
            throw new IllegalArgumentException("Vertex ID: " + vertex.getId()
                    + " has an edge with negative or zero weight value " + weight);
        }
        for (int i = 0; i < nStates; i++) {
            sum = 0d;
            for (int j = 0; j < nStates; j++) {
                double msg = Math.exp(
                        prior.getQuick(j) + edgePotential(Math.abs(i - j) / (double) (nStates - 1), weight));
                if (maxProduct) {
                    sum = sum > msg ? sum : msg;
                } else {
                    sum += msg;
                }
            }
            belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE);
        }
        belief = belief.plus(-belief.maxValue());
        // send out messages
        newMessage.setVector(belief);
        sendMessage(edge.getTargetVertexId(), newMessage);
    }
}

From source file:org.trustedanalytics.atk.giraph.algorithms.lbp.LoopyBeliefPropagationComputation.java

License:Apache License

@Override
public void compute(Vertex<LongWritable, VertexData4LBPWritable, DoubleWritable> vertex,
        Iterable<IdWithVectorMessage> messages) throws IOException {
    long step = getSuperstep();
    if (step == 0) {
        initializeVertex(vertex);//from  www.  ja  va 2  s . c  o  m
        return;
    }

    // collect messages sent to this vertex
    HashMap<Long, Vector> map = new HashMap<Long, Vector>();
    for (IdWithVectorMessage message : messages) {
        map.put(message.getData(), message.getVector());
    }

    // update posterior according to prior and messages
    VertexData4LBPWritable vertexValue = vertex.getValue();
    VertexType vt = vertexValue.getType();
    vt = ignoreVertexType ? VertexType.TRAIN : vt;
    Vector prior = vertexValue.getPriorVector();
    double nStates = prior.size();
    if (vt != VertexType.TRAIN) {
        // assign a uniform prior for validate/test vertex
        prior = prior.clone().assign(Math.log(1.0 / nStates));
    }
    // sum of prior and messages
    Vector sumPosterior = prior;
    for (IdWithVectorMessage message : messages) {
        sumPosterior = sumPosterior.plus(message.getVector());
    }
    sumPosterior = sumPosterior.plus(-sumPosterior.maxValue());
    // update posterior if this isn't an anchor vertex
    if (prior.maxValue() < anchorThreshold) {
        // normalize posterior
        Vector posterior = sumPosterior.clone().assign(Functions.EXP);
        posterior = posterior.normalize(1d);
        Vector oldPosterior = vertexValue.getPosteriorVector();
        double delta = posterior.minus(oldPosterior).norm(1d);
        // aggregate deltas
        switch (vt) {
        case TRAIN:
            aggregate(SUM_TRAIN_DELTA, new DoubleWritable(delta));
            break;
        case VALIDATE:
            aggregate(SUM_VALIDATE_DELTA, new DoubleWritable(delta));
            break;
        case TEST:
            aggregate(SUM_TEST_DELTA, new DoubleWritable(delta));
            break;
        default:
            throw new IllegalArgumentException("Unknown vertex type: " + vt.toString());
        }
        // update posterior
        vertexValue.setPosteriorVector(posterior);
    }

    if (step < maxSupersteps) {
        // if it's not a training vertex, don't send out messages
        if (vt != VertexType.TRAIN) {
            return;
        }
        IdWithVectorMessage newMessage = new IdWithVectorMessage();
        newMessage.setData(vertex.getId().get());
        // update belief
        Vector belief = prior.clone();
        for (Edge<LongWritable, DoubleWritable> edge : vertex.getEdges()) {
            double weight = edge.getValue().get();
            long id = edge.getTargetVertexId().get();
            Vector tempVector = sumPosterior;
            if (map.containsKey(id)) {
                tempVector = sumPosterior.minus(map.get(id));
            }
            for (int i = 0; i < nStates; i++) {
                double sum = 0d;
                for (int j = 0; j < nStates; j++) {
                    double msg = Math.exp(
                            tempVector.getQuick(j) + edgePotential(Math.abs(i - j) / (nStates - 1), weight));
                    if (maxProduct) {
                        sum = sum > msg ? sum : msg;
                    } else {
                        sum += msg;
                    }
                }
                belief.setQuick(i, sum > 0d ? Math.log(sum) : Double.MIN_VALUE);
            }
            belief = belief.plus(-belief.maxValue());
            newMessage.setVector(belief);
            sendMessage(edge.getTargetVertexId(), newMessage);
        }
    } else {
        // convert prior back to regular scale before output
        prior = vertexValue.getPriorVector();
        prior = prior.assign(Functions.EXP);
        vertexValue.setPriorVector(prior);
        vertex.voteToHalt();
    }
}