Example usage for org.apache.mahout.math Vector set

List of usage examples for org.apache.mahout.math Vector set

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector set.

Prototype

void set(int index, double value);

Source Link

Document

Set the value at the given index

Usage

From source file:com.cloudera.science.ml.parallel.fn.SvmLightFnTest.java

License:Open Source License

@Test
public void testVector() throws Exception {
    Vector v = Vectors.of(1.0, 2.0, 3.0);
    assertEquals("0:1.0 1:2.0 2:3.0", fn.map(v));

    v = Vectors.sparse(10);/*from  www . ja  va2  s  .  c om*/
    v.set(3, 7.2);
    v.set(6, 12.0);
    assertEquals("3:7.2 6:12.0", fn.map(v));
}

From source file:com.cloudera.science.ml.parallel.fn.SvmLightFnTest.java

License:Open Source License

@Test
public void testNamedVector() throws Exception {
    Vector v = Vectors.named("foo", 1.0, 2.0, 3.0);
    assertEquals("foo 0:1.0 1:2.0 2:3.0", fn.map(v));

    v = Vectors.sparse(10);// w ww  . j ava2 s  . c om
    v.set(3, 7.2);
    v.set(6, 12.0);
    v = new NamedVector(v, "bar");
    assertEquals("bar 3:7.2 6:12.0", fn.map(v));
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

private static Vector viewRowSums(Matrix m) {
    Vector v = new DenseVector(m.numRows());
    for (MatrixSlice slice : m) {
        v.set(slice.index(), slice.vector().norm(1));
    }// ww  w. j  a  v  a2s  .c o  m
    return v;
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

private static Pair<Matrix, Vector> randomMatrix(int numTopics, int numTerms, Random random) {
    Matrix topicTermCounts = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    if (random != null) {
        for (int x = 0; x < numTopics; x++) {
            for (int term = 0; term < numTerms; term++) {
                topicTermCounts.viewRow(x).set(term, random.nextDouble());
            }//from w ww. j a v a  2s.  c om
        }
    }
    for (int x = 0; x < numTopics; x++) {
        topicSums.set(x, random == null ? 1.0 : topicTermCounts.viewRow(x).norm(1));
    }
    return Pair.of(topicTermCounts, topicSums);
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException {
    int numTopics = -1;
    int numTerms = -1;
    List<Pair<Integer, Vector>> rows = Lists.newArrayList();
    for (Path modelPath : modelPaths) {
        for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true,
                conf)) {//from w  w w. j  a v  a 2s.co  m
            rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext
            numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext
            if (numTerms < 0) {
                numTerms = row.getSecond().get().size();
            }
        }
    }
    if (rows.isEmpty()) {
        throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
    }
    numTopics++;
    Matrix model = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    for (Pair<Integer, Vector> pair : rows) {
        model.viewRow(pair.getFirst()).assign(pair.getSecond());
        topicSums.set(pair.getFirst(), pair.getSecond().norm(1));
    }
    return Pair.of(model, topicSums);
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
    // first calculate p(topic|term,document) for all terms in original, and all topics,
    // using p(term|topic) and p(topic|doc)
    pTopicGivenTerm(original, topics, docTopicModel);
    normalizeByTopic(docTopicModel);//from   w w w. j a  va2s .c  o m
    // now multiply, term-by-term, by the document, to get the weighted distribution of
    // term-topic pairs from this document.
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        for (int x = 0; x < numTopics; x++) {
            Vector docTopicModelRow = docTopicModel.viewRow(x);
            docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
        }
    }
    // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
    topics.assign(0.0);
    for (int x = 0; x < numTopics; x++) {
        topics.set(x, docTopicModel.viewRow(x).norm(1));
    }
    // now renormalize so that sum_x(p(x|doc)) = 1
    topics.assign(Functions.mult(1 / topics.norm(1)));
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public Vector infer(Vector original, Vector docTopics) {
    Vector pTerm = original.like();
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        int term = e.index();
        // p(a) = sum_x (p(a|x) * p(x|i))
        double pA = 0;
        for (int x = 0; x < numTopics; x++) {
            pA += (topicTermCounts.viewRow(x).get(term) / topicSums.get(x)) * docTopics.get(x);
        }/*w  w  w  .j av  a 2  s.c  om*/
        pTerm.set(term, pA);
    }
    return pTerm;
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void update(int termId, Vector topicCounts) {
    for (int x = 0; x < numTopics; x++) {
        Vector v = topicTermCounts.viewRow(x);
        v.set(termId, v.get(termId) + topicCounts.get(x));
    }//  ww w.j  av a  2s.com
    topicSums.assign(topicCounts, Functions.PLUS);
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

/**
 * Computes {@code p(topic x|term a, document i)} distributions given input document {@code i}.
 * {@code pTGT[x][a]} is the (un-normalized) {@code p(x|a,i)}, or if docTopics is {@code null},
 * {@code p(a|x)} (also un-normalized)./*from  www .jav a2  s.c  om*/
 *
 * @param document doc-term vector encoding {@code w(term a|document i)}.
 * @param docTopics {@code docTopics[x]} is the overall weight of topic {@code x} in given
 *          document. If {@code null}, a topic weight of {@code 1.0} is used for all topics.
 * @param termTopicDist storage for output {@code p(x|a,i)} distributions.
 */
private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) {
    // for each topic x
    for (int x = 0; x < numTopics; x++) {
        // get p(topic x | document i), or 1.0 if docTopics is null
        double topicWeight = docTopics == null ? 1.0 : docTopics.get(x);
        // get w(term a | topic x)
        Vector topicTermRow = topicTermCounts.viewRow(x);
        // get \sum_a w(term a | topic x)
        double topicSum = topicSums.get(x);
        // get p(topic x | term a) distribution to update
        Vector termTopicRow = termTopicDist.viewRow(x);

        // for each term a in document i with non-zero weight
        Iterator<Vector.Element> it = document.iterateNonZero();
        while (it.hasNext()) {
            Vector.Element e = it.next();
            int termIndex = e.index();

            // calc un-normalized p(topic x | term a, document i)
            double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha)
                    / (topicSum + eta * numTerms);
            termTopicRow.set(termIndex, termTopicLikelihood);
        }
    }
}

From source file:com.elex.dmp.lda.TopicModel.java

License:Apache License

public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException {
    int numTopics = -1;
    int numTerms = -1;
    List<Pair<Integer, Vector>> rows = Lists.newArrayList();
    for (Path modelPath : modelPaths) {
        for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true,
                conf)) {//from w w  w. j  a  va 2  s .  co m
            rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext
            numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext
            if (numTerms < 0) {
                numTerms = row.getSecond().get().size();
            }
        }
    }
    if (rows.isEmpty()) {
        throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
    }
    numTopics++;
    Matrix model = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    for (Pair<Integer, Vector> pair : rows) {
        model.viewRow(pair.getFirst()).assign(pair.getSecond());
        topicSums.set(pair.getFirst(), pair.getSecond().norm(1));
    }
    return Pair.of(model, topicSums);
}