List of usage examples for org.apache.mahout.math Vector set
void set(int index, double value);
From source file:com.cloudera.science.ml.parallel.fn.SvmLightFnTest.java
License:Open Source License
@Test public void testVector() throws Exception { Vector v = Vectors.of(1.0, 2.0, 3.0); assertEquals("0:1.0 1:2.0 2:3.0", fn.map(v)); v = Vectors.sparse(10);/*from www . ja va2 s . c om*/ v.set(3, 7.2); v.set(6, 12.0); assertEquals("3:7.2 6:12.0", fn.map(v)); }
From source file:com.cloudera.science.ml.parallel.fn.SvmLightFnTest.java
License:Open Source License
@Test public void testNamedVector() throws Exception { Vector v = Vectors.named("foo", 1.0, 2.0, 3.0); assertEquals("foo 0:1.0 1:2.0 2:3.0", fn.map(v)); v = Vectors.sparse(10);// w ww . j ava2 s . c om v.set(3, 7.2); v.set(6, 12.0); v = new NamedVector(v, "bar"); assertEquals("bar 3:7.2 6:12.0", fn.map(v)); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
private static Vector viewRowSums(Matrix m) { Vector v = new DenseVector(m.numRows()); for (MatrixSlice slice : m) { v.set(slice.index(), slice.vector().norm(1)); }// ww w. j a v a2s .c o m return v; }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
private static Pair<Matrix, Vector> randomMatrix(int numTopics, int numTerms, Random random) { Matrix topicTermCounts = new DenseMatrix(numTopics, numTerms); Vector topicSums = new DenseVector(numTopics); if (random != null) { for (int x = 0; x < numTopics; x++) { for (int term = 0; term < numTerms; term++) { topicTermCounts.viewRow(x).set(term, random.nextDouble()); }//from w ww. j a v a 2s. c om } } for (int x = 0; x < numTopics; x++) { topicSums.set(x, random == null ? 1.0 : topicTermCounts.viewRow(x).norm(1)); } return Pair.of(topicTermCounts, topicSums); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException { int numTopics = -1; int numTerms = -1; List<Pair<Integer, Vector>> rows = Lists.newArrayList(); for (Path modelPath : modelPaths) { for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true, conf)) {//from w w w. j a v a 2s.co m rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext if (numTerms < 0) { numTerms = row.getSecond().get().size(); } } } if (rows.isEmpty()) { throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it"); } numTopics++; Matrix model = new DenseMatrix(numTopics, numTerms); Vector topicSums = new DenseVector(numTopics); for (Pair<Integer, Vector> pair : rows) { model.viewRow(pair.getFirst()).assign(pair.getSecond()); topicSums.set(pair.getFirst(), pair.getSecond().norm(1)); } return Pair.of(model, topicSums); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) { // first calculate p(topic|term,document) for all terms in original, and all topics, // using p(term|topic) and p(topic|doc) pTopicGivenTerm(original, topics, docTopicModel); normalizeByTopic(docTopicModel);//from w w w. j a va2s .c o m // now multiply, term-by-term, by the document, to get the weighted distribution of // term-topic pairs from this document. Iterator<Vector.Element> it = original.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); for (int x = 0; x < numTopics; x++) { Vector docTopicModelRow = docTopicModel.viewRow(x); docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get()); } } // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm topics.assign(0.0); for (int x = 0; x < numTopics; x++) { topics.set(x, docTopicModel.viewRow(x).norm(1)); } // now renormalize so that sum_x(p(x|doc)) = 1 topics.assign(Functions.mult(1 / topics.norm(1))); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public Vector infer(Vector original, Vector docTopics) { Vector pTerm = original.like(); Iterator<Vector.Element> it = original.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); int term = e.index(); // p(a) = sum_x (p(a|x) * p(x|i)) double pA = 0; for (int x = 0; x < numTopics; x++) { pA += (topicTermCounts.viewRow(x).get(term) / topicSums.get(x)) * docTopics.get(x); }/*w w w .j av a 2 s.c om*/ pTerm.set(term, pA); } return pTerm; }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public void update(int termId, Vector topicCounts) { for (int x = 0; x < numTopics; x++) { Vector v = topicTermCounts.viewRow(x); v.set(termId, v.get(termId) + topicCounts.get(x)); }// ww w.j av a 2s.com topicSums.assign(topicCounts, Functions.PLUS); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
/** * Computes {@code p(topic x|term a, document i)} distributions given input document {@code i}. * {@code pTGT[x][a]} is the (un-normalized) {@code p(x|a,i)}, or if docTopics is {@code null}, * {@code p(a|x)} (also un-normalized)./*from www .jav a2 s.c om*/ * * @param document doc-term vector encoding {@code w(term a|document i)}. * @param docTopics {@code docTopics[x]} is the overall weight of topic {@code x} in given * document. If {@code null}, a topic weight of {@code 1.0} is used for all topics. * @param termTopicDist storage for output {@code p(x|a,i)} distributions. */ private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) { // for each topic x for (int x = 0; x < numTopics; x++) { // get p(topic x | document i), or 1.0 if docTopics is null double topicWeight = docTopics == null ? 1.0 : docTopics.get(x); // get w(term a | topic x) Vector topicTermRow = topicTermCounts.viewRow(x); // get \sum_a w(term a | topic x) double topicSum = topicSums.get(x); // get p(topic x | term a) distribution to update Vector termTopicRow = termTopicDist.viewRow(x); // for each term a in document i with non-zero weight Iterator<Vector.Element> it = document.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); int termIndex = e.index(); // calc un-normalized p(topic x | term a, document i) double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) / (topicSum + eta * numTerms); termTopicRow.set(termIndex, termTopicLikelihood); } } }
From source file:com.elex.dmp.lda.TopicModel.java
License:Apache License
public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException { int numTopics = -1; int numTerms = -1; List<Pair<Integer, Vector>> rows = Lists.newArrayList(); for (Path modelPath : modelPaths) { for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true, conf)) {//from w w w. j a va 2 s . co m rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext if (numTerms < 0) { numTerms = row.getSecond().get().size(); } } } if (rows.isEmpty()) { throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it"); } numTopics++; Matrix model = new DenseMatrix(numTopics, numTerms); Vector topicSums = new DenseVector(numTopics); for (Pair<Integer, Vector> pair : rows) { model.viewRow(pair.getFirst()).assign(pair.getSecond()); topicSums.set(pair.getFirst(), pair.getSecond().norm(1)); } return Pair.of(model, topicSums); }