List of usage examples for org.apache.mahout.math Vector norm
double norm(double power);
From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java
License:Apache License
public static void orthonormalizeColumns(Matrix mx) { //int n = mx.numCols(); int n = mx.numRows(); for (int c = 0; c < n; c++) { System.out.println("col: " + c); Vector col = mx.viewRow(c); for (int c1 = 0; c1 < c; c1++) { Vector viewC1 = mx.viewRow(c1); col.assign(col.minus(viewC1.times(viewC1.dot(col)))); }/*from w ww . ja v a 2s. c o m*/ final double norm2 = col.norm(2); if (norm2 == 0) { System.out.println("zero"); } col.assign(new DoubleFunction() { @Override public double apply(double x) { return x / norm2; } }); } }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) { // first calculate p(topic|term,document) for all terms in original, and all topics, // using p(term|topic) and p(topic|doc) pTopicGivenTerm(original, topics, docTopicModel); normalizeByTopic(docTopicModel);/*from w w w . j a v a 2 s. co m*/ // now multiply, term-by-term, by the document, to get the weighted distribution of // term-topic pairs from this document. Iterator<Vector.Element> it = original.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); for (int x = 0; x < numTopics; x++) { Vector docTopicModelRow = docTopicModel.viewRow(x); docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get()); } } // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm topics.assign(0.0); for (int x = 0; x < numTopics; x++) { topics.set(x, docTopicModel.viewRow(x).norm(1)); } // now renormalize so that sum_x(p(x|doc)) = 1 topics.assign(Functions.mult(1 / topics.norm(1))); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public void updateTopic(int topic, Vector docTopicCounts) { topicTermCounts.viewRow(topic).assign(docTopicCounts, Functions.PLUS); topicSums.set(topic, topicSums.get(topic) + docTopicCounts.norm(1)); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
/** * sum_x sum_a (c_ai * log(p(x|i) * p(a|x))) *//*w w w .j av a 2 s .c o m*/ public double perplexity(Vector document, Vector docTopics) { double perplexity = 0; double norm = docTopics.norm(1) + (docTopics.size() * alpha); Iterator<Vector.Element> it = document.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); int term = e.index(); double prob = 0; for (int x = 0; x < numTopics; x++) { double d = (docTopics.get(x) + alpha) / norm; double p = d * (topicTermCounts.viewRow(x).get(term) + eta) / (topicSums.get(x) + eta * numTerms); prob += p; } perplexity += e.get() * Math.log(prob); } return -perplexity; }
From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java
License:Apache License
private void postInitCorpus() { totalCorpusWeight = 0;//from w ww. j av a2 s.c o m int numNonZero = 0; for (int i = 0; i < numDocuments; i++) { Vector v = corpusWeights.viewRow(i); double norm; if (v != null && (norm = v.norm(1)) != 0) { numNonZero += v.getNumNondefaultElements(); totalCorpusWeight += norm; } } String s = "Initializing corpus with %d docs, %d terms, %d nonzero entries, total termWeight %f"; log.info(String.format(s, numDocuments, numTerms, numNonZero, totalCorpusWeight)); }
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public double calculatePerplexity(VectorIterable matrix, VectorIterable docTopicCounts, double testFraction) { Iterator<MatrixSlice> docIterator = matrix.iterator(); Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator(); double perplexity = 0; double matrixNorm = 0; while (docIterator.hasNext() && docTopicIterator.hasNext()) { MatrixSlice docSlice = docIterator.next(); MatrixSlice topicSlice = docTopicIterator.next(); int docId = docSlice.index(); Vector document = docSlice.vector(); Vector topicDist = topicSlice.vector(); if (testFraction == 0 || docId % (1 / testFraction) == 0) { trainSync(document, topicDist, false, 10); perplexity += readModel.perplexity(document, topicDist); matrixNorm += document.norm(1); }//w w w . j a v a2 s .c o m } return perplexity / matrixNorm; }
From source file:de.isabeldrostfromm.sof.util.VectorsTest.java
License:Open Source License
@Test @Repeat(iterations = 10)//w w w .j a v a2 s . c om public void testAppendTwo() { Vector vecA = randomVector(); Vector vecB = randomVector(); Vector result = Vectors.append(vecA, vecB); double sum = Math.pow(vecA.norm(2), 2) + Math.pow(vecB.norm(2), 2); double length = Math.sqrt(sum); assertEquals("Appending two vectors should result in a vector of added length.", length, result.norm(2), 0.00001); }
From source file:de.isabeldrostfromm.sof.util.VectorsTest.java
License:Open Source License
@Test @Repeat(iterations = 10)//from w ww . j a va 2s . c o m public void testCreation() { Vector vec = randomVector(); double[] entries = new double[vec.getNumNondefaultElements()]; int index = 0; for (Vector.Element e : vec) { entries[index] = e.get(); index++; } Vector result = Vectors.newSequentialAccessSparseVector(entries); assertEquals("Original vector should have same length as the one created from its entries.", vec.norm(2), result.norm(2), 0.0001); }
From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java
License:Apache License
/** * Compute alpha//from www . j a v a 2s. c om * * @param gradient of type Vector * @param conjugate of type Vector * @param messages of type Iterable * @return alpha of type double */ private double computeAlpha(Vector gradient, Vector conjugate, Iterable<MessageData4CFWritable> messages) { double alpha = 0d; if (conjugate.norm(1d) == 0d) { return alpha; } double predictSquared = 0d; int numTrain = 0; for (MessageData4CFWritable message : messages) { EdgeType et = message.getType(); if (et == EdgeType.TRAIN) { Vector vector = message.getVector(); double predict = conjugate.dot(vector); predictSquared += predict * predict; numTrain++; } } if (numTrain > 0) { alpha = -gradient.dot(conjugate) / (predictSquared / numTrain + lambda * conjugate.dot(conjugate)); } return alpha; }
From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java
License:Apache License
/** * Compute beta according to Hestenes-Stiefel formula * * @param gradient of type Vector/*from w ww. ja va 2s. c o m*/ * @param conjugate of type Vector * @param gradientNext of type Vector * @return beta of type double */ private double computeBeta(Vector gradient, Vector conjugate, Vector gradientNext) { double beta = 0d; if (conjugate.norm(1d) == 0d) { return beta; } Vector deltaVector = gradientNext.minus(gradient); beta = -gradientNext.dot(deltaVector) / conjugate.dot(deltaVector); return beta; }