List of usage examples for org.apache.mahout.math Vector getNumNondefaultElements
int getNumNondefaultElements();
From source file:Vectors.java
License:Apache License
public static Vector maybeSample(Vector original, int sampleSize) { if (original.getNumNondefaultElements() <= sampleSize) { return original; }//from w w w . ja va 2 s . c om Vector sample = original.like(); Iterator<Vector.Element> sampledElements = new FixedSizeSamplingIterator<Vector.Element>(sampleSize, original.iterateNonZero()); while (sampledElements.hasNext()) { Vector.Element elem = sampledElements.next(); sample.setQuick(elem.index(), elem.get()); } return sample; }
From source file:Vectors.java
License:Apache License
public static Vector topKElements(int k, Vector original) { if (original.getNumNondefaultElements() <= k) { return original; }/* w ww . jav a 2s . c o m*/ TopK<Vector.Element> topKQueue = new TopK<Vector.Element>(k, BY_VALUE); Iterator<Vector.Element> nonZeroElements = original.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); topKQueue.offer(new Vectors.TemporaryElement(nonZeroElement)); } Vector topKSimilarities = original.like(); for (Vector.Element topKSimilarity : topKQueue.retrieve()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } return topKSimilarities; }
From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java
License:Apache License
public static void storeSparseColumns(Matrix mat) { int numCols = mat.numCols(); int numRows = mat.numRows(); for (int i = 0; i < numCols; i++) { Vector sparseVect = new RandomAccessSparseVector(numRows); Vector col = mat.viewColumn(i); Iterator<Vector.Element> itr = col.iterateNonZero(); while (itr.hasNext()) { Element elem = itr.next(); if (elem.get() != 0) { System.out.println(elem.get()); sparseVect.set(elem.index(), elem.get()); }//from w w w .jav a 2 s . c o m } System.out.println(sparseVect.getNumNondefaultElements()); mat.assignColumn(i, sparseVect); System.out.println(mat.viewColumn(i).getNumNondefaultElements()); System.exit(1); } }
From source file:com.chimpler.example.eigenface.Helper.java
License:Apache License
public static double[][] readMatrixSequenceFile(String fileName) throws Exception { Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration); List<double[]> rows = new ArrayList<double[]>(); IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); while (matrixReader.next(key, value)) { Vector vector = value.get(); double[] row = new double[vector.size()]; for (int i = 0; i < vector.getNumNondefaultElements(); i++) { Element element = vector.getElement(i); row[element.index()] = element.get(); }//from w ww.j a v a2 s .c o m rows.add(row); } return rows.toArray(new double[rows.size()][]); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public static String vectorToSortedString(Vector vector, String[] dictionary) { List<Pair<String, Double>> vectorValues = new ArrayList<Pair<String, Double>>( vector.getNumNondefaultElements()); Iterator<Vector.Element> it = vector.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); vectorValues/*ww w . jav a2s . c om*/ .add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()), e.get())); } Collections.sort(vectorValues, new Comparator<Pair<String, Double>>() { @Override public int compare(Pair<String, Double> x, Pair<String, Double> y) { return y.getSecond().compareTo(x.getSecond()); } }); Iterator<Pair<String, Double>> listIt = vectorValues.iterator(); StringBuilder bldr = new StringBuilder(2048); bldr.append('{'); int i = 0; while (listIt.hasNext() && i < 25) { i++; Pair<String, Double> p = listIt.next(); bldr.append(p.getFirst()); bldr.append(':'); bldr.append(p.getSecond()); bldr.append(','); } if (bldr.length() > 1) { bldr.setCharAt(bldr.length() - 1, '}'); } return bldr.toString(); }
From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java
License:Apache License
private void postInitCorpus() { totalCorpusWeight = 0;//from w w w.j a v a 2 s. co m int numNonZero = 0; for (int i = 0; i < numDocuments; i++) { Vector v = corpusWeights.viewRow(i); double norm; if (v != null && (norm = v.norm(1)) != 0) { numNonZero += v.getNumNondefaultElements(); totalCorpusWeight += norm; } } String s = "Initializing corpus with %d docs, %d terms, %d nonzero entries, total termWeight %f"; log.info(String.format(s, numDocuments, numTerms, numNonZero, totalCorpusWeight)); }
From source file:com.elex.dmp.lda.ModelTrainer.java
License:Apache License
public void train(VectorIterable matrix, VectorIterable docTopicCounts, int numDocTopicIters) { start();//from w w w . j av a 2 s .c o m Iterator<MatrixSlice> docIterator = matrix.iterator(); Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator(); long startTime = System.nanoTime(); int i = 0; double[] times = new double[100]; Map<Vector, Vector> batch = Maps.newHashMap(); int numTokensInBatch = 0; long batchStart = System.nanoTime(); while (docIterator.hasNext() && docTopicIterator.hasNext()) { i++; Vector document = docIterator.next().vector(); Vector topicDist = docTopicIterator.next().vector(); if (isReadWrite) { if (batch.size() < numTrainThreads) { batch.put(document, topicDist); if (log.isDebugEnabled()) { numTokensInBatch += document.getNumNondefaultElements(); } } else { batchTrain(batch, true, numDocTopicIters); long time = System.nanoTime(); log.debug("trained {} docs with {} tokens, start time {}, end time {}", new Object[] { numTrainThreads, numTokensInBatch, batchStart, time }); batchStart = time; numTokensInBatch = 0; } } else { long start = System.nanoTime(); train(document, topicDist, true, numDocTopicIters); if (log.isDebugEnabled()) { times[i % times.length] = (System.nanoTime() - start) / (1.0e6 * document.getNumNondefaultElements()); if (i % 100 == 0) { long time = System.nanoTime() - startTime; log.debug("trained " + i + " documents in " + (time / 1.0e6) + "ms"); if (i % 500 == 0) { Arrays.sort(times); log.debug("training took median " + times[times.length / 2] + "ms per token-instance"); } } } } } stop(); }
From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException, InterruptedException { Iterator<StringTuple> it = values.iterator(); if (!it.hasNext()) { return;/*from w ww .j av a2 s. com*/ } StringTuple value = it.next(); Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size if (maxNGramSize >= 2) { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize); try { do { String term = sf.getAttribute(CharTermAttribute.class).toString(); if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram int termId = dictionary.get(term); vector.setQuick(termId, vector.getQuick(termId) + 1); } } while (sf.incrementToken()); sf.end(); } finally { Closeables.closeQuietly(sf); } } else { for (String term : value.getEntries()) { if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram int termId = dictionary.get(term); vector.setQuick(termId, vector.getQuick(termId) + 1); } } } if (sequentialAccess) { vector = new SequentialAccessSparseVector(vector); } if (namedVector) { vector = new NamedVector(vector, key.toString()); } // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk. if (vector.getNumNondefaultElements() > 0) { VectorWritable vectorWritable = new VectorWritable(vector); context.write(key, vectorWritable); } else { context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1); } }
From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java
License:Apache License
public static Vector solve(Iterable<Vector> featureVectors, Vector ratingVector, double lambda, int numFeatures) { Preconditions.checkNotNull(featureVectors, "Feature Vectors cannot be null"); Preconditions.checkArgument(!Iterables.isEmpty(featureVectors)); Preconditions.checkNotNull(ratingVector, "Rating Vector cannot be null"); Preconditions.checkArgument(ratingVector.getNumNondefaultElements() > 0, "Rating Vector cannot be empty"); Preconditions.checkArgument(Iterables.size(featureVectors) == ratingVector.getNumNondefaultElements()); int nui = ratingVector.getNumNondefaultElements(); Matrix MiIi = createMiIi(featureVectors, numFeatures); Matrix RiIiMaybeTransposed = createRiIiMaybeTransposed(ratingVector); /* compute Ai = MiIi * t(MiIi) + lambda * nui * E */ Matrix Ai = miTimesMiTransposePlusLambdaTimesNuiTimesE(MiIi, lambda, nui); /* compute Vi = MiIi * t(R(i,Ii)) */ Matrix Vi = MiIi.times(RiIiMaybeTransposed); /* compute Ai * ui = Vi */ return solve(Ai, Vi); }
From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java
License:Apache License
static Matrix createRiIiMaybeTransposed(Vector ratingVector) { Preconditions.checkArgument(ratingVector.isSequentialAccess(), "Ratings should be iterable in Index or Sequential Order"); double[][] RiIiMaybeTransposed = new double[ratingVector.getNumNondefaultElements()][1]; int index = 0; for (Vector.Element elem : ratingVector.nonZeroes()) { RiIiMaybeTransposed[index++][0] = elem.get(); }// ww w. j a v a 2 s . co m return new DenseMatrix(RiIiMaybeTransposed, true); }