List of usage examples for org.apache.mahout.math Vector getQuick
double getQuick(int index);
From source file:com.scaleunlimited.classify.vectors.BaseNormalizer.java
License:Apache License
public static void dumpTopTerms(final Vector docFrequencies, List<String> uniqueTerms) { int cardinality = docFrequencies.size(); List<Integer> sortedDocFrequencyIndexes = new ArrayList<Integer>(cardinality); for (int i = 0; i < cardinality; i++) { sortedDocFrequencyIndexes.add(i); }/*from w w w . j av a 2s . c o m*/ Collections.sort(sortedDocFrequencyIndexes, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return (int) (docFrequencies.getQuick(o2) - docFrequencies.getQuick(o1)); } }); double maxFrequency = docFrequencies.getQuick(docFrequencies.maxValueIndex()); StringBuffer topTermsReport = new StringBuffer(); for (int i = 0; i < cardinality; i++) { int index = sortedDocFrequencyIndexes.get(i); double frequency = docFrequencies.getQuick(index); if ((frequency / maxFrequency) > MIN_FREQUENCY_REPORT_RATIO) { topTermsReport.append(String.format("'%s'=%d, ", uniqueTerms.get(index), (int) frequency)); } } LOGGER.debug(topTermsReport.toString()); }
From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java
License:Apache License
@Override public void normalize(Vector vector) { double length = Math.sqrt(vector.getLengthSquared()); // Divide each vector coordinate by length, so we wind up with a unit vector. int cardinality = vector.size(); for (int j = 0; j < cardinality; j++) { double curValue = vector.getQuick(j); if (curValue > 0.0) { vector.setQuick(j, curValue / length); }/*ww w . jav a 2s . c om*/ } }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static Vector appendVectors(Vector baseVector, Vector extraVector) { int baseSize = baseVector.size(); Vector result = new RandomAccessSparseVector(baseSize + extraVector.size()); for (int i = 0; i < baseSize; i++) { double value = baseVector.getQuick(i); if (value != 0.0) { result.setQuick(i, value);// w w w .j a va 2 s . com } } for (int i = 0; i < extraVector.size(); i++) { double value = extraVector.getQuick(i); if (value != 0.0) { result.setQuick(baseSize + i, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static Vector extendVector(Vector v, int extraSize) { if (extraSize == 0) { return v; }/*from w w w. j a v a 2 s . c om*/ int baseSize = v.size(); Vector result = new RandomAccessSparseVector(baseSize + extraSize); for (int i = 0; i < baseSize; i++) { double value = v.getQuick(i); if (value != 0.0) { result.setQuick(i, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static String dumpVector(Vector v) { StringBuffer result = new StringBuffer(); result.append(String.format("Vector '%s': ", "<unknown>")); int baseSize = v.size(); for (int i = 0; i < baseSize; i++) { double component = v.getQuick(i); if (component != 0.0) { result.append(String.format("%d => %f, ", i, component)); }/*from w w w .j a v a2s . c o m*/ } return result.toString(); }
From source file:com.scaleunlimited.classify.vectors.VectorUtilsTest.java
License:Apache License
@Test public void testAppend() { Vector v1 = new RandomAccessSparseVector(2); v1.setQuick(0, 0);/* w ww . j av a 2 s.c o m*/ v1.setQuick(1, 1); Vector v2 = new RandomAccessSparseVector(3); v2.setQuick(0, 2); v2.setQuick(1, 3); v2.setQuick(2, 4); Vector v3 = VectorUtils.appendVectors(v1, v2); Assert.assertEquals(5, v3.size()); for (int i = 0; i < 5; i++) { Assert.assertEquals(i, new Double(v3.getQuick(i)).intValue()); } }
From source file:com.scaleunlimited.classify.vectors.WritableComparableVectorTest.java
License:Apache License
private void compareVectors(Vector vector1, Vector vector2) { Assert.assertEquals(vector1.size(), vector2.size()); for (int i = 0; i < vector1.size(); i++) { Assert.assertEquals(vector1.getQuick(i), vector2.getQuick(i)); }/*from w w w . j a v a2s . c om*/ }
From source file:com.ydy.cf.solver.impl.AlternatingLeastSquaresSolver.java
License:Apache License
private Matrix createMiIi(Iterable<Vector> featureVectors, int numFeatures) { Matrix MiIi = new DenseMatrix(numFeatures, Iterables.size(featureVectors)); int n = 0;//from w w w .j a v a2s . com for (Vector featureVector : featureVectors) { for (int m = 0; m < numFeatures; m++) { MiIi.setQuick(m, n, featureVector.getQuick(m)); } n++; } return MiIi; }
From source file:edu.rosehulman.TFPartialVectorReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException, InterruptedException { Iterator<StringTuple> it = values.iterator(); if (!it.hasNext()) { return;/*from w w w .jav a 2 s . c o m*/ } StringTuple value = it.next(); Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size if (maxNGramSize >= 2) { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize); sf.reset(); try { do { String term = sf.getAttribute(CharTermAttribute.class).toString(); if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram int termId = dictionary.get(term); vector.setQuick(termId, vector.getQuick(termId) + 1); } } while (sf.incrementToken()); sf.end(); } finally { Closeables.close(sf, true); } } else { for (String term : value.getEntries()) { if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram int termId = dictionary.get(term); vector.setQuick(termId, vector.getQuick(termId) + 1); } } } if (sequentialAccess) { vector = new SequentialAccessSparseVector(vector); } if (namedVector) { vector = new NamedVector(vector, key.toString()); } // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk. if (vector.getNumNondefaultElements() > 0) { VectorWritable vectorWritable = new VectorWritable(vector); context.write(key, vectorWritable); } else { context.getCounter("TFPartialVectorReducer", "emptyVectorCount").increment(1); } }
From source file:edu.utsa.sifter.som.SelfOrganizingMap.java
License:Apache License
public double computeDistance(final int id, final IntArrayWritable doc) { final Vector cellWeights = getCell(id); final int cardinality = doc.getLength(); final double s1 = cardinality; final double s2 = getS2(id); double s3 = 0.0; double c = 0.0, // Kahan summation algorithm to account for error, c.f. http://en.wikipedia.org/wiki/Kahan_summation_algorithm w, t;/*w w w. j a va2 s.c o m*/ final double f = getFactor(id); final int[] terms = doc.getInts(); for (int i = 0; i < cardinality; ++i) { w = (f * cellWeights.getQuick(terms[i])) - c; t = s3 + w; c = (t - s3) - w; s3 = t; // s3 += f * cellWeights.getQuick(itr.next().index()); } s3 *= -2; final double d = s1 + s2 + s3; if (d < 0) { System.out.println("Negative distance on " + id + " - d = " + d + ", s1 = " + s1 + ", s2 = " + s2 + ", s3 = " + s3 + ", f = " + f); } return d; }