List of usage examples for org.apache.mahout.math Vector setQuick
void setQuick(int index, double value);
From source file:com.scaleunlimited.classify.model.HashedFeaturesLibLinearModel.java
License:Apache License
/** * Given a map from term to count, generate a feature array using * _maxFeatureIndex as the max index, based on the hash of the term. * /*from w w w.jav a 2 s .c om*/ * @param terms * @return array of LibLinear features */ private Feature[] getFeatures(Map<String, Integer> terms) { // First create the vector, where each term's index is the hash // of the term, and the value is the term count. Map<Integer, Integer> collisionCount = new HashMap<>(); Vector v = new RandomAccessSparseVector(_maxFeatureIndex); for (String term : terms.keySet()) { int index = calcHashJoaat(term, _maxFeatureIndex); double curValue = v.getQuick(index); if (_averageCollisions && (curValue != 0.0)) { Integer curCollisionCount = collisionCount.get(index); if (curCollisionCount == null) { // Number of values we'll need to divide by collisionCount.put(index, 2); } else { collisionCount.put(index, curCollisionCount + 1); } v.setQuick(index, curValue + terms.get(term)); } else { v.setQuick(index, terms.get(term)); } } // Now adjust the vector for collisions, if needed. if (_averageCollisions && !collisionCount.isEmpty()) { for (Integer index : collisionCount.keySet()) { double curValue = v.getQuick(index); v.setQuick(index, curValue / collisionCount.get(index)); } } // Apply the term vector normalizer. getNormalizer().normalize(v); List<FeatureNode> features = new ArrayList<FeatureNode>(terms.size()); for (Element e : v.nonZeroes()) { features.add(new FeatureNode(e.index() + 1, e.get())); } // We need to sort by increasing index. Collections.sort(features, new Comparator<FeatureNode>() { @Override public int compare(FeatureNode o1, FeatureNode o2) { return o1.index - o2.index; } }); return features.toArray(new FeatureNode[features.size()]); }
From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java
License:Apache License
@Override public void normalize(Vector vector) { double length = Math.sqrt(vector.getLengthSquared()); // Divide each vector coordinate by length, so we wind up with a unit vector. int cardinality = vector.size(); for (int j = 0; j < cardinality; j++) { double curValue = vector.getQuick(j); if (curValue > 0.0) { vector.setQuick(j, curValue / length); }//from ww w . j a v a2 s .co m } }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
/** * Create a vector from the (sorted) list of unique terms, and the map of terms/counts * for a document// www. j av a 2s. c o m * * @param terms * @param docTerms * @return vector of term frequencies */ public static Vector makeVector(List<String> terms, Map<String, Integer> docTerms) { Vector result = new RandomAccessSparseVector(terms.size()); for (String term : docTerms.keySet()) { int index = Collections.binarySearch(terms, term); if (index < 0) { // This can happen when we're making a vector for classification // result, since docTerms contains terms from a random doc, but // terms has the terms from cluster generation. } else { int value = docTerms.get(term); result.setQuick(index, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static Vector makeVectorDouble(List<String> featuresList, Map<String, Double> featureMap) { Vector result = new RandomAccessSparseVector(featuresList.size()); for (String term : featureMap.keySet()) { int index = Collections.binarySearch(featuresList, term); if (index < 0) { // This can happen when we're making a vector for classification // result, since docTerms contains terms from a random doc, but // terms has the terms from cluster generation. } else {//from w w w . ja v a 2s. co m double value = featureMap.get(term); result.setQuick(index, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static Vector makeExtraVector(List<String> terms, Map<String, Integer> docTerms) { List<String> extraTerms = new ArrayList<String>(); for (String term : docTerms.keySet()) { int index = Collections.binarySearch(terms, term); if (index < 0) { extraTerms.add(term);//from ww w . ja v a 2s.c om } } Vector result = new RandomAccessSparseVector(extraTerms.size()); Collections.sort(extraTerms); int index = 0; for (String extraTerm : extraTerms) { int value = docTerms.get(extraTerm); if (value != 0) { result.setQuick(index++, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static Vector appendVectors(Vector baseVector, Vector extraVector) { int baseSize = baseVector.size(); Vector result = new RandomAccessSparseVector(baseSize + extraVector.size()); for (int i = 0; i < baseSize; i++) { double value = baseVector.getQuick(i); if (value != 0.0) { result.setQuick(i, value); }/*from www. j ava2 s .c o m*/ } for (int i = 0; i < extraVector.size(); i++) { double value = extraVector.getQuick(i); if (value != 0.0) { result.setQuick(baseSize + i, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtils.java
License:Apache License
public static Vector extendVector(Vector v, int extraSize) { if (extraSize == 0) { return v; }/* w ww.j a va 2s.co m*/ int baseSize = v.size(); Vector result = new RandomAccessSparseVector(baseSize + extraSize); for (int i = 0; i < baseSize; i++) { double value = v.getQuick(i); if (value != 0.0) { result.setQuick(i, value); } } return result; }
From source file:com.scaleunlimited.classify.vectors.VectorUtilsTest.java
License:Apache License
@Test public void testAppend() { Vector v1 = new RandomAccessSparseVector(2); v1.setQuick(0, 0); v1.setQuick(1, 1);/*from w w w . ja v a 2 s. co m*/ Vector v2 = new RandomAccessSparseVector(3); v2.setQuick(0, 2); v2.setQuick(1, 3); v2.setQuick(2, 4); Vector v3 = VectorUtils.appendVectors(v1, v2); Assert.assertEquals(5, v3.size()); for (int i = 0; i < 5; i++) { Assert.assertEquals(i, new Double(v3.getQuick(i)).intValue()); } }
From source file:com.scaleunlimited.classify.vectors.WritableComparableVectorTest.java
License:Apache License
private Vector makeVector() { Vector vector = new RandomAccessSparseVector(2); vector.setQuick(0, 5); vector.setQuick(1, 10.0);// w w w . j av a 2 s . c om return vector; }
From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java
License:Apache License
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); SequenceFile.Writer writer = null; //MapFile.Writer writer = null; try {// w w w . java 2 s . com //writer = new MapFile.Writer(getConf(), fs, pathToM(-1).toString(), IntWritable.class, VectorWritable.class); writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class); Iterator<Vector.Element> averages = averageRatings.iterateNonZero(); while (averages.hasNext()) { Vector.Element e = averages.next(); Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } writer.append(new IntWritable(e.index()), new VectorWritable(row)); } } finally { Closeables.closeQuietly(writer); } }