Example usage for org.apache.mahout.math Vector setQuick

List of usage examples for org.apache.mahout.math Vector setQuick

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector setQuick.

Prototype

void setQuick(int index, double value);

Source Link

Document

Set the value at the given index, without checking bounds

Usage

From source file:com.scaleunlimited.classify.model.HashedFeaturesLibLinearModel.java

License:Apache License

/**
 * Given a map from term to count, generate a feature array using
 * _maxFeatureIndex as the max index, based on the hash of the term.
 * /*from  w w  w.jav a 2 s .c  om*/
 * @param terms
 * @return array of LibLinear features
 */

private Feature[] getFeatures(Map<String, Integer> terms) {

    // First create the vector, where each term's index is the hash
    // of the term, and the value is the term count.
    Map<Integer, Integer> collisionCount = new HashMap<>();
    Vector v = new RandomAccessSparseVector(_maxFeatureIndex);
    for (String term : terms.keySet()) {
        int index = calcHashJoaat(term, _maxFeatureIndex);
        double curValue = v.getQuick(index);
        if (_averageCollisions && (curValue != 0.0)) {
            Integer curCollisionCount = collisionCount.get(index);
            if (curCollisionCount == null) {
                // Number of values we'll need to divide by
                collisionCount.put(index, 2);
            } else {
                collisionCount.put(index, curCollisionCount + 1);
            }

            v.setQuick(index, curValue + terms.get(term));
        } else {
            v.setQuick(index, terms.get(term));
        }
    }

    // Now adjust the vector for collisions, if needed.
    if (_averageCollisions && !collisionCount.isEmpty()) {
        for (Integer index : collisionCount.keySet()) {
            double curValue = v.getQuick(index);
            v.setQuick(index, curValue / collisionCount.get(index));
        }
    }

    // Apply the term vector normalizer.
    getNormalizer().normalize(v);

    List<FeatureNode> features = new ArrayList<FeatureNode>(terms.size());
    for (Element e : v.nonZeroes()) {
        features.add(new FeatureNode(e.index() + 1, e.get()));
    }

    // We need to sort by increasing index.
    Collections.sort(features, new Comparator<FeatureNode>() {

        @Override
        public int compare(FeatureNode o1, FeatureNode o2) {
            return o1.index - o2.index;
        }
    });

    return features.toArray(new FeatureNode[features.size()]);
}

From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java

License:Apache License

@Override
public void normalize(Vector vector) {

    double length = Math.sqrt(vector.getLengthSquared());

    // Divide each vector coordinate by length, so we wind up with a unit vector.
    int cardinality = vector.size();
    for (int j = 0; j < cardinality; j++) {
        double curValue = vector.getQuick(j);
        if (curValue > 0.0) {
            vector.setQuick(j, curValue / length);
        }//from ww  w . j a v  a2  s  .co m
    }
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

/**
 * Create a vector from the (sorted) list of unique terms, and the map of terms/counts
 * for a document//  www.  j av  a 2s. c o  m
 * 
 * @param terms
 * @param docTerms
 * @return vector of term frequencies
 */
public static Vector makeVector(List<String> terms, Map<String, Integer> docTerms) {
    Vector result = new RandomAccessSparseVector(terms.size());

    for (String term : docTerms.keySet()) {
        int index = Collections.binarySearch(terms, term);
        if (index < 0) {
            // This can happen when we're making a vector for classification
            // result, since docTerms contains terms from a random doc, but
            // terms has the terms from cluster generation.
        } else {
            int value = docTerms.get(term);
            result.setQuick(index, value);
        }
    }

    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector makeVectorDouble(List<String> featuresList, Map<String, Double> featureMap) {
    Vector result = new RandomAccessSparseVector(featuresList.size());

    for (String term : featureMap.keySet()) {
        int index = Collections.binarySearch(featuresList, term);
        if (index < 0) {
            // This can happen when we're making a vector for classification
            // result, since docTerms contains terms from a random doc, but
            // terms has the terms from cluster generation.
        } else {//from   w w  w .  ja  v a 2s.  co m
            double value = featureMap.get(term);
            result.setQuick(index, value);
        }
    }
    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector makeExtraVector(List<String> terms, Map<String, Integer> docTerms) {
    List<String> extraTerms = new ArrayList<String>();

    for (String term : docTerms.keySet()) {
        int index = Collections.binarySearch(terms, term);
        if (index < 0) {
            extraTerms.add(term);//from  ww w  .  ja  v a 2s.c om
        }
    }

    Vector result = new RandomAccessSparseVector(extraTerms.size());
    Collections.sort(extraTerms);

    int index = 0;
    for (String extraTerm : extraTerms) {
        int value = docTerms.get(extraTerm);
        if (value != 0) {
            result.setQuick(index++, value);
        }
    }

    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector appendVectors(Vector baseVector, Vector extraVector) {
    int baseSize = baseVector.size();
    Vector result = new RandomAccessSparseVector(baseSize + extraVector.size());

    for (int i = 0; i < baseSize; i++) {
        double value = baseVector.getQuick(i);
        if (value != 0.0) {
            result.setQuick(i, value);
        }/*from   www. j ava2 s  .c o  m*/
    }

    for (int i = 0; i < extraVector.size(); i++) {
        double value = extraVector.getQuick(i);
        if (value != 0.0) {
            result.setQuick(baseSize + i, value);
        }
    }

    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtils.java

License:Apache License

public static Vector extendVector(Vector v, int extraSize) {
    if (extraSize == 0) {
        return v;
    }/* w  ww.j a  va 2s.co  m*/

    int baseSize = v.size();
    Vector result = new RandomAccessSparseVector(baseSize + extraSize);
    for (int i = 0; i < baseSize; i++) {
        double value = v.getQuick(i);
        if (value != 0.0) {
            result.setQuick(i, value);
        }
    }

    return result;
}

From source file:com.scaleunlimited.classify.vectors.VectorUtilsTest.java

License:Apache License

@Test
public void testAppend() {
    Vector v1 = new RandomAccessSparseVector(2);
    v1.setQuick(0, 0);
    v1.setQuick(1, 1);/*from  w  w  w . ja v  a  2 s. co m*/

    Vector v2 = new RandomAccessSparseVector(3);
    v2.setQuick(0, 2);
    v2.setQuick(1, 3);
    v2.setQuick(2, 4);

    Vector v3 = VectorUtils.appendVectors(v1, v2);

    Assert.assertEquals(5, v3.size());
    for (int i = 0; i < 5; i++) {
        Assert.assertEquals(i, new Double(v3.getQuick(i)).intValue());
    }
}

From source file:com.scaleunlimited.classify.vectors.WritableComparableVectorTest.java

License:Apache License

private Vector makeVector() {
    Vector vector = new RandomAccessSparseVector(2);
    vector.setQuick(0, 5);
    vector.setQuick(1, 10.0);// w  w w  .  j av a  2 s .  c  om
    return vector;
}

From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java

License:Apache License

private void initializeM(Vector averageRatings) throws IOException {
    Random random = RandomUtils.getRandom();

    FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf());
    SequenceFile.Writer writer = null;
    //MapFile.Writer writer = null;
    try {// w w w  . java 2 s  .  com
        //writer = new MapFile.Writer(getConf(), fs, pathToM(-1).toString(), IntWritable.class, VectorWritable.class);
        writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"),
                IntWritable.class, VectorWritable.class);

        Iterator<Vector.Element> averages = averageRatings.iterateNonZero();
        while (averages.hasNext()) {
            Vector.Element e = averages.next();
            Vector row = new DenseVector(numFeatures);
            row.setQuick(0, e.get());
            for (int m = 1; m < numFeatures; m++) {
                row.setQuick(m, random.nextDouble());
            }
            writer.append(new IntWritable(e.index()), new VectorWritable(row));
        }
    } finally {
        Closeables.closeQuietly(writer);
    }
}