Example usage for org.apache.mahout.math Vector getQuick

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector getQuick.

Prototype

double getQuick(int index);

Source Link

Document

Return the value at the given index, without checking bounds

Usage

From source file:com.cloudera.science.ml.core.vectors.Vectors.java

License:Open Source License

/**
 * Converts the given {@code Vector} into a {@code double[]}.
 * //from  ww w.j  a v  a2 s.  com
 * @param v The vector to convert
 * @return The resulting array of values
 */
public static double[] toArray(Vector v) {
    double[] ret = new double[v.size()];
    for (int i = 0; i < ret.length; i++) {
        ret[i] = v.getQuick(i);
    }
    return ret;
}

From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java

License:Open Source License

private BitSet index(Vector vec) {
    double[] prod = new double[projectionBits];
    if (vec.isDense()) {
        for (int i = 0; i < vec.size(); i++) {
            double v = vec.getQuick(i);
            if (v != 0.0) {
                for (int j = 0; j < projectionBits; j++) {
                    prod[j] += v * projection[i + j * dimensions];
                }//from  www. ja  va2 s  . c  o m
            }
        }
    } else {
        Iterator<Vector.Element> iter = vec.iterateNonZero();
        while (iter.hasNext()) {
            Vector.Element e = iter.next();
            for (int j = 0; j < projectionBits; j++) {
                prod[j] = e.get() * projection[e.index() + j * dimensions];
            }
        }
    }
    BitSet bitset = new BitSet(projectionBits);
    for (int i = 0; i < projectionBits; i++) {
        if (prod[i] > 0.0) {
            bitset.set(i);
        }
    }
    return bitset;
}

From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java

License:Open Source License

private static double dot(Vector vec, double[] p) {
    double dot = 0;
    if (vec.isDense()) {
        for (int i = 0; i < p.length; i++) {
            dot += vec.getQuick(i) * p[i];
        }/*w  w w  .j ava  2s.c om*/
    } else {
        Iterator<Vector.Element> iter = vec.iterateNonZero();
        while (iter.hasNext()) {
            Vector.Element e = iter.next();
            dot += e.get() * p[e.index()];
        }
    }
    return dot;
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
    // first calculate p(topic|term,document) for all terms in original, and all topics,
    // using p(term|topic) and p(topic|doc)
    pTopicGivenTerm(original, topics, docTopicModel);
    normalizeByTopic(docTopicModel);/*from w w  w .ja  v a  2 s .c o m*/
    // now multiply, term-by-term, by the document, to get the weighted distribution of
    // term-topic pairs from this document.
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        for (int x = 0; x < numTopics; x++) {
            Vector docTopicModelRow = docTopicModel.viewRow(x);
            docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
        }
    }
    // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
    topics.assign(0.0);
    for (int x = 0; x < numTopics; x++) {
        topics.set(x, docTopicModel.viewRow(x).norm(1));
    }
    // now renormalize so that sum_x(p(x|doc)) = 1
    topics.assign(Functions.mult(1 / topics.norm(1)));
}

From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context)
        throws IOException, InterruptedException {
    Iterator<StringTuple> it = values.iterator();
    if (!it.hasNext()) {
        return;/*  w  ww  .  java2s.co m*/
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
        ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
                maxNGramSize);
        try {
            do {
                String term = sf.getAttribute(CharTermAttribute.class).toString();
                if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
                    int termId = dictionary.get(term);
                    vector.setQuick(termId, vector.getQuick(termId) + 1);
                }
            } while (sf.incrementToken());

            sf.end();
        } finally {
            Closeables.closeQuietly(sf);
        }
    } else {
        for (String term : value.getEntries()) {
            if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram
                int termId = dictionary.get(term);
                vector.setQuick(termId, vector.getQuick(termId) + 1);
            }
        }
    }
    if (sequentialAccess) {
        vector = new SequentialAccessSparseVector(vector);
    }

    if (namedVector) {
        vector = new NamedVector(vector, key.toString());
    }

    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
        VectorWritable vectorWritable = new VectorWritable(vector);
        context.write(key, vectorWritable);
    } else {
        context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1);
    }
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

private static BasicDBList listFromMahoutVector(Vector vec, String prefix, BasicDBObject element) {
    if (vec instanceof NamedVector) {
        element.put(prefix + "Name", ((NamedVector) vec).getName());
    }/*from  w ww .  ja v a  2 s.  c  o m*/
    BasicDBList dbl2 = new BasicDBList();
    if (vec.isDense()) {
        int nSize = vec.size();
        dbl2.ensureCapacity(nSize);
        for (int i = 0; i < nSize; ++i) {
            dbl2.add(vec.getQuick(i));
        }
    } else { // sparse, write as a set in the format [{int:double}]
        Iterator<org.apache.mahout.math.Vector.Element> elIt = vec.iterateNonZero();
        while (elIt.hasNext()) {
            BasicDBObject el2 = new BasicDBObject();
            org.apache.mahout.math.Vector.Element el = elIt.next();
            el2.put("k", el.index());
            el2.put("v", el.get());
            dbl2.add(el2);
        }
    }
    return dbl2;
}

From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java

License:Apache License

static Matrix createMiIi(Iterable<Vector> featureVectors, int numFeatures) {
    double[][] MiIi = new double[numFeatures][Iterables.size(featureVectors)];
    int n = 0;/*from  ww w . j  a  v a 2  s . c o m*/
    for (Vector featureVector : featureVectors) {
        for (int m = 0; m < numFeatures; m++) {
            MiIi[m][n] = featureVector.getQuick(m);
        }
        n++;
    }
    return new DenseMatrix(MiIi, true);
}

From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

public Matrix getYtransposeY(final OpenIntObjectHashMap<Vector> Y) {

    ExecutorService queue = Executors.newFixedThreadPool(numTrainingThreads);
    if (log.isInfoEnabled()) {
        log.info("Starting the computation of Y'Y");
    }/*from w  w  w  .  j  a v a2s .c om*/
    long startTime = System.nanoTime();
    final IntArrayList indexes = Y.keys();
    final int numIndexes = indexes.size();

    final double[][] YtY = new double[numFeatures][numFeatures];

    // Compute Y'Y by dot products between the 'columns' of Y
    for (int i = 0; i < numFeatures; i++) {
        for (int j = i; j < numFeatures; j++) {

            final int ii = i;
            final int jj = j;
            queue.execute(new Runnable() {
                @Override
                public void run() {
                    double dot = 0;
                    for (int k = 0; k < numIndexes; k++) {
                        Vector row = Y.get(indexes.getQuick(k));
                        dot += row.getQuick(ii) * row.getQuick(jj);
                    }
                    YtY[ii][jj] = dot;
                    if (ii != jj) {
                        YtY[jj][ii] = dot;
                    }
                }
            });

        }
    }
    queue.shutdown();
    try {
        queue.awaitTermination(1, TimeUnit.DAYS);
    } catch (InterruptedException e) {
        log.error("Error during Y'Y queue shutdown", e);
        throw new RuntimeException("Error during Y'Y queue shutdown");
    }
    if (log.isInfoEnabled()) {
        log.info("Computed Y'Y in " + (System.nanoTime() - startTime) / 1000000.0 + " ms");
    }
    return new DenseMatrix(YtY, true);
}

From source file:com.scaleunlimited.classify.model.HashedFeaturesLibLinearModel.java

License:Apache License

/**
 * Given a map from term to count, generate a feature array using
 * _maxFeatureIndex as the max index, based on the hash of the term.
 * /*from   w  w w .j  a  va  2 s .co m*/
 * @param terms
 * @return array of LibLinear features
 */

private Feature[] getFeatures(Map<String, Integer> terms) {

    // First create the vector, where each term's index is the hash
    // of the term, and the value is the term count.
    Map<Integer, Integer> collisionCount = new HashMap<>();
    Vector v = new RandomAccessSparseVector(_maxFeatureIndex);
    for (String term : terms.keySet()) {
        int index = calcHashJoaat(term, _maxFeatureIndex);
        double curValue = v.getQuick(index);
        if (_averageCollisions && (curValue != 0.0)) {
            Integer curCollisionCount = collisionCount.get(index);
            if (curCollisionCount == null) {
                // Number of values we'll need to divide by
                collisionCount.put(index, 2);
            } else {
                collisionCount.put(index, curCollisionCount + 1);
            }

            v.setQuick(index, curValue + terms.get(term));
        } else {
            v.setQuick(index, terms.get(term));
        }
    }

    // Now adjust the vector for collisions, if needed.
    if (_averageCollisions && !collisionCount.isEmpty()) {
        for (Integer index : collisionCount.keySet()) {
            double curValue = v.getQuick(index);
            v.setQuick(index, curValue / collisionCount.get(index));
        }
    }

    // Apply the term vector normalizer.
    getNormalizer().normalize(v);

    List<FeatureNode> features = new ArrayList<FeatureNode>(terms.size());
    for (Element e : v.nonZeroes()) {
        features.add(new FeatureNode(e.index() + 1, e.get()));
    }

    // We need to sort by increasing index.
    Collections.sort(features, new Comparator<FeatureNode>() {

        @Override
        public int compare(FeatureNode o1, FeatureNode o2) {
            return o1.index - o2.index;
        }
    });

    return features.toArray(new FeatureNode[features.size()]);
}

From source file:com.scaleunlimited.classify.model.RawFeaturesLibLinearModel.java

License:Apache License

private FeatureNode[] vectorToFeatureNodes(Vector vector) {
    int featureCount = vector.getNumNondefaultElements();
    FeatureNode[] x = new FeatureNode[featureCount];
    int arrayIndex = 0;
    int cardinality = vector.size();
    for (int i = 0; i < cardinality; i++) {
        double value = vector.getQuick(i);
        if (value != 0.0) {
            // (At least) Linear.train assumes that FeatureNode.index
            // is 1-based, and we don't really have to map back to our
            // term indexes, so just add one. YUCK!
            x[arrayIndex++] = new FeatureNode(i + 1, value);
        }/*  www. j a v  a  2  s.  c  om*/
    }
    return x;
}