Example usage for org.apache.mahout.math Vector nonZeroes

List of usage examples for org.apache.mahout.math Vector nonZeroes

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector nonZeroes.

Prototype

Iterable<Element> nonZeroes();

Source Link

Usage

From source file:edu.rosehulman.mahout.math.VectorWritable.java

License:Apache License

public static void writeVector(DataOutput out, Vector vector, boolean laxPrecision) throws IOException {
    boolean dense = vector.isDense();
    boolean sequential = vector.isSequentialAccess();
    boolean named = vector instanceof NamedVector;

    out.writeByte((dense ? FLAG_DENSE : 0) | (sequential ? FLAG_SEQUENTIAL : 0) | (named ? FLAG_NAMED : 0)
            | (laxPrecision ? FLAG_LAX_PRECISION : 0));

    Varint.writeUnsignedVarInt(vector.size(), out);
    if (dense) {//from w  w w.  j av  a  2 s  .  c  o m
        for (Vector.Element element : vector.all()) {
            if (laxPrecision) {
                out.writeFloat((float) element.get());
            } else {
                out.writeDouble(element.get());
            }
        }
    } else {
        Varint.writeUnsignedVarInt(vector.getNumNonZeroElements(), out);
        Iterator<Element> iter = vector.nonZeroes().iterator();
        if (sequential) {
            int lastIndex = 0;
            while (iter.hasNext()) {
                Vector.Element element = iter.next();
                if (element.get() == 0) {
                    continue;
                }
                int thisIndex = element.index();
                // Delta-code indices:
                Varint.writeUnsignedVarInt(thisIndex - lastIndex, out);
                lastIndex = thisIndex;
                if (laxPrecision) {
                    out.writeFloat((float) element.get());
                } else {
                    out.writeDouble(element.get());
                }
            }
        } else {
            while (iter.hasNext()) {
                Vector.Element element = iter.next();
                if (element.get() == 0) {
                    // TODO(robinanil): Fix the damn iterator for the zero element.
                    continue;
                }
                Varint.writeUnsignedVarInt(element.index(), out);
                if (laxPrecision) {
                    out.writeFloat((float) element.get());
                } else {
                    out.writeDouble(element.get());
                }
            }
        }
    }
    if (named) {
        String name = ((NamedVector) vector).getName();
        out.writeUTF(name == null ? "" : name);
    }
}

From source file:finderbots.recommenders.hadoop.WriteToSolrJob.java

License:Apache License

private String getOrderedItems(Vector v, BiMap<String, String> elementIndex) {
    String doc = new String("");
    //sort the vector by element weight
    class VectorElementComparator implements Comparator<Vector.Element> {

        @Override//from ww w .j  av a2 s  .c o  m
        public int compare(Vector.Element o1, Vector.Element o2) {
            return (o1.get() > o2.get() ? -1 : (o1.equals(o2) ? 0 : 1));
        }
    }

    ArrayList<Vector.Element> vel = new ArrayList<Vector.Element>();
    for (Vector.Element ve : v.nonZeroes())
        vel.add(ve);
    Collections.sort(vel, new VectorElementComparator());
    for (Vector.Element ve : vel) {
        int i = ve.index();
        String s = String.valueOf(i);
        String exID = elementIndex.inverse().get(s);
        String intID = elementIndex.get(s);
        doc += exID + " ";
    }
    return doc;
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* each entry here is the sum in the numerator of the prediction formula */
    Vector numerators = null;/*from   ww w.  j a  va2s.  co m*/
    /* each entry here is the sum in the denominator of the prediction formula */
    Vector denominators = null;
    /* each entry here is the number of similar items used in the prediction formula */
    Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);

    for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
        Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
        float prefValue = prefAndSimilarityColumn.getPrefValue();
        /* count the number of items used for each prediction */
        for (Element e : simColumn.nonZeroes()) {
            int itemIDIndex = e.index();
            numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
        }

        if (denominators == null) {
            denominators = simColumn.clone();
        } else {
            denominators.assign(simColumn, Functions.PLUS_ABS);
        }

        if (numerators == null) {
            numerators = simColumn.clone();
            if (prefValue != BOOLEAN_PREF_VALUE) {
                numerators.assign(Functions.MULT, prefValue);
            }
        } else {
            if (prefValue != BOOLEAN_PREF_VALUE) {
                simColumn.assign(Functions.MULT, prefValue);
            }
            numerators.assign(simColumn, Functions.PLUS);
        }

    }

    if (numerators == null) {
        return;
    }

    Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    for (Element element : numerators.nonZeroes()) {
        int itemIDIndex = element.index();
        /* preference estimations must be based on at least 2 datapoints */
        if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {
            /* compute normalized prediction */
            double prediction = element.get() / denominators.getQuick(itemIDIndex);
            recommendationVector.setQuick(itemIDIndex, prediction);
        }
    }
    writeRecommendedItems(userID, recommendationVector, context);
}

From source file:hadoop.api.AggregateAndRecommendReducer.java

License:Apache License

/**
 * find the top entries in recommendationVector, map them to the real itemIDs and write back the result
 *//*from w  w w .  jav a 2s.  c o  m*/
private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context)
        throws IOException, InterruptedException {

    TopItemsQueue topKItems = new TopItemsQueue(recommendationsPerUser);

    for (Element element : recommendationVector.nonZeroes()) {
        int index = element.index();
        long itemID;
        if (indexItemIDMap != null && !indexItemIDMap.isEmpty()) {
            itemID = indexItemIDMap.get(index);
        } else { //we don't have any mappings, so just use the original
            itemID = index;
        }
        if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) {
            float value = (float) element.get();
            if (!Float.isNaN(value)) {

                MutableRecommendedItem topItem = topKItems.top();
                if (value > topItem.getValue()) {
                    topItem.set(itemID, value);
                    topKItems.updateTop();
                }
            }
        }
    }

    List<RecommendedItem> topItems = topKItems.getTopItems();
    if (!topItems.isEmpty()) {
        recommendedItems.set(topItems);
        context.write(userID, recommendedItems);
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java

License:Apache License

public static Vector solveExplicit(VectorWritable ratingsWritable, OpenIntObjectHashMap<Vector> uOrM,
        double lambda, int numFeatures) {
    Vector ratings = ratingsWritable.get();

    List<Vector> featureVectors = new ArrayList<>(ratings.getNumNondefaultElements());
    for (Vector.Element e : ratings.nonZeroes()) {
        int index = e.index();
        featureVectors.add(uOrM.get(index));
    }/*from  w  ww .  j  av a2s  .  c  o m*/

    return AlternatingLeastSquaresSolver.solve(featureVectors, ratings, lambda, numFeatures);
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java

License:Apache License

private void initializeM(Vector averageRatings) throws IOException {
    Random random = RandomUtils.getRandom();

    FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf());
    try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(),
            new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) {
        IntWritable index = new IntWritable();
        VectorWritable featureVector = new VectorWritable();

        for (Vector.Element e : averageRatings.nonZeroes()) {
            Vector row = new DenseVector(numFeatures);
            row.setQuick(0, e.get());//from ww  w  . j  av a  2s  .  c o m
            for (int m = 1; m < numFeatures; m++) {
                row.setQuick(m, random.nextDouble());
            }
            index.set(e.index());
            featureVector.set(row);
            writer.append(index, featureVector);
        }
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.PredictionMapper.java

License:Apache License

@Override
protected void map(IntWritable userIndexWritable, VectorWritable ratingsWritable, Context ctx)
        throws IOException, InterruptedException {

    Pair<OpenIntObjectHashMap<Vector>, OpenIntObjectHashMap<Vector>> uAndM = getSharedInstance();
    OpenIntObjectHashMap<Vector> U = uAndM.getFirst();
    OpenIntObjectHashMap<Vector> M = uAndM.getSecond();

    Vector ratings = ratingsWritable.get();
    int userIndex = userIndexWritable.get();
    final OpenIntHashSet alreadyRatedItems = new OpenIntHashSet(ratings.getNumNondefaultElements());

    for (Vector.Element e : ratings.nonZeroes()) {
        alreadyRatedItems.add(e.index());
    }//  w ww .  j  av  a  2 s.c  o m

    final TopItemsQueue topItemsQueue = new TopItemsQueue(recommendationsPerUser);
    final Vector userFeatures = U.get(userIndex);

    M.forEachPair(new IntObjectProcedure<Vector>() {
        @Override
        public boolean apply(int itemID, Vector itemFeatures) {
            if (!alreadyRatedItems.contains(itemID)) {
                double predictedRating = userFeatures.dot(itemFeatures);

                MutableRecommendedItem top = topItemsQueue.top();
                if (predictedRating > top.getValue()) {
                    top.set(itemID, (float) predictedRating);
                    topItemsQueue.updateTop();
                }
            }
            return true;
        }
    });

    List<RecommendedItem> recommendedItems = topItemsQueue.getTopItems();

    if (!recommendedItems.isEmpty()) {

        // cap predictions to maxRating
        for (RecommendedItem topItem : recommendedItems) {
            ((MutableRecommendedItem) topItem).capToMaxValue(maxRating);
        }

        if (usesLongIDs) {
            long userID = userIDIndex.get(userIndex);
            userIDWritable.set(userID);

            for (RecommendedItem topItem : recommendedItems) {
                // remap item IDs
                long itemID = itemIDIndex.get((int) topItem.getItemID());
                ((MutableRecommendedItem) topItem).setItemID(itemID);
            }

        } else {
            userIDWritable.set(userIndex);
        }

        recommendations.set(recommendedItems);
        ctx.write(userIDWritable, recommendations);
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer.java

License:Apache License

/**
 * find the top entries in recommendationVector, map them to the real itemIDs and write back the result
 *//*ww w.  j  av  a 2s. c  om*/
private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context)
        throws IOException, InterruptedException {
    TopItemsQueue topKItems = new TopItemsQueue(recommendationsPerUser);
    FastIDSet itemsForUser = null;

    if (idReader != null && idReader.isUserItemFilterSpecified()) {
        itemsForUser = idReader.getItemsToRecommendForUser(userID.get());
    }

    for (Element element : recommendationVector.nonZeroes()) {
        int index = element.index();
        long itemID;
        if (indexItemIDMap != null && !indexItemIDMap.isEmpty()) {
            itemID = indexItemIDMap.get(index);
        } else { // we don't have any mappings, so just use the original
            itemID = index;
        }

        if (shouldIncludeItemIntoRecommendations(itemID, itemsToRecommendFor, itemsForUser)) {

            float value = (float) element.get();
            if (!Float.isNaN(value)) {

                MutableRecommendedItem topItem = topKItems.top();
                if (value > topItem.getValue()) {
                    topItem.set(itemID, value);
                    topKItems.updateTop();
                }
            }
        }
    }

    List<RecommendedItem> topItems = topKItems.getTopItems();
    if (!topItems.isEmpty()) {
        recommendedItems.set(topItems);
        context.write(userID, recommendedItems);
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java

License:Apache License

@Override
protected void map(VarLongWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    long userID = key.get();

    log.info("UserID = {}", userID);

    if (usersToRecommendFor != null && !usersToRecommendFor.contains(userID)) {
        return;/*from  www .j ava2 s. c om*/
    }
    Vector userVector = maybePruneUserVector(value.get());

    for (Element e : userVector.nonZeroes()) {
        itemIndexWritable.set(e.index());
        vectorOrPref.set(userID, (float) e.get());
        context.write(itemIndexWritable, vectorOrPref);
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java

License:Apache License

private Vector maybePruneUserVector(Vector userVector) {
    if (userVector.getNumNondefaultElements() <= maxPrefsPerUserConsidered) {
        return userVector;
    }/*from   ww  w  .j a  v  a2  s . c o  m*/

    float smallestLargeValue = findSmallestLargeValue(userVector);

    // "Blank out" small-sized prefs to reduce the amount of partial products
    // generated later. They're not zeroed, but NaN-ed, so they come through
    // and can be used to exclude these items from prefs.
    for (Element e : userVector.nonZeroes()) {
        float absValue = Math.abs((float) e.get());
        if (absValue < smallestLargeValue) {
            e.set(Float.NaN);
        }
    }

    return userVector;
}