List of usage examples for org.apache.mahout.math Vector nonZeroes
Iterable<Element> nonZeroes();
From source file:edu.rosehulman.mahout.math.VectorWritable.java
License:Apache License
public static void writeVector(DataOutput out, Vector vector, boolean laxPrecision) throws IOException { boolean dense = vector.isDense(); boolean sequential = vector.isSequentialAccess(); boolean named = vector instanceof NamedVector; out.writeByte((dense ? FLAG_DENSE : 0) | (sequential ? FLAG_SEQUENTIAL : 0) | (named ? FLAG_NAMED : 0) | (laxPrecision ? FLAG_LAX_PRECISION : 0)); Varint.writeUnsignedVarInt(vector.size(), out); if (dense) {//from w w w. j av a 2 s . c o m for (Vector.Element element : vector.all()) { if (laxPrecision) { out.writeFloat((float) element.get()); } else { out.writeDouble(element.get()); } } } else { Varint.writeUnsignedVarInt(vector.getNumNonZeroElements(), out); Iterator<Element> iter = vector.nonZeroes().iterator(); if (sequential) { int lastIndex = 0; while (iter.hasNext()) { Vector.Element element = iter.next(); if (element.get() == 0) { continue; } int thisIndex = element.index(); // Delta-code indices: Varint.writeUnsignedVarInt(thisIndex - lastIndex, out); lastIndex = thisIndex; if (laxPrecision) { out.writeFloat((float) element.get()); } else { out.writeDouble(element.get()); } } } else { while (iter.hasNext()) { Vector.Element element = iter.next(); if (element.get() == 0) { // TODO(robinanil): Fix the damn iterator for the zero element. continue; } Varint.writeUnsignedVarInt(element.index(), out); if (laxPrecision) { out.writeFloat((float) element.get()); } else { out.writeDouble(element.get()); } } } } if (named) { String name = ((NamedVector) vector).getName(); out.writeUTF(name == null ? "" : name); } }
From source file:finderbots.recommenders.hadoop.WriteToSolrJob.java
License:Apache License
private String getOrderedItems(Vector v, BiMap<String, String> elementIndex) { String doc = new String(""); //sort the vector by element weight class VectorElementComparator implements Comparator<Vector.Element> { @Override//from ww w .j av a2 s .c o m public int compare(Vector.Element o1, Vector.Element o2) { return (o1.get() > o2.get() ? -1 : (o1.equals(o2) ? 0 : 1)); } } ArrayList<Vector.Element> vel = new ArrayList<Vector.Element>(); for (Vector.Element ve : v.nonZeroes()) vel.add(ve); Collections.sort(vel, new VectorElementComparator()); for (Vector.Element ve : vel) { int i = ve.index(); String s = String.valueOf(i); String exID = elementIndex.inverse().get(s); String intID = elementIndex.get(s); doc += exID + " "; } return doc; }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;/*from ww w. j a va2s. co m*/ /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ for (Element e : simColumn.nonZeroes()) { int itemIDIndex = e.index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } if (denominators == null) { denominators = simColumn.clone(); } else { denominators.assign(simColumn, Functions.PLUS_ABS); } if (numerators == null) { numerators = simColumn.clone(); if (prefValue != BOOLEAN_PREF_VALUE) { numerators.assign(Functions.MULT, prefValue); } } else { if (prefValue != BOOLEAN_PREF_VALUE) { simColumn.assign(Functions.MULT, prefValue); } numerators.assign(simColumn, Functions.PLUS); } } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (Element element : numerators.nonZeroes()) { int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }
From source file:hadoop.api.AggregateAndRecommendReducer.java
License:Apache License
/** * find the top entries in recommendationVector, map them to the real itemIDs and write back the result *//*from w w w . jav a 2s. c o m*/ private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context) throws IOException, InterruptedException { TopItemsQueue topKItems = new TopItemsQueue(recommendationsPerUser); for (Element element : recommendationVector.nonZeroes()) { int index = element.index(); long itemID; if (indexItemIDMap != null && !indexItemIDMap.isEmpty()) { itemID = indexItemIDMap.get(index); } else { //we don't have any mappings, so just use the original itemID = index; } if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) { float value = (float) element.get(); if (!Float.isNaN(value)) { MutableRecommendedItem topItem = topKItems.top(); if (value > topItem.getValue()) { topItem.set(itemID, value); topKItems.updateTop(); } } } } List<RecommendedItem> topItems = topKItems.getTopItems(); if (!topItems.isEmpty()) { recommendedItems.set(topItems); context.write(userID, recommendedItems); } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java
License:Apache License
public static Vector solveExplicit(VectorWritable ratingsWritable, OpenIntObjectHashMap<Vector> uOrM, double lambda, int numFeatures) { Vector ratings = ratingsWritable.get(); List<Vector> featureVectors = new ArrayList<>(ratings.getNumNondefaultElements()); for (Vector.Element e : ratings.nonZeroes()) { int index = e.index(); featureVectors.add(uOrM.get(index)); }/*from w ww . j av a2s . c o m*/ return AlternatingLeastSquaresSolver.solve(featureVectors, ratings, lambda, numFeatures); }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java
License:Apache License
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) { IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get());//from ww w . j av a 2s . c o m for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.PredictionMapper.java
License:Apache License
@Override protected void map(IntWritable userIndexWritable, VectorWritable ratingsWritable, Context ctx) throws IOException, InterruptedException { Pair<OpenIntObjectHashMap<Vector>, OpenIntObjectHashMap<Vector>> uAndM = getSharedInstance(); OpenIntObjectHashMap<Vector> U = uAndM.getFirst(); OpenIntObjectHashMap<Vector> M = uAndM.getSecond(); Vector ratings = ratingsWritable.get(); int userIndex = userIndexWritable.get(); final OpenIntHashSet alreadyRatedItems = new OpenIntHashSet(ratings.getNumNondefaultElements()); for (Vector.Element e : ratings.nonZeroes()) { alreadyRatedItems.add(e.index()); }// w ww . j av a 2 s.c o m final TopItemsQueue topItemsQueue = new TopItemsQueue(recommendationsPerUser); final Vector userFeatures = U.get(userIndex); M.forEachPair(new IntObjectProcedure<Vector>() { @Override public boolean apply(int itemID, Vector itemFeatures) { if (!alreadyRatedItems.contains(itemID)) { double predictedRating = userFeatures.dot(itemFeatures); MutableRecommendedItem top = topItemsQueue.top(); if (predictedRating > top.getValue()) { top.set(itemID, (float) predictedRating); topItemsQueue.updateTop(); } } return true; } }); List<RecommendedItem> recommendedItems = topItemsQueue.getTopItems(); if (!recommendedItems.isEmpty()) { // cap predictions to maxRating for (RecommendedItem topItem : recommendedItems) { ((MutableRecommendedItem) topItem).capToMaxValue(maxRating); } if (usesLongIDs) { long userID = userIDIndex.get(userIndex); userIDWritable.set(userID); for (RecommendedItem topItem : recommendedItems) { // remap item IDs long itemID = itemIDIndex.get((int) topItem.getItemID()); ((MutableRecommendedItem) topItem).setItemID(itemID); } } else { userIDWritable.set(userIndex); } recommendations.set(recommendedItems); ctx.write(userIDWritable, recommendations); } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.AggregateAndRecommendReducer.java
License:Apache License
/** * find the top entries in recommendationVector, map them to the real itemIDs and write back the result *//*ww w. j av a 2s. c om*/ private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context) throws IOException, InterruptedException { TopItemsQueue topKItems = new TopItemsQueue(recommendationsPerUser); FastIDSet itemsForUser = null; if (idReader != null && idReader.isUserItemFilterSpecified()) { itemsForUser = idReader.getItemsToRecommendForUser(userID.get()); } for (Element element : recommendationVector.nonZeroes()) { int index = element.index(); long itemID; if (indexItemIDMap != null && !indexItemIDMap.isEmpty()) { itemID = indexItemIDMap.get(index); } else { // we don't have any mappings, so just use the original itemID = index; } if (shouldIncludeItemIntoRecommendations(itemID, itemsToRecommendFor, itemsForUser)) { float value = (float) element.get(); if (!Float.isNaN(value)) { MutableRecommendedItem topItem = topKItems.top(); if (value > topItem.getValue()) { topItem.set(itemID, value); topKItems.updateTop(); } } } } List<RecommendedItem> topItems = topKItems.getTopItems(); if (!topItems.isEmpty()) { recommendedItems.set(topItems); context.write(userID, recommendedItems); } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java
License:Apache License
@Override protected void map(VarLongWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { long userID = key.get(); log.info("UserID = {}", userID); if (usersToRecommendFor != null && !usersToRecommendFor.contains(userID)) { return;/*from www .j ava2 s. c om*/ } Vector userVector = maybePruneUserVector(value.get()); for (Element e : userVector.nonZeroes()) { itemIndexWritable.set(e.index()); vectorOrPref.set(userID, (float) e.get()); context.write(itemIndexWritable, vectorOrPref); } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.UserVectorSplitterMapper.java
License:Apache License
private Vector maybePruneUserVector(Vector userVector) { if (userVector.getNumNondefaultElements() <= maxPrefsPerUserConsidered) { return userVector; }/*from ww w .j a v a2 s . c o m*/ float smallestLargeValue = findSmallestLargeValue(userVector); // "Blank out" small-sized prefs to reduce the amount of partial products // generated later. They're not zeroed, but NaN-ed, so they come through // and can be used to exclude these items from prefs. for (Element e : userVector.nonZeroes()) { float absValue = Math.abs((float) e.get()); if (absValue < smallestLargeValue) { e.set(Float.NaN); } } return userVector; }