List of usage examples for org.apache.mahout.math Vector set
void set(int index, double value);
From source file:DisplayClustering.java
License:Apache License
protected static void plotSampleParameters(Graphics2D g2) { Vector v = new DenseVector(2); Vector dv = new DenseVector(2); g2.setColor(Color.RED);//ww w .j av a 2 s .c o m for (Vector param : SAMPLE_PARAMS) { v.set(0, param.get(0)); v.set(1, param.get(1)); dv.set(0, param.get(2) * 3); dv.set(1, param.get(3) * 3); plotEllipse(g2, v, dv); } }
From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java
License:Apache License
public static void storeSparseColumns(Matrix mat) { int numCols = mat.numCols(); int numRows = mat.numRows(); for (int i = 0; i < numCols; i++) { Vector sparseVect = new RandomAccessSparseVector(numRows); Vector col = mat.viewColumn(i); Iterator<Vector.Element> itr = col.iterateNonZero(); while (itr.hasNext()) { Element elem = itr.next(); if (elem.get() != 0) { System.out.println(elem.get()); sparseVect.set(elem.index(), elem.get()); }//from www . j av a 2 s. com } System.out.println(sparseVect.getNumNondefaultElements()); mat.assignColumn(i, sparseVect); System.out.println(mat.viewColumn(i).getNumNondefaultElements()); System.exit(1); } }
From source file:cc.recommenders.mining.calls.clustering.VectorBuilder.java
License:Open Source License
public List<Vector> build(List<List<Feature>> usages, Dictionary<Feature> dictionary) { List<Vector> vectors = Lists.newArrayList(); for (List<Feature> usage : usages) { final Vector vector = new RandomAccessSparseVector(dictionary.size()); for (Feature f : usage) { int index = dictionary.getId(f); boolean isValidFeature = index >= 0; if (isValidFeature) { double value = weighter.getWeight(f); vector.set(index, value); }// w w w . j a va2s . co m } vectors.add(vector); } return vectors; }
From source file:cc.recommenders.mining.calls.clustering.VectorBuilderTest.java
License:Open Source License
private Vector createVector(double... values) { Vector v = new RandomAccessSparseVector(4); for (int i = 0; i < values.length; i++) { v.set(i, values[i]); }/* w w w .j a v a 2s . c o m*/ return v; }
From source file:cn.edu.bjtu.cit.recommender.Recommender.java
License:Apache License
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println();/*from w ww. ja v a 2 s .c o m*/ System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output [profiling] [estimation] [clustersize]"); System.err.println(); printUsage(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } OptionParser parser = new OptionParser(args); Pipeline pipeline = new MRPipeline(Recommender.class, getConf()); if (parser.hasOption(CLUSTER_SIZE)) { pipeline.getConfiguration().setInt(ClusterOracle.CLUSTER_SIZE, Integer.parseInt(parser.getOption(CLUSTER_SIZE).getValue())); } if (parser.hasOption(PROFILING)) { pipeline.getConfiguration().setBoolean(Profiler.IS_PROFILE, true); this.profileFilePath = parser.getOption(PROFILING).getValue(); } if (parser.hasOption(ESTIMATION)) { estFile = parser.getOption(ESTIMATION).getValue(); est = new Estimator(estFile, clusterSize); } if (parser.hasOption(OPT_REDUCE)) { pipeline.getConfiguration().setBoolean(OPT_REDUCE, true); } if (parser.hasOption(OPT_MSCR)) { pipeline.getConfiguration().setBoolean(OPT_MSCR, true); } if (parser.hasOption(ACTIVE_THRESHOLD)) { threshold = Integer.parseInt(parser.getOption("at").getValue()); } if (parser.hasOption(TOP)) { top = Integer.parseInt(parser.getOption("top").getValue()); } profiler = new Profiler(pipeline); /* * input node */ PCollection<String> lines = pipeline.readTextFile(args[0]); if (profiler.isProfiling() && lines.getSize() > 10 * 1024 * 1024) { lines = lines.sample(0.1); } /* * S0 + GBK */ PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() { @Override public Pair<Long, Long> map(String input) { String[] split = input.split(Estimator.DELM); long userID = Long.parseLong(split[0]); long itemID = Long.parseLong(split[1]); return Pair.of(userID, itemID); } @Override public float scaleFactor() { return est.getScaleFactor("S0").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S0").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey(est.getClusterSize()); /* * S1 */ PTable<Long, Vector> userVector = userWithPrefs .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() { @Override public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (long itemPref : input.second()) { userVector.set((int) itemPref, 1.0f); } return Pair.of(input.first(), userVector); } @Override public float scaleFactor() { return est.getScaleFactor("S1").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S1").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())); userVector = profiler.profile("S0-S1", pipeline, userVector, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S2 */ PTable<Long, Vector> filteredUserVector = userVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, Vector>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Long, Vector>> emitter) { if (input.second().getNumNondefaultElements() > threshold) { emitter.emit(input); } } @Override public float scaleFactor() { return est.getScaleFactor("S2").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S2").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())); filteredUserVector = profiler.profile("S2", pipeline, filteredUserVector, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S3 + GBK */ PGroupedTable<Integer, Integer> coOccurencePairs = filteredUserVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) { Iterator<Vector.Element> it = input.second().iterateNonZero(); while (it.hasNext()) { int index1 = it.next().index(); Iterator<Vector.Element> it2 = input.second().iterateNonZero(); while (it2.hasNext()) { int index2 = it2.next().index(); emitter.emit(Pair.of(index1, index2)); } } } @Override public float scaleFactor() { float size = est.getScaleFactor("S3").sizeFactor; return size; } @Override public float scaleFactorByRecord() { float recs = est.getScaleFactor("S3").recsFactor; return recs; } }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey(est.getClusterSize()); /* * S4 */ PTable<Integer, Vector> coOccurenceVector = coOccurencePairs .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() { @Override public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) { Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (int itemIndex2 : input.second()) { cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } return Pair.of(input.first(), cooccurrenceRow); } @Override public float scaleFactor() { return est.getScaleFactor("S4").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S4").recsFactor; } }, Writables.tableOf(Writables.ints(), Writables.vectors())); coOccurenceVector = profiler.profile("S3-S4", pipeline, coOccurenceVector, ProfileConverter.int_vector(), Writables.tableOf(Writables.ints(), Writables.vectors())); /* * S5 Wrapping co-occurrence columns */ PTable<Integer, VectorOrPref> wrappedCooccurrence = coOccurenceVector .parallelDo(new MapFn<Pair<Integer, Vector>, Pair<Integer, VectorOrPref>>() { @Override public Pair<Integer, VectorOrPref> map(Pair<Integer, Vector> input) { return Pair.of(input.first(), new VectorOrPref(input.second())); } @Override public float scaleFactor() { return est.getScaleFactor("S5").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S5").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); wrappedCooccurrence = profiler.profile("S5", pipeline, wrappedCooccurrence, ProfileConverter.int_vopv(), Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); /* * S6 Splitting user vectors */ PTable<Integer, VectorOrPref> userVectorSplit = filteredUserVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, VectorOrPref>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, VectorOrPref>> emitter) { long userID = input.first(); Vector userVector = input.second(); Iterator<Vector.Element> it = userVector.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); int itemIndex = e.index(); float preferenceValue = (float) e.get(); emitter.emit(Pair.of(itemIndex, new VectorOrPref(userID, preferenceValue))); } } @Override public float scaleFactor() { return est.getScaleFactor("S6").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S6").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); userVectorSplit = profiler.profile("S6", pipeline, userVectorSplit, ProfileConverter.int_vopp(), Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs())); /* * S7 Combine VectorOrPrefs */ PTable<Integer, VectorAndPrefs> combinedVectorOrPref = wrappedCooccurrence.union(userVectorSplit) .groupByKey(est.getClusterSize()) .parallelDo(new DoFn<Pair<Integer, Iterable<VectorOrPref>>, Pair<Integer, VectorAndPrefs>>() { @Override public void process(Pair<Integer, Iterable<VectorOrPref>> input, Emitter<Pair<Integer, VectorAndPrefs>> emitter) { Vector vector = null; List<Long> userIDs = Lists.newArrayList(); List<Float> values = Lists.newArrayList(); for (VectorOrPref vop : input.second()) { if (vector == null) { vector = vop.getVector(); } long userID = vop.getUserID(); if (userID != Long.MIN_VALUE) { userIDs.add(vop.getUserID()); } float value = vop.getValue(); if (!Float.isNaN(value)) { values.add(vop.getValue()); } } emitter.emit(Pair.of(input.first(), new VectorAndPrefs(vector, userIDs, values))); } @Override public float scaleFactor() { return est.getScaleFactor("S7").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S7").recsFactor; } }, Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs())); combinedVectorOrPref = profiler.profile("S5+S6-S7", pipeline, combinedVectorOrPref, ProfileConverter.int_vap(), Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs())); /* * S8 Computing partial recommendation vectors */ PTable<Long, Vector> partialMultiply = combinedVectorOrPref .parallelDo(new DoFn<Pair<Integer, VectorAndPrefs>, Pair<Long, Vector>>() { @Override public void process(Pair<Integer, VectorAndPrefs> input, Emitter<Pair<Long, Vector>> emitter) { Vector cooccurrenceColumn = input.second().getVector(); List<Long> userIDs = input.second().getUserIDs(); List<Float> prefValues = input.second().getValues(); for (int i = 0; i < userIDs.size(); i++) { long userID = userIDs.get(i); if (userID != Long.MIN_VALUE) { float prefValue = prefValues.get(i); Vector partialProduct = cooccurrenceColumn.times(prefValue); emitter.emit(Pair.of(userID, partialProduct)); } } } @Override public float scaleFactor() { return est.getScaleFactor("S8").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S8").recsFactor; } }, Writables.tableOf(Writables.longs(), Writables.vectors())).groupByKey(est.getClusterSize()) .combineValues(new CombineFn<Long, Vector>() { @Override public void process(Pair<Long, Iterable<Vector>> input, Emitter<Pair<Long, Vector>> emitter) { Vector partial = null; for (Vector vector : input.second()) { partial = partial == null ? vector : partial.plus(vector); } emitter.emit(Pair.of(input.first(), partial)); } @Override public float scaleFactor() { return est.getScaleFactor("combine").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("combine").recsFactor; } }); partialMultiply = profiler.profile("S8-combine", pipeline, partialMultiply, ProfileConverter.long_vector(), Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S9 Producing recommendations from vectors */ PTable<Long, RecommendedItems> recommendedItems = partialMultiply .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, RecommendedItems>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Long, RecommendedItems>> emitter) { Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(11, Collections.reverseOrder(BY_PREFERENCE_VALUE)); Iterator<Vector.Element> recommendationVectorIterator = input.second().iterateNonZero(); while (recommendationVectorIterator.hasNext()) { Vector.Element element = recommendationVectorIterator.next(); int index = element.index(); float value = (float) element.get(); if (topItems.size() < top) { topItems.add(new GenericRecommendedItem(index, value)); } else if (value > topItems.peek().getValue()) { topItems.add(new GenericRecommendedItem(index, value)); topItems.poll(); } } List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size()); recommendations.addAll(topItems); Collections.sort(recommendations, BY_PREFERENCE_VALUE); emitter.emit(Pair.of(input.first(), new RecommendedItems(recommendations))); } @Override public float scaleFactor() { return est.getScaleFactor("S9").sizeFactor; } @Override public float scaleFactorByRecord() { return est.getScaleFactor("S9").recsFactor; } }, Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems())); recommendedItems = profiler.profile("S9", pipeline, recommendedItems, ProfileConverter.long_ri(), Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems())); /* * Profiling */ if (profiler.isProfiling()) { profiler.writeResultToFile(profileFilePath); profiler.cleanup(pipeline.getConfiguration()); return 0; } /* * asText */ pipeline.writeTextFile(recommendedItems, args[1]); PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
From source file:com.cloudera.knittingboar.records.RCV1RecordFactory.java
License:Apache License
public static void ScanFile(String file, int debug_break_cnt) throws IOException { ConstantValueEncoder encoder_test = new ConstantValueEncoder("test"); BufferedReader reader = null; // Collection<String> words int line_count = 0; Multiset<String> class_count = ConcurrentHashMultiset.create(); Multiset<String> namespaces = ConcurrentHashMultiset.create(); try {//from w w w . j ava 2 s . c o m // System.out.println( newsgroup ); reader = new BufferedReader(new FileReader(file)); String line = reader.readLine(); while (line != null && line.length() > 0) { // shard_writer.write(line + "\n"); // out += line; String[] parts = line.split(" "); // System.out.println( "Class: " + parts[0] ); class_count.add(parts[0]); namespaces.add(parts[1]); line = reader.readLine(); line_count++; Vector v = new RandomAccessSparseVector(FEATURES); for (int x = 2; x < parts.length; x++) { // encoder_test.addToVector(parts[x], v); // System.out.println( parts[x] ); String[] feature = parts[x].split(":"); int index = Integer.parseInt(feature[0]) % FEATURES; double val = Double.parseDouble(feature[1]); // System.out.println( feature[1] + " = " + val ); if (index < FEATURES) { v.set(index, val); } else { System.out.println("Could Hash: " + index + " to " + (index % FEATURES)); } } Utils.PrintVectorSectionNonZero(v, 10); System.out.println("###"); if (line_count > debug_break_cnt) { break; } } System.out.println("Total Rec Count: " + line_count); System.out.println("-------------------- "); System.out.println("Classes"); for (String word : class_count.elementSet()) { System.out.println("Class " + word + ": " + class_count.count(word) + " "); } System.out.println("-------------------- "); System.out.println("NameSpaces:"); for (String word : namespaces.elementSet()) { System.out.println("Namespace " + word + ": " + namespaces.count(word) + " "); } /* * TokenStream ts = analyzer.tokenStream("text", reader); * ts.addAttribute(CharTermAttribute.class); * * // for each word in the stream, minus non-word stuff, add word to * collection while (ts.incrementToken()) { String s = * ts.getAttribute(CharTermAttribute.class).toString(); * //System.out.print( " " + s ); //words.add(s); out += s + " "; } */ } finally { reader.close(); } // return out + "\n"; }
From source file:com.cloudera.knittingboar.records.RCV1RecordFactory.java
License:Apache License
/** * Processes single line of input into: - target variable - Feature vector * /* ww w .ja v a 2 s. c om*/ * Right now our hash function is simply "modulo" * * @throws Exception */ public int processLine(String line, Vector v) throws Exception { // p.269 --------------------------------------------------------- // Map<String, Set<Integer>> traceDictionary = new TreeMap<String, // Set<Integer>>(); int actual = 0; String[] parts = line.split(" "); actual = Integer.parseInt(parts[0]); // dont know what to do the the "namespace" "f" for (int x = 2; x < parts.length; x++) { String[] feature = parts[x].split(":"); int index = Integer.parseInt(feature[0]) % FEATURES; double val = Double.parseDouble(feature[1]); if (index < FEATURES) { v.set(index, val); } else { System.out.println("Could Hash: " + index + " to " + (index % FEATURES)); } } // System.out.println("\nEOL\n"); return actual; }
From source file:com.cloudera.knittingboar.sgd.TestParallelOnlineLogisticRegression.java
License:Apache License
public void testTrainMechanics() { int categories = 2; int numFeatures = 5; double lambda = 1.0e-4; double learning_rate = 10; ParallelOnlineLogisticRegression plr = new ParallelOnlineLogisticRegression(categories, numFeatures, new L1()).lambda(lambda).learningRate(learning_rate).alpha(1 - 1.0e-3); Vector input = new RandomAccessSparseVector(numFeatures); for (int x = 0; x < numFeatures; x++) { input.set(x, x); }/*from ww w. j a v a 2 s .c om*/ plr.train(0, input); plr.train(0, input); plr.train(0, input); }
From source file:com.cloudera.knittingboar.sgd.TestParallelOnlineLogisticRegression.java
License:Apache License
public void testPOLRInternalBuffers() { System.out.println("testPOLRInternalBuffers --------------"); int categories = 2; int numFeatures = 5; double lambda = 1.0e-4; double learning_rate = 10; ArrayList<Vector> trainingSet_0 = new ArrayList<Vector>(); for (int s = 0; s < 1; s++) { Vector input = new RandomAccessSparseVector(numFeatures); for (int x = 0; x < numFeatures; x++) { input.set(x, x); }//from w w w .ja v a 2s . c o m trainingSet_0.add(input); } // for ParallelOnlineLogisticRegression plr_agent_0 = new ParallelOnlineLogisticRegression(categories, numFeatures, new L1()).lambda(lambda).learningRate(learning_rate).alpha(1 - 1.0e-3); System.out.println("Beta: "); //Utils.PrintVectorNonZero(plr_agent_0.getBeta().getRow(0)); Utils.PrintVectorNonZero(plr_agent_0.getBeta().viewRow(0)); System.out.println("\nGamma: "); //Utils.PrintVectorNonZero(plr_agent_0.gamma.getMatrix().getRow(0)); Utils.PrintVectorNonZero(plr_agent_0.gamma.getMatrix().viewRow(0)); plr_agent_0.train(0, trainingSet_0.get(0)); System.out.println("Beta: "); //Utils.PrintVectorNonZero(plr_agent_0.noReallyGetBeta().getRow(0)); Utils.PrintVectorNonZero(plr_agent_0.noReallyGetBeta().viewRow(0)); System.out.println("\nGamma: "); //Utils.PrintVectorNonZero(plr_agent_0.gamma.getMatrix().getRow(0)); Utils.PrintVectorNonZero(plr_agent_0.gamma.getMatrix().viewRow(0)); }
From source file:com.cloudera.knittingboar.sgd.TestParallelOnlineLogisticRegression.java
License:Apache License
public void testLocalGradientFlush() { System.out.println("\n\n\ntestLocalGradientFlush --------------"); int categories = 2; int numFeatures = 5; double lambda = 1.0e-4; double learning_rate = 10; ArrayList<Vector> trainingSet_0 = new ArrayList<Vector>(); for (int s = 0; s < 1; s++) { Vector input = new RandomAccessSparseVector(numFeatures); for (int x = 0; x < numFeatures; x++) { input.set(x, x); }//w w w . j ava 2s . c om trainingSet_0.add(input); } // for ParallelOnlineLogisticRegression plr_agent_0 = new ParallelOnlineLogisticRegression(categories, numFeatures, new L1()).lambda(lambda).learningRate(learning_rate).alpha(1 - 1.0e-3); plr_agent_0.train(0, trainingSet_0.get(0)); System.out.println("\nGamma: "); Utils.PrintVectorNonZero(plr_agent_0.gamma.getMatrix().viewRow(0)); plr_agent_0.FlushGamma(); System.out.println("Flushing Gamma ...... "); System.out.println("\nGamma: "); Utils.PrintVector(plr_agent_0.gamma.getMatrix().viewRow(0)); for (int x = 0; x < numFeatures; x++) { assertEquals(plr_agent_0.gamma.getMatrix().get(0, x), 0.0); } }