List of usage examples for org.apache.mahout.math Vector set
void set(int index, double value);
From source file:opennlp.addons.mahout.AbstractOnlineLearnerTrainer.java
License:Apache License
protected void trainOnlineLearner(DataIndexer indexer, org.apache.mahout.classifier.OnlineLearner pa) { int cardinality = indexer.getPredLabels().length; int outcomes[] = indexer.getOutcomeList(); for (int i = 0; i < indexer.getContexts().length; i++) { Vector vector = new RandomAccessSparseVector(cardinality); int features[] = indexer.getContexts()[i]; for (int fi = 0; fi < features.length; fi++) { vector.set(features[fi], indexer.getNumTimesEventsSeen()[i]); }//www . j a v a 2s . c om pa.train(outcomes[i], vector); } }
From source file:opennlp.addons.mahout.VectorClassifierModel.java
License:Apache License
public double[] eval(String[] features) { Vector vector = new RandomAccessSparseVector(predMap.size()); for (String feature : features) { Integer featureId = predMap.get(feature); if (featureId != null) { vector.set(featureId, vector.get(featureId) + 1); }/*from w w w. jav a 2 s .c o m*/ } Vector resultVector = classifier.classifyFull(vector); double outcomes[] = new double[classifier.numCategories()]; for (int i = 0; i < outcomes.length; i++) { outcomes[i] = resultVector.get(i); } return outcomes; }
From source file:org.apache.crunch.examples.Recommender.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println();// w w w . ja va2s. c o m System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output"); System.err.println(); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } Pipeline pipeline = new MRPipeline(Recommender.class, getConf()); /* * input node */ PCollection<String> lines = pipeline.readTextFile(args[0]); /* * S0 + GBK */ PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() { @Override public Pair<Long, Long> map(String input) { String[] split = input.split("[,\\s]"); long userID = Long.parseLong(split[0]); long itemID = Long.parseLong(split[1]); return Pair.of(userID, itemID); } }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey(); /* * S1 */ PTable<Long, Vector> userVector = userWithPrefs .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() { @Override public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (long itemPref : input.second()) { userVector.set((int) itemPref, 1.0f); } return Pair.of(input.first(), userVector); } }, Writables.tableOf(Writables.longs(), Writables.vectors())); /* * S2 + GBK */ PGroupedTable<Integer, Integer> coOccurencePairs = userVector .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() { @Override public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) { Iterator<Vector.Element> it = input.second().iterateNonZero(); while (it.hasNext()) { int index1 = it.next().index(); Iterator<Vector.Element> it2 = input.second().iterateNonZero(); while (it2.hasNext()) { int index2 = it2.next().index(); emitter.emit(Pair.of(index1, index2)); } } } }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey(); /* * S3 */ PTable<Integer, Vector> coOccurenceVector = coOccurencePairs .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() { @Override public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) { Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (int itemIndex2 : input.second()) { cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } return Pair.of(input.first(), cooccurrenceRow); } }, Writables.tableOf(Writables.ints(), Writables.vectors())); /* * asText */ pipeline.writeTextFile(coOccurenceVector, args[1]); PipelineResult result = pipeline.done(); return result.succeeded() ? 0 : 1; }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.ItemFilterAsVectorAndPrefsReducer.java
License:Apache License
@Override protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx) throws IOException, InterruptedException { int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get()); Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */ vector.set(itemIDIndex, Double.NaN); List<Long> userIDs = new ArrayList<>(); List<Float> prefValues = new ArrayList<>(); for (VarLongWritable userID : values) { userIDs.add(userID.get());/*from w w w.ja v a 2 s. co m*/ prefValues.add(1.0f); } itemIDIndexWritable.set(itemIDIndex); vectorAndPrefs.set(vector, userIDs, prefValues); ctx.write(itemIDIndexWritable, vectorAndPrefs); }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.SimilarityMatrixRowWrapperMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector similarityMatrixRow = value.get(); /* remove self similarity */ similarityMatrixRow.set(key.get(), Double.NaN); index.set(key.get());//from ww w . j ava 2s. c om vectorOrPref.set(similarityMatrixRow); context.write(index, vectorOrPref); }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.ToUserVectorsReducer.java
License:Apache License
@Override protected void reduce(VarLongWritable userID, Iterable<VarLongWritable> itemPrefs, Context context) throws IOException, InterruptedException { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (VarLongWritable itemPref : itemPrefs) { int index = TasteHadoopUtils.idToIndex(itemPref.get()); float value = itemPref instanceof EntityPrefWritable ? ((EntityPrefWritable) itemPref).getPrefValue() : 1.0f;/*from www .j ava 2s.c o m*/ userVector.set(index, value); } if (userVector.getNumNondefaultElements() >= minPreferences) { userVectorWritable.set(userVector); userVectorWritable.setWritesLaxPrecision(true); context.getCounter(Counters.USERS).increment(1); context.write(userID, userVectorWritable); } }
From source file:org.qcri.pca.FileFormat.java
public static void convertFromDenseToSeq(String inputPath, int cardinality, String outputFolderPath) { try {/* w w w . j a v a 2 s . co m*/ final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer; final IntWritable key = new IntWritable(); final VectorWritable value = new VectorWritable(); int lineNumber = 0; String thisLine; File[] filePathList = null; File inputFile = new File(inputPath); if (inputFile.isFile()) // if it is a file { filePathList = new File[1]; filePathList[0] = inputFile; } else { filePathList = inputFile.listFiles(); } if (filePathList == null) { log.error("The path " + inputPath + " does not exist"); return; } for (File file : filePathList) { BufferedReader br = new BufferedReader(new FileReader(file)); Vector vector = null; String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq"; writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); while ((thisLine = br.readLine()) != null) { // while loop begins here if (thisLine.isEmpty()) continue; String[] splitted = thisLine.split("\\s+"); vector = new SequentialAccessSparseVector(splitted.length); for (int i = 0; i < splitted.length; i++) { vector.set(i, Double.parseDouble(splitted[i])); } key.set(lineNumber); value.set(vector); //System.out.println(vector); writer.append(key, value);//write last row lineNumber++; } writer.close(); } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.qcri.pca.FileFormat.java
public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) { try {//ww w. j a v a 2 s . co m final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = null; final IntWritable key = new IntWritable(); final VectorWritable value = new VectorWritable(); Vector vector = null; String thisLine; int prevRowID = -1; boolean first = true; File[] filePathList = null; File inputFile = new File(inputPath); if (inputFile.isFile()) // if it is a file { filePathList = new File[1]; filePathList[0] = inputFile; } else { filePathList = inputFile.listFiles(); } if (filePathList == null) { log.error("The path " + inputPath + " does not exist"); return; } for (File file : filePathList) { BufferedReader br = new BufferedReader(new FileReader(file)); String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq"; writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); while ((thisLine = br.readLine()) != null) { // while loop begins here String[] splitted = thisLine.split(","); int rowID = Integer.parseInt(splitted[0]); int colID = Integer.parseInt(splitted[1]); double element = Double.parseDouble(splitted[2]); if (first) { first = false; vector = new SequentialAccessSparseVector(cardinality); } else if (rowID != prevRowID) { key.set(prevRowID); value.set(vector); //System.out.println(vector); writer.append(key, value);//write last row vector = new SequentialAccessSparseVector(cardinality); } prevRowID = rowID; vector.set(colID - base, element); } /*//here we append the last vector in each file (assuming that we will start a new row in the next file key.set(prevRowID); value.set(vector); //System.out.println("last vector"); //System.out.println(vector); writer.append(key,value);//write last row writer.close(); */ } if (writer != null) //append last vector in last file { key.set(prevRowID); value.set(vector); //System.out.println("last vector"); //System.out.println(vector); writer.append(key, value);//write last row writer.close(); } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.qcri.pca.MahoutCompatibilityTest.java
License:Apache License
@Test public void testMAHOUT_1238() throws IOException { Vector v = new SequentialAccessSparseVector(5); v.set(1, 3.0); v.set(3, 5.0);//w ww. ja va 2 s . co m Vector view = new VectorView(v, 0, v.size()); doTestVectorWritableEquals(view); }
From source file:org.qcri.sparkpca.FileFormat.java
public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) { try {//from w w w .j a va 2 s .c o m final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = null; final IntWritable key = new IntWritable(); final VectorWritable value = new VectorWritable(); Vector vector = null; String thisLine; int lineNumber = 0; int prevRowID = -1; boolean first = true; File[] filePathList = null; File inputFile = new File(inputPath); if (inputFile.isFile()) // if it is a file { filePathList = new File[1]; filePathList[0] = inputFile; } else { filePathList = inputFile.listFiles(); } if (filePathList == null) { log.error("The path " + inputPath + " does not exist"); return; } for (File file : filePathList) { BufferedReader br = new BufferedReader(new FileReader(file)); String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq"; writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); while ((thisLine = br.readLine()) != null) { // while loop begins here String[] splitted = thisLine.split(","); int rowID = Integer.parseInt(splitted[0]); int colID = Integer.parseInt(splitted[1]); double element = Double.parseDouble(splitted[2]); if (first) { first = false; vector = new SequentialAccessSparseVector(cardinality); } else if (rowID != prevRowID) { key.set(prevRowID); value.set(vector); //System.out.println(vector); writer.append(key, value);//write last row vector = new SequentialAccessSparseVector(cardinality); } prevRowID = rowID; vector.set(colID - base, element); } } if (writer != null) //append last vector in last file { key.set(prevRowID); value.set(vector); writer.append(key, value);//write last row writer.close(); } } catch (Exception e) { e.printStackTrace(); } }