List of usage examples for org.apache.mahout.math Vector set
void set(int index, double value);
From source file:edu.snu.dolphin.bsp.examples.ml.sub.MapOfIntVSumCodec.java
License:Apache License
@Override public Map<Integer, VectorSum> decode(final byte[] data) { final ByteArrayInputStream bais = new ByteArrayInputStream(data); final Map<Integer, VectorSum> resultMap = new HashMap<>(); try (final DataInputStream dais = new DataInputStream(bais)) { final int mapSize = dais.readInt(); for (int i = 0; i < mapSize; i++) { final int mapInt = dais.readInt(); final int vectorSize = dais.readInt(); final Vector vector = new DenseVector(vectorSize); for (int j = 0; j < vectorSize; j++) { vector.set(j, dais.readDouble()); }/*from w ww . j a va 2 s . com*/ final int count = dais.readInt(); resultMap.put(mapInt, new VectorSum(vector, count)); } } catch (final IOException e) { throw new RuntimeException(e.getCause()); } return resultMap; }
From source file:edu.snu.dolphin.bsp.examples.ml.sub.VectorListCodec.java
License:Apache License
public List<Vector> decode(final byte[] data) { final ByteArrayInputStream bais = new ByteArrayInputStream(data); final List<Vector> resultList = new LinkedList<>(); try (final DataInputStream dais = new DataInputStream(bais)) { final int listSize = dais.readInt(); final int length = dais.readInt(); for (int i = 0; i < listSize; i++) { final Vector vector = new DenseVector(length); for (int j = 0; j < length; j++) { vector.set(j, dais.readDouble()); }//from ww w .ja v a 2 s . c o m resultList.add(vector); } } catch (final IOException e) { throw new RuntimeException(e.getCause()); } return resultList; }
From source file:edu.utsa.sifter.som.SelfOrganizingMap.java
License:Apache License
void assignTopTerms(final int numTopTerms, final java.util.Vector<String> terms) { final PriorityQueue<TermPair> topWeights = new PriorityQueue<TermPair>(numTopTerms, new TermPair.TermPairComparator()); for (int i = 0; i < numCells(); ++i) { final java.util.Vector<String> topTerms = new java.util.Vector<String>(numTopTerms); topTerms.setSize(numTopTerms);//from w w w .j a va 2 s. c o m final Vector cell = getCell(i); final double f = getFactor(i); topWeights.clear(); for (Vector.Element w : cell) { int val = (int) (1000 * f * w.get()); if (topWeights.size() < numTopTerms) { topWeights.add(new TermPair(terms.get(w.index()), val)); } else if (topWeights.peek().DocCount < val) { topWeights.remove(); topWeights.add(new TermPair(terms.get(w.index()), val)); } } final int numTopWeights = topWeights.size(); for (int j = numTopWeights - 1; j > -1; --j) { topTerms.set(j, topWeights.remove().Term); } getStats(i).setTopTerms(topTerms); } }
From source file:hk.newsRecommender.Classify.java
License:Open Source License
public static void genNaiveBayesModel(Configuration conf, int labelIndex, String trainFile, String trainSeqFile, boolean hasHeader) { CSVReader reader = null;/*from w w w .j a v a2 s. c om*/ try { FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(trainSeqFile))) fs.delete(new Path(trainSeqFile), true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, new Path(trainSeqFile), Text.class, VectorWritable.class); FileSystem fsopen = FileSystem.get(conf); FSDataInputStream in = fsopen.open(new Path(trainFile)); reader = new CSVReader(new InputStreamReader(in)); String[] header = null; if (hasHeader) header = reader.readNext(); String[] line = null; Long l = 0L; while ((line = reader.readNext()) != null) { if (labelIndex > line.length) break; l++; List<String> tmpList = Lists.newArrayList(line); String label = tmpList.get(labelIndex); if (!strLabelList.contains(label)) strLabelList.add(label); // Text key = new Text("/" + label + "/" + l); Text key = new Text("/" + label + "/"); tmpList.remove(labelIndex); VectorWritable vectorWritable = new VectorWritable(); Vector vector = new RandomAccessSparseVector(tmpList.size(), tmpList.size());//??? for (int i = 0; i < tmpList.size(); i++) { String tmpStr = tmpList.get(i); if (StringUtils.isNumeric(tmpStr)) vector.set(i, Double.parseDouble(tmpStr)); else vector.set(i, parseStrCell(tmpStr)); } vectorWritable.set(vector); writer.append(key, vectorWritable); } writer.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:hk.newsRecommender.Classify.java
License:Open Source License
public static void test(Configuration conf, String testFile, int labelIndex) throws IOException { System.out.println("~~~ begin to test ~~~"); AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(naiveBayesModel); FileSystem fsopen = FileSystem.get(conf); FSDataInputStream in = fsopen.open(new Path(testFile)); CSVReader csv = new CSVReader(new InputStreamReader(in)); csv.readNext(); // skip header String[] line = null;//from ww w . j av a2 s.c om double totalSampleCount = 0.; double correctClsCount = 0.; // String str="10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,8,7,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,14,0,8"; // List<String> newsList=new ArrayList<String>(); // newsList.add(str); // for(int j=0;j<newsList.size();j++){ // line=newsList.get(j).split(","); while ((line = csv.readNext()) != null) { // ??ID???ID? // ???? List<String> tmpList = Lists.newArrayList(line); String label = tmpList.get(labelIndex); tmpList.remove(labelIndex); totalSampleCount++; Vector vector = new RandomAccessSparseVector(tmpList.size(), tmpList.size()); for (int i = 0; i < tmpList.size(); i++) { String tempStr = tmpList.get(i); if (StringUtils.isNumeric(tempStr)) { vector.set(i, Double.parseDouble(tempStr)); } else { Long id = strOptionMap.get(tempStr); if (id != null) vector.set(i, id); else { System.out.println(StringUtils.join(tempStr, ",")); continue; } } } Vector resultVector = classifier.classifyFull(vector); int classifyResult = resultVector.maxValueIndex(); if (StringUtils.equals(label, strLabelList.get(classifyResult))) { correctClsCount++; } else { // line[labelIndex]????ID?? // ??????? // System.out.println("CorrectORItem=" + label + "\tClassify=" + strLabelList.get(classifyResult)); } } // System.out.println("Correct Ratio:" + (correctClsCount / totalSampleCount)); }
From source file:hk.newsRecommender.MatrixAndCluster.java
License:Open Source License
public static void matrix2Vector(Configuration conf, Path path) throws IOException { FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader = null; // ??SequenceFile????Name?? reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable val = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); Writer writer = null;/*www . jav a 2 s . c om*/ try { writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, VectorWritable.class, CompressionType.BLOCK); final IntWritable key1 = new IntWritable(); final VectorWritable value = new VectorWritable(); int lineNum = 0; Vector vector = null; while (reader.next(key, val)) { int index = 0; StringTokenizer st = new StringTokenizer(val.toString()); // SequentialAccessSparseVector??NamedVector vector = new NamedVector(new SequentialAccessSparseVector(Cardinality), lineNum + ""); while (st.hasMoreTokens()) { if (Integer.parseInt(st.nextToken()) == 1) { vector.set(index, 1); } index++; } key1.set(lineNum++); value.set(vector); writer.append(key, value); } } finally { writer.close(); reader.close(); } }
From source file:mlbench.bayes.train.WeightSummer.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException { parseArgs(args);/*from w w w .jav a2 s . c o m*/ HashMap<String, String> conf = new HashMap<String, String>(); initConf(conf); MPI_D.Init(args, MPI_D.Mode.Common, conf); if (MPI_D.COMM_BIPARTITE_O != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O); int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O); FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O, (JobConf) config, inDir, rank); Vector weightsPerFeature = null; Vector weightsPerLabel = new DenseVector(labNum); for (int i = 0; i < inputs.length; i++) { FileSplit fsplit = inputs[i]; SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config, fsplit); IntWritable index = kvrr.createKey(); VectorWritable value = kvrr.createValue(); while (kvrr.next(index, value)) { Vector instance = value.get(); if (weightsPerFeature == null) { weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements()); } int label = index.get(); weightsPerFeature.assign(instance, Functions.PLUS); weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum()); } } if (weightsPerFeature != null) { MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature)); MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel)); } } else if (MPI_D.COMM_BIPARTITE_A != null) { int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A); config.set(MAPRED_OUTPUT_DIR, outDirW); config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString()); ((JobConf) config).setOutputKeyClass(Text.class); ((JobConf) config).setOutputValueClass(VectorWritable.class); TaskAttemptContext taskContext = new TaskAttemptContextImpl(config, DataMPIUtil.getHadoopTaskAttemptID()); SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>(); FileSystem fs = FileSystem.get(config); Path output = new Path(config.get(MAPRED_OUTPUT_DIR)); FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext); RecordWriter<Text, VectorWritable> outrw = null; try { fcommitter.setupJob(taskContext); outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null); } catch (IOException e) { e.printStackTrace(); System.err.println("ERROR: Please set the HDFS configuration properly\n"); System.exit(-1); } Text key = null, newKey = null; VectorWritable point = null, newPoint = null; Vector vector = null; Object[] vals = MPI_D.Recv(); while (vals != null) { newKey = (Text) vals[0]; newPoint = (VectorWritable) vals[1]; if (key == null && point == null) { } else if (!key.equals(newKey)) { outrw.write(key, new VectorWritable(vector)); vector = null; } if (vector == null) { vector = newPoint.get(); } else { vector.assign(newPoint.get(), Functions.PLUS); } key = newKey; point = newPoint; vals = MPI_D.Recv(); } if (newKey != null && newPoint != null) { outrw.write(key, new VectorWritable(vector)); } outrw.close(null); if (fcommitter.needsTaskCommit(taskContext)) { fcommitter.commitTask(taskContext); } MPI_D.COMM_BIPARTITE_A.Barrier(); if (rank == 0) { Path resOut = new Path(outDir); NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config); naiveBayesModel.serialize(resOut, config); } } MPI_D.Finalize(); }
From source file:net.aprendizajengrande.ontocluster.RedisToVectors.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: <hdfs folder for input>"); System.exit(1);/* w w w .j a v a 2 s.co m*/ } Configuration conf = new Configuration(); System.out.println("Input: " + args[0]); // see // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); String inputName = args[0] + "/input"; String relsInputName = args[0] + "/rels"; String instancesInputName = args[0] + "/instances"; Path input = new Path(inputName); Path relsInput = new Path(relsInputName); Path instancesInput = new Path(instancesInputName); // see http://stackoverflow.com/questions/14993644/configure-jedis-timeout Jedis jedis = new Jedis("localhost", 6379, 18000); // create the relations and instances first, so we know what to expect Set<String> rels = jedis.keys("rel-nom-*"); Map<Integer, String> relIdToName = new HashMap<>(); FSDataOutputStream fsdos = relsInput.getFileSystem(conf).create(relsInput); PrintWriter pw = new PrintWriter(new OutputStreamWriter(fsdos)); int relNum = 0; for (String rel : rels) { String relName = rel.replaceAll("^rel-nom-", ""); int relId = Integer.parseInt(jedis.get(rel)); relIdToName.put(relId, relName); if (relId > relNum) relNum = relId; } relNum++; for (int i = 0; i < relNum; i++) pw.println(i + "\t" + relIdToName.get(i)); pw.close(); rels.clear(); Set<String> instances = jedis.keys("res-nom-*"); fsdos = instancesInput.getFileSystem(conf).create(instancesInput); pw = new PrintWriter(new OutputStreamWriter(fsdos)); for (String instance : instances) { int instanceId = Integer.parseInt(instance.replaceAll("^res-nom-", "")); String instanceName = jedis.get(instance); pw.println(instanceId + "\t" + instanceName); } pw.close(); instances.clear(); Set<String> keys = jedis.keys("r-*"); SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(input), Writer.keyClass(Text.class), Writer.valueClass(VectorWritable.class)); for (String key : keys) { Set<String> theseRels = jedis.smembers(key); Vector s = new SequentialAccessSparseVector(relNum); for (String relId : theseRels) s.set(Integer.parseInt(relId), 1.0); VectorWritable v = new VectorWritable(s); writer.append(new Text(key), v); } writer.close(); jedis.close(); }
From source file:nl.gridline.zieook.inx.movielens.hbase.RecommendationsImportMap.java
License:Apache License
@Override public void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector similarityMatrixRow = value.get(); // Remove self similarity similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); // from the equation // determine max non-zero element: (==item index) List<RecommendationElement> recommendations = new ArrayList<RecommendationElement>(); // collect non-zero items: Iterator<Element> it = similarityMatrixRow.iterateNonZero(); while (it.hasNext()) { Element e = it.next();/* w ww. j a va 2 s. c o m*/ recommendations.add(new RecommendationElement(e.index(), e.get())); // LOG.info("created new recommendation for " + e.index()); } // sorted list of recommendations: now we have an item id, and similarity value: Collections.sort(recommendations, new SimilarityComparator()); LOG.info("sorted: " + recommendations.size()); int rank = 1; Put put = new Put(RowKeys.getRecommendationKey(collection, recommender, key.get())); for (RecommendationElement el : recommendations) { // if (el.getSimilarityValue() > 0) // { byte[] data = Recommend.getRecommendation(el.getItemIndex(), rank, el.getSimilarityValue()); put.add(RECOMMENDATION_COLUMN, Bytes.toBytes(rank), data); rank++; // } } context.write(new LongWritable(key.get()), put); }
From source file:nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java
License:Apache License
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { int maxIndex = -1; Vector similarityMatrixRow = value.get(); /* remove self similarity */ similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); ////from www .j a v a2s .com // determine maximum index // Iterator<Element> it = similarityMatrixRow.iterateNonZero(); while (it.hasNext()) { Element e = it.next(); // e.index() // == item id if (e.index() > maxIndex) { maxIndex = e.index(); } } // System.out.println(String.format("key: %d maxIndex: %d", key.get(), maxIndex)); if (maxIndex > 0) { RecommendationElement[] itemBasedRecommendations = new RecommendationElement[maxIndex]; for (int i = 0; i < maxIndex; i++) { Element element = similarityMatrixRow.getElement(i); double similarityValue = Double.NEGATIVE_INFINITY; if (element != null) { similarityValue = element.get(); } itemBasedRecommendations[i] = new RecommendationElement(i, similarityValue); } Arrays.sort(itemBasedRecommendations, new SimilarityComparator()); RecommendationElementArray array = new RecommendationElementArray(itemBasedRecommendations); context.write(new VarIntWritable(key.get()), array); } }