List of usage examples for org.apache.mahout.math VectorWritable VectorWritable
public VectorWritable()
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java
License:Apache License
public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities, Configuration conf) throws IOException { IntWritable rowIndex = new IntWritable(); VectorWritable row = new VectorWritable(); OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities) : new OpenIntObjectHashMap<Vector>(); Path[] cachedFiles = HadoopUtil.getCachedFiles(conf); LocalFileSystem localFs = FileSystem.getLocal(conf); for (Path cachedFile : cachedFiles) { try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)) { while (reader.next(rowIndex, row)) { featureMatrix.put(rowIndex.get(), row.get()); }/*from www. j ava 2 s.co m*/ } } Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty"); return featureMatrix; }
From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java
License:Apache License
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) { IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get());/*from w w w .j a va 2 s . c om*/ for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.PrefAndSimilarityColumnWritable.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { prefValue = in.readFloat();/*from w ww . j a va 2s.com*/ VectorWritable vw = new VectorWritable(); vw.readFields(in); similarityColumn = vw.get(); }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.VectorAndPrefsWritable.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { VectorWritable writable = new VectorWritable(); writable.readFields(in);/* w ww . j a v a 2 s. c o m*/ vector = writable.get(); int size = Varint.readUnsignedVarInt(in); userIDs = new ArrayList<>(size); values = new ArrayList<>(size); for (int i = 0; i < size; i++) { userIDs.add(Varint.readSignedVarLong(in)); values.add(in.readFloat()); } }
From source file:org.gpfvic.mahout.cf.taste.hadoop.item.VectorOrPrefWritable.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { boolean hasVector = in.readBoolean(); if (hasVector) { VectorWritable writable = new VectorWritable(); writable.readFields(in);//from w w w . ja v a 2 s. c o m set(writable.get()); } else { long theUserID = Varint.readSignedVarLong(in); float theValue = in.readFloat(); set(theUserID, theValue); } }
From source file:org.plista.kornakapi.core.training.LDATopicFactorizer.java
License:Apache License
/** * gets topic posterior from lda output// w w w . j av a2 s .c o m * @throws IOException */ private void getAllTopicPosterior() throws IOException { itemFeatures = new HashMap<String, Vector>(); Reader reader = new SequenceFile.Reader(fs, new Path(this.conf.getLDADocTopicsPath()), lconf); IntWritable key = new IntWritable(); VectorWritable newVal = new VectorWritable(); while (reader.next(key, newVal)) { itemFeatures.put(getIndexItem(key.get()), newVal.get()); } Closeables.close(reader, false); }
From source file:org.plista.kornakapi.core.training.SemanticModel.java
License:Apache License
/** * Method to safe the model//w ww . j a va2 s .com * @throws IOException */ public void safe(String safeKey) throws IOException { /** * New Model training changes the key. Inference can only safe the model if its key is still valid. Thus since inference job start and end no new model was calculated */ if (!this.key.equals(safeKey)) { if (log.isInfoEnabled()) { log.info("Storing model Failed. Modelkey Changed"); } return; } if (itemFeatures != null) { Path model = path.suffix("/itemFeature.model"); Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, VectorWritable.class); for (String itemid : itemFeatures.keySet()) { Text id = new Text(); VectorWritable val = new VectorWritable(); id.set(itemid); val.set(itemFeatures.get(itemid)); w.append(id, val); } Closeables.close(w, false); } if (indexItem != null) { Path model = path.suffix("/indexItem.model"); Writer w = SequenceFile.createWriter(fs, lconf, model, IntWritable.class, Text.class); for (Integer itemid : indexItem.keySet()) { IntWritable key = new IntWritable(); Text val = new Text(); key.set(itemid); val.set(indexItem.get(itemid)); w.append(key, val); } Closeables.close(w, false); } if (itemIndex != null) { Path model = path.suffix("/itemIndex.model"); Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, IntWritable.class); for (String itemid : itemIndex.keySet()) { IntWritable val = new IntWritable(); Text key = new Text(); key.set(itemid); val.set(itemIndex.get(itemid)); w.append(key, val); } Closeables.close(w, false); } if (log.isInfoEnabled()) { log.info("LDA Model Safed"); } }
From source file:org.plista.kornakapi.core.training.SemanticModel.java
License:Apache License
/** * method to load model from squence file * @throws IOException//from w w w .j av a 2 s .c om */ public void read() throws IOException { Path indexPath = path.suffix("/indexItem.model"); if (fs.exists(indexPath)) { indexItem = new HashMap<Integer, String>(); Reader reader = new SequenceFile.Reader(fs, indexPath, lconf); IntWritable key = new IntWritable(); Text val = new Text(); while (reader.next(key, val)) { indexItem.put(key.get(), val.toString()); } Closeables.close(reader, false); } Path itemIndexPath = path.suffix("/itemIndex.model"); if (fs.exists(itemIndexPath)) { itemIndex = new HashMap<String, Integer>(); Reader reader = new SequenceFile.Reader(fs, itemIndexPath, lconf); IntWritable val = new IntWritable(); Text key = new Text(); while (reader.next(key, val)) { itemIndex.put(key.toString(), val.get()); } Closeables.close(reader, false); } Path featurePath = path.suffix("/itemFeature.model"); if (fs.exists(featurePath)) { Reader reader = new SequenceFile.Reader(fs, featurePath, lconf); itemFeatures = new HashMap<String, Vector>(); Text key = new Text(); VectorWritable val = new VectorWritable(); while (reader.next(key, val)) { itemFeatures.put(key.toString(), val.get()); } Closeables.close(reader, false); } if (log.isInfoEnabled()) { log.info("LDA Model Read"); } }
From source file:org.qcri.pca.FileFormat.java
public static void convertFromDenseToSeq(String inputPath, int cardinality, String outputFolderPath) { try {//ww w . j a v a 2 s . c o m final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer; final IntWritable key = new IntWritable(); final VectorWritable value = new VectorWritable(); int lineNumber = 0; String thisLine; File[] filePathList = null; File inputFile = new File(inputPath); if (inputFile.isFile()) // if it is a file { filePathList = new File[1]; filePathList[0] = inputFile; } else { filePathList = inputFile.listFiles(); } if (filePathList == null) { log.error("The path " + inputPath + " does not exist"); return; } for (File file : filePathList) { BufferedReader br = new BufferedReader(new FileReader(file)); Vector vector = null; String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq"; writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); while ((thisLine = br.readLine()) != null) { // while loop begins here if (thisLine.isEmpty()) continue; String[] splitted = thisLine.split("\\s+"); vector = new SequentialAccessSparseVector(splitted.length); for (int i = 0; i < splitted.length; i++) { vector.set(i, Double.parseDouble(splitted[i])); } key.set(lineNumber); value.set(vector); //System.out.println(vector); writer.append(key, value);//write last row lineNumber++; } writer.close(); } } catch (Exception e) { e.printStackTrace(); } }
From source file:org.qcri.pca.FileFormat.java
public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) { try {/*from ww w .j a v a 2 s.com*/ final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = null; final IntWritable key = new IntWritable(); final VectorWritable value = new VectorWritable(); Vector vector = null; String thisLine; int prevRowID = -1; boolean first = true; File[] filePathList = null; File inputFile = new File(inputPath); if (inputFile.isFile()) // if it is a file { filePathList = new File[1]; filePathList[0] = inputFile; } else { filePathList = inputFile.listFiles(); } if (filePathList == null) { log.error("The path " + inputPath + " does not exist"); return; } for (File file : filePathList) { BufferedReader br = new BufferedReader(new FileReader(file)); String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq"; writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class, VectorWritable.class, CompressionType.BLOCK); while ((thisLine = br.readLine()) != null) { // while loop begins here String[] splitted = thisLine.split(","); int rowID = Integer.parseInt(splitted[0]); int colID = Integer.parseInt(splitted[1]); double element = Double.parseDouble(splitted[2]); if (first) { first = false; vector = new SequentialAccessSparseVector(cardinality); } else if (rowID != prevRowID) { key.set(prevRowID); value.set(vector); //System.out.println(vector); writer.append(key, value);//write last row vector = new SequentialAccessSparseVector(cardinality); } prevRowID = rowID; vector.set(colID - base, element); } /*//here we append the last vector in each file (assuming that we will start a new row in the next file key.set(prevRowID); value.set(vector); //System.out.println("last vector"); //System.out.println(vector); writer.append(key,value);//write last row writer.close(); */ } if (writer != null) //append last vector in last file { key.set(prevRowID); value.set(vector); //System.out.println("last vector"); //System.out.println(vector); writer.append(key, value);//write last row writer.close(); } } catch (Exception e) { e.printStackTrace(); } }