List of usage examples for org.apache.mahout.math VectorWritable VectorWritable
public VectorWritable()
From source file:com.scaleunlimited.classify.vectors.WritableComparableVector.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { VectorWritable v = new VectorWritable(); // VectorWritable relies on having a valid conf - not sure how // that normally gets set up w/Mahout, but this seems to work. v.setConf(CONF);// www .j av a2 s. c o m v.readFields(in); _vector = v.get(); }
From source file:com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java
License:Apache License
private Vector retrieveRow(int index) throws IOException { IntWritable rowIDWritable = new IntWritable(index); VectorWritable colWritable = new VectorWritable(); /*/*from w ww . j a v a 2 s . c o m*/ if (reader.get(rowIDWritable, colWritable) == null) { throw new IOException("find " + index + " in MapFile failed!"); } */ if (mapFileReaders.get(getPartition(index)).get(rowIDWritable, colWritable) == null) { throw new IOException("find " + index + " in MapFile failed!"); } return colWritable.get(); }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Convert an in-memory representation of a matrix to a distributed MapDir * format. It then can be used in distributed jobs * // w ww .j av a 2s . co m * @param oriMatrix * @return path that will contain the matrix files * @throws Exception */ public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label) throws Exception { Configuration conf = new Configuration(); Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols()); FileSystem fs = FileSystem.get(outputDir.toUri(), conf); if (!fs.exists(outputDir)) { Path mapDir = new Path(outputDir, "matrix-k-0"); Path outputFile = new Path(mapDir, "data"); @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class, VectorWritable.class); VectorWritable vectorw = new VectorWritable(); IntWritable intw = new IntWritable(); try { for (int r = 0; r < origMatrix.numRows(); r++) { Vector vector = origMatrix.viewRow(r); vectorw.set(vector); intw.set(r); writer.append(intw, vectorw); } } finally { writer.close(); } MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf); } else { log.warn("----------- Skip matrix " + outputDir + " - already exists"); } DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(), origMatrix.numCols()); dMatrix.setConf(conf); return dMatrix; }
From source file:com.twitter.algebra.AlgebraCommon.java
License:Apache License
/** * Write a vector to filesystem so that it can be used by distributed jobs * @param vector/*from www .j a v a2 s .c o m*/ * @param outputDir * @param label the unique label that be used in naming the vector file * @param conf * @return * @throws IOException */ public static Path toDistributedVector(Vector vector, Path outputDir, String label, Configuration conf) throws IOException { Path outputFile = new Path(outputDir, "Vector-" + label); FileSystem fs = FileSystem.get(outputDir.toUri(), conf); if (fs.exists(outputFile)) { log.warn("----------- OVERWRITE " + outputFile + " already exists"); fs.delete(outputFile, false); } @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class, VectorWritable.class); VectorWritable vectorw = new VectorWritable(); vectorw.set(vector); writer.append(new IntWritable(0), vectorw); writer.close(); return outputFile; }
From source file:com.twitter.algebra.matrix.format.MapDir.java
License:Apache License
public static void main(String[] args) throws Exception { Path inPath = new Path(args[0]); Configuration conf = new Configuration(); MapDir mapDir = new MapDir(conf, inPath); for (int i = 0; i < 10; i++) { IntWritable key = new IntWritable(i); VectorWritable vw = new VectorWritable(); vw = mapDir.get(key, vw);// w w w .j a v a2 s. c o m System.out.println(vw); } mapDir.close(); }
From source file:com.twitter.algebra.matrix.format.MapDir.java
License:Apache License
public static void testIterator(DistributedRowMatrix origMtx, Path inPath) throws IOException { Configuration conf = new Configuration(); MapDir mapDir = new MapDir(conf, inPath); Iterator<MatrixSlice> sliceIterator = origMtx.iterateAll(); while (sliceIterator.hasNext()) { MatrixSlice slice = sliceIterator.next(); int index = slice.index(); System.out.println("A[" + index + "] = " + slice.vector()); IntWritable key = new IntWritable(index); VectorWritable vw = new VectorWritable(); vw = mapDir.get(key, vw);//w ww . j a v a 2 s . c o m System.out.println("B[" + index + "] = " + vw); } mapDir.close(); }
From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java
License:Apache License
private static void printSequenceFile(String inputStr, int printRow) throws IOException { Configuration conf = new Configuration(); Path finalNumberFile = new Path(inputStr); SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf); double sum = 0; IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); Vector printVector = null;/*from www . j ava2s.c o m*/ while (reader.next(key, value)) { if (key.get() == printRow) printVector = value.get(); int cnt = 0; Iterator<Element> iter = value.get().nonZeroes().iterator(); for (; iter.hasNext(); iter.next()) cnt++; sum += value.get().zSum(); System.out.println("# " + key + " " + cnt + " " + value.get().zSum()); } System.out.println("SUM " + sum); reader.close(); if (printVector != null) System.out.println("##### " + printRow + " " + printVector); else System.out.println("##### " + key + " " + value.get()); }
From source file:crawler.TextTFIDF.java
License:Apache License
public TreeMap<String, Vector> getPointsVectors() { TreeMap<String, Vector> points = new TreeMap<String, Vector>(); Configuration conf = new Configuration(); FileSystem fs;/*w ww . j ava2 s. c o m*/ try { fs = FileSystem.get(conf); Path path = new Path(outputDir + "/tfidf-vectors/part-r-00000"); SequenceFile.Reader readerTFIDF = new SequenceFile.Reader(fs, path, conf); Text keyTFIDF = new Text(); VectorWritable valueTFIDF = new VectorWritable(); while (readerTFIDF.next(keyTFIDF, valueTFIDF)) { Vector point = valueTFIDF.get(); points.put(keyTFIDF.toString(), point); } readerTFIDF.close(); } catch (IOException e) { e.printStackTrace(); } return points; }
From source file:csvToSequence.ConvertToSeqLargeTxtVec.java
public static void main(String[] args) throws IOException { String filename = "/home/ivan/WorkDir/ccFraud.csv"; String outputfilename = "/home/ivan/WorkDir/part-0000"; SequenceFile.Writer writer;/*from w w w. j a va 2s . c om*/ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(outputfilename); writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); BufferedReader br = new BufferedReader(new FileReader(filename)); String s; br.readLine(); //skip line while ((s = br.readLine()) != null) { String[] value = s.split(","); double[] numValue = new double[8]; for (int i = 0; i < 8; i++) numValue[i] = Double.parseDouble(value[i]); if (Integer.parseInt(value[8]) == 1) value[8] = "Fraud/" + value[8]; else value[8] = "Normal/" + value[8]; NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]); vec.set(oneV.getDelegate()); writer.append(new Text(oneV.getName()), vec); } writer.close(); }
From source file:csvToSequence.ConvertToSeqTextVecWritable.java
public static void main(String[] args) throws FileNotFoundException, IOException { String filename = "/home/ivan/WorkDir/ccFraud.csv"; String outputfilename = "/home/ivan/WorkDir/part-0000"; SequenceFile.Writer writer;//from ww w. j av a 2 s.c o m Configuration conf = new Configuration(); List<NamedVector> namedVectors = new ArrayList<>(); /*Integer i = 1; CSVVectorIterator vectorCSVVectorIterator = new CSVVectorIterator(new FileReader(filename)); //System.out.println("Densvector"+vec.next()): while(vectorCSVVectorIterator.hasNext()){ NamedVector vecIt = new NamedVector(vectorCSVVectorIterator.next(),i.toString()); namedVectors.add(vecIt); i++; }*/ BufferedReader br = new BufferedReader(new FileReader(filename)); String s; br.readLine(); //skip line while ((s = br.readLine()) != null) { String[] value = s.split(","); double[] numValue = new double[8]; for (int i = 0; i < 8; i++) numValue[i] = Double.parseDouble(value[i]); if (Integer.parseInt(value[8]) == 1) value[8] = "Fraud/" + value[8]; else value[8] = "Normal/" + value[8]; NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]); namedVectors.add(oneV); } FileSystem fs = FileSystem.get(conf); Path path = new Path(outputfilename); writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); for (NamedVector iter : namedVectors) { vec.set(iter.getDelegate()); writer.append(new Text(iter.getName()), vec); } writer.close(); /*try (SequenceFile.Reader reader = new SequenceFile.Reader(fs,path, conf)) { Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { System.out.println(key + " "+ value); } }*/ }