List of usage examples for org.apache.mahout.math VectorWritable VectorWritable
public VectorWritable()
From source file:root.input.util.CreateSeedVector.java
License:Apache License
/** * Load in the seed vectors which will be used for the similarity matrix. * /*from ww w. j av a 2 s . c o m*/ * @param conf configuration file * @return list of configured seed vectors */ public static List<NamedVector> loadSeedVectors(Configuration conf) throws IOException, URISyntaxException { String seedPathStr = conf.get(CreateSimilarityMatrixJob.SEEDS_PATH_KEY); if (seedPathStr == null || seedPathStr.isEmpty()) { return Collections.emptyList(); } List<NamedVector> seedVectors = Lists.newArrayList(); Path seedsDirPath = new Path(seedPathStr + "/part-r-00000"); URI uri = new URI(conf.get("fs.default.name")); FileSystem fs = FileSystem.get(uri, conf); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seedsDirPath, conf); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { VectorWritable vw = (VectorWritable) value; Vector vector = vw.get(); seedVectors.add(new NamedVector(vector, key.toString())); } reader.close(); if (seedVectors.isEmpty()) { throw new IllegalStateException("No seeds found. Check your path: " + seedPathStr); } return seedVectors; }
From source file:sigis.kmeansmultiplek.AnotherKmeans.java
private List<DenseVector> toDenseVector(Configuration conf) throws FileNotFoundException, IOException { List<DenseVector> positions = new ArrayList<DenseVector>(); DenseVector position;/*w ww . j a v a 2 s . co m*/ BufferedReader br; br = new BufferedReader(new FileReader(this.INPUT_PATH)); String sCurrentLine; while ((sCurrentLine = br.readLine()) != null) { double[] features = new double[3]; String[] values = sCurrentLine.split(","); for (int indx = 0; indx < features.length; indx++) { features[indx] = Float.parseFloat(values[indx + 2]); if (indx == 2) { features[indx] = round(Float.parseFloat(values[indx + 3]), 2); } } position = new DenseVector(features); positions.add(position); } final Path path = new Path(POINTS_PATH + "/pointsFile"); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); Integer count = 0; for (DenseVector vector : positions) { vec.set(vector); writer.append(new Text(count.toString()), vec); count++; } writer.close(); return positions; }
From source file:utils.FileToSeqFile.java
public void createFile() throws Exception { String INPUT_FILE = "/home/asabater/Downloads/lat_lon_time.tsv"; String OUTPUT_FILE = "/home/asabater/data_to_cluster"; DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss"); DateTime dateField = new DateTime(); List<NamedVector> positions = new ArrayList<NamedVector>(); NamedVector position;/*from w ww . j av a2 s . co m*/ BufferedReader br = null; br = new BufferedReader(new FileReader(INPUT_FILE)); String sCurrentLine; int count = 0; while ((sCurrentLine = br.readLine()) != null) { String item_number = "" + count; double[] features = new double[NUM_COLUMNS]; for (int indx = 0; indx < NUM_COLUMNS; indx++) { if (indx == 2) { double minute = round(Float.parseFloat(sCurrentLine.split("\\t")[indx].substring(14, 16)) / 60, 2); String min = "" + minute; min = min.substring(2); String hour = sCurrentLine.split("\\t")[indx].substring(11, 13); String finalTime = hour + "." + min; features[indx] = round(Float.parseFloat(finalTime), 2); } else { features[indx] = Float.parseFloat(sCurrentLine.split("\\t")[indx]); } } position = new NamedVector(new DenseVector(features), item_number); positions.add(position); count++; } Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(OUTPUT_FILE); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); for (NamedVector vector : positions) { vec.set(vector); writer.append(new Text(vector.getName()), vec); } writer.close(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(OUTPUT_FILE), conf); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { System.out.println(key.toString() + " , " + value.get().asFormatString()); } reader.close(); }