Example usage for org.apache.mahout.math VectorWritable VectorWritable

List of usage examples for org.apache.mahout.math VectorWritable VectorWritable

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable VectorWritable.

Prototype

public VectorWritable() 

Source Link

Usage

From source file:root.input.util.CreateSeedVector.java

License:Apache License

/**
 * Load in the seed vectors which will be used for the similarity matrix.
 * /*from   ww w.  j av a 2 s .  c o  m*/
 * @param conf configuration file
 * @return list of configured seed vectors
 */
public static List<NamedVector> loadSeedVectors(Configuration conf) throws IOException, URISyntaxException {

    String seedPathStr = conf.get(CreateSimilarityMatrixJob.SEEDS_PATH_KEY);
    if (seedPathStr == null || seedPathStr.isEmpty()) {
        return Collections.emptyList();
    }

    List<NamedVector> seedVectors = Lists.newArrayList();

    Path seedsDirPath = new Path(seedPathStr + "/part-r-00000");

    URI uri = new URI(conf.get("fs.default.name"));
    FileSystem fs = FileSystem.get(uri, conf);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seedsDirPath, conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();

    while (reader.next(key, value)) {
        VectorWritable vw = (VectorWritable) value;
        Vector vector = vw.get();
        seedVectors.add(new NamedVector(vector, key.toString()));
    }

    reader.close();

    if (seedVectors.isEmpty()) {
        throw new IllegalStateException("No seeds found. Check your path: " + seedPathStr);
    }

    return seedVectors;
}

From source file:sigis.kmeansmultiplek.AnotherKmeans.java

private List<DenseVector> toDenseVector(Configuration conf) throws FileNotFoundException, IOException {
    List<DenseVector> positions = new ArrayList<DenseVector>();
    DenseVector position;/*w ww  .  j  a v a 2  s .  co  m*/
    BufferedReader br;
    br = new BufferedReader(new FileReader(this.INPUT_PATH));

    String sCurrentLine;
    while ((sCurrentLine = br.readLine()) != null) {
        double[] features = new double[3];
        String[] values = sCurrentLine.split(",");
        for (int indx = 0; indx < features.length; indx++) {
            features[indx] = Float.parseFloat(values[indx + 2]);
            if (indx == 2) {
                features[indx] = round(Float.parseFloat(values[indx + 3]), 2);
            }
        }
        position = new DenseVector(features);
        positions.add(position);
    }

    final Path path = new Path(POINTS_PATH + "/pointsFile");
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);

    VectorWritable vec = new VectorWritable();
    Integer count = 0;

    for (DenseVector vector : positions) {
        vec.set(vector);
        writer.append(new Text(count.toString()), vec);
        count++;
    }
    writer.close();
    return positions;
}

From source file:utils.FileToSeqFile.java

public void createFile() throws Exception {

    String INPUT_FILE = "/home/asabater/Downloads/lat_lon_time.tsv";
    String OUTPUT_FILE = "/home/asabater/data_to_cluster";
    DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
    DateTime dateField = new DateTime();
    List<NamedVector> positions = new ArrayList<NamedVector>();
    NamedVector position;/*from  w  ww . j av  a2 s  .  co  m*/
    BufferedReader br = null;
    br = new BufferedReader(new FileReader(INPUT_FILE));
    String sCurrentLine;
    int count = 0;
    while ((sCurrentLine = br.readLine()) != null) {

        String item_number = "" + count;
        double[] features = new double[NUM_COLUMNS];
        for (int indx = 0; indx < NUM_COLUMNS; indx++) {
            if (indx == 2) {
                double minute = round(Float.parseFloat(sCurrentLine.split("\\t")[indx].substring(14, 16)) / 60,
                        2);
                String min = "" + minute;
                min = min.substring(2);
                String hour = sCurrentLine.split("\\t")[indx].substring(11, 13);
                String finalTime = hour + "." + min;
                features[indx] = round(Float.parseFloat(finalTime), 2);

            } else {
                features[indx] = Float.parseFloat(sCurrentLine.split("\\t")[indx]);
            }
        }

        position = new NamedVector(new DenseVector(features), item_number);
        positions.add(position);
        count++;
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(OUTPUT_FILE);

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);

    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : positions) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path(OUTPUT_FILE), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " , " + value.get().asFormatString());
    }
    reader.close();
}