Example usage for org.apache.mahout.math VectorWritable VectorWritable

List of usage examples for org.apache.mahout.math VectorWritable VectorWritable

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable VectorWritable.

Prototype

public VectorWritable() 

Source Link

Usage

From source file:com.scaleunlimited.classify.vectors.WritableComparableVector.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    VectorWritable v = new VectorWritable();

    // VectorWritable relies on having a valid conf - not sure how
    // that normally gets set up w/Mahout, but this seems to work.
    v.setConf(CONF);//  www .j av a2  s. c  o m
    v.readFields(in);
    _vector = v.get();
}

From source file:com.skp.experiment.math.als.hadoop.DistributedImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

private Vector retrieveRow(int index) throws IOException {
    IntWritable rowIDWritable = new IntWritable(index);
    VectorWritable colWritable = new VectorWritable();
    /*/*from w ww .  j a  v a 2  s . c  o  m*/
    if (reader.get(rowIDWritable, colWritable) == null) {
      throw new IOException("find " + index + " in MapFile failed!");
    }
    */

    if (mapFileReaders.get(getPartition(index)).get(rowIDWritable, colWritable) == null) {
        throw new IOException("find " + index + " in MapFile failed!");
    }
    return colWritable.get();
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert an in-memory representation of a matrix to a distributed MapDir
 * format. It then can be used in distributed jobs
 * // w ww .j av a 2s  .  co  m
 * @param oriMatrix
 * @return path that will contain the matrix files
 * @throws Exception
 */
public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label)
        throws Exception {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path mapDir = new Path(outputDir, "matrix-k-0");
        Path outputFile = new Path(mapDir, "data");
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorw = new VectorWritable();
        IntWritable intw = new IntWritable();
        try {
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorw.set(vector);
                intw.set(r);
                writer.append(intw, vectorw);
            }
        } finally {
            writer.close();
        }
        MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf);
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
    DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(),
            origMatrix.numCols());
    dMatrix.setConf(conf);
    return dMatrix;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Write a vector to filesystem so that it can be used by distributed jobs
 * @param vector/*from  www  .j a v a2 s  .c  o m*/
 * @param outputDir
 * @param label the unique label that be used in naming the vector file
 * @param conf
 * @return
 * @throws IOException
 */
public static Path toDistributedVector(Vector vector, Path outputDir, String label, Configuration conf)
        throws IOException {
    Path outputFile = new Path(outputDir, "Vector-" + label);
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (fs.exists(outputFile)) {
        log.warn("----------- OVERWRITE " + outputFile + " already exists");
        fs.delete(outputFile, false);
    }
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
            VectorWritable.class);
    VectorWritable vectorw = new VectorWritable();
    vectorw.set(vector);
    writer.append(new IntWritable(0), vectorw);
    writer.close();
    return outputFile;
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Path inPath = new Path(args[0]);
    Configuration conf = new Configuration();
    MapDir mapDir = new MapDir(conf, inPath);
    for (int i = 0; i < 10; i++) {
        IntWritable key = new IntWritable(i);
        VectorWritable vw = new VectorWritable();
        vw = mapDir.get(key, vw);//  w w  w  .j  a  v a2  s. c o m
        System.out.println(vw);
    }
    mapDir.close();
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

public static void testIterator(DistributedRowMatrix origMtx, Path inPath) throws IOException {
    Configuration conf = new Configuration();
    MapDir mapDir = new MapDir(conf, inPath);

    Iterator<MatrixSlice> sliceIterator = origMtx.iterateAll();
    while (sliceIterator.hasNext()) {
        MatrixSlice slice = sliceIterator.next();
        int index = slice.index();
        System.out.println("A[" + index + "] = " + slice.vector());

        IntWritable key = new IntWritable(index);
        VectorWritable vw = new VectorWritable();
        vw = mapDir.get(key, vw);//w ww  .  j a  v a 2 s  .  c o  m
        System.out.println("B[" + index + "] = " + vw);
    }
    mapDir.close();
}

From source file:com.twitter.algebra.matrix.text.TestSequenceFile.java

License:Apache License

private static void printSequenceFile(String inputStr, int printRow) throws IOException {
    Configuration conf = new Configuration();
    Path finalNumberFile = new Path(inputStr);
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), finalNumberFile, conf);
    double sum = 0;
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    Vector printVector = null;/*from www  . j  ava2s.c o m*/
    while (reader.next(key, value)) {
        if (key.get() == printRow)
            printVector = value.get();
        int cnt = 0;
        Iterator<Element> iter = value.get().nonZeroes().iterator();
        for (; iter.hasNext(); iter.next())
            cnt++;
        sum += value.get().zSum();
        System.out.println("# " + key + " " + cnt + " " + value.get().zSum());
    }
    System.out.println("SUM " + sum);
    reader.close();
    if (printVector != null)
        System.out.println("##### " + printRow + " " + printVector);
    else
        System.out.println("##### " + key + " " + value.get());
}

From source file:crawler.TextTFIDF.java

License:Apache License

public TreeMap<String, Vector> getPointsVectors() {
    TreeMap<String, Vector> points = new TreeMap<String, Vector>();

    Configuration conf = new Configuration();
    FileSystem fs;/*w  ww .  j  ava2  s. c o  m*/
    try {
        fs = FileSystem.get(conf);
        Path path = new Path(outputDir + "/tfidf-vectors/part-r-00000");
        SequenceFile.Reader readerTFIDF = new SequenceFile.Reader(fs, path, conf);
        Text keyTFIDF = new Text();
        VectorWritable valueTFIDF = new VectorWritable();
        while (readerTFIDF.next(keyTFIDF, valueTFIDF)) {
            Vector point = valueTFIDF.get();
            points.put(keyTFIDF.toString(), point);
        }
        readerTFIDF.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

    return points;
}

From source file:csvToSequence.ConvertToSeqLargeTxtVec.java

public static void main(String[] args) throws IOException {
    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;/*from w w  w.  j a  va  2s . c  om*/
    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();

    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line

    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);

        vec.set(oneV.getDelegate());
        writer.append(new Text(oneV.getName()), vec);

    }
    writer.close();
}

From source file:csvToSequence.ConvertToSeqTextVecWritable.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;//from  ww w.  j av a  2 s.c  o  m
    Configuration conf = new Configuration();
    List<NamedVector> namedVectors = new ArrayList<>();
    /*Integer i = 1;
            
    CSVVectorIterator vectorCSVVectorIterator = new CSVVectorIterator(new FileReader(filename));
    //System.out.println("Densvector"+vec.next()):
            
            
            
    while(vectorCSVVectorIterator.hasNext()){
    NamedVector vecIt = new NamedVector(vectorCSVVectorIterator.next(),i.toString());
    namedVectors.add(vecIt);
    i++;
    }*/
    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line
    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);
        namedVectors.add(oneV);

    }

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);

    VectorWritable vec = new VectorWritable();

    for (NamedVector iter : namedVectors) {
        vec.set(iter.getDelegate());
        writer.append(new Text(iter.getName()), vec);
    }

    writer.close();

    /*try (SequenceFile.Reader reader = new SequenceFile.Reader(fs,path, conf)) {
    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
                
        System.out.println(key + " "+ value);
    }
    }*/

}