Example usage for org.apache.mahout.math VectorWritable VectorWritable

List of usage examples for org.apache.mahout.math VectorWritable VectorWritable

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable VectorWritable.

Prototype

public VectorWritable() 

Source Link

Usage

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ALS.java

License:Apache License

public static OpenIntObjectHashMap<Vector> readMatrixByRowsFromDistributedCache(int numEntities,
        Configuration conf) throws IOException {

    IntWritable rowIndex = new IntWritable();
    VectorWritable row = new VectorWritable();

    OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0 ? new OpenIntObjectHashMap<Vector>(numEntities)
            : new OpenIntObjectHashMap<Vector>();

    Path[] cachedFiles = HadoopUtil.getCachedFiles(conf);
    LocalFileSystem localFs = FileSystem.getLocal(conf);

    for (Path cachedFile : cachedFiles) {
        try (SequenceFile.Reader reader = new SequenceFile.Reader(localFs, cachedFile, conf)) {
            while (reader.next(rowIndex, row)) {
                featureMatrix.put(rowIndex.get(), row.get());
            }/*from   www. j  ava  2  s.co m*/
        }
    }

    Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is empty");
    return featureMatrix;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java

License:Apache License

private void initializeM(Vector averageRatings) throws IOException {
    Random random = RandomUtils.getRandom();

    FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf());
    try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(),
            new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class)) {
        IntWritable index = new IntWritable();
        VectorWritable featureVector = new VectorWritable();

        for (Vector.Element e : averageRatings.nonZeroes()) {
            Vector row = new DenseVector(numFeatures);
            row.setQuick(0, e.get());/*from w w w .j  a va 2 s .  c om*/
            for (int m = 1; m < numFeatures; m++) {
                row.setQuick(m, random.nextDouble());
            }
            index.set(e.index());
            featureVector.set(row);
            writer.append(index, featureVector);
        }
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.PrefAndSimilarityColumnWritable.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    prefValue = in.readFloat();/*from   w ww  . j a va 2s.com*/
    VectorWritable vw = new VectorWritable();
    vw.readFields(in);
    similarityColumn = vw.get();
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.VectorAndPrefsWritable.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    VectorWritable writable = new VectorWritable();
    writable.readFields(in);/*  w ww . j a v a 2 s. c  o m*/
    vector = writable.get();
    int size = Varint.readUnsignedVarInt(in);
    userIDs = new ArrayList<>(size);
    values = new ArrayList<>(size);
    for (int i = 0; i < size; i++) {
        userIDs.add(Varint.readSignedVarLong(in));
        values.add(in.readFloat());
    }
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.VectorOrPrefWritable.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    boolean hasVector = in.readBoolean();
    if (hasVector) {
        VectorWritable writable = new VectorWritable();
        writable.readFields(in);//from w  w w . ja  v a  2  s.  c  o  m
        set(writable.get());
    } else {
        long theUserID = Varint.readSignedVarLong(in);
        float theValue = in.readFloat();
        set(theUserID, theValue);
    }
}

From source file:org.plista.kornakapi.core.training.LDATopicFactorizer.java

License:Apache License

/**
 * gets topic posterior from lda output//  w  w w .  j av a2  s  .c o  m
 * @throws IOException
 */
private void getAllTopicPosterior() throws IOException {
    itemFeatures = new HashMap<String, Vector>();
    Reader reader = new SequenceFile.Reader(fs, new Path(this.conf.getLDADocTopicsPath()), lconf);
    IntWritable key = new IntWritable();
    VectorWritable newVal = new VectorWritable();
    while (reader.next(key, newVal)) {
        itemFeatures.put(getIndexItem(key.get()), newVal.get());
    }
    Closeables.close(reader, false);
}

From source file:org.plista.kornakapi.core.training.SemanticModel.java

License:Apache License

/**
 * Method to safe the model//w  ww  .  j  a va2  s  .com
 * @throws IOException
 */
public void safe(String safeKey) throws IOException {
    /**
     * New Model training changes the key. Inference can only safe the model if its key is still valid. Thus since inference job start and end no new model was calculated
     */
    if (!this.key.equals(safeKey)) {
        if (log.isInfoEnabled()) {
            log.info("Storing model Failed. Modelkey Changed");
        }
        return;
    }

    if (itemFeatures != null) {
        Path model = path.suffix("/itemFeature.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, VectorWritable.class);
        for (String itemid : itemFeatures.keySet()) {
            Text id = new Text();
            VectorWritable val = new VectorWritable();
            id.set(itemid);
            val.set(itemFeatures.get(itemid));
            w.append(id, val);
        }
        Closeables.close(w, false);
    }
    if (indexItem != null) {
        Path model = path.suffix("/indexItem.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, IntWritable.class, Text.class);
        for (Integer itemid : indexItem.keySet()) {
            IntWritable key = new IntWritable();
            Text val = new Text();
            key.set(itemid);
            val.set(indexItem.get(itemid));
            w.append(key, val);
        }
        Closeables.close(w, false);
    }
    if (itemIndex != null) {
        Path model = path.suffix("/itemIndex.model");
        Writer w = SequenceFile.createWriter(fs, lconf, model, Text.class, IntWritable.class);
        for (String itemid : itemIndex.keySet()) {
            IntWritable val = new IntWritable();
            Text key = new Text();
            key.set(itemid);
            val.set(itemIndex.get(itemid));
            w.append(key, val);
        }
        Closeables.close(w, false);
    }
    if (log.isInfoEnabled()) {
        log.info("LDA Model Safed");
    }
}

From source file:org.plista.kornakapi.core.training.SemanticModel.java

License:Apache License

/**
 * method to load model from squence file
 * @throws IOException//from  w w w  .j  av a 2 s  .c om
 */
public void read() throws IOException {
    Path indexPath = path.suffix("/indexItem.model");
    if (fs.exists(indexPath)) {
        indexItem = new HashMap<Integer, String>();
        Reader reader = new SequenceFile.Reader(fs, indexPath, lconf);
        IntWritable key = new IntWritable();
        Text val = new Text();
        while (reader.next(key, val)) {
            indexItem.put(key.get(), val.toString());
        }
        Closeables.close(reader, false);
    }

    Path itemIndexPath = path.suffix("/itemIndex.model");
    if (fs.exists(itemIndexPath)) {
        itemIndex = new HashMap<String, Integer>();
        Reader reader = new SequenceFile.Reader(fs, itemIndexPath, lconf);
        IntWritable val = new IntWritable();
        Text key = new Text();
        while (reader.next(key, val)) {
            itemIndex.put(key.toString(), val.get());
        }
        Closeables.close(reader, false);
    }

    Path featurePath = path.suffix("/itemFeature.model");
    if (fs.exists(featurePath)) {
        Reader reader = new SequenceFile.Reader(fs, featurePath, lconf);
        itemFeatures = new HashMap<String, Vector>();
        Text key = new Text();
        VectorWritable val = new VectorWritable();
        while (reader.next(key, val)) {
            itemFeatures.put(key.toString(), val.get());
        }
        Closeables.close(reader, false);
    }
    if (log.isInfoEnabled()) {
        log.info("LDA Model Read");
    }

}

From source file:org.qcri.pca.FileFormat.java

public static void convertFromDenseToSeq(String inputPath, int cardinality, String outputFolderPath) {
    try {//ww w  . j  a  v a  2 s . c  o m
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        int lineNumber = 0;
        String thisLine;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
        {
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        }
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
            return;
        }
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            Vector vector = null;
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here
                if (thisLine.isEmpty())
                    continue;
                String[] splitted = thisLine.split("\\s+");
                vector = new SequentialAccessSparseVector(splitted.length);
                for (int i = 0; i < splitted.length; i++) {
                    vector.set(i, Double.parseDouble(splitted[i]));
                }
                key.set(lineNumber);
                value.set(vector);
                //System.out.println(vector);
                writer.append(key, value);//write last row
                lineNumber++;
            }
            writer.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:org.qcri.pca.FileFormat.java

public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) {
    try {/*from ww  w  .j a  v  a  2 s.com*/
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer = null;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        Vector vector = null;

        String thisLine;
        int prevRowID = -1;
        boolean first = true;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
        {
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        }
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
            return;
        }
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here            
                String[] splitted = thisLine.split(",");
                int rowID = Integer.parseInt(splitted[0]);
                int colID = Integer.parseInt(splitted[1]);
                double element = Double.parseDouble(splitted[2]);
                if (first) {
                    first = false;
                    vector = new SequentialAccessSparseVector(cardinality);
                } else if (rowID != prevRowID) {
                    key.set(prevRowID);
                    value.set(vector);
                    //System.out.println(vector);
                    writer.append(key, value);//write last row
                    vector = new SequentialAccessSparseVector(cardinality);
                }
                prevRowID = rowID;
                vector.set(colID - base, element);
            }
            /*//here we append the last vector in each file (assuming that we will start a new row in the next file
            key.set(prevRowID);
            value.set(vector);
            //System.out.println("last vector");
            //System.out.println(vector);
            writer.append(key,value);//write last row
            writer.close();
            */
        }
        if (writer != null) //append last vector in last file
        {
            key.set(prevRowID);
            value.set(vector);
            //System.out.println("last vector");
            //System.out.println(vector);
            writer.append(key, value);//write last row
            writer.close();
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}