Example usage for org.apache.mahout.math Vector set

List of usage examples for org.apache.mahout.math Vector set

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector set.

Prototype

void set(int index, double value);

Source Link

Document

Set the value at the given index

Usage

From source file:opennlp.addons.mahout.AbstractOnlineLearnerTrainer.java

License:Apache License

protected void trainOnlineLearner(DataIndexer indexer, org.apache.mahout.classifier.OnlineLearner pa) {
    int cardinality = indexer.getPredLabels().length;
    int outcomes[] = indexer.getOutcomeList();

    for (int i = 0; i < indexer.getContexts().length; i++) {

        Vector vector = new RandomAccessSparseVector(cardinality);

        int features[] = indexer.getContexts()[i];

        for (int fi = 0; fi < features.length; fi++) {
            vector.set(features[fi], indexer.getNumTimesEventsSeen()[i]);
        }//www .  j  a v  a 2s  . c om

        pa.train(outcomes[i], vector);
    }
}

From source file:opennlp.addons.mahout.VectorClassifierModel.java

License:Apache License

public double[] eval(String[] features) {
    Vector vector = new RandomAccessSparseVector(predMap.size());

    for (String feature : features) {
        Integer featureId = predMap.get(feature);

        if (featureId != null) {
            vector.set(featureId, vector.get(featureId) + 1);
        }/*from  w  w w.  jav a 2  s .c o m*/
    }

    Vector resultVector = classifier.classifyFull(vector);

    double outcomes[] = new double[classifier.numCategories()];

    for (int i = 0; i < outcomes.length; i++) {
        outcomes[i] = resultVector.get(i);
    }

    return outcomes;
}

From source file:org.apache.crunch.examples.Recommender.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();//  w w  w  . ja  va2s.  c o  m
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        System.err.println();
        GenericOptionsParser.printGenericCommandUsage(System.err);
        return 1;
    }
    Pipeline pipeline = new MRPipeline(Recommender.class, getConf());
    /*
     * input node
     */
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    /*
     * S0 + GBK
     */
    PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() {

        @Override
        public Pair<Long, Long> map(String input) {
            String[] split = input.split("[,\\s]");
            long userID = Long.parseLong(split[0]);
            long itemID = Long.parseLong(split[1]);
            return Pair.of(userID, itemID);
        }
    }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey();

    /*
     * S1
     */
    PTable<Long, Vector> userVector = userWithPrefs
            .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() {
                @Override
                public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) {
                    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (long itemPref : input.second()) {
                        userVector.set((int) itemPref, 1.0f);
                    }
                    return Pair.of(input.first(), userVector);
                }
            }, Writables.tableOf(Writables.longs(), Writables.vectors()));

    /*
     * S2 + GBK
     */
    PGroupedTable<Integer, Integer> coOccurencePairs = userVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() {
                @Override
                public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) {
                    Iterator<Vector.Element> it = input.second().iterateNonZero();
                    while (it.hasNext()) {
                        int index1 = it.next().index();
                        Iterator<Vector.Element> it2 = input.second().iterateNonZero();
                        while (it2.hasNext()) {
                            int index2 = it2.next().index();
                            emitter.emit(Pair.of(index1, index2));
                        }
                    }
                }
            }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey();

    /*
     * S3
     */
    PTable<Integer, Vector> coOccurenceVector = coOccurencePairs
            .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() {
                @Override
                public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) {
                    Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (int itemIndex2 : input.second()) {
                        cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0);
                    }
                    return Pair.of(input.first(), cooccurrenceRow);
                }
            }, Writables.tableOf(Writables.ints(), Writables.vectors()));

    /*
     * asText
     */
    pipeline.writeTextFile(coOccurenceVector, args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.ItemFilterAsVectorAndPrefsReducer.java

License:Apache License

@Override
protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx)
        throws IOException, InterruptedException {

    int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get());
    Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
    /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */
    vector.set(itemIDIndex, Double.NaN);

    List<Long> userIDs = new ArrayList<>();
    List<Float> prefValues = new ArrayList<>();
    for (VarLongWritable userID : values) {
        userIDs.add(userID.get());/*from   w w w.ja v  a 2 s. co  m*/
        prefValues.add(1.0f);
    }

    itemIDIndexWritable.set(itemIDIndex);
    vectorAndPrefs.set(vector, userIDs, prefValues);
    ctx.write(itemIDIndexWritable, vectorAndPrefs);
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.SimilarityMatrixRowWrapperMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    Vector similarityMatrixRow = value.get();
    /* remove self similarity */
    similarityMatrixRow.set(key.get(), Double.NaN);

    index.set(key.get());//from   ww w .  j ava  2s. c om
    vectorOrPref.set(similarityMatrixRow);

    context.write(index, vectorOrPref);
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.ToUserVectorsReducer.java

License:Apache License

@Override
protected void reduce(VarLongWritable userID, Iterable<VarLongWritable> itemPrefs, Context context)
        throws IOException, InterruptedException {
    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    for (VarLongWritable itemPref : itemPrefs) {
        int index = TasteHadoopUtils.idToIndex(itemPref.get());
        float value = itemPref instanceof EntityPrefWritable ? ((EntityPrefWritable) itemPref).getPrefValue()
                : 1.0f;/*from   www .j  ava  2s.c o  m*/
        userVector.set(index, value);
    }

    if (userVector.getNumNondefaultElements() >= minPreferences) {
        userVectorWritable.set(userVector);
        userVectorWritable.setWritesLaxPrecision(true);
        context.getCounter(Counters.USERS).increment(1);
        context.write(userID, userVectorWritable);
    }
}

From source file:org.qcri.pca.FileFormat.java

public static void convertFromDenseToSeq(String inputPath, int cardinality, String outputFolderPath) {
    try {/*  w w  w .  j a v  a  2  s . co  m*/
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        int lineNumber = 0;
        String thisLine;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
        {
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        }
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
            return;
        }
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            Vector vector = null;
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here
                if (thisLine.isEmpty())
                    continue;
                String[] splitted = thisLine.split("\\s+");
                vector = new SequentialAccessSparseVector(splitted.length);
                for (int i = 0; i < splitted.length; i++) {
                    vector.set(i, Double.parseDouble(splitted[i]));
                }
                key.set(lineNumber);
                value.set(vector);
                //System.out.println(vector);
                writer.append(key, value);//write last row
                lineNumber++;
            }
            writer.close();
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:org.qcri.pca.FileFormat.java

public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) {
    try {//ww w.  j  a v  a  2  s  . co m
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer = null;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        Vector vector = null;

        String thisLine;
        int prevRowID = -1;
        boolean first = true;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
        {
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        }
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
            return;
        }
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here            
                String[] splitted = thisLine.split(",");
                int rowID = Integer.parseInt(splitted[0]);
                int colID = Integer.parseInt(splitted[1]);
                double element = Double.parseDouble(splitted[2]);
                if (first) {
                    first = false;
                    vector = new SequentialAccessSparseVector(cardinality);
                } else if (rowID != prevRowID) {
                    key.set(prevRowID);
                    value.set(vector);
                    //System.out.println(vector);
                    writer.append(key, value);//write last row
                    vector = new SequentialAccessSparseVector(cardinality);
                }
                prevRowID = rowID;
                vector.set(colID - base, element);
            }
            /*//here we append the last vector in each file (assuming that we will start a new row in the next file
            key.set(prevRowID);
            value.set(vector);
            //System.out.println("last vector");
            //System.out.println(vector);
            writer.append(key,value);//write last row
            writer.close();
            */
        }
        if (writer != null) //append last vector in last file
        {
            key.set(prevRowID);
            value.set(vector);
            //System.out.println("last vector");
            //System.out.println(vector);
            writer.append(key, value);//write last row
            writer.close();
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:org.qcri.pca.MahoutCompatibilityTest.java

License:Apache License

@Test
public void testMAHOUT_1238() throws IOException {
    Vector v = new SequentialAccessSparseVector(5);
    v.set(1, 3.0);
    v.set(3, 5.0);//w ww. ja va 2 s  .  co m
    Vector view = new VectorView(v, 0, v.size());
    doTestVectorWritableEquals(view);
}

From source file:org.qcri.sparkpca.FileFormat.java

public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) {
    try {//from  w  w w  .j a  va  2 s  .c  o m
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer = null;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        Vector vector = null;

        String thisLine;

        int lineNumber = 0;
        int prevRowID = -1;
        boolean first = true;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
        {
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        }
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
            return;
        }
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here            
                String[] splitted = thisLine.split(",");
                int rowID = Integer.parseInt(splitted[0]);
                int colID = Integer.parseInt(splitted[1]);
                double element = Double.parseDouble(splitted[2]);
                if (first) {
                    first = false;
                    vector = new SequentialAccessSparseVector(cardinality);
                } else if (rowID != prevRowID) {
                    key.set(prevRowID);
                    value.set(vector);
                    //System.out.println(vector);
                    writer.append(key, value);//write last row
                    vector = new SequentialAccessSparseVector(cardinality);
                }
                prevRowID = rowID;
                vector.set(colID - base, element);
            }
        }
        if (writer != null) //append last vector in last file
        {
            key.set(prevRowID);
            value.set(vector);
            writer.append(key, value);//write last row
            writer.close();
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
}