void set(int index, double value);

Set the value at the given index


From source file:opennlp.addons.mahout.AbstractOnlineLearnerTrainer.java

License:Apache License

protected void trainOnlineLearner(DataIndexer indexer, org.apache.mahout.classifier.OnlineLearner pa) {
    int cardinality = indexer.getPredLabels().length;
    int outcomes[] = indexer.getOutcomeList();

    for (int i = 0; i < indexer.getContexts().length; i++) {

        Vector vector = new RandomAccessSparseVector(cardinality);

        int features[] = indexer.getContexts()[i];

        for (int fi = 0; fi < features.length; fi++) {
            vector.set(features[fi], indexer.getNumTimesEventsSeen()[i]);
        }

        pa.train(outcomes[i], vector);

From source file:opennlp.addons.mahout.VectorClassifierModel.java

License:Apache License

public double[] eval(String[] features) {
    Vector vector = new RandomAccessSparseVector(predMap.size());

    for (String feature : features) {
        Integer featureId = predMap.get(feature);

        if (featureId != null) {
            vector.set(featureId, vector.get(featureId) + 1);
        }

    Vector resultVector = classifier.classifyFull(vector);

    double outcomes[] = new double[classifier.numCategories()];

    for (int i = 0; i < outcomes.length; i++) {
        outcomes[i] = resultVector.get(i);

    return outcomes;

From source file:org.apache.crunch.examples.Recommender.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();//  w w  w  . ja  va2s.  c o  m
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        return 1;
    Pipeline pipeline = new MRPipeline(Recommender.class, getConf());
     * input node
    PCollection<String> lines = pipeline.readTextFile(args[0]);

     * S0 + GBK
    PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() {

        public Pair<Long, Long> map(String input) {
            String[] split = input.split("[,\\s]");
            long userID = Long.parseLong(split[0]);
            long itemID = Long.parseLong(split[1]);
            return Pair.of(userID, itemID);
    }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey();

     * S1
    PTable<Long, Vector> userVector = userWithPrefs
            .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() {
                public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) {
                    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (long itemPref : input.second()) {
                        userVector.set((int) itemPref, 1.0f);
                    return Pair.of(input.first(), userVector);
            }, Writables.tableOf(Writables.longs(), Writables.vectors()));

     * S2 + GBK
    PGroupedTable<Integer, Integer> coOccurencePairs = userVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() {
                public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) {
                    Iterator<Vector.Element> it = input.second().iterateNonZero();
                    while (it.hasNext()) {
                        int index1 = it.next().index();
                        Iterator<Vector.Element> it2 = input.second().iterateNonZero();
                        while (it2.hasNext()) {
                            int index2 = it2.next().index();
                            emitter.emit(Pair.of(index1, index2));
            }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey();

     * S3
    PTable<Integer, Vector> coOccurenceVector = coOccurencePairs
            .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() {
                public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) {
                    Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (int itemIndex2 : input.second()) {
                        cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0);
                    return Pair.of(input.first(), cooccurrenceRow);
            }, Writables.tableOf(Writables.ints(), Writables.vectors()));

     * asText
    pipeline.writeTextFile(coOccurenceVector, args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.ItemFilterAsVectorAndPrefsReducer.java

License:Apache License

protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable> values, Context ctx)
        throws IOException, InterruptedException {

    int itemIDIndex = TasteHadoopUtils.idToIndex(itemID.get());
    Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
    /* artificial NaN summand to exclude this item from the recommendations for all users specified in userIDs */
    vector.set(itemIDIndex, Double.NaN);

    List<Long> userIDs = new ArrayList<>();
    List<Float> prefValues = new ArrayList<>();
    for (VarLongWritable userID : values) {
        userIDs.add(userID.get());

    vectorAndPrefs.set(vector, userIDs, prefValues);
    ctx.write(itemIDIndexWritable, vectorAndPrefs);

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.SimilarityMatrixRowWrapperMapper.java

License:Apache License

protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    Vector similarityMatrixRow = value.get();
    /* remove self similarity */
    similarityMatrixRow.set(key.get(), Double.NaN);

    index.set(key.get());

    context.write(index, vectorOrPref);

From source file:org.gpfvic.mahout.cf.taste.hadoop.item.ToUserVectorsReducer.java

License:Apache License

protected void reduce(VarLongWritable userID, Iterable<VarLongWritable> itemPrefs, Context context)
        throws IOException, InterruptedException {
    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    for (VarLongWritable itemPref : itemPrefs) {
        int index = TasteHadoopUtils.idToIndex(itemPref.get());
        float value = itemPref instanceof EntityPrefWritable ? ((EntityPrefWritable) itemPref).getPrefValue()
                : 1.0f;
        userVector.set(index, value);

    if (userVector.getNumNondefaultElements() >= minPreferences) {
        context.write(userID, userVectorWritable);

From source file:org.qcri.pca.FileFormat.java

public static void convertFromDenseToSeq(String inputPath, int cardinality, String outputFolderPath) {
    try {
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        int lineNumber = 0;
        String thisLine;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            Vector vector = null;
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here
                if (thisLine.isEmpty())
                String[] splitted = thisLine.split("\\s+");
                vector = new SequentialAccessSparseVector(splitted.length);
                for (int i = 0; i < splitted.length; i++) {
                    vector.set(i, Double.parseDouble(splitted[i]));
                writer.append(key, value);//write last row
    } catch (Exception e) {


From source file:org.qcri.pca.FileFormat.java

public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) {
    try {
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer = null;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        Vector vector = null;

        String thisLine;
        int prevRowID = -1;
        boolean first = true;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here            
                String[] splitted = thisLine.split(",");
                int rowID = Integer.parseInt(splitted[0]);
                int colID = Integer.parseInt(splitted[1]);
                double element = Double.parseDouble(splitted[2]);
                if (first) {
                    first = false;
                    vector = new SequentialAccessSparseVector(cardinality);
                } else if (rowID != prevRowID) {
                    writer.append(key, value);//write last row
                    vector = new SequentialAccessSparseVector(cardinality);
                prevRowID = rowID;
                vector.set(colID - base, element);
            /*//here we append the last vector in each file (assuming that we will start a new row in the next file
            //System.out.println("last vector");
            writer.append(key,value);//write last row
        if (writer != null) //append last vector in last file
            //System.out.println("last vector");
            writer.append(key, value);//write last row

    } catch (Exception e) {

From source file:org.qcri.pca.MahoutCompatibilityTest.java

License:Apache License

public void testMAHOUT_1238() throws IOException {
    Vector v = new SequentialAccessSparseVector(5);
    v.set(1, 3.0);
    v.set(3, 5.0);//w ww. ja va 2 s  .  co m
    Vector view = new VectorView(v, 0, v.size());

From source file:org.qcri.sparkpca.FileFormat.java

public static void convertFromCooToSeq(String inputPath, int cardinality, int base, String outputFolderPath) {
    try {
        final Configuration conf = new Configuration();
        final FileSystem fs = FileSystem.get(conf);
        SequenceFile.Writer writer = null;

        final IntWritable key = new IntWritable();
        final VectorWritable value = new VectorWritable();

        Vector vector = null;

        String thisLine;

        int lineNumber = 0;
        int prevRowID = -1;
        boolean first = true;
        File[] filePathList = null;
        File inputFile = new File(inputPath);
        if (inputFile.isFile()) // if it is a file
            filePathList = new File[1];
            filePathList[0] = inputFile;
        } else {
            filePathList = inputFile.listFiles();
        if (filePathList == null) {
            log.error("The path " + inputPath + " does not exist");
        for (File file : filePathList) {
            BufferedReader br = new BufferedReader(new FileReader(file));
            String outputFileName = outputFolderPath + File.separator + file.getName() + ".seq";
            writer = SequenceFile.createWriter(fs, conf, new Path(outputFileName), IntWritable.class,
                    VectorWritable.class, CompressionType.BLOCK);
            while ((thisLine = br.readLine()) != null) { // while loop begins here            
                String[] splitted = thisLine.split(",");
                int rowID = Integer.parseInt(splitted[0]);
                int colID = Integer.parseInt(splitted[1]);
                double element = Double.parseDouble(splitted[2]);
                if (first) {
                    first = false;
                    vector = new SequentialAccessSparseVector(cardinality);
                } else if (rowID != prevRowID) {
                    writer.append(key, value);//write last row
                    vector = new SequentialAccessSparseVector(cardinality);
                prevRowID = rowID;
                vector.set(colID - base, element);
        if (writer != null) //append last vector in last file
            writer.append(key, value);//write last row

    } catch (Exception e) {