Example usage for org.apache.mahout.math Vector set

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector set.

Prototype

void set(int index, double value);

Source Link

Document

Set the value at the given index

Usage

From source file:edu.snu.dolphin.bsp.examples.ml.sub.MapOfIntVSumCodec.java

License:Apache License

@Override
public Map<Integer, VectorSum> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final Map<Integer, VectorSum> resultMap = new HashMap<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int mapSize = dais.readInt();
        for (int i = 0; i < mapSize; i++) {
            final int mapInt = dais.readInt();
            final int vectorSize = dais.readInt();
            final Vector vector = new DenseVector(vectorSize);
            for (int j = 0; j < vectorSize; j++) {
                vector.set(j, dais.readDouble());
            }/*from   w ww .  j  a  va 2 s .  com*/
            final int count = dais.readInt();
            resultMap.put(mapInt, new VectorSum(vector, count));
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultMap;
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.VectorListCodec.java

License:Apache License

public List<Vector> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final List<Vector> resultList = new LinkedList<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int listSize = dais.readInt();
        final int length = dais.readInt();
        for (int i = 0; i < listSize; i++) {
            final Vector vector = new DenseVector(length);
            for (int j = 0; j < length; j++) {
                vector.set(j, dais.readDouble());
            }//from  ww w .ja  v a  2  s  .  c o m
            resultList.add(vector);
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultList;
}

From source file:edu.utsa.sifter.som.SelfOrganizingMap.java

License:Apache License

void assignTopTerms(final int numTopTerms, final java.util.Vector<String> terms) {
    final PriorityQueue<TermPair> topWeights = new PriorityQueue<TermPair>(numTopTerms,
            new TermPair.TermPairComparator());

    for (int i = 0; i < numCells(); ++i) {
        final java.util.Vector<String> topTerms = new java.util.Vector<String>(numTopTerms);
        topTerms.setSize(numTopTerms);//from   w  w  w  .j a  va  2  s. c  o m

        final Vector cell = getCell(i);
        final double f = getFactor(i);

        topWeights.clear();
        for (Vector.Element w : cell) {
            int val = (int) (1000 * f * w.get());
            if (topWeights.size() < numTopTerms) {
                topWeights.add(new TermPair(terms.get(w.index()), val));
            } else if (topWeights.peek().DocCount < val) {
                topWeights.remove();
                topWeights.add(new TermPair(terms.get(w.index()), val));
            }
        }
        final int numTopWeights = topWeights.size();
        for (int j = numTopWeights - 1; j > -1; --j) {
            topTerms.set(j, topWeights.remove().Term);
        }
        getStats(i).setTopTerms(topTerms);
    }
}

From source file:hk.newsRecommender.Classify.java

License:Open Source License

public static void genNaiveBayesModel(Configuration conf, int labelIndex, String trainFile, String trainSeqFile,
        boolean hasHeader) {
    CSVReader reader = null;/*from   w  w w  .j  a v a2  s. c  om*/
    try {
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(new Path(trainSeqFile)))
            fs.delete(new Path(trainSeqFile), true);
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, new Path(trainSeqFile), Text.class,
                VectorWritable.class);
        FileSystem fsopen = FileSystem.get(conf);
        FSDataInputStream in = fsopen.open(new Path(trainFile));
        reader = new CSVReader(new InputStreamReader(in));

        String[] header = null;
        if (hasHeader)
            header = reader.readNext();
        String[] line = null;
        Long l = 0L;
        while ((line = reader.readNext()) != null) {
            if (labelIndex > line.length)
                break;
            l++;
            List<String> tmpList = Lists.newArrayList(line);
            String label = tmpList.get(labelIndex);
            if (!strLabelList.contains(label))
                strLabelList.add(label);
            //            Text key = new Text("/" + label + "/" + l);
            Text key = new Text("/" + label + "/");
            tmpList.remove(labelIndex);

            VectorWritable vectorWritable = new VectorWritable();
            Vector vector = new RandomAccessSparseVector(tmpList.size(), tmpList.size());//???

            for (int i = 0; i < tmpList.size(); i++) {
                String tmpStr = tmpList.get(i);
                if (StringUtils.isNumeric(tmpStr))
                    vector.set(i, Double.parseDouble(tmpStr));
                else
                    vector.set(i, parseStrCell(tmpStr));
            }
            vectorWritable.set(vector);
            writer.append(key, vectorWritable);
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:hk.newsRecommender.Classify.java

License:Open Source License

public static void test(Configuration conf, String testFile, int labelIndex) throws IOException {
    System.out.println("~~~ begin to test ~~~");
    AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(naiveBayesModel);

    FileSystem fsopen = FileSystem.get(conf);
    FSDataInputStream in = fsopen.open(new Path(testFile));
    CSVReader csv = new CSVReader(new InputStreamReader(in));
    csv.readNext(); // skip header

    String[] line = null;//from   ww w  .  j av  a2 s.c  om
    double totalSampleCount = 0.;
    double correctClsCount = 0.;
    //       String str="10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,8,7,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,14,0,8";
    //       List<String> newsList=new ArrayList<String>();
    //       newsList.add(str);
    //       for(int j=0;j<newsList.size();j++){
    //          line=newsList.get(j).split(",");
    while ((line = csv.readNext()) != null) {
        //          ??ID???ID?
        //          ????
        List<String> tmpList = Lists.newArrayList(line);
        String label = tmpList.get(labelIndex);
        tmpList.remove(labelIndex);
        totalSampleCount++;
        Vector vector = new RandomAccessSparseVector(tmpList.size(), tmpList.size());
        for (int i = 0; i < tmpList.size(); i++) {
            String tempStr = tmpList.get(i);
            if (StringUtils.isNumeric(tempStr)) {
                vector.set(i, Double.parseDouble(tempStr));
            } else {
                Long id = strOptionMap.get(tempStr);
                if (id != null)
                    vector.set(i, id);
                else {
                    System.out.println(StringUtils.join(tempStr, ","));
                    continue;
                }
            }
        }
        Vector resultVector = classifier.classifyFull(vector);
        int classifyResult = resultVector.maxValueIndex();
        if (StringUtils.equals(label, strLabelList.get(classifyResult))) {
            correctClsCount++;
        } else {
            //             line[labelIndex]????ID??
            //             ???????
            //             
            System.out.println("CorrectORItem=" + label + "\tClassify=" + strLabelList.get(classifyResult));
        }
    }
    //       System.out.println("Correct Ratio:" + (correctClsCount / totalSampleCount));
}

From source file:hk.newsRecommender.MatrixAndCluster.java

License:Open Source License

public static void matrix2Vector(Configuration conf, Path path) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Reader reader = null;
    // ??SequenceFile????Name??
    reader = new SequenceFile.Reader(fs, path, conf);
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable val = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    Writer writer = null;/*www . jav  a 2 s .  c om*/
    try {
        writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, VectorWritable.class,
                CompressionType.BLOCK);
        final IntWritable key1 = new IntWritable();
        final VectorWritable value = new VectorWritable();
        int lineNum = 0;
        Vector vector = null;
        while (reader.next(key, val)) {
            int index = 0;
            StringTokenizer st = new StringTokenizer(val.toString());
            // SequentialAccessSparseVector??NamedVector
            vector = new NamedVector(new SequentialAccessSparseVector(Cardinality), lineNum + "");
            while (st.hasMoreTokens()) {
                if (Integer.parseInt(st.nextToken()) == 1) {
                    vector.set(index, 1);
                }
                index++;
            }
            key1.set(lineNum++);
            value.set(vector);
            writer.append(key, value);
        }
    } finally {
        writer.close();
        reader.close();
    }
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);/*from   w w  w .jav  a2 s  .  c o m*/
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:net.aprendizajengrande.ontocluster.RedisToVectors.java

License:Open Source License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: <hdfs folder for input>");
        System.exit(1);/*  w w  w  .j a  v a 2 s.co m*/
    }

    Configuration conf = new Configuration();

    System.out.println("Input: " + args[0]);

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    String inputName = args[0] + "/input";
    String relsInputName = args[0] + "/rels";
    String instancesInputName = args[0] + "/instances";

    Path input = new Path(inputName);
    Path relsInput = new Path(relsInputName);
    Path instancesInput = new Path(instancesInputName);

    // see http://stackoverflow.com/questions/14993644/configure-jedis-timeout
    Jedis jedis = new Jedis("localhost", 6379, 18000);

    // create the relations and instances first, so we know what to expect
    Set<String> rels = jedis.keys("rel-nom-*");

    Map<Integer, String> relIdToName = new HashMap<>();

    FSDataOutputStream fsdos = relsInput.getFileSystem(conf).create(relsInput);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(fsdos));

    int relNum = 0;
    for (String rel : rels) {
        String relName = rel.replaceAll("^rel-nom-", "");
        int relId = Integer.parseInt(jedis.get(rel));
        relIdToName.put(relId, relName);
        if (relId > relNum)
            relNum = relId;
    }
    relNum++;
    for (int i = 0; i < relNum; i++)
        pw.println(i + "\t" + relIdToName.get(i));
    pw.close();
    rels.clear();

    Set<String> instances = jedis.keys("res-nom-*");

    fsdos = instancesInput.getFileSystem(conf).create(instancesInput);
    pw = new PrintWriter(new OutputStreamWriter(fsdos));

    for (String instance : instances) {
        int instanceId = Integer.parseInt(instance.replaceAll("^res-nom-", ""));
        String instanceName = jedis.get(instance);
        pw.println(instanceId + "\t" + instanceName);
    }
    pw.close();
    instances.clear();

    Set<String> keys = jedis.keys("r-*");

    SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(input),
            Writer.keyClass(Text.class), Writer.valueClass(VectorWritable.class));

    for (String key : keys) {
        Set<String> theseRels = jedis.smembers(key);

        Vector s = new SequentialAccessSparseVector(relNum);
        for (String relId : theseRels)
            s.set(Integer.parseInt(relId), 1.0);
        VectorWritable v = new VectorWritable(s);
        writer.append(new Text(key), v);
    }
    writer.close();

    jedis.close();
}

From source file:nl.gridline.zieook.inx.movielens.hbase.RecommendationsImportMap.java

License:Apache License

@Override
public void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {

    Vector similarityMatrixRow = value.get();

    // Remove self similarity
    similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); // from the equation

    // determine max non-zero element: (==item index)
    List<RecommendationElement> recommendations = new ArrayList<RecommendationElement>();

    // collect non-zero items:
    Iterator<Element> it = similarityMatrixRow.iterateNonZero();
    while (it.hasNext()) {
        Element e = it.next();/*  w  ww.  j  a  va  2 s. c o m*/
        recommendations.add(new RecommendationElement(e.index(), e.get()));
        // LOG.info("created new recommendation for " + e.index());
    }

    // sorted list of recommendations: now we have an item id, and similarity value:
    Collections.sort(recommendations, new SimilarityComparator());
    LOG.info("sorted: " + recommendations.size());

    int rank = 1;
    Put put = new Put(RowKeys.getRecommendationKey(collection, recommender, key.get()));
    for (RecommendationElement el : recommendations) {
        // if (el.getSimilarityValue() > 0)
        // {
        byte[] data = Recommend.getRecommendation(el.getItemIndex(), rank, el.getSimilarityValue());
        put.add(RECOMMENDATION_COLUMN, Bytes.toBytes(rank), data);
        rank++;
        // }
    }
    context.write(new LongWritable(key.get()), put);

}

From source file:nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    int maxIndex = -1;

    Vector similarityMatrixRow = value.get();
    /* remove self similarity */

    similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY);

    ////from   www  .j  a  v  a2s  .com
    // determine maximum index
    //
    Iterator<Element> it = similarityMatrixRow.iterateNonZero();

    while (it.hasNext()) {
        Element e = it.next();

        // e.index() // == item id

        if (e.index() > maxIndex) {
            maxIndex = e.index();
        }
    }

    // System.out.println(String.format("key: %d maxIndex: %d", key.get(), maxIndex));

    if (maxIndex > 0) {

        RecommendationElement[] itemBasedRecommendations = new RecommendationElement[maxIndex];

        for (int i = 0; i < maxIndex; i++) {
            Element element = similarityMatrixRow.getElement(i);

            double similarityValue = Double.NEGATIVE_INFINITY;

            if (element != null) {
                similarityValue = element.get();
            }

            itemBasedRecommendations[i] = new RecommendationElement(i, similarityValue);
        }

        Arrays.sort(itemBasedRecommendations, new SimilarityComparator());

        RecommendationElementArray array = new RecommendationElementArray(itemBasedRecommendations);

        context.write(new VarIntWritable(key.get()), array);

    }
}