Example usage for org.apache.mahout.math Vector set

List of usage examples for org.apache.mahout.math Vector set

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector set.

Prototype

void set(int index, double value);

Source Link

Document

Set the value at the given index

Usage

From source file:edu.snu.dolphin.bsp.examples.ml.sub.MapOfIntVSumCodec.java

License:Apache License

@Override
public Map<Integer, VectorSum> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final Map<Integer, VectorSum> resultMap = new HashMap<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int mapSize = dais.readInt();
        for (int i = 0; i < mapSize; i++) {
            final int mapInt = dais.readInt();
            final int vectorSize = dais.readInt();
            final Vector vector = new DenseVector(vectorSize);
            for (int j = 0; j < vectorSize; j++) {
                vector.set(j, dais.readDouble());
            }/*from   w ww .  j  a  va 2 s .  com*/
            final int count = dais.readInt();
            resultMap.put(mapInt, new VectorSum(vector, count));
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultMap;
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.VectorListCodec.java

License:Apache License

public List<Vector> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final List<Vector> resultList = new LinkedList<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int listSize = dais.readInt();
        final int length = dais.readInt();
        for (int i = 0; i < listSize; i++) {
            final Vector vector = new DenseVector(length);
            for (int j = 0; j < length; j++) {
                vector.set(j, dais.readDouble());
            }//from  ww w .ja  v a  2  s  .  c o m
            resultList.add(vector);
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultList;
}

From source file:edu.utsa.sifter.som.SelfOrganizingMap.java

License:Apache License

void assignTopTerms(final int numTopTerms, final java.util.Vector<String> terms) {
    final PriorityQueue<TermPair> topWeights = new PriorityQueue<TermPair>(numTopTerms,
            new TermPair.TermPairComparator());

    for (int i = 0; i < numCells(); ++i) {
        final java.util.Vector<String> topTerms = new java.util.Vector<String>(numTopTerms);
        topTerms.setSize(numTopTerms);//from   w  w  w  .j a  va  2  s. c  o m

        final Vector cell = getCell(i);
        final double f = getFactor(i);

        topWeights.clear();
        for (Vector.Element w : cell) {
            int val = (int) (1000 * f * w.get());
            if (topWeights.size() < numTopTerms) {
                topWeights.add(new TermPair(terms.get(w.index()), val));
            } else if (topWeights.peek().DocCount < val) {
                topWeights.remove();
                topWeights.add(new TermPair(terms.get(w.index()), val));
            }
        }
        final int numTopWeights = topWeights.size();
        for (int j = numTopWeights - 1; j > -1; --j) {
            topTerms.set(j, topWeights.remove().Term);
        }
        getStats(i).setTopTerms(topTerms);
    }
}

From source file:hk.newsRecommender.Classify.java

License:Open Source License

public static void genNaiveBayesModel(Configuration conf, int labelIndex, String trainFile, String trainSeqFile,
        boolean hasHeader) {
    CSVReader reader = null;/*from   w  w w  .j  a v a2  s. c  om*/
    try {
        FileSystem fs = FileSystem.get(conf);
        if (fs.exists(new Path(trainSeqFile)))
            fs.delete(new Path(trainSeqFile), true);
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, new Path(trainSeqFile), Text.class,
                VectorWritable.class);
        FileSystem fsopen = FileSystem.get(conf);
        FSDataInputStream in = fsopen.open(new Path(trainFile));
        reader = new CSVReader(new InputStreamReader(in));

        String[] header = null;
        if (hasHeader)
            header = reader.readNext();
        String[] line = null;
        Long l = 0L;
        while ((line = reader.readNext()) != null) {
            if (labelIndex > line.length)
                break;
            l++;
            List<String> tmpList = Lists.newArrayList(line);
            String label = tmpList.get(labelIndex);
            if (!strLabelList.contains(label))
                strLabelList.add(label);
            //            Text key = new Text("/" + label + "/" + l);
            Text key = new Text("/" + label + "/");
            tmpList.remove(labelIndex);

            VectorWritable vectorWritable = new VectorWritable();
            Vector vector = new RandomAccessSparseVector(tmpList.size(), tmpList.size());//???

            for (int i = 0; i < tmpList.size(); i++) {
                String tmpStr = tmpList.get(i);
                if (StringUtils.isNumeric(tmpStr))
                    vector.set(i, Double.parseDouble(tmpStr));
                else
                    vector.set(i, parseStrCell(tmpStr));
            }
            vectorWritable.set(vector);
            writer.append(key, vectorWritable);
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:hk.newsRecommender.Classify.java

License:Open Source License

public static void test(Configuration conf, String testFile, int labelIndex) throws IOException {
    System.out.println("~~~ begin to test ~~~");
    AbstractNaiveBayesClassifier classifier = new StandardNaiveBayesClassifier(naiveBayesModel);

    FileSystem fsopen = FileSystem.get(conf);
    FSDataInputStream in = fsopen.open(new Path(testFile));
    CSVReader csv = new CSVReader(new InputStreamReader(in));
    csv.readNext(); // skip header

    String[] line = null;//from   ww w  .  j av  a2 s.c  om
    double totalSampleCount = 0.;
    double correctClsCount = 0.;
    //       String str="10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,27,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,8,7,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,0,0,0,14,0,8";
    //       List<String> newsList=new ArrayList<String>();
    //       newsList.add(str);
    //       for(int j=0;j<newsList.size();j++){
    //          line=newsList.get(j).split(",");
    while ((line = csv.readNext()) != null) {
        //          ??ID???ID?
        //          ????
        List<String> tmpList = Lists.newArrayList(line);
        String label = tmpList.get(labelIndex);
        tmpList.remove(labelIndex);
        totalSampleCount++;
        Vector vector = new RandomAccessSparseVector(tmpList.size(), tmpList.size());
        for (int i = 0; i < tmpList.size(); i++) {
            String tempStr = tmpList.get(i);
            if (StringUtils.isNumeric(tempStr)) {
                vector.set(i, Double.parseDouble(tempStr));
            } else {
                Long id = strOptionMap.get(tempStr);
                if (id != null)
                    vector.set(i, id);
                else {
                    System.out.println(StringUtils.join(tempStr, ","));
                    continue;
                }
            }
        }
        Vector resultVector = classifier.classifyFull(vector);
        int classifyResult = resultVector.maxValueIndex();
        if (StringUtils.equals(label, strLabelList.get(classifyResult))) {
            correctClsCount++;
        } else {
            //             line[labelIndex]????ID??
            //             ???????
            //             
            System.out.println("CorrectORItem=" + label + "\tClassify=" + strLabelList.get(classifyResult));
        }
    }
    //       System.out.println("Correct Ratio:" + (correctClsCount / totalSampleCount));
}

From source file:hk.newsRecommender.MatrixAndCluster.java

License:Open Source License

public static void matrix2Vector(Configuration conf, Path path) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Reader reader = null;
    // ??SequenceFile????Name??
    reader = new SequenceFile.Reader(fs, path, conf);
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable val = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    Writer writer = null;/*www . jav  a 2 s .  c om*/
    try {
        writer = SequenceFile.createWriter(fs, conf, path, IntWritable.class, VectorWritable.class,
                CompressionType.BLOCK);
        final IntWritable key1 = new IntWritable();
        final VectorWritable value = new VectorWritable();
        int lineNum = 0;
        Vector vector = null;
        while (reader.next(key, val)) {
            int index = 0;
            StringTokenizer st = new StringTokenizer(val.toString());
            // SequentialAccessSparseVector??NamedVector
            vector = new NamedVector(new SequentialAccessSparseVector(Cardinality), lineNum + "");
            while (st.hasMoreTokens()) {
                if (Integer.parseInt(st.nextToken()) == 1) {
                    vector.set(index, 1);
                }
                index++;
            }
            key1.set(lineNum++);
            value.set(vector);
            writer.append(key, value);
        }
    } finally {
        writer.close();
        reader.close();
    }
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);/*from   w w  w .jav  a2 s  .  c o m*/
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:net.aprendizajengrande.ontocluster.RedisToVectors.java

License:Open Source License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: <hdfs folder for input>");
        System.exit(1);/*  w w  w  .j a  v a 2 s.co m*/
    }

    Configuration conf = new Configuration();

    System.out.println("Input: " + args[0]);

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    String inputName = args[0] + "/input";
    String relsInputName = args[0] + "/rels";
    String instancesInputName = args[0] + "/instances";

    Path input = new Path(inputName);
    Path relsInput = new Path(relsInputName);
    Path instancesInput = new Path(instancesInputName);

    // see http://stackoverflow.com/questions/14993644/configure-jedis-timeout
    Jedis jedis = new Jedis("localhost", 6379, 18000);

    // create the relations and instances first, so we know what to expect
    Set<String> rels = jedis.keys("rel-nom-*");

    Map<Integer, String> relIdToName = new HashMap<>();

    FSDataOutputStream fsdos = relsInput.getFileSystem(conf).create(relsInput);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(fsdos));

    int relNum = 0;
    for (String rel : rels) {
        String relName = rel.replaceAll("^rel-nom-", "");
        int relId = Integer.parseInt(jedis.get(rel));
        relIdToName.put(relId, relName);
        if (relId > relNum)
            relNum = relId;
    }
    relNum++;
    for (int i = 0; i < relNum; i++)
        pw.println(i + "\t" + relIdToName.get(i));
    pw.close();
    rels.clear();

    Set<String> instances = jedis.keys("res-nom-*");

    fsdos = instancesInput.getFileSystem(conf).create(instancesInput);
    pw = new PrintWriter(new OutputStreamWriter(fsdos));

    for (String instance : instances) {
        int instanceId = Integer.parseInt(instance.replaceAll("^res-nom-", ""));
        String instanceName = jedis.get(instance);
        pw.println(instanceId + "\t" + instanceName);
    }
    pw.close();
    instances.clear();

    Set<String> keys = jedis.keys("r-*");

    SequenceFile.Writer writer = SequenceFile.createWriter(conf, Writer.file(input),
            Writer.keyClass(Text.class), Writer.valueClass(VectorWritable.class));

    for (String key : keys) {
        Set<String> theseRels = jedis.smembers(key);

        Vector s = new SequentialAccessSparseVector(relNum);
        for (String relId : theseRels)
            s.set(Integer.parseInt(relId), 1.0);
        VectorWritable v = new VectorWritable(s);
        writer.append(new Text(key), v);
    }
    writer.close();

    jedis.close();
}

From source file:nl.gridline.zieook.inx.movielens.hbase.RecommendationsImportMap.java

License:Apache License

@Override
public void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {

    Vector similarityMatrixRow = value.get();

    // Remove self similarity
    similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY); // from the equation

    // determine max non-zero element: (==item index)
    List<RecommendationElement> recommendations = new ArrayList<RecommendationElement>();

    // collect non-zero items:
    Iterator<Element> it = similarityMatrixRow.iterateNonZero();
    while (it.hasNext()) {
        Element e = it.next();/*  w  ww.  j  a  va  2 s. c o m*/
        recommendations.add(new RecommendationElement(e.index(), e.get()));
        // LOG.info("created new recommendation for " + e.index());
    }

    // sorted list of recommendations: now we have an item id, and similarity value:
    Collections.sort(recommendations, new SimilarityComparator());
    LOG.info("sorted: " + recommendations.size());

    int rank = 1;
    Put put = new Put(RowKeys.getRecommendationKey(collection, recommender, key.get()));
    for (RecommendationElement el : recommendations) {
        // if (el.getSimilarityValue() > 0)
        // {
        byte[] data = Recommend.getRecommendation(el.getItemIndex(), rank, el.getSimilarityValue());
        put.add(RECOMMENDATION_COLUMN, Bytes.toBytes(rank), data);
        rank++;
        // }
    }
    context.write(new LongWritable(key.get()), put);

}

From source file:nl.gridline.zieook.inx.movielens.items.ItemBasedSortSimilaritiesMapper.java

License:Apache License

@Override
protected void map(IntWritable key, VectorWritable value, Context context)
        throws IOException, InterruptedException {
    int maxIndex = -1;

    Vector similarityMatrixRow = value.get();
    /* remove self similarity */

    similarityMatrixRow.set(key.get(), Double.NEGATIVE_INFINITY);

    ////from   www  .j  a  v  a2s  .com
    // determine maximum index
    //
    Iterator<Element> it = similarityMatrixRow.iterateNonZero();

    while (it.hasNext()) {
        Element e = it.next();

        // e.index() // == item id

        if (e.index() > maxIndex) {
            maxIndex = e.index();
        }
    }

    // System.out.println(String.format("key: %d maxIndex: %d", key.get(), maxIndex));

    if (maxIndex > 0) {

        RecommendationElement[] itemBasedRecommendations = new RecommendationElement[maxIndex];

        for (int i = 0; i < maxIndex; i++) {
            Element element = similarityMatrixRow.getElement(i);

            double similarityValue = Double.NEGATIVE_INFINITY;

            if (element != null) {
                similarityValue = element.get();
            }

            itemBasedRecommendations[i] = new RecommendationElement(i, similarityValue);
        }

        Arrays.sort(itemBasedRecommendations, new SimilarityComparator());

        RecommendationElementArray array = new RecommendationElementArray(itemBasedRecommendations);

        context.write(new VarIntWritable(key.get()), array);

    }
}