Example usage for org.apache.mahout.math Vector get

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector get.

Prototype

double get(int index);

Source Link

Document

Return the value at the given index

Usage

From source file:edu.snu.dolphin.bsp.examples.ml.sub.VectorListCodec.java

License:Apache License

@Override
public byte[] encode(final List<Vector> list) {

    /*/*www  .  ja v a2 s.c  o  m*/
     * This codec assume that vectors have the same length
     */
    int length = 0;
    for (final Vector vector : list) {
        length = vector.size();
    }

    final ByteArrayOutputStream baos = new ByteArrayOutputStream(
            Integer.SIZE + Integer.SIZE + Double.SIZE * length * list.size());

    try (final DataOutputStream daos = new DataOutputStream(baos)) {
        daos.writeInt(list.size());
        daos.writeInt(length);
        for (final Vector vector : list) {
            for (int i = 0; i < length; i++) {
                daos.writeDouble(vector.get(i));
            }
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return baos.toByteArray();
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

void somStats(final SifterConfig conf, final SelfOrganizingMap som, final ArrayList<ArrayList<Long>> clusters,
        final Writer somJS) throws IOException {
    somJS.write("{\"width\":" + som.width() + ", \"height\":" + som.height() + ", \n\"cells\":[\n");

    int numZero = 0;
    int numWith = 0;
    int totalWith = 0;
    long totalSSD = 0;
    int maxNum = 0;
    double maxSSD = 0;
    double maxStd = 0;

    for (int i = 0; i < som.numCells(); ++i) {
        final ArrayList<Long> cluster = clusters.get(i);
        if (cluster.size() == 0) {
            ++numZero;/*w ww.  ja va  2s .co m*/
        } else {
            ++numWith;
            totalWith += cluster.size();
        }
        totalSSD += som.getStats(i).sumSqrDistance();

        maxNum = Math.max(maxNum, cluster.size());
        maxSSD = Math.max(maxSSD, som.getStats(i).sumSqrDistance());
        maxStd = Math.max(maxStd, som.getStats(i).stdDev());

        somJS.write("{\"topTerms\":[");
        final java.util.Vector<String> topTerms = som.getStats(i).getTopTerms();
        for (int j = 0; j < Conf.NUM_TOP_CELL_TERMS; ++j) {
            if (j != 0) {
                somJS.write(", ");
            }
            somJS.write("\"");
            somJS.write(StringEscapeUtils.escapeEcmaScript(topTerms.get(j)));
            somJS.write("\"");
        }
        somJS.write("], ");
        somJS.write("\"num\":" + cluster.size() + ", \"stdDev\":" + som.getStats(i).stdDev() + ", \"ssd\":"
                + som.getStats(i).sumSqrDistance());
        somJS.write(", \"region\":" + som.getStats(i).getRegion());
        if (i + 1 == som.numCells()) {
            somJS.write("}\n");
        } else {
            somJS.write("},\n");
        }
    }
    somJS.write("], \"numZero\":" + numZero + ", \"numWith\":" + numWith);
    somJS.write(", \"totalWith\":" + totalWith + ", \"avgNum\":"
            + (numWith == 0 ? 0 : (double) totalWith / numWith));
    somJS.write(", \"numOutliers\":" + getNumOutliers());
    somJS.write(", \"ssd\":" + totalSSD + ", \"numRegions\":" + som.getNumRegions());
    somJS.write(", \"maxCellNum\":" + maxNum + ", \"maxCellSSD\":" + maxSSD + ", \"maxCellStd\":" + maxStd
            + ",\n\"regionColors\":[");
    for (int i = 0; i < som.getNumRegions(); ++i) {
        if (i > 0) {
            somJS.write(", ");
        }
        somJS.write(Integer.toString(som.getRegionColor(i)));
    }
    somJS.write("],\n\"regionMap\":[");
    final ArrayList<Set<Integer>> regionMap = som.getRegionMap();
    for (int i = 0; i < regionMap.size(); ++i) {
        if (i > 0) {
            somJS.write(", ");
        }
        somJS.write("[");
        final Set<Integer> adjMap = regionMap.get(i);
        int j = 0;
        for (Integer adj : adjMap) {
            if (j > 0) {
                somJS.write(", ");
            }
            somJS.write(Integer.toString(adj));
            ++j;
        }
        somJS.write("]");
    }
    somJS.write("],\n");

    somJS.write("\"cellTermDiffs\":[\n");
    for (int i = 0; i < som.numCells(); ++i) {
        final HashMap<Integer, Integer> diffs = som.getCellTermDiffs(i);
        if (i != 0) {
            somJS.write(",\n");
        }
        somJS.write("{");
        int j = 0;
        for (Map.Entry<Integer, Integer> pair : diffs.entrySet()) {
            if (j != 0) {
                somJS.write(", ");
            }
            ++j;
            somJS.write("\"");
            somJS.write(Integer.toString(pair.getKey()));
            somJS.write("\": \"");
            int val = pair.getValue();
            if (val < 0) {
                somJS.write("-");
                val = -1 * val;
            }
            somJS.write(Terms.get(val));
            somJS.write("\"");
        }
        somJS.write("}");
    }
    somJS.write("]\n");
    somJS.write("}\n");
}

From source file:edu.utsa.sifter.som.SelfOrganizingMap.java

License:Apache License

void assignTopTerms(final int numTopTerms, final java.util.Vector<String> terms) {
    final PriorityQueue<TermPair> topWeights = new PriorityQueue<TermPair>(numTopTerms,
            new TermPair.TermPairComparator());

    for (int i = 0; i < numCells(); ++i) {
        final java.util.Vector<String> topTerms = new java.util.Vector<String>(numTopTerms);
        topTerms.setSize(numTopTerms);/*from w ww  . j a v  a 2 s .com*/

        final Vector cell = getCell(i);
        final double f = getFactor(i);

        topWeights.clear();
        for (Vector.Element w : cell) {
            int val = (int) (1000 * f * w.get());
            if (topWeights.size() < numTopTerms) {
                topWeights.add(new TermPair(terms.get(w.index()), val));
            } else if (topWeights.peek().DocCount < val) {
                topWeights.remove();
                topWeights.add(new TermPair(terms.get(w.index()), val));
            }
        }
        final int numTopWeights = topWeights.size();
        for (int j = numTopWeights - 1; j > -1; --j) {
            topTerms.set(j, topWeights.remove().Term);
        }
        getStats(i).setTopTerms(topTerms);
    }
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from w ww .  j av a  2s. c  o m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:mlbench.kmeans.KmeansUtils.java

License:Apache License

static void accumulate(double[] sum, Vector vector) throws MPI_D_Exception {
    if (sum.length != vector.size()) {
        throw new MPI_D_Exception("Array is incorrent!");
    }/*from   www  .  j av a2  s.c  o  m*/
    for (int i = 0; i < sum.length; i++) {
        sum[i] += vector.get(i);
    }
}

From source file:net.aprendizajengrande.ontocluster.Clusterer.java

License:Open Source License

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

    if (args.length != 3) {
        System.err.println(/*from  ww  w . ja v  a  2s. com*/
                "Usage: <input hdfs folder with vectors> <hdfs folder for output> <local folder for output>");
        System.exit(1);
    }

    Configuration conf = new Configuration();
    DistanceMeasure measure = new CosineDistanceMeasure();
    long seed = 67241;
    int numClusters = 250;
    int numIterations = 500;

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    // crear vectores en HDFS
    System.out.println("Input: " + args[0]);
    Path input = new Path(args[0] + "/input");

    // first centroids are an input parameter to clustering
    Path clusters = new Path(args[0] + "/clusters");
    clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, numClusters, measure, seed);

    Path output = new Path(args[1]);

    // cluster
    KMeansDriver.run(input, clusters, output, 0.005, numIterations, true, 0.0, false);

    // read the rel names, to pretty print

    Path inputRels = new Path(args[0] + "/rels");
    FileSystem fs = inputRels.getFileSystem(conf);
    FSDataInputStream fsdis = fs.open(inputRels);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();
    Map<Integer, String> relIdToName = new HashMap<>();
    while (line != null) {
        String[] parts = line.split("\\t");
        relIdToName.put(Integer.parseInt(parts[0]), parts[1]);
        line = br.readLine();
    }

    // read output
    Path outputFinal = ClusterExtractor.findFinalClusters(args[1], conf);
    if (outputFinal == null) {
        System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'");
        System.exit(1);
    }
    Path successFile = new Path(outputFinal, "_SUCCESS");
    if (fs.exists(successFile)) {
        fs.delete(successFile, false);
    }

    SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST,
            conf);

    PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2])));

    int clusterNum = 0;
    for (Pair<Text, Writable> p : it) {
        Object obj = p.getSecond();
        if (!(obj instanceof ClusterWritable))
            continue;
        pw.println(clusterNum + ") " + p.getFirst());
        Cluster cluster = ((ClusterWritable) obj).getValue();
        Vector center = cluster.getCenter();
        for (int i = 0; i < center.size(); i++) {
            String name = relIdToName.get(i);
            if (name == null)
                name = "?";
            if (center.get(i) >= 0.01)
                pw.println("\t" + name + ": " + center.get(i));
        }
        pw.println();
        clusterNum++;
    }
    pw.close();
}

From source file:net.aprendizajengrande.ontocluster.ClusterExtractor.java

License:Open Source License

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

    if (args.length != 3) {
        System.err.println(//from  w  ww.j  a v  a 2s  .c om
                "Usage: <input hdfs folder with rels> <hdfs folder for output> <local folder for output>");
        System.exit(1);
    }

    Configuration conf = new Configuration();

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    // crear vectores en HDFS
    System.out.println("Input: " + args[0]);

    // read the rel names, to pretty print

    Path inputRels = new Path(args[0] + "/rels");
    FileSystem fs = inputRels.getFileSystem(conf);
    FSDataInputStream fsdis = fs.open(inputRels);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();
    Map<Integer, String> relIdToName = new HashMap<>();
    while (line != null) {
        String[] parts = line.split("\\t");
        relIdToName.put(Integer.parseInt(parts[0]), parts[1]);
        line = br.readLine();
    }

    // read output
    Path outputFinal = findFinalClusters(args[1], conf);
    if (outputFinal == null) {
        System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'");
        System.exit(1);
    }

    // delete the _SUCCESS file as it is problematic
    // see
    // http://stackoverflow.com/questions/10752708/eofexception-at-org-apache-hadoop-io-sequencefilereader-initsequencefile-java
    Path successFile = new Path(outputFinal, "_SUCCESS");
    if (fs.exists(successFile)) {
        fs.delete(successFile, false);
    }

    SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST,
            conf);

    PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2])));

    int clusterNum = 0;
    for (Pair<Text, Writable> p : it) {
        Object obj = p.getSecond();
        if (!(obj instanceof ClusterWritable))
            continue;
        pw.println(clusterNum + ") " + p.getFirst());
        Cluster cluster = ((ClusterWritable) obj).getValue();
        Vector center = cluster.getCenter();
        for (int i = 0; i < center.size(); i++) {
            String name = relIdToName.get(i);
            if (name == null)
                name = "?";
            if (center.get(i) >= 0.01)
                pw.println("\t" + name + ": " + center.get(i));
        }
        pw.println();
        clusterNum++;
    }
    pw.close();
}

From source file:opennlp.addons.mahout.VectorClassifierModel.java

License:Apache License

public double[] eval(String[] features) {
    Vector vector = new RandomAccessSparseVector(predMap.size());

    for (String feature : features) {
        Integer featureId = predMap.get(feature);

        if (featureId != null) {
            vector.set(featureId, vector.get(featureId) + 1);
        }//from w  w  w  .ja v a2  s.c o m
    }

    Vector resultVector = classifier.classifyFull(vector);

    double outcomes[] = new double[classifier.numCategories()];

    for (int i = 0; i < outcomes.length; i++) {
        outcomes[i] = resultVector.get(i);
    }

    return outcomes;
}

From source file:org.apache.crunch.examples.Recommender.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();/*from   w  w w.  jav  a  2  s.  c om*/
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        System.err.println();
        GenericOptionsParser.printGenericCommandUsage(System.err);
        return 1;
    }
    Pipeline pipeline = new MRPipeline(Recommender.class, getConf());
    /*
     * input node
     */
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    /*
     * S0 + GBK
     */
    PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() {

        @Override
        public Pair<Long, Long> map(String input) {
            String[] split = input.split("[,\\s]");
            long userID = Long.parseLong(split[0]);
            long itemID = Long.parseLong(split[1]);
            return Pair.of(userID, itemID);
        }
    }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey();

    /*
     * S1
     */
    PTable<Long, Vector> userVector = userWithPrefs
            .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() {
                @Override
                public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) {
                    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (long itemPref : input.second()) {
                        userVector.set((int) itemPref, 1.0f);
                    }
                    return Pair.of(input.first(), userVector);
                }
            }, Writables.tableOf(Writables.longs(), Writables.vectors()));

    /*
     * S2 + GBK
     */
    PGroupedTable<Integer, Integer> coOccurencePairs = userVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() {
                @Override
                public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) {
                    Iterator<Vector.Element> it = input.second().iterateNonZero();
                    while (it.hasNext()) {
                        int index1 = it.next().index();
                        Iterator<Vector.Element> it2 = input.second().iterateNonZero();
                        while (it2.hasNext()) {
                            int index2 = it2.next().index();
                            emitter.emit(Pair.of(index1, index2));
                        }
                    }
                }
            }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey();

    /*
     * S3
     */
    PTable<Integer, Vector> coOccurenceVector = coOccurencePairs
            .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() {
                @Override
                public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) {
                    Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (int itemIndex2 : input.second()) {
                        cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0);
                    }
                    return Pair.of(input.first(), cooccurrenceRow);
                }
            }, Writables.tableOf(Writables.ints(), Writables.vectors()));

    /*
     * asText
     */
    pipeline.writeTextFile(coOccurenceVector, args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;
}

From source file:org.qcri.algebra.MultiplicationTest.java

License:Apache License

void verifySquareSum(Path sumPath) throws IOException {
    Vector sumVec = AlgebraCommon.mapDirToSparseVector(sumPath, 1, colsA, conf);
    double[][] vectorsA = inputVectorsA;
    for (int r = 0; r < vectorsA.length; r++) {
        double sum = 0;
        for (int c = 0; c < vectorsA[0].length; c++)
            sum += vectorsA[r][c] * vectorsA[r][c];
        Assert.assertEquals("The sum of a[" + r + "][*] is incorrect: ", sum, sumVec.get(r), EPSILON);
    }/*from w  w w .  ja  v a2s.c  o  m*/
}