Example usage for org.apache.mahout.math Vector get

List of usage examples for org.apache.mahout.math Vector get

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector get.

Prototype

double get(int index);

Source Link

Document

Return the value at the given index

Usage

From source file:edu.snu.dolphin.bsp.examples.ml.sub.VectorListCodec.java

License:Apache License

@Override
public byte[] encode(final List<Vector> list) {

    /*/*www  .  ja v a2 s.c  o  m*/
     * This codec assume that vectors have the same length
     */
    int length = 0;
    for (final Vector vector : list) {
        length = vector.size();
    }

    final ByteArrayOutputStream baos = new ByteArrayOutputStream(
            Integer.SIZE + Integer.SIZE + Double.SIZE * length * list.size());

    try (final DataOutputStream daos = new DataOutputStream(baos)) {
        daos.writeInt(list.size());
        daos.writeInt(length);
        for (final Vector vector : list) {
            for (int i = 0; i < length; i++) {
                daos.writeDouble(vector.get(i));
            }
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return baos.toByteArray();
}

From source file:edu.utsa.sifter.som.MainSOM.java

License:Apache License

void somStats(final SifterConfig conf, final SelfOrganizingMap som, final ArrayList<ArrayList<Long>> clusters,
        final Writer somJS) throws IOException {
    somJS.write("{\"width\":" + som.width() + ", \"height\":" + som.height() + ", \n\"cells\":[\n");

    int numZero = 0;
    int numWith = 0;
    int totalWith = 0;
    long totalSSD = 0;
    int maxNum = 0;
    double maxSSD = 0;
    double maxStd = 0;

    for (int i = 0; i < som.numCells(); ++i) {
        final ArrayList<Long> cluster = clusters.get(i);
        if (cluster.size() == 0) {
            ++numZero;/*w ww.  ja va  2s .co m*/
        } else {
            ++numWith;
            totalWith += cluster.size();
        }
        totalSSD += som.getStats(i).sumSqrDistance();

        maxNum = Math.max(maxNum, cluster.size());
        maxSSD = Math.max(maxSSD, som.getStats(i).sumSqrDistance());
        maxStd = Math.max(maxStd, som.getStats(i).stdDev());

        somJS.write("{\"topTerms\":[");
        final java.util.Vector<String> topTerms = som.getStats(i).getTopTerms();
        for (int j = 0; j < Conf.NUM_TOP_CELL_TERMS; ++j) {
            if (j != 0) {
                somJS.write(", ");
            }
            somJS.write("\"");
            somJS.write(StringEscapeUtils.escapeEcmaScript(topTerms.get(j)));
            somJS.write("\"");
        }
        somJS.write("], ");
        somJS.write("\"num\":" + cluster.size() + ", \"stdDev\":" + som.getStats(i).stdDev() + ", \"ssd\":"
                + som.getStats(i).sumSqrDistance());
        somJS.write(", \"region\":" + som.getStats(i).getRegion());
        if (i + 1 == som.numCells()) {
            somJS.write("}\n");
        } else {
            somJS.write("},\n");
        }
    }
    somJS.write("], \"numZero\":" + numZero + ", \"numWith\":" + numWith);
    somJS.write(", \"totalWith\":" + totalWith + ", \"avgNum\":"
            + (numWith == 0 ? 0 : (double) totalWith / numWith));
    somJS.write(", \"numOutliers\":" + getNumOutliers());
    somJS.write(", \"ssd\":" + totalSSD + ", \"numRegions\":" + som.getNumRegions());
    somJS.write(", \"maxCellNum\":" + maxNum + ", \"maxCellSSD\":" + maxSSD + ", \"maxCellStd\":" + maxStd
            + ",\n\"regionColors\":[");
    for (int i = 0; i < som.getNumRegions(); ++i) {
        if (i > 0) {
            somJS.write(", ");
        }
        somJS.write(Integer.toString(som.getRegionColor(i)));
    }
    somJS.write("],\n\"regionMap\":[");
    final ArrayList<Set<Integer>> regionMap = som.getRegionMap();
    for (int i = 0; i < regionMap.size(); ++i) {
        if (i > 0) {
            somJS.write(", ");
        }
        somJS.write("[");
        final Set<Integer> adjMap = regionMap.get(i);
        int j = 0;
        for (Integer adj : adjMap) {
            if (j > 0) {
                somJS.write(", ");
            }
            somJS.write(Integer.toString(adj));
            ++j;
        }
        somJS.write("]");
    }
    somJS.write("],\n");

    somJS.write("\"cellTermDiffs\":[\n");
    for (int i = 0; i < som.numCells(); ++i) {
        final HashMap<Integer, Integer> diffs = som.getCellTermDiffs(i);
        if (i != 0) {
            somJS.write(",\n");
        }
        somJS.write("{");
        int j = 0;
        for (Map.Entry<Integer, Integer> pair : diffs.entrySet()) {
            if (j != 0) {
                somJS.write(", ");
            }
            ++j;
            somJS.write("\"");
            somJS.write(Integer.toString(pair.getKey()));
            somJS.write("\": \"");
            int val = pair.getValue();
            if (val < 0) {
                somJS.write("-");
                val = -1 * val;
            }
            somJS.write(Terms.get(val));
            somJS.write("\"");
        }
        somJS.write("}");
    }
    somJS.write("]\n");
    somJS.write("}\n");
}

From source file:edu.utsa.sifter.som.SelfOrganizingMap.java

License:Apache License

void assignTopTerms(final int numTopTerms, final java.util.Vector<String> terms) {
    final PriorityQueue<TermPair> topWeights = new PriorityQueue<TermPair>(numTopTerms,
            new TermPair.TermPairComparator());

    for (int i = 0; i < numCells(); ++i) {
        final java.util.Vector<String> topTerms = new java.util.Vector<String>(numTopTerms);
        topTerms.setSize(numTopTerms);/*from w ww  . j a v  a 2 s .com*/

        final Vector cell = getCell(i);
        final double f = getFactor(i);

        topWeights.clear();
        for (Vector.Element w : cell) {
            int val = (int) (1000 * f * w.get());
            if (topWeights.size() < numTopTerms) {
                topWeights.add(new TermPair(terms.get(w.index()), val));
            } else if (topWeights.peek().DocCount < val) {
                topWeights.remove();
                topWeights.add(new TermPair(terms.get(w.index()), val));
            }
        }
        final int numTopWeights = topWeights.size();
        for (int j = numTopWeights - 1; j > -1; --j) {
            topTerms.set(j, topWeights.remove().Term);
        }
        getStats(i).setTopTerms(topTerms);
    }
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from w ww .  j av a  2s. c  o m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:mlbench.kmeans.KmeansUtils.java

License:Apache License

static void accumulate(double[] sum, Vector vector) throws MPI_D_Exception {
    if (sum.length != vector.size()) {
        throw new MPI_D_Exception("Array is incorrent!");
    }/*from   www  .  j av a2  s.c  o  m*/
    for (int i = 0; i < sum.length; i++) {
        sum[i] += vector.get(i);
    }
}

From source file:net.aprendizajengrande.ontocluster.Clusterer.java

License:Open Source License

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

    if (args.length != 3) {
        System.err.println(/*from  ww  w . ja v  a  2s. com*/
                "Usage: <input hdfs folder with vectors> <hdfs folder for output> <local folder for output>");
        System.exit(1);
    }

    Configuration conf = new Configuration();
    DistanceMeasure measure = new CosineDistanceMeasure();
    long seed = 67241;
    int numClusters = 250;
    int numIterations = 500;

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    // crear vectores en HDFS
    System.out.println("Input: " + args[0]);
    Path input = new Path(args[0] + "/input");

    // first centroids are an input parameter to clustering
    Path clusters = new Path(args[0] + "/clusters");
    clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, numClusters, measure, seed);

    Path output = new Path(args[1]);

    // cluster
    KMeansDriver.run(input, clusters, output, 0.005, numIterations, true, 0.0, false);

    // read the rel names, to pretty print

    Path inputRels = new Path(args[0] + "/rels");
    FileSystem fs = inputRels.getFileSystem(conf);
    FSDataInputStream fsdis = fs.open(inputRels);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();
    Map<Integer, String> relIdToName = new HashMap<>();
    while (line != null) {
        String[] parts = line.split("\\t");
        relIdToName.put(Integer.parseInt(parts[0]), parts[1]);
        line = br.readLine();
    }

    // read output
    Path outputFinal = ClusterExtractor.findFinalClusters(args[1], conf);
    if (outputFinal == null) {
        System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'");
        System.exit(1);
    }
    Path successFile = new Path(outputFinal, "_SUCCESS");
    if (fs.exists(successFile)) {
        fs.delete(successFile, false);
    }

    SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST,
            conf);

    PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2])));

    int clusterNum = 0;
    for (Pair<Text, Writable> p : it) {
        Object obj = p.getSecond();
        if (!(obj instanceof ClusterWritable))
            continue;
        pw.println(clusterNum + ") " + p.getFirst());
        Cluster cluster = ((ClusterWritable) obj).getValue();
        Vector center = cluster.getCenter();
        for (int i = 0; i < center.size(); i++) {
            String name = relIdToName.get(i);
            if (name == null)
                name = "?";
            if (center.get(i) >= 0.01)
                pw.println("\t" + name + ": " + center.get(i));
        }
        pw.println();
        clusterNum++;
    }
    pw.close();
}

From source file:net.aprendizajengrande.ontocluster.ClusterExtractor.java

License:Open Source License

public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

    if (args.length != 3) {
        System.err.println(//from  w  ww.j  a v  a 2s  .c om
                "Usage: <input hdfs folder with rels> <hdfs folder for output> <local folder for output>");
        System.exit(1);
    }

    Configuration conf = new Configuration();

    // see
    // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file
    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());

    // crear vectores en HDFS
    System.out.println("Input: " + args[0]);

    // read the rel names, to pretty print

    Path inputRels = new Path(args[0] + "/rels");
    FileSystem fs = inputRels.getFileSystem(conf);
    FSDataInputStream fsdis = fs.open(inputRels);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();
    Map<Integer, String> relIdToName = new HashMap<>();
    while (line != null) {
        String[] parts = line.split("\\t");
        relIdToName.put(Integer.parseInt(parts[0]), parts[1]);
        line = br.readLine();
    }

    // read output
    Path outputFinal = findFinalClusters(args[1], conf);
    if (outputFinal == null) {
        System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'");
        System.exit(1);
    }

    // delete the _SUCCESS file as it is problematic
    // see
    // http://stackoverflow.com/questions/10752708/eofexception-at-org-apache-hadoop-io-sequencefilereader-initsequencefile-java
    Path successFile = new Path(outputFinal, "_SUCCESS");
    if (fs.exists(successFile)) {
        fs.delete(successFile, false);
    }

    SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST,
            conf);

    PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2])));

    int clusterNum = 0;
    for (Pair<Text, Writable> p : it) {
        Object obj = p.getSecond();
        if (!(obj instanceof ClusterWritable))
            continue;
        pw.println(clusterNum + ") " + p.getFirst());
        Cluster cluster = ((ClusterWritable) obj).getValue();
        Vector center = cluster.getCenter();
        for (int i = 0; i < center.size(); i++) {
            String name = relIdToName.get(i);
            if (name == null)
                name = "?";
            if (center.get(i) >= 0.01)
                pw.println("\t" + name + ": " + center.get(i));
        }
        pw.println();
        clusterNum++;
    }
    pw.close();
}

From source file:opennlp.addons.mahout.VectorClassifierModel.java

License:Apache License

public double[] eval(String[] features) {
    Vector vector = new RandomAccessSparseVector(predMap.size());

    for (String feature : features) {
        Integer featureId = predMap.get(feature);

        if (featureId != null) {
            vector.set(featureId, vector.get(featureId) + 1);
        }//from w  w  w  .ja v a2  s.c o m
    }

    Vector resultVector = classifier.classifyFull(vector);

    double outcomes[] = new double[classifier.numCategories()];

    for (int i = 0; i < outcomes.length; i++) {
        outcomes[i] = resultVector.get(i);
    }

    return outcomes;
}

From source file:org.apache.crunch.examples.Recommender.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();/*from   w  w w.  jav  a  2  s.  c om*/
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        System.err.println();
        GenericOptionsParser.printGenericCommandUsage(System.err);
        return 1;
    }
    Pipeline pipeline = new MRPipeline(Recommender.class, getConf());
    /*
     * input node
     */
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    /*
     * S0 + GBK
     */
    PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() {

        @Override
        public Pair<Long, Long> map(String input) {
            String[] split = input.split("[,\\s]");
            long userID = Long.parseLong(split[0]);
            long itemID = Long.parseLong(split[1]);
            return Pair.of(userID, itemID);
        }
    }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey();

    /*
     * S1
     */
    PTable<Long, Vector> userVector = userWithPrefs
            .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() {
                @Override
                public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) {
                    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (long itemPref : input.second()) {
                        userVector.set((int) itemPref, 1.0f);
                    }
                    return Pair.of(input.first(), userVector);
                }
            }, Writables.tableOf(Writables.longs(), Writables.vectors()));

    /*
     * S2 + GBK
     */
    PGroupedTable<Integer, Integer> coOccurencePairs = userVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() {
                @Override
                public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) {
                    Iterator<Vector.Element> it = input.second().iterateNonZero();
                    while (it.hasNext()) {
                        int index1 = it.next().index();
                        Iterator<Vector.Element> it2 = input.second().iterateNonZero();
                        while (it2.hasNext()) {
                            int index2 = it2.next().index();
                            emitter.emit(Pair.of(index1, index2));
                        }
                    }
                }
            }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey();

    /*
     * S3
     */
    PTable<Integer, Vector> coOccurenceVector = coOccurencePairs
            .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() {
                @Override
                public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) {
                    Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (int itemIndex2 : input.second()) {
                        cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0);
                    }
                    return Pair.of(input.first(), cooccurrenceRow);
                }
            }, Writables.tableOf(Writables.ints(), Writables.vectors()));

    /*
     * asText
     */
    pipeline.writeTextFile(coOccurenceVector, args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;
}

From source file:org.qcri.algebra.MultiplicationTest.java

License:Apache License

void verifySquareSum(Path sumPath) throws IOException {
    Vector sumVec = AlgebraCommon.mapDirToSparseVector(sumPath, 1, colsA, conf);
    double[][] vectorsA = inputVectorsA;
    for (int r = 0; r < vectorsA.length; r++) {
        double sum = 0;
        for (int c = 0; c < vectorsA[0].length; c++)
            sum += vectorsA[r][c] * vectorsA[r][c];
        Assert.assertEquals("The sum of a[" + r + "][*] is incorrect: ", sum, sumVec.get(r), EPSILON);
    }/*from w  w w .  ja  v a2s.c  o  m*/
}