Example usage for org.apache.mahout.math Vector assign

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector assign.

Prototype

Vector assign(DoubleFunction function);

Source Link

Document

Apply the function to each element of the receiver

Usage

From source file:SimpleCsvExamples.java

License:Apache License

public static void main(String[] args) throws IOException {
    FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS];
    for (int i = 0; i < FIELDS; i++) {
        encoder[i] = new ConstantValueEncoder("v" + 1);
    }/* ww w . j  a va  2s. c  om*/

    OnlineSummarizer[] s = new OnlineSummarizer[FIELDS];
    for (int i = 0; i < FIELDS; i++) {
        s[i] = new OnlineSummarizer();
    }
    long t0 = System.currentTimeMillis();
    Vector v = new DenseVector(1000);
    if ("--generate".equals(args[0])) {
        PrintWriter out = new PrintWriter(
                new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8));
        try {
            int n = Integer.parseInt(args[1]);
            for (int i = 0; i < n; i++) {
                Line x = Line.generate();
                out.println(x);
            }
        } finally {
            Closeables.close(out, false);
        }
    } else if ("--parse".equals(args[0])) {
        BufferedReader in = Files.newReader(new File(args[1]), Charsets.UTF_8);
        double total = 0;
        try {
            String line = in.readLine();
            while (line != null) {
                v.assign(0);
                Line x = new Line(line);
                for (int i = 0; i < FIELDS; i++) {
                    double z = x.getDouble(i);
                    total += z;
                    //s[i].add(x.getDouble(i));
                    encoder[i].addToVector(x.get(i), v);
                }
                line = in.readLine();
            }
        } finally {
            Closeables.close(in, true);
        }
        //      String separator = "";
        //      for (int i = 0; i < FIELDS; i++) {
        //        System.out.printf("%s%.3f", separator, s[i].getMean());
        //        separator = ",";
        //      }
        System.out.println("total: " + total);
    } else if ("--fast".equals(args[0])) {
        FastLineReader in = new FastLineReader(new FileInputStream(args[1]));
        double total = 0;
        try {
            FastLine line = in.read();
            while (line != null) {
                v.assign(0);
                for (int i = 0; i < FIELDS; i++) {
                    double z = line.getDouble(i);
                    total += z;
                    //s[i].add(z);
                    encoder[i].addToVector((byte[]) null, z, v);
                }
                line = in.read();
            }
        } finally {
            Closeables.close(in, true);
        }
        //      String separator = "";
        //      for (int i = 0; i < FIELDS; i++) {
        //        System.out.printf("%s%.3f", separator, s[i].getMean());
        //        separator = ",";
        //      }
        System.out.println("total: " + total);
    }
    System.out.printf("\nElapsed time = %.3f%n", (System.currentTimeMillis() - t0) / 1000.0);
}

From source file:DisplayClustering.java

License:Apache License

protected static void plotSampleData(Graphics2D g2) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    // plot the axes
    g2.setColor(Color.BLACK);//from  w w w .  j a  v  a2  s . co  m
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data
    g2.setColor(Color.DARK_GRAY);
    dv.assign(0.03);
    for (VectorWritable v : SAMPLE_DATA) {
        plotRectangle(g2, v.get(), dv);
    }
}

From source file:DisplayClustering.java

License:Apache License

/**
 * This method plots points and colors them according to their cluster
 * membership, rather than drawing ellipses.
 *
 * As of commit, this method is used only by K-means spectral clustering.
 * Since the cluster assignments are set within the eigenspace of the data, it
 * is not inherent that the original data cluster as they would in K-means:
 * that is, as symmetric gaussian mixtures.
 *
 * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw
 * output is not directly usable. Rather, the cluster assignments from the raw
 * output need to be transferred back to the original data. As such, this
 * method will read the SequenceFile cluster results of K-means and transfer
 * the cluster assignments to the original data, coloring them appropriately.
 *
 * @param g2//from w w  w .  j  ava 2  s  .  c o m
 * @param data
 */
protected static void plotClusteredSampleData(Graphics2D g2, Path data) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    g2.setColor(Color.BLACK);
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data, colored according to the cluster they belong to
    dv.assign(0.03);

    Path clusteredPointsPath = new Path(data, "clusteredPoints");
    Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
    Map<Integer, Color> colors = new HashMap<Integer, Color>();
    int point = 0;
    for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>(
            inputPath, new Configuration())) {
        int clusterId = record.getFirst().get();
        VectorWritable v = SAMPLE_DATA.get(point++);
        Integer key = clusterId;
        if (!colors.containsKey(key)) {
            colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]);
        }
        plotClusteredRectangle(g2, v.get(), dv, colors.get(key));
    }
}

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void orthonormalizeColumns(Matrix mx) {

    //int n = mx.numCols();
    int n = mx.numRows();

    for (int c = 0; c < n; c++) {
        System.out.println("col: " + c);
        Vector col = mx.viewRow(c);
        for (int c1 = 0; c1 < c; c1++) {
            Vector viewC1 = mx.viewRow(c1);
            col.assign(col.minus(viewC1.times(viewC1.dot(col))));

        }// w w  w.j a va 2s.c  o m
        final double norm2 = col.norm(2);
        if (norm2 == 0) {
            System.out.println("zero");
        }
        col.assign(new DoubleFunction() {
            @Override
            public double apply(double x) {
                return x / norm2;
            }
        });
    }
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public TopicModel(Matrix topicTermCounts, Vector topicSums, double eta, double alpha, String[] dictionary,
        int numThreads, double modelWeight) {
    this.dictionary = dictionary;
    this.topicTermCounts = topicTermCounts;
    this.topicSums = topicSums;
    this.numTopics = topicSums.size();
    this.numTerms = topicTermCounts.numCols();
    this.eta = eta;
    this.alpha = alpha;
    this.sampler = new Sampler(RandomUtils.getRandom());
    this.numThreads = numThreads;
    if (modelWeight != 1) {
        topicSums.assign(Functions.mult(modelWeight));
        for (int x = 0; x < numTopics; x++) {
            topicTermCounts.viewRow(x).assign(Functions.mult(modelWeight));
        }/*from   ww w .j a va2  s .  com*/
    }
    initializeThreadPool();
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
    // first calculate p(topic|term,document) for all terms in original, and all topics,
    // using p(term|topic) and p(topic|doc)
    pTopicGivenTerm(original, topics, docTopicModel);
    normalizeByTopic(docTopicModel);//from  w ww . j  a va  2s  .  c  o m
    // now multiply, term-by-term, by the document, to get the weighted distribution of
    // term-topic pairs from this document.
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        for (int x = 0; x < numTopics; x++) {
            Vector docTopicModelRow = docTopicModel.viewRow(x);
            docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
        }
    }
    // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
    topics.assign(0.0);
    for (int x = 0; x < numTopics; x++) {
        topics.set(x, docTopicModel.viewRow(x).norm(1));
    }
    // now renormalize so that sum_x(p(x|doc)) = 1
    topics.assign(Functions.mult(1 / topics.norm(1)));
}

From source file:com.modofo.molo.cluster.DisplayClustering.java

License:Apache License

/**
 * This method plots points and colors them according to their cluster
 * membership, rather than drawing ellipses.
 * /*from  w w w .  j ava 2  s.c  om*/
 * As of commit, this method is used only by K-means spectral clustering.
 * Since the cluster assignments are set within the eigenspace of the data, it
 * is not inherent that the original data cluster as they would in K-means:
 * that is, as symmetric gaussian mixtures.
 * 
 * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw
 * output is not directly usable. Rather, the cluster assignments from the raw
 * output need to be transferred back to the original data. As such, this
 * method will read the SequenceFile cluster results of K-means and transfer
 * the cluster assignments to the original data, coloring them appropriately.
 * 
 * @param g2
 * @param data
 */
protected static void plotClusteredSampleData(Graphics2D g2, Path data) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    g2.setColor(Color.BLACK);
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data, colored according to the cluster they belong to
    dv.assign(0.03);

    Path clusteredPointsPath = new Path(data, "clusteredPoints");
    Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
    HashMap<Integer, Color> colors = new HashMap<Integer, Color>();
    int point = 0;
    for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>(
            inputPath, new Configuration())) {
        int clusterId = record.getFirst().get();
        VectorWritable v = SAMPLE_DATA.get(point++);
        Integer key = clusterId;
        if (!colors.containsKey(key)) {
            colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]);
        }
        plotClusteredRectangle(g2, v.get(), dv, colors.get(key));
    }
}

From source file:de.isabeldrostfromm.sof.util.Vectors.java

License:Open Source License

/**
 * Appends two vectors directly after one another, leaving all non set elements zero.
 * *//*w w w. j av a  2 s  . c om*/
public static Vector append(Vector... vectors) {
    int totalSize = 0;
    for (Vector vec : vectors) {
        totalSize += vec.size();
    }

    Vector result = new SequentialAccessSparseVector(totalSize);
    result.assign(0);

    int lastIndex = 0;
    for (Vector vector : vectors) {
        for (Element elem : vector) {
            result.setQuick(lastIndex + elem.index(), elem.get());
        }
        lastIndex += vector.size();
    }
    return result;
}

From source file:io.ssc.relationdiscovery.SVD.java

License:Open Source License

public SVD(Matrix A, int rank) throws IOException {
    this.A = A;//from   ww w  . ja  v  a2  s . c om
    this.rank = rank;

    Vector initialVector = new DenseVector(A.numCols());
    initialVector.assign(1.0 / Math.sqrt(A.numCols()));

    lanczosState = new LanczosState(A, rank + OVERSHOOT, initialVector);
}

From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java

License:Apache License

private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values,
        Context context) throws IOException, InterruptedException {
    /* each entry here is the sum in the numerator of the prediction formula */
    Vector numerators = null;/*from w ww  .  j  a  va  2s  . co  m*/
    /* each entry here is the sum in the denominator of the prediction formula */
    Vector denominators = null;
    /* each entry here is the number of similar items used in the prediction formula */
    Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);

    for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) {
        Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn();
        float prefValue = prefAndSimilarityColumn.getPrefValue();
        /* count the number of items used for each prediction */
        Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero();
        while (usedItemsIterator.hasNext()) {
            int itemIDIndex = usedItemsIterator.next().index();
            numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1);
        }

        numerators = numerators == null
                ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue)
                : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue));

        simColumn.assign(ABSOLUTE_VALUES);
        denominators = denominators == null ? simColumn : denominators.plus(simColumn);
    }

    if (numerators == null) {
        return;
    }

    Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    Iterator<Vector.Element> iterator = numerators.iterateNonZero();
    while (iterator.hasNext()) {
        Vector.Element element = iterator.next();
        int itemIDIndex = element.index();
        /* preference estimations must be based on at least 2 datapoints */
        if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) {
            /* compute normalized prediction */
            double prediction = element.get() / denominators.getQuick(itemIDIndex);
            recommendationVector.setQuick(itemIDIndex, prediction);
        }
    }
    writeRecommendedItems(userID, recommendationVector, context);
}