List of usage examples for org.apache.mahout.math Vector assign
Vector assign(DoubleFunction function);
From source file:SimpleCsvExamples.java
License:Apache License
public static void main(String[] args) throws IOException { FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS]; for (int i = 0; i < FIELDS; i++) { encoder[i] = new ConstantValueEncoder("v" + 1); }/* ww w . j a va 2s. c om*/ OnlineSummarizer[] s = new OnlineSummarizer[FIELDS]; for (int i = 0; i < FIELDS; i++) { s[i] = new OnlineSummarizer(); } long t0 = System.currentTimeMillis(); Vector v = new DenseVector(1000); if ("--generate".equals(args[0])) { PrintWriter out = new PrintWriter( new OutputStreamWriter(new FileOutputStream(new File(args[2])), Charsets.UTF_8)); try { int n = Integer.parseInt(args[1]); for (int i = 0; i < n; i++) { Line x = Line.generate(); out.println(x); } } finally { Closeables.close(out, false); } } else if ("--parse".equals(args[0])) { BufferedReader in = Files.newReader(new File(args[1]), Charsets.UTF_8); double total = 0; try { String line = in.readLine(); while (line != null) { v.assign(0); Line x = new Line(line); for (int i = 0; i < FIELDS; i++) { double z = x.getDouble(i); total += z; //s[i].add(x.getDouble(i)); encoder[i].addToVector(x.get(i), v); } line = in.readLine(); } } finally { Closeables.close(in, true); } // String separator = ""; // for (int i = 0; i < FIELDS; i++) { // System.out.printf("%s%.3f", separator, s[i].getMean()); // separator = ","; // } System.out.println("total: " + total); } else if ("--fast".equals(args[0])) { FastLineReader in = new FastLineReader(new FileInputStream(args[1])); double total = 0; try { FastLine line = in.read(); while (line != null) { v.assign(0); for (int i = 0; i < FIELDS; i++) { double z = line.getDouble(i); total += z; //s[i].add(z); encoder[i].addToVector((byte[]) null, z, v); } line = in.read(); } } finally { Closeables.close(in, true); } // String separator = ""; // for (int i = 0; i < FIELDS; i++) { // System.out.printf("%s%.3f", separator, s[i].getMean()); // separator = ","; // } System.out.println("total: " + total); } System.out.printf("\nElapsed time = %.3f%n", (System.currentTimeMillis() - t0) / 1000.0); }
From source file:DisplayClustering.java
License:Apache License
protected static void plotSampleData(Graphics2D g2) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); // plot the axes g2.setColor(Color.BLACK);//from w w w . j a v a2 s . co m Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data g2.setColor(Color.DARK_GRAY); dv.assign(0.03); for (VectorWritable v : SAMPLE_DATA) { plotRectangle(g2, v.get(), dv); } }
From source file:DisplayClustering.java
License:Apache License
/** * This method plots points and colors them according to their cluster * membership, rather than drawing ellipses. * * As of commit, this method is used only by K-means spectral clustering. * Since the cluster assignments are set within the eigenspace of the data, it * is not inherent that the original data cluster as they would in K-means: * that is, as symmetric gaussian mixtures. * * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw * output is not directly usable. Rather, the cluster assignments from the raw * output need to be transferred back to the original data. As such, this * method will read the SequenceFile cluster results of K-means and transfer * the cluster assignments to the original data, coloring them appropriately. * * @param g2//from w w w . j ava 2 s . c o m * @param data */ protected static void plotClusteredSampleData(Graphics2D g2, Path data) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); g2.setColor(Color.BLACK); Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data, colored according to the cluster they belong to dv.assign(0.03); Path clusteredPointsPath = new Path(data, "clusteredPoints"); Path inputPath = new Path(clusteredPointsPath, "part-m-00000"); Map<Integer, Color> colors = new HashMap<Integer, Color>(); int point = 0; for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>( inputPath, new Configuration())) { int clusterId = record.getFirst().get(); VectorWritable v = SAMPLE_DATA.get(point++); Integer key = clusterId; if (!colors.containsKey(key)) { colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]); } plotClusteredRectangle(g2, v.get(), dv, colors.get(key)); } }
From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java
License:Apache License
public static void orthonormalizeColumns(Matrix mx) { //int n = mx.numCols(); int n = mx.numRows(); for (int c = 0; c < n; c++) { System.out.println("col: " + c); Vector col = mx.viewRow(c); for (int c1 = 0; c1 < c; c1++) { Vector viewC1 = mx.viewRow(c1); col.assign(col.minus(viewC1.times(viewC1.dot(col)))); }// w w w.j a va 2s.c o m final double norm2 = col.norm(2); if (norm2 == 0) { System.out.println("zero"); } col.assign(new DoubleFunction() { @Override public double apply(double x) { return x / norm2; } }); } }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public TopicModel(Matrix topicTermCounts, Vector topicSums, double eta, double alpha, String[] dictionary, int numThreads, double modelWeight) { this.dictionary = dictionary; this.topicTermCounts = topicTermCounts; this.topicSums = topicSums; this.numTopics = topicSums.size(); this.numTerms = topicTermCounts.numCols(); this.eta = eta; this.alpha = alpha; this.sampler = new Sampler(RandomUtils.getRandom()); this.numThreads = numThreads; if (modelWeight != 1) { topicSums.assign(Functions.mult(modelWeight)); for (int x = 0; x < numTopics; x++) { topicTermCounts.viewRow(x).assign(Functions.mult(modelWeight)); }/*from ww w .j a va2 s . com*/ } initializeThreadPool(); }
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) { // first calculate p(topic|term,document) for all terms in original, and all topics, // using p(term|topic) and p(topic|doc) pTopicGivenTerm(original, topics, docTopicModel); normalizeByTopic(docTopicModel);//from w ww . j a va 2s . c o m // now multiply, term-by-term, by the document, to get the weighted distribution of // term-topic pairs from this document. Iterator<Vector.Element> it = original.iterateNonZero(); while (it.hasNext()) { Vector.Element e = it.next(); for (int x = 0; x < numTopics; x++) { Vector docTopicModelRow = docTopicModel.viewRow(x); docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get()); } } // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm topics.assign(0.0); for (int x = 0; x < numTopics; x++) { topics.set(x, docTopicModel.viewRow(x).norm(1)); } // now renormalize so that sum_x(p(x|doc)) = 1 topics.assign(Functions.mult(1 / topics.norm(1))); }
From source file:com.modofo.molo.cluster.DisplayClustering.java
License:Apache License
/** * This method plots points and colors them according to their cluster * membership, rather than drawing ellipses. * /*from w w w . j ava 2 s.c om*/ * As of commit, this method is used only by K-means spectral clustering. * Since the cluster assignments are set within the eigenspace of the data, it * is not inherent that the original data cluster as they would in K-means: * that is, as symmetric gaussian mixtures. * * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw * output is not directly usable. Rather, the cluster assignments from the raw * output need to be transferred back to the original data. As such, this * method will read the SequenceFile cluster results of K-means and transfer * the cluster assignments to the original data, coloring them appropriately. * * @param g2 * @param data */ protected static void plotClusteredSampleData(Graphics2D g2, Path data) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); g2.setColor(Color.BLACK); Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data, colored according to the cluster they belong to dv.assign(0.03); Path clusteredPointsPath = new Path(data, "clusteredPoints"); Path inputPath = new Path(clusteredPointsPath, "part-m-00000"); HashMap<Integer, Color> colors = new HashMap<Integer, Color>(); int point = 0; for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>( inputPath, new Configuration())) { int clusterId = record.getFirst().get(); VectorWritable v = SAMPLE_DATA.get(point++); Integer key = clusterId; if (!colors.containsKey(key)) { colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]); } plotClusteredRectangle(g2, v.get(), dv, colors.get(key)); } }
From source file:de.isabeldrostfromm.sof.util.Vectors.java
License:Open Source License
/** * Appends two vectors directly after one another, leaving all non set elements zero. * *//*w w w. j av a 2 s . c om*/ public static Vector append(Vector... vectors) { int totalSize = 0; for (Vector vec : vectors) { totalSize += vec.size(); } Vector result = new SequentialAccessSparseVector(totalSize); result.assign(0); int lastIndex = 0; for (Vector vector : vectors) { for (Element elem : vector) { result.setQuick(lastIndex + elem.index(), elem.get()); } lastIndex += vector.size(); } return result; }
From source file:io.ssc.relationdiscovery.SVD.java
License:Open Source License
public SVD(Matrix A, int rank) throws IOException { this.A = A;//from ww w . ja v a2 s . c om this.rank = rank; Vector initialVector = new DenseVector(A.numCols()); initialVector.assign(1.0 / Math.sqrt(A.numCols())); lanczosState = new LanczosState(A, rank + OVERSHOOT, initialVector); }
From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java
License:Apache License
private void reduceNonBooleanData(VarLongWritable userID, Iterable<PrefAndSimilarityColumnWritable> values, Context context) throws IOException, InterruptedException { /* each entry here is the sum in the numerator of the prediction formula */ Vector numerators = null;/*from w ww . j a va 2s . co m*/ /* each entry here is the sum in the denominator of the prediction formula */ Vector denominators = null; /* each entry here is the number of similar items used in the prediction formula */ Vector numberOfSimilarItemsUsed = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); for (PrefAndSimilarityColumnWritable prefAndSimilarityColumn : values) { Vector simColumn = prefAndSimilarityColumn.getSimilarityColumn(); float prefValue = prefAndSimilarityColumn.getPrefValue(); /* count the number of items used for each prediction */ Iterator<Vector.Element> usedItemsIterator = simColumn.iterateNonZero(); while (usedItemsIterator.hasNext()) { int itemIDIndex = usedItemsIterator.next().index(); numberOfSimilarItemsUsed.setQuick(itemIDIndex, numberOfSimilarItemsUsed.getQuick(itemIDIndex) + 1); } numerators = numerators == null ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : simColumn.times(prefValue) : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : simColumn.times(prefValue)); simColumn.assign(ABSOLUTE_VALUES); denominators = denominators == null ? simColumn : denominators.plus(simColumn); } if (numerators == null) { return; } Vector recommendationVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); Iterator<Vector.Element> iterator = numerators.iterateNonZero(); while (iterator.hasNext()) { Vector.Element element = iterator.next(); int itemIDIndex = element.index(); /* preference estimations must be based on at least 2 datapoints */ if (numberOfSimilarItemsUsed.getQuick(itemIDIndex) > 1) { /* compute normalized prediction */ double prediction = element.get() / denominators.getQuick(itemIDIndex); recommendationVector.setQuick(itemIDIndex, prediction); } } writeRecommendedItems(userID, recommendationVector, context); }