List of usage examples for org.apache.mahout.math Vector get
double get(int index);
From source file:edu.indiana.d2i.htrc.io.SparseVectorUtil.java
License:Apache License
public static Vector transform2Vector(String text, String field, Analyzer analyzer, HTRCFilter filter, Dictionary dictionary) throws IOException { Vector result = new RandomAccessSparseVector(dictionary.size()); TokenStream stream = analyzer.reusableTokenStream(field, new StringReader(text.toString())); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset();/*w w w .j av a 2 s . c om*/ while (stream.incrementToken()) { // String term = new String(termAtt.buffer(), 0, // termAtt.length()); String term = new String(termAtt.buffer(), 0, termAtt.length()).toLowerCase(); if (filter.accept(term, 0)) { int index = dictionary.get(term); result.setQuick(index, result.get(index) + 1); } } return result; }
From source file:edu.snu.cms.reef.ml.kmeans.data.EuclideanDistance.java
License:Apache License
@Override public final double distance(Vector v1, Vector v2) { if (v1.size() != v2.size()) { throw new IllegalArgumentException("Vector dimensions are not consistent"); }//from w w w . ja v a 2s . co m double distance = 0; for (int i = 0; i < v1.size(); i++) { distance += (v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i)); } return Math.sqrt(distance); }
From source file:edu.snu.cms.reef.ml.kmeans.data.VectorSum.java
License:Apache License
/** * We may select whether to create a deep copy of @member sum, or just a reference. *//* ww w . j av a2 s. co m*/ public VectorSum(final Vector sum, final int count, final boolean isDeepCopy) { this.count = count; if (isDeepCopy) { final Vector newSum = new DenseVector(sum.size()); for (int i = 0; i < newSum.size(); i++) { newSum.set(i, sum.get(i)); } this.sum = newSum; } else { this.sum = sum; } }
From source file:edu.snu.cms.reef.ml.kmeans.groupcomm.subs.VectorListCodec.java
License:Apache License
@Override public final byte[] encode(final List<Vector> list) { /* This codec does not assume consistent centroid vector sizes(dimensions). * Therefore to specify the initial data size, * a quick iteration over the input list to compute * the sums of vector sizes is required. *///from www. j ava2s .c om int vectorSizeSum = 0; for (final Vector vector : list) { vectorSizeSum += vector.size(); } final ByteArrayOutputStream baos = new ByteArrayOutputStream( Integer.SIZE + Integer.SIZE * list.size() + Double.SIZE * vectorSizeSum); try (final DataOutputStream daos = new DataOutputStream(baos)) { daos.writeInt(list.size()); for (final Vector vector : list) { daos.writeInt(vector.size()); for (int i = 0; i < vector.size(); i++) { daos.writeDouble(vector.get(i)); } } } catch (final IOException e) { throw new RuntimeException(e.getCause()); } return baos.toByteArray(); }
From source file:edu.snu.cms.reef.ml.kmeans.VectorSumTest.java
License:Apache License
/** * Add two random VectorSums with VectorSum.add(), * and then check if the result really is the addition of the two VectorSums. *///from w w w . ja v a 2 s. co m @Test public final void testAddSum() { final int vectorSize = (int) (Math.random() * 1000); final Vector vectorA = new DenseVector(vectorSize); for (int i = 0; i < vectorSize; i++) { vectorA.set(i, Math.random()); } final VectorSum vectorSumA = new VectorSum(vectorA, (int) (Math.random() * 1000)); final Vector vectorB = new DenseVector(vectorSize); for (int i = 0; i < vectorSize; i++) { vectorB.set(i, Math.random()); } final VectorSum vectorSumB = new VectorSum(vectorB, (int) (Math.random() * 1000)); final Vector expectedVector = new DenseVector(vectorSize); for (int i = 0; i < vectorSize; i++) { expectedVector.set(i, vectorA.get(i) + vectorB.get(i)); } final VectorSum expectedSum = new VectorSum(expectedVector, vectorSumA.count + vectorSumB.count); vectorSumA.add(vectorSumB); assertEquals(expectedSum.sum.size(), vectorSumA.sum.size()); for (int i = 0; i < vectorSize; i++) { assertEquals(expectedSum.sum.get(i), vectorSumA.sum.get(i), 0.001); } assertEquals(expectedSum.count, vectorSumA.count); }
From source file:edu.snu.cms.reef.ml.kmeans.VectorSumTest.java
License:Apache License
/** * Add a random VectorSum and random Vector with VectorSum.add(), * and then check if the result really is the addition of the VectorSum and Vector. *//* w w w .j a va 2 s .com*/ @Test public final void testAddVector() { final int vectorSize = (int) (Math.random() * 1000); final Vector vectorA = new DenseVector(vectorSize); for (int i = 0; i < vectorSize; i++) { vectorA.set(i, Math.random()); } final VectorSum vectorSumA = new VectorSum(vectorA, (int) (Math.random() * 1000)); final Vector vectorB = new DenseVector(vectorSize); for (int i = 0; i < vectorSize; i++) { vectorB.set(i, Math.random()); } final Vector expectedVector = new DenseVector(vectorSize); for (int i = 0; i < vectorSize; i++) { expectedVector.set(i, vectorA.get(i) + vectorB.get(i)); } final VectorSum expectedSum = new VectorSum(expectedVector, vectorSumA.count + 1); vectorSumA.add(vectorB); assertEquals(expectedSum.sum.size(), vectorSumA.sum.size()); for (int i = 0; i < vectorSize; i++) { assertEquals(expectedSum.sum.get(i), vectorSumA.sum.get(i), 0.001); } assertEquals(expectedSum.count, vectorSumA.count); }
From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java
License:Apache License
@Override public void run(final int iteration) { clusterToStats = new HashMap<>(); final int numClusters = clusterSummaries.size(); // Compute the partial statistics of each cluster for (final Vector vector : points) { final int dimension = vector.size(); Matrix outProd = null;//from w w w. j a v a 2s .co m if (isCovarianceDiagonal) { outProd = new SparseMatrix(dimension, dimension); for (int j = 0; j < dimension; j++) { outProd.set(j, j, vector.get(j) * vector.get(j)); } } else { outProd = vector.cross(vector); } double denominator = 0; final double[] numerators = new double[numClusters]; for (int i = 0; i < numClusters; i++) { final ClusterSummary clusterSummary = clusterSummaries.get(i); final Vector centroid = clusterSummary.getCentroid(); final Matrix covariance = clusterSummary.getCovariance(); final Double prior = clusterSummary.getPrior(); final Vector differ = vector.minus(centroid); numerators[i] = prior / Math.sqrt(covariance.determinant()) * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2)); denominator += numerators[i]; } for (int i = 0; i < numClusters; i++) { final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator; if (!clusterToStats.containsKey(i)) { clusterToStats.put(i, new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } else { clusterToStats.get(i).add( new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false)); } } } }
From source file:edu.snu.dolphin.bsp.examples.ml.data.ClusterStats.java
License:Apache License
/** * Compute the covariance matrix from the statistics. * @return// w w w. j a v a 2 s . c o m */ public Matrix computeCovariance() { final Vector mean = computeMean(); final Matrix covariance = outProdSum.clone(); final Iterator<MatrixSlice> sliceIterator = outProdSum.iterator(); while (sliceIterator.hasNext()) { final MatrixSlice slice = sliceIterator.next(); final int row = slice.index(); for (final Vector.Element e : slice.nonZeroes()) { final int col = e.index(); final double squaredSum = e.get(); covariance.set(row, col, squaredSum / probSum - mean.get(row) * mean.get(col)); } } return covariance; }
From source file:edu.snu.dolphin.bsp.examples.ml.data.EuclideanDistance.java
License:Apache License
@Override public double distance(final Vector v1, final Vector v2) { if (v1.size() != v2.size()) { throw new IllegalArgumentException("Vector dimensions are not consistent"); }/*from w w w.j a v a 2 s.com*/ double distance = 0; for (int i = 0; i < v1.size(); i++) { distance += (v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i)); } return Math.sqrt(distance); }
From source file:edu.snu.dolphin.bsp.examples.ml.sub.CentroidListCodec.java
License:Apache License
@Override public byte[] encode(final List<Vector> list) { /* This codec does not assume consistent centroid vector sizes(dimensions). * Therefore to specify the initial data size, * a quick iteration over the input list to compute * the sums of vector sizes is required. *//*from ww w . java2 s . c o m*/ final int numClusters = list.size(); int dimension = 0; if (numClusters > 0) { dimension = list.get(0).size(); } final ByteArrayOutputStream baos = new ByteArrayOutputStream(Integer.SIZE * 2 // for dimension and the number of clusters + Double.SIZE * dimension * numClusters); try (final DataOutputStream daos = new DataOutputStream(baos)) { daos.writeInt(numClusters); daos.writeInt(dimension); for (final Vector centroid : list) { for (int i = 0; i < dimension; i++) { daos.writeDouble(centroid.get(i)); } } } catch (final IOException e) { throw new RuntimeException(e.getCause()); } return baos.toByteArray(); }