Example usage for org.apache.mahout.math Vector get

List of usage examples for org.apache.mahout.math Vector get

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector get.

Prototype

double get(int index);

Source Link

Document

Return the value at the given index

Usage

From source file:edu.indiana.d2i.htrc.io.SparseVectorUtil.java

License:Apache License

public static Vector transform2Vector(String text, String field, Analyzer analyzer, HTRCFilter filter,
        Dictionary dictionary) throws IOException {
    Vector result = new RandomAccessSparseVector(dictionary.size());

    TokenStream stream = analyzer.reusableTokenStream(field, new StringReader(text.toString()));
    CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
    stream.reset();/*w  w  w  .j av a  2  s  .  c om*/
    while (stream.incrementToken()) {
        // String term = new String(termAtt.buffer(), 0,
        // termAtt.length());
        String term = new String(termAtt.buffer(), 0, termAtt.length()).toLowerCase();
        if (filter.accept(term, 0)) {
            int index = dictionary.get(term);
            result.setQuick(index, result.get(index) + 1);
        }
    }

    return result;
}

From source file:edu.snu.cms.reef.ml.kmeans.data.EuclideanDistance.java

License:Apache License

@Override
public final double distance(Vector v1, Vector v2) {
    if (v1.size() != v2.size()) {
        throw new IllegalArgumentException("Vector dimensions are not consistent");
    }//from w w w  .  ja v  a 2s  . co m

    double distance = 0;
    for (int i = 0; i < v1.size(); i++) {
        distance += (v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i));
    }
    return Math.sqrt(distance);
}

From source file:edu.snu.cms.reef.ml.kmeans.data.VectorSum.java

License:Apache License

/**
 * We may select whether to create a deep copy of @member sum, or just a reference.
 *//*  ww  w  .  j av  a2  s.  co m*/
public VectorSum(final Vector sum, final int count, final boolean isDeepCopy) {
    this.count = count;
    if (isDeepCopy) {
        final Vector newSum = new DenseVector(sum.size());
        for (int i = 0; i < newSum.size(); i++) {
            newSum.set(i, sum.get(i));
        }
        this.sum = newSum;
    } else {
        this.sum = sum;
    }
}

From source file:edu.snu.cms.reef.ml.kmeans.groupcomm.subs.VectorListCodec.java

License:Apache License

@Override
public final byte[] encode(final List<Vector> list) {

    /* This codec does not assume consistent centroid vector sizes(dimensions).
     * Therefore to specify the initial data size,
     * a quick iteration over the input list to compute
     * the sums of vector sizes is required.
     *///from   www. j  ava2s  .c om
    int vectorSizeSum = 0;
    for (final Vector vector : list) {
        vectorSizeSum += vector.size();
    }

    final ByteArrayOutputStream baos = new ByteArrayOutputStream(
            Integer.SIZE + Integer.SIZE * list.size() + Double.SIZE * vectorSizeSum);
    try (final DataOutputStream daos = new DataOutputStream(baos)) {
        daos.writeInt(list.size());

        for (final Vector vector : list) {
            daos.writeInt(vector.size());

            for (int i = 0; i < vector.size(); i++) {
                daos.writeDouble(vector.get(i));
            }
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return baos.toByteArray();
}

From source file:edu.snu.cms.reef.ml.kmeans.VectorSumTest.java

License:Apache License

/**
 * Add two random VectorSums with VectorSum.add(),
 * and then check if the result really is the addition of the two VectorSums.
 *///from   w w  w  .  ja  v a  2  s.  co m
@Test
public final void testAddSum() {
    final int vectorSize = (int) (Math.random() * 1000);
    final Vector vectorA = new DenseVector(vectorSize);
    for (int i = 0; i < vectorSize; i++) {
        vectorA.set(i, Math.random());
    }
    final VectorSum vectorSumA = new VectorSum(vectorA, (int) (Math.random() * 1000));

    final Vector vectorB = new DenseVector(vectorSize);
    for (int i = 0; i < vectorSize; i++) {
        vectorB.set(i, Math.random());
    }
    final VectorSum vectorSumB = new VectorSum(vectorB, (int) (Math.random() * 1000));

    final Vector expectedVector = new DenseVector(vectorSize);
    for (int i = 0; i < vectorSize; i++) {
        expectedVector.set(i, vectorA.get(i) + vectorB.get(i));
    }
    final VectorSum expectedSum = new VectorSum(expectedVector, vectorSumA.count + vectorSumB.count);

    vectorSumA.add(vectorSumB);

    assertEquals(expectedSum.sum.size(), vectorSumA.sum.size());
    for (int i = 0; i < vectorSize; i++) {
        assertEquals(expectedSum.sum.get(i), vectorSumA.sum.get(i), 0.001);
    }
    assertEquals(expectedSum.count, vectorSumA.count);
}

From source file:edu.snu.cms.reef.ml.kmeans.VectorSumTest.java

License:Apache License

/**
 * Add a random VectorSum and random Vector with VectorSum.add(),
 * and then check if the result really is the addition of the VectorSum and Vector.
 *//* w  w  w  .j a  va  2  s .com*/
@Test
public final void testAddVector() {
    final int vectorSize = (int) (Math.random() * 1000);
    final Vector vectorA = new DenseVector(vectorSize);
    for (int i = 0; i < vectorSize; i++) {
        vectorA.set(i, Math.random());
    }
    final VectorSum vectorSumA = new VectorSum(vectorA, (int) (Math.random() * 1000));

    final Vector vectorB = new DenseVector(vectorSize);
    for (int i = 0; i < vectorSize; i++) {
        vectorB.set(i, Math.random());
    }

    final Vector expectedVector = new DenseVector(vectorSize);
    for (int i = 0; i < vectorSize; i++) {
        expectedVector.set(i, vectorA.get(i) + vectorB.get(i));
    }
    final VectorSum expectedSum = new VectorSum(expectedVector, vectorSumA.count + 1);

    vectorSumA.add(vectorB);

    assertEquals(expectedSum.sum.size(), vectorSumA.sum.size());
    for (int i = 0; i < vectorSize; i++) {
        assertEquals(expectedSum.sum.get(i), vectorSumA.sum.get(i), 0.001);
    }
    assertEquals(expectedSum.count, vectorSumA.count);
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

@Override
public void run(final int iteration) {
    clusterToStats = new HashMap<>();
    final int numClusters = clusterSummaries.size();

    // Compute the partial statistics of each cluster
    for (final Vector vector : points) {
        final int dimension = vector.size();
        Matrix outProd = null;//from w  w w. j a  v a  2s  .co m

        if (isCovarianceDiagonal) {
            outProd = new SparseMatrix(dimension, dimension);
            for (int j = 0; j < dimension; j++) {
                outProd.set(j, j, vector.get(j) * vector.get(j));
            }
        } else {
            outProd = vector.cross(vector);
        }

        double denominator = 0;
        final double[] numerators = new double[numClusters];
        for (int i = 0; i < numClusters; i++) {
            final ClusterSummary clusterSummary = clusterSummaries.get(i);
            final Vector centroid = clusterSummary.getCentroid();
            final Matrix covariance = clusterSummary.getCovariance();
            final Double prior = clusterSummary.getPrior();

            final Vector differ = vector.minus(centroid);
            numerators[i] = prior / Math.sqrt(covariance.determinant())
                    * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2));
            denominator += numerators[i];
        }

        for (int i = 0; i < numClusters; i++) {
            final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator;
            if (!clusterToStats.containsKey(i)) {
                clusterToStats.put(i,
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            } else {
                clusterToStats.get(i).add(
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            }
        }
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.data.ClusterStats.java

License:Apache License

/**
 * Compute the covariance matrix from the statistics.
 * @return// w w  w. j a v a  2 s  . c o m
 */
public Matrix computeCovariance() {
    final Vector mean = computeMean();
    final Matrix covariance = outProdSum.clone();

    final Iterator<MatrixSlice> sliceIterator = outProdSum.iterator();
    while (sliceIterator.hasNext()) {
        final MatrixSlice slice = sliceIterator.next();
        final int row = slice.index();
        for (final Vector.Element e : slice.nonZeroes()) {
            final int col = e.index();
            final double squaredSum = e.get();
            covariance.set(row, col, squaredSum / probSum - mean.get(row) * mean.get(col));
        }
    }
    return covariance;
}

From source file:edu.snu.dolphin.bsp.examples.ml.data.EuclideanDistance.java

License:Apache License

@Override
public double distance(final Vector v1, final Vector v2) {
    if (v1.size() != v2.size()) {
        throw new IllegalArgumentException("Vector dimensions are not consistent");
    }/*from w w w.j  a v a 2  s.com*/

    double distance = 0;
    for (int i = 0; i < v1.size(); i++) {
        distance += (v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i));
    }
    return Math.sqrt(distance);
}

From source file:edu.snu.dolphin.bsp.examples.ml.sub.CentroidListCodec.java

License:Apache License

@Override
public byte[] encode(final List<Vector> list) {

    /* This codec does not assume consistent centroid vector sizes(dimensions).
     * Therefore to specify the initial data size,
     * a quick iteration over the input list to compute
     * the sums of vector sizes is required.
     *//*from  ww w .  java2 s  . c o  m*/
    final int numClusters = list.size();
    int dimension = 0;
    if (numClusters > 0) {
        dimension = list.get(0).size();
    }

    final ByteArrayOutputStream baos = new ByteArrayOutputStream(Integer.SIZE * 2 // for dimension and the number of clusters
            + Double.SIZE * dimension * numClusters);
    try (final DataOutputStream daos = new DataOutputStream(baos)) {
        daos.writeInt(numClusters);
        daos.writeInt(dimension);
        for (final Vector centroid : list) {
            for (int i = 0; i < dimension; i++) {
                daos.writeDouble(centroid.get(i));
            }
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return baos.toByteArray();
}