Example usage for org.apache.mahout.math Vector size

List of usage examples for org.apache.mahout.math Vector size

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector size.

Prototype

int size();

Source Link

Document

Return the cardinality of the recipient (the maximum number of values)

Usage

From source file:edu.snu.cms.reef.ml.kmeans.CentroidListCodecTest.java

License:Apache License

@Before
public final void setUp() {
    for (int j = 0; j < (int) (Math.random() * 1000); j++) {
        final Vector vector = new DenseVector((int) (Math.random() * 1000));
        for (int i = 0; i < vector.size(); i++) {
            vector.set(i, Math.random());
        }//from  w ww  .ja  va2 s .com
        final Centroid centroid = new Centroid((int) (Math.random() * 1000000), vector);
        list.add(centroid);
    }
}

From source file:edu.snu.cms.reef.ml.kmeans.data.Centroid.java

License:Apache License

/**
 * A copy constructor that creates a deep copy of a centroid.
 *
 * The newly created KMeansCentroid does not reference
 * anything from the original KMeansCentroid.
 *///from w w  w  .  j  a  v  a2 s  . com
public Centroid(final Centroid centroid) {
    this.clusterId = centroid.clusterId;
    final Vector vector = new DenseVector(centroid.vector.size());
    for (int i = 0; i < vector.size(); i++) {
        vector.set(i, centroid.vector.get(i));
    }
    this.vector = vector;
}

From source file:edu.snu.cms.reef.ml.kmeans.data.EuclideanDistance.java

License:Apache License

@Override
public final double distance(Vector v1, Vector v2) {
    if (v1.size() != v2.size()) {
        throw new IllegalArgumentException("Vector dimensions are not consistent");
    }/*from   ww w .j av  a 2s  . co  m*/

    double distance = 0;
    for (int i = 0; i < v1.size(); i++) {
        distance += (v1.get(i) - v2.get(i)) * (v1.get(i) - v2.get(i));
    }
    return Math.sqrt(distance);
}

From source file:edu.snu.cms.reef.ml.kmeans.data.VectorSum.java

License:Apache License

/**
 * We may select whether to create a deep copy of @member sum, or just a reference.
 *///from  w ww  .  j  a  v a 2s .  c  om
public VectorSum(final Vector sum, final int count, final boolean isDeepCopy) {
    this.count = count;
    if (isDeepCopy) {
        final Vector newSum = new DenseVector(sum.size());
        for (int i = 0; i < newSum.size(); i++) {
            newSum.set(i, sum.get(i));
        }
        this.sum = newSum;
    } else {
        this.sum = sum;
    }
}

From source file:edu.snu.cms.reef.ml.kmeans.data.VectorSum.java

License:Apache License

public final Vector computeVectorMean() {
    final Vector mean = new DenseVector(sum.size());
    for (int i = 0; i < mean.size(); i++) {
        mean.set(i, sum.get(i) / count);
    }/*w ww .  j  a  va 2s .  com*/
    return mean;
}

From source file:edu.snu.cms.reef.ml.kmeans.groupcomm.subs.VectorListCodec.java

License:Apache License

@Override
public final byte[] encode(final List<Vector> list) {

    /* This codec does not assume consistent centroid vector sizes(dimensions).
     * Therefore to specify the initial data size,
     * a quick iteration over the input list to compute
     * the sums of vector sizes is required.
     */// www . j  av  a 2s . c om
    int vectorSizeSum = 0;
    for (final Vector vector : list) {
        vectorSizeSum += vector.size();
    }

    final ByteArrayOutputStream baos = new ByteArrayOutputStream(
            Integer.SIZE + Integer.SIZE * list.size() + Double.SIZE * vectorSizeSum);
    try (final DataOutputStream daos = new DataOutputStream(baos)) {
        daos.writeInt(list.size());

        for (final Vector vector : list) {
            daos.writeInt(vector.size());

            for (int i = 0; i < vector.size(); i++) {
                daos.writeDouble(vector.get(i));
            }
        }
    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return baos.toByteArray();
}

From source file:edu.snu.cms.reef.ml.kmeans.groupcomm.subs.VectorListCodec.java

License:Apache License

public final List<Vector> decode(final byte[] data) {
    final ByteArrayInputStream bais = new ByteArrayInputStream(data);
    final List<Vector> resultList = new ArrayList<>();

    try (final DataInputStream dais = new DataInputStream(bais)) {
        final int listSize = dais.readInt();

        for (int i = 0; i < listSize; i++) {
            final Vector vector = new DenseVector(dais.readInt());

            for (int j = 0; j < vector.size(); j++) {
                vector.set(j, dais.readDouble());
            }/* ww  w  .  ja  v  a2 s  .  c o  m*/
            resultList.add(vector);
        }

    } catch (final IOException e) {
        throw new RuntimeException(e.getCause());
    }

    return resultList;
}

From source file:edu.snu.cms.reef.ml.kmeans.MapOfIntVSumCodecTest.java

License:Apache License

@Before
public final void setUp() throws Exception {
    for (int i = 0; i < 1000; i++) {
        final Vector vector = new DenseVector((int) (Math.random() * 1000));
        for (int j = 0; j < vector.size(); j++) {
            vector.set(j, Math.random());
        }/*  w  ww  . ja va 2s .  c  o  m*/

        map.put(i, new VectorSum(vector, (int) (Math.random() * 1000)));
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCmpTask.java

License:Apache License

@Override
public void run(final int iteration) {
    clusterToStats = new HashMap<>();
    final int numClusters = clusterSummaries.size();

    // Compute the partial statistics of each cluster
    for (final Vector vector : points) {
        final int dimension = vector.size();
        Matrix outProd = null;/*from   w ww  . jav a2s  .c o  m*/

        if (isCovarianceDiagonal) {
            outProd = new SparseMatrix(dimension, dimension);
            for (int j = 0; j < dimension; j++) {
                outProd.set(j, j, vector.get(j) * vector.get(j));
            }
        } else {
            outProd = vector.cross(vector);
        }

        double denominator = 0;
        final double[] numerators = new double[numClusters];
        for (int i = 0; i < numClusters; i++) {
            final ClusterSummary clusterSummary = clusterSummaries.get(i);
            final Vector centroid = clusterSummary.getCentroid();
            final Matrix covariance = clusterSummary.getCovariance();
            final Double prior = clusterSummary.getPrior();

            final Vector differ = vector.minus(centroid);
            numerators[i] = prior / Math.sqrt(covariance.determinant())
                    * Math.exp(differ.dot(inverse(covariance).times(differ)) / (-2));
            denominator += numerators[i];
        }

        for (int i = 0; i < numClusters; i++) {
            final double posterior = denominator == 0 ? 1.0 / numerators.length : numerators[i] / denominator;
            if (!clusterToStats.containsKey(i)) {
                clusterToStats.put(i,
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            } else {
                clusterToStats.get(i).add(
                        new ClusterStats(times(outProd, posterior), vector.times(posterior), posterior, false));
            }
        }
    }
}

From source file:edu.snu.dolphin.bsp.examples.ml.algorithms.clustering.em.EMMainCtrlTask.java

License:Apache License

/**
 * Receive initial centroids from the preprocess task.
 *///  ww w.  j  av  a2  s.  com
@Override
public void initialize() {

    // Load the initial centroids from the previous stage
    centroids = keyValueStore.get(Centroids.class);

    // Initialize cluster summaries
    final int numClusters = centroids.size();
    for (int clusterID = 0; clusterID < numClusters; clusterID++) {
        final Vector vector = centroids.get(clusterID);
        final int dimension = vector.size();
        clusterSummaries.add(new ClusterSummary(1.0, vector, DiagonalMatrix.identity(dimension)));
    }
}