Example usage for org.apache.mahout.math Vector getLengthSquared

List of usage examples for org.apache.mahout.math Vector getLengthSquared

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector getLengthSquared.

Prototype

double getLengthSquared();

Source Link

Document

Return the sum of squares of all elements in the vector.

Usage

From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java

License:Open Source License

public void add(Vector vec, int centerId) {
    points.get(centerId).add(Vectors.toArray(vec));
    lengthSquared.get(centerId).add(vec.getLengthSquared());
    pointsPerCenter[centerId]++;/*from  w w w .j  a  v  a  2  s  .c o m*/
    updated = true;
}

From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java

License:Open Source License

public Distances getDistances(Vector vec, boolean approx) {
    int[] closestPoints = new int[pointsPerCenter.length];
    double[] distances = new double[pointsPerCenter.length];

    if (approx) {
        if (updated) {
            buildIndices();/* w  w  w  .j  a  v  a  2  s .  c om*/
        }

        BitSet q = index(vec);
        for (int i = 0; i < pointsPerCenter.length; i++) {
            List<BitSet> index = indices.get(i);
            SortedSet<Idx> lookup = Sets.newTreeSet();
            for (int j = 0; j < index.size(); j++) {
                Idx idx = new Idx(hammingDistance(q, index.get(j)), j);
                if (lookup.size() < projectionSamples) {
                    lookup.add(idx);
                } else if (idx.compareTo(lookup.last()) < 0) {
                    lookup.add(idx);
                    lookup.remove(lookup.last());
                }
            }

            List<double[]> p = points.get(i);
            distances[i] = Double.POSITIVE_INFINITY;
            for (Idx idx : lookup) {
                double lenSq = lengthSquared.get(i).get(idx.index);
                double d = vec.getLengthSquared() + lenSq - 2 * dot(vec, p.get(idx.index));
                if (d < distances[i]) {
                    distances[i] = d;
                    closestPoints[i] = idx.index;
                }
            }
        }
    } else { // More expensive exact computation
        for (int i = 0; i < pointsPerCenter.length; i++) {
            distances[i] = Double.POSITIVE_INFINITY;
            List<double[]> px = points.get(i);
            List<Double> lsq = lengthSquared.get(i);
            for (int j = 0; j < px.size(); j++) {
                double[] p = px.get(j);
                double lenSq = lsq.get(j);
                double d = vec.getLengthSquared() + lenSq - 2 * dot(vec, p);
                if (d < distances[i]) {
                    distances[i] = d;
                    closestPoints[i] = j;
                }
            }
        }
    }

    return new Distances(distances, closestPoints);
}

From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java

License:Apache License

@Override
public void normalize(Vector vector) {

    double length = Math.sqrt(vector.getLengthSquared());

    // Divide each vector coordinate by length, so we wind up with a unit vector.
    int cardinality = vector.size();
    for (int j = 0; j < cardinality; j++) {
        double curValue = vector.getQuick(j);
        if (curValue > 0.0) {
            vector.setQuick(j, curValue / length);
        }//from  w w  w . j  av a 2 s.c o  m
    }
}

From source file:edu.indiana.d2i.htrc.kmeans.KMeansClusterer.java

License:Apache License

/**
 * Iterates over all clusters and identifies the one closes to the given point. Distance measure used is
 * configured at creation time.//from w  w  w  .  jav  a  2 s  .  co m
 * 
 * @param point
 *          a point to find a cluster for.
 * @param clusters
 *          a List<Cluster> to test.
 */
public void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters,
        Mapper<?, ?, Text, ClusterObservations>.Context context) throws IOException, InterruptedException {
    Cluster nearestCluster = null;
    double nearestDistance = Double.MAX_VALUE;
    for (Cluster cluster : clusters) {
        Vector clusterCenter = cluster.getCenter();
        double distance = this.measure.distance(clusterCenter.getLengthSquared(), clusterCenter, point);
        if (log.isDebugEnabled()) {
            log.debug("{} Cluster: {}", distance, cluster.getId());
        }
        if (distance < nearestDistance || nearestCluster == null) {
            nearestCluster = cluster;
            nearestDistance = distance;
        }
    }
    context.write(new Text(nearestCluster.getIdentifier()),
            new ClusterObservations(1, point, point.times(point)));
}

From source file:edu.indiana.d2i.htrc.kmeans.KMeansClusterer.java

License:Apache License

public void outputPointWithClusterInfo(Vector vector, Iterable<Cluster> clusters,
        Mapper<?, ?, IntWritable, WeightedPropertyVectorWritable>.Context context)
        throws IOException, InterruptedException {
    AbstractCluster nearestCluster = null;
    double nearestDistance = Double.MAX_VALUE;
    for (AbstractCluster cluster : clusters) {
        Vector clusterCenter = cluster.getCenter();
        double distance = measure.distance(clusterCenter.getLengthSquared(), clusterCenter, vector);
        if (distance < nearestDistance || nearestCluster == null) {
            nearestCluster = cluster;//  w w w  .j a v a 2  s .c o m
            nearestDistance = distance;
        }
    }
    Map<Text, Text> props = new HashMap<Text, Text>();
    props.put(new Text("distance"), new Text(String.valueOf(nearestDistance)));
    context.write(new IntWritable(nearestCluster.getId()),
            new WeightedPropertyVectorWritable(1, vector, props));
}

From source file:edu.indiana.d2i.htrc.kmeans.KMeansClusterer.java

License:Apache License

/**
 * Iterates over all clusters and identifies the one closes to the given point. Distance measure used is
 * configured at creation time.//from   w ww .  ja va  2s  .  c o  m
 * 
 * @param point
 *          a point to find a cluster for.
 * @param clusters
 *          a List<Cluster> to test.
 */
protected void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters, Writer writer)
        throws IOException {
    AbstractCluster nearestCluster = null;
    double nearestDistance = Double.MAX_VALUE;
    for (AbstractCluster cluster : clusters) {
        Vector clusterCenter = cluster.getCenter();
        double distance = this.measure.distance(clusterCenter.getLengthSquared(), clusterCenter, point);
        if (log.isDebugEnabled()) {
            log.debug("{} Cluster: {}", distance, cluster.getId());
        }
        if (distance < nearestDistance || nearestCluster == null) {
            nearestCluster = cluster;
            nearestDistance = distance;
        }
    }
    writer.append(new IntWritable(nearestCluster.getId()), new WeightedVectorWritable(1, point));
}