List of usage examples for org.apache.mahout.math Vector getLengthSquared
double getLengthSquared();
From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java
License:Open Source License
public void add(Vector vec, int centerId) { points.get(centerId).add(Vectors.toArray(vec)); lengthSquared.get(centerId).add(vec.getLengthSquared()); pointsPerCenter[centerId]++;/*from w w w .j a v a 2 s .c o m*/ updated = true; }
From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java
License:Open Source License
public Distances getDistances(Vector vec, boolean approx) { int[] closestPoints = new int[pointsPerCenter.length]; double[] distances = new double[pointsPerCenter.length]; if (approx) { if (updated) { buildIndices();/* w w w .j a v a 2 s . c om*/ } BitSet q = index(vec); for (int i = 0; i < pointsPerCenter.length; i++) { List<BitSet> index = indices.get(i); SortedSet<Idx> lookup = Sets.newTreeSet(); for (int j = 0; j < index.size(); j++) { Idx idx = new Idx(hammingDistance(q, index.get(j)), j); if (lookup.size() < projectionSamples) { lookup.add(idx); } else if (idx.compareTo(lookup.last()) < 0) { lookup.add(idx); lookup.remove(lookup.last()); } } List<double[]> p = points.get(i); distances[i] = Double.POSITIVE_INFINITY; for (Idx idx : lookup) { double lenSq = lengthSquared.get(i).get(idx.index); double d = vec.getLengthSquared() + lenSq - 2 * dot(vec, p.get(idx.index)); if (d < distances[i]) { distances[i] = d; closestPoints[i] = idx.index; } } } } else { // More expensive exact computation for (int i = 0; i < pointsPerCenter.length; i++) { distances[i] = Double.POSITIVE_INFINITY; List<double[]> px = points.get(i); List<Double> lsq = lengthSquared.get(i); for (int j = 0; j < px.size(); j++) { double[] p = px.get(j); double lenSq = lsq.get(j); double d = vec.getLengthSquared() + lenSq - 2 * dot(vec, p); if (d < distances[i]) { distances[i] = d; closestPoints[i] = j; } } } } return new Distances(distances, closestPoints); }
From source file:com.scaleunlimited.classify.vectors.UnitNormalizer.java
License:Apache License
@Override public void normalize(Vector vector) { double length = Math.sqrt(vector.getLengthSquared()); // Divide each vector coordinate by length, so we wind up with a unit vector. int cardinality = vector.size(); for (int j = 0; j < cardinality; j++) { double curValue = vector.getQuick(j); if (curValue > 0.0) { vector.setQuick(j, curValue / length); }//from w w w . j av a 2 s.c o m } }
From source file:edu.indiana.d2i.htrc.kmeans.KMeansClusterer.java
License:Apache License
/** * Iterates over all clusters and identifies the one closes to the given point. Distance measure used is * configured at creation time.//from w w w . jav a 2 s . co m * * @param point * a point to find a cluster for. * @param clusters * a List<Cluster> to test. */ public void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters, Mapper<?, ?, Text, ClusterObservations>.Context context) throws IOException, InterruptedException { Cluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; for (Cluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); double distance = this.measure.distance(clusterCenter.getLengthSquared(), clusterCenter, point); if (log.isDebugEnabled()) { log.debug("{} Cluster: {}", distance, cluster.getId()); } if (distance < nearestDistance || nearestCluster == null) { nearestCluster = cluster; nearestDistance = distance; } } context.write(new Text(nearestCluster.getIdentifier()), new ClusterObservations(1, point, point.times(point))); }
From source file:edu.indiana.d2i.htrc.kmeans.KMeansClusterer.java
License:Apache License
public void outputPointWithClusterInfo(Vector vector, Iterable<Cluster> clusters, Mapper<?, ?, IntWritable, WeightedPropertyVectorWritable>.Context context) throws IOException, InterruptedException { AbstractCluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; for (AbstractCluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); double distance = measure.distance(clusterCenter.getLengthSquared(), clusterCenter, vector); if (distance < nearestDistance || nearestCluster == null) { nearestCluster = cluster;// w w w .j a v a 2 s .c o m nearestDistance = distance; } } Map<Text, Text> props = new HashMap<Text, Text>(); props.put(new Text("distance"), new Text(String.valueOf(nearestDistance))); context.write(new IntWritable(nearestCluster.getId()), new WeightedPropertyVectorWritable(1, vector, props)); }
From source file:edu.indiana.d2i.htrc.kmeans.KMeansClusterer.java
License:Apache License
/** * Iterates over all clusters and identifies the one closes to the given point. Distance measure used is * configured at creation time.//from w ww . ja va 2s . c o m * * @param point * a point to find a cluster for. * @param clusters * a List<Cluster> to test. */ protected void emitPointToNearestCluster(Vector point, Iterable<Cluster> clusters, Writer writer) throws IOException { AbstractCluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; for (AbstractCluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); double distance = this.measure.distance(clusterCenter.getLengthSquared(), clusterCenter, point); if (log.isDebugEnabled()) { log.debug("{} Cluster: {}", distance, cluster.getId()); } if (distance < nearestDistance || nearestCluster == null) { nearestCluster = cluster; nearestDistance = distance; } } writer.append(new IntWritable(nearestCluster.getId()), new WeightedVectorWritable(1, point)); }