org.apache.mahout.clustering.meanshift.MeanShiftCanopy.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mahout.clustering.meanshift.MeanShiftCanopy.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mahout.clustering.meanshift;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.lang.NotImplementedException;
import org.apache.mahout.clustering.kmeans.Cluster;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.list.IntArrayList;

/**
 * This class models a canopy as a center point, the number of points that are contained within it according
 * to the application of some distance metric, and a point total which is the sum of all the points and is
 * used to compute the centroid when needed.
 */
public class MeanShiftCanopy extends Cluster {

    // TODO: this is still problematic from a scalability perspective, but how else to encode membership?
    private IntArrayList boundPoints = new IntArrayList();

    /**
     * Used for Writable
     */
    public MeanShiftCanopy() {
    }

    /**
     * Create a new Canopy containing the given point
     * 
     * @param point   a Vector
     * @param id      an int canopy id
     * @param measure a DistanceMeasure
     */
    public MeanShiftCanopy(Vector point, int id, DistanceMeasure measure) {
        super(point, id, measure);
        boundPoints.add(id);
    }

    /**
     * Create an initial Canopy, retaining the original type of the given point (e.g. NamedVector)
     * @param point a Vector
     * @param id an int
     * @param measure a DistanceMeasure
     * @return a MeanShiftCanopy
     */
    public static MeanShiftCanopy initialCanopy(Vector point, int id, DistanceMeasure measure) {
        MeanShiftCanopy result = new MeanShiftCanopy(point, id, measure);
        // overwrite center so original point type is retained
        result.setCenter(point);
        return result;
    }

    /**
     * Create a new Canopy containing the given point, id and bound points
     * 
     * @param point
     *          a Vector
     * @param id
     *          an int identifying the canopy local to this process only
     * @param boundPoints
     *          a IntArrayList containing points ids bound to the canopy
     * @param converged
     *          true if the canopy has converged
     */
    MeanShiftCanopy(Vector point, int id, IntArrayList boundPoints, boolean converged) {
        this.setId(id);
        this.setCenter(point);
        this.setRadius(point.like());
        this.setNumPoints(1);
        this.boundPoints = boundPoints;
        setConverged(converged);
    }

    public IntArrayList getBoundPoints() {
        return boundPoints;
    }

    /**
     * The receiver overlaps the given canopy. Add my bound points to it.
     * 
     * @param canopy
     *          an existing MeanShiftCanopy
     */
    void merge(MeanShiftCanopy canopy) {
        boundPoints.addAllOf(canopy.boundPoints);
    }

    /**
     * The receiver touches the given canopy. Add respective centers.
     * 
     * @param canopy
     *          an existing MeanShiftCanopy
     */
    void touch(MeanShiftCanopy canopy) {
        canopy.observe(getCenter(), boundPoints.size());
        observe(canopy.getCenter(), canopy.boundPoints.size());
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        super.readFields(in);
        int numpoints = in.readInt();
        this.boundPoints = new IntArrayList();
        for (int i = 0; i < numpoints; i++) {
            this.boundPoints.add(in.readInt());
        }
    }

    @Override
    public void write(DataOutput out) throws IOException {
        super.write(out);
        out.writeInt(boundPoints.size());
        for (int v : boundPoints.elements()) {
            out.writeInt(v);
        }
    }

    public MeanShiftCanopy shallowCopy() {
        MeanShiftCanopy result = new MeanShiftCanopy();
        result.setMeasure(this.getMeasure());
        result.setId(this.getId());
        result.setCenter(this.getCenter());
        result.setRadius(this.getRadius());
        result.setNumPoints(this.getNumPoints());
        result.setBoundPoints(boundPoints);
        return result;
    }

    @Override
    public String asFormatString() {
        return toString();
    }

    public void setBoundPoints(IntArrayList boundPoints) {
        this.boundPoints = boundPoints;
    }

    @Override
    public String getIdentifier() {
        return (isConverged() ? "MSV-" : "MSC-") + getId();
    }

    @Override
    public double pdf(VectorWritable vw) {
        // MSCanopy membership is explicit via membership in boundPoints. Can't compute pdf for Arbitrary point
        throw new NotImplementedException();
    }

}