me.datamining.cluster.STING.java Source code

Java tutorial

Introduction

Here is the source code for me.datamining.cluster.STING.java

Source

//CIRAS: Crime Information Retrieval and Analysis System
//Copyright 2009 by Russ Brasser, Mark Everline and Eric Franklin
//
//This program is free software: you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation, either version 3 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program.  If not, see <http://www.gnu.org/licenses/>.

package me.datamining.cluster;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.apache.commons.math3.distribution.NormalDistribution;
import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.commons.math3.exception.NumberIsTooLargeException;

import com.vividsolutions.jts.geom.Point;

import me.datamining.ClusteringAlgorithm;
import me.math.Vertex;
import me.math.grid.AbstractSpatialGridOverlay;
import me.math.grid.AbstractSpatialGridPoint;
import me.math.grid.array.UniformSpatialGrid;
import me.math.grid.data.STINGDataSample;
import me.math.kdtree.IKDSearch;
import me.math.kdtree.INode;
import me.math.kdtree.KDTree;
import me.math.kdtree.search.RangeSearch;

public class STING implements ClusteringAlgorithm {

    private AbstractSpatialGridOverlay grid_ = null;
    private int rangeHi = Integer.MAX_VALUE;
    private int rangeLow = 0;
    private double confidence = 0.5;
    private double density = 1;
    private KDTree tree_ = null;
    private Log log = LogFactory.getLog(STING.class);

    /**
     * 
     */
    public STING() {
    }

    /**
     * 
     * @param aGrid
     */
    public STING(UniformSpatialGrid aGrid) {
        this.init(aGrid);
    }

    /**
     * 
     */
    public STING(int hi, int low, double confidence, double density) {
        this.rangeHi = hi;
        this.rangeLow = low;
        this.confidence = confidence;
        this.density = density;
    }

    /**
     * 
     * @param ul
     * @param lr
     * @param gridSize
     */
    public STING(Point ul, Point lr, double gridSize) {
        this.init(new UniformSpatialGrid(ul, lr, gridSize));
    }

    /**
     * 
     * @param aGrid
     */
    public void init(AbstractSpatialGridOverlay aGrid) {
        grid_ = aGrid;
        tree_ = aGrid.getTree();
    }

    /**
     * 
     * @return
     */
    public int getRangeHi() {
        return rangeHi;
    }

    /*
     * (non-Javadoc)
     * @see me.datamining.ClusteringAlgorithm#setRangeHi(int)
     */
    public void setRangeHi(int rangeHi_) {
        this.rangeHi = rangeHi_;
    }

    /**
     * 
     * @return
     */
    public int getRangeLow() {
        return rangeLow;
    }

    /*
     * (non-Javadoc)
     * @see me.datamining.ClusteringAlgorithm#setRangeLow(int)
     */
    public void setRangeLow(int rangeLow_) {
        this.rangeLow = rangeLow_;
    }

    /**
     * 
     * @return
     */
    public double getConfidence() {
        return confidence;
    }

    /**
     * 
     * @param confidence_
     */
    public void setConfidence(double confidence_) {
        this.confidence = confidence_;
    }

    /**
     * 
     * @return
     */
    public double getDensity() {
        return density;
    }

    /**
     * 
     * @param density_
     */
    public void setDensity(double density_) {
        this.density = density_;
    }

    /**
     * 
     * @param value
     * @param mean
     * @param std
     * @return
     */
    public static double standardPDF(double value, double mean, double std) {
        if (std == 0) {
            return 0;
        }
        NormalDistribution sdf = new NormalDistribution(mean, std);
        try {
            return sdf.cumulativeProbability(value);
        } catch (NumberIsTooLargeException e) {
            return 0;
        }
    }

    /**
     * 
     * @param value
     * @param lambda
     * @return
     */
    public static double pissonPDF(double value, double lambda) {
        if (lambda == 0) {
            return 0;
        }
        PoissonDistribution pdf = new PoissonDistribution(lambda);
        //TODO: fix needs to take in double
        return pdf.probability((int) value);
    }

    /**
     * 
     * @param value
     * @param number
     * @param probabilty
     * @return
     */
    public static double binomialPDF(int value, double number, double probabilty) {
        BinomialDistribution bdf = new BinomialDistribution((int) value, probabilty);

        return bdf.probability(value);
    }

    /**
     * 
     * @return
     */
    public double getGridSizeInMeters() {
        return grid_.getGridSpacingMeters();
    }

    /* (non-Javadoc)
     * @see me.datamining.ClusteringAlgorithm#findClusters()
     */
    public List<AbstractSpatialGridPoint> findClusters(AbstractSpatialGridOverlay aGrid) {
        this.init(aGrid);
        return this.findClusters();
    }

    /* (non-Javadoc)
     * @see me.datamining.ClusteringAlgorithm#findClusters()
     */
    public List<AbstractSpatialGridPoint> findClusters() {

        findReleventNodes nodeFinder = new findReleventNodes(this.getRangeLow(), this.getRangeHi(),
                this.getConfidence());

        tree_.searchStats(nodeFinder);

        double factor = Math.sqrt(1 / (Math.PI * this.getDensity()));
        double distance = Math.max((double) this.getGridSizeInMeters(), factor);

        HashSet<AbstractSpatialGridPoint> clusterPoints = new HashSet<AbstractSpatialGridPoint>();
        List<AbstractSpatialGridPoint> possiableNodes = nodeFinder.getResults();

        ClusterNodeEvaluation evaluator = new ClusterNodeEvaluation(this.getRangeLow(), this.getRangeHi(),
                this.getConfidence());

        log.debug("STING Relvent: " + nodeFinder.getResults().size());
        log.debug("STING distance: " + distance);

        while (!possiableNodes.isEmpty()) {
            AbstractSpatialGridPoint pt = possiableNodes.remove(0);
            if (!clusterPoints.contains(pt)) {
                clusterPoints.add(pt);
            }

            List<AbstractSpatialGridPoint> check = null;
            if (distance != this.getGridSizeInMeters()) {
                check = tree_.find(new RangeSearch(pt.getVertex(), distance));

                for (AbstractSpatialGridPoint n : check) {
                    if (n.getData() instanceof STINGDataSample) {
                        STINGDataSample sample = STINGDataSample.class.cast(n);
                        if (!sample.isChecked()) {
                            sample.setChecked(true);
                            if (evaluator.isRelevent(sample.average(), sample.standardDeviation(),
                                    sample.getSampleNumber())) {
                                possiableNodes.add(n);
                            }
                        }
                    }
                }

            }
        }

        List<AbstractSpatialGridPoint> rtn = new ArrayList<AbstractSpatialGridPoint>();
        for (AbstractSpatialGridPoint point : clusterPoints) {
            rtn.add(point);
        }

        return rtn;
    }

    // /////////////////////////////////////////////////////////////////////////////////////////////
    // /////////////////////////////////////////////////////////////////////////////////////////////
    // /////////////////////////////////////////////////////////////////////////////////////////////

    public static class ClusterNodeEvaluation {

        private double min_;
        private double max_;
        private double confThresshold_;

        public ClusterNodeEvaluation(double min, double max, double threshold) {
            this.setMax(max);
            this.setMin_(min);
            this.setConfThresshold(threshold);
        }

        public double getMin() {
            return min_;
        }

        private void setMin_(double min) {
            min_ = min;
        }

        public double getMax() {
            return max_;
        }

        private void setMax(double max) {
            max_ = max;
        }

        public double getConfThresshold() {
            return confThresshold_;
        }

        private void setConfThresshold(double confThresshold) {
            confThresshold_ = confThresshold;
        }

        public double probablit(double density, double mean, double std) {
            if (std == 0) {
                return 0;
            }
            return (density * ((max_ - mean) / std)) - (density * ((min_ - mean) / std));
        }

        public boolean isRelevent(double mean, double std, double number) {

            if (number == 0.0 && mean == 0.0 && std == 0.0) {
                return false;
            }

            double density = STING.standardPDF(number, mean, std);

            if (density < 2) {
                if (number < this.getMin() || number > this.getMax()) {
                    return false;
                } else {
                    return true;
                }
            }
            double prob = this.probablit(density, mean, std);
            double nci = (STING.binomialPDF((int) Math.ceil(number + 0.5f), number, density)) / number;

            if (number <= 30 && nci > this.getConfThresshold()) {
                return true;
            } else if (number > 30) {
                if (prob >= 5 && (number * (1 - prob)) > 5) {
                    double z = ((this.getMin() / 2) - mean) / std;
                    double equ = Math.sqrt((prob * (1 - prob)) / number);

                    if (z >= this.getMin()) {
                        prob = prob + (z * equ);
                    } else {
                        prob = prob - (z * equ);
                    }

                } else {
                    prob = STING.pissonPDF(number, number * prob);
                    if ((number * (1 - prob)) < 5) {
                        prob = 1 - prob;
                    }
                }
            }

            if (prob >= this.getConfThresshold()) {
                return true;
            }
            return false;
        }

    }

    // /////////////////////////////////////////////////////////////////////////////////////////////
    // /////////////////////////////////////////////////////////////////////////////////////////////
    // /////////////////////////////////////////////////////////////////////////////////////////////

    public class findReleventNodes implements IKDSearch {

        public int relevantCount_ = 0;
        private ClusterNodeEvaluation evaluator_ = null;
        private List<AbstractSpatialGridPoint> relevent_ = new ArrayList<AbstractSpatialGridPoint>();

        public findReleventNodes(double min, double max, double threshold) {
            evaluator_ = new ClusterNodeEvaluation(min, max, threshold);
        }

        public boolean endSearch(INode node) {
            return false;
        }

        public List<AbstractSpatialGridPoint> getResults() {
            return relevent_;
        }

        public Vertex getVertex() {
            return null;
        }

        public void compare(INode node) {

            AbstractSpatialGridPoint pt = node.getPoint();
            if (pt.getData() != null) {
                STINGDataSample data = STINGDataSample.class.cast(pt.getData());
                if (evaluator_.isRelevent(data.average(), data.standardDeviation(), data.getSampleNumber())) {
                    data.setChecked(true);
                    relevent_.add(node.getPoint());
                }
            }
        }

    }

}