analysis.SilhouetteIndex.java Source code

Java tutorial

Introduction

Here is the source code for analysis.SilhouetteIndex.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package analysis;

/**
 *
 * @author Sinan
 */
import org.apache.commons.math3.ml.distance.EuclideanDistance;
import weka.clusterers.SimpleKMeans;
import weka.core.Instances;

public class SilhouetteIndex {

    public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception {
        //Map<Integer, Instances> clustermap = sk.clusterInstance;
        sk.setNumClusters(c);
        sk.buildClusterer(inst);
        EuclideanDistance ed = new EuclideanDistance();
        double avgSilhouetteOverAllPoints = 0.d;

        if (sk.getNumClusters() == 1) {
            //Index is not defined for k=1. needs at least 2 clusters
            return Double.NaN;
        }

        for (int i = 0; i < inst.numInstances(); i++) {
            //for the current element get its cluster
            int currentcluster = sk.clusterInstance(inst.instance(i));
            //System.out.println(inst.instance(i).value(2));
            double[] current_attr = new double[inst.numAttributes()];
            double[] other_attr = new double[inst.numAttributes()];
            //get attributes of the current instance
            for (int attr = 0; attr < inst.numAttributes(); attr++) {
                current_attr[attr] = inst.instance(i).value(attr);
            }
            // int counter
            double[] distances = new double[sk.getNumClusters()];
            int[] counters = new int[sk.getNumClusters()];
            //System.out.println("distances: "+distances.length);
            double avgInClusterDist = 0, dist = 0;
            int countsamecluster = 0;
            distances[currentcluster] = Double.MAX_VALUE;
            for (int j = 0; j < inst.numInstances(); j++) {
                for (int attr = 0; attr < inst.numAttributes(); attr++) {
                    other_attr[attr] = inst.instance(j).value(attr);
                }
                //get cluster number of j th element
                int clusternumber = sk.clusterInstance(inst.instance(j));
                //check if j and i in the same cluster
                if (clusternumber == currentcluster) {
                    if (inst.instance(i) != inst.instance(j)) {
                        //calculate average dist to other elements in the cluster
                        //inst.

                        dist = ed.compute(current_attr, other_attr);
                        avgInClusterDist = avgInClusterDist + dist;
                        countsamecluster++;
                    }
                } else {
                    dist = ed.compute(current_attr, other_attr);
                    distances[clusternumber] = distances[clusternumber] + dist;
                    counters[clusternumber]++;
                }
            }
            //calculate value ai
            if (countsamecluster > 0) {
                avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai
            }
            //find average distances to other clusters
            for (int k = 0; k < distances.length; k++) {
                if (k != currentcluster) {
                    distances[k] = distances[k] / counters[k];
                }
            }
            //Find the min value of average distance to other clusters
            double min = distances[0];
            for (int k = 1; k < distances.length; k++) {
                if (min > distances[k]) {
                    min = distances[k];
                }
            }

            //si for current element:
            double si;
            // if we only have one element in our cluster it makes sense to set
            // si = 0
            if (countsamecluster == 1) {
                si = 0.0d;
            } else {
                si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist);
            }
            avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si;
        }
        //System.out.println(inst.numInstances());
        return avgSilhouetteOverAllPoints / inst.numInstances();

    }
}