tubes2.myClusterers.myKMeans.java Source code

Java tutorial

Introduction

Here is the source code for tubes2.myClusterers.myKMeans.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package tubes2.myClusterers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import weka.core.Instance;
import weka.core.Instances;

/**
 *
 * @author nim_13512501
 */
public class myKMeans extends weka.clusterers.AbstractClusterer {

    int k;
    DistanceFunction distanceFunction;

    public myKMeans(int k, DistanceFunction distanceFunction) {
        this.k = k;
        this.distanceFunction = distanceFunction;
    }

    public myKMeans(int k) {
        this.k = k;
        this.distanceFunction = new EuclideanDistance();
    }

    Instances template;
    Instance[] centroids;
    Instances[] clusters;

    @Override
    public int clusterInstance(Instance instance) throws java.lang.Exception {
        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
    }

    /**
     * initializes centroids randomly
     * @param i 
     */
    public void initializeCentroids(Instances i) throws Exception {
        int n = i.numInstances();
        if (n < k)
            throw new Exception("n<k");
        centroids = new Instance[k];

        Set<Integer> centroidIndexSet = new HashSet<>();
        Random random = new Random();
        for (int j = 0; j < k; j++) {
            int centroidIndex;
            do {
                centroidIndex = random.nextInt(n);
            } while (centroidIndexSet.contains(centroidIndex));

            centroids[j] = i.instance(centroidIndex);
        }
    }

    /**
     * initializes centroids with the longest distances
     * @param i 
     */
    public void initializeCentroidsDistance(Instances i) throws Exception {
        int n = i.numInstances();
        if (n < k)
            throw new Exception("n<k");
        centroids = new Instance[k];

        //CARI RATA-RATA DARI K-1 DISTANCE TERTINGGI MASING-MASING SIMPUL
        Double meanN[] = new Double[n];
        Double allDistances[][] = new Double[n][n];
        for (int a = 0; a < n; a++) {
            Double arr[] = new Double[n];
            for (int b = 0; b < n; b++) {
                allDistances[a][b] = distanceFunction.distanceOf(i.instance(a), i.instance(b));
                arr[b] = allDistances[a][b];
            }
            Arrays.sort(arr);
            Double total = 0.0;
            for (int c = 0; c < k - 1; c++) {
                total += arr[n - 1 - c];
            }
            meanN[a] = total / (k - 1);
        }

        //AMBIL RATA-RATA TERBESAR
        Double max = meanN[0];
        int indexMax = 0;
        for (int a = 1; a < n; a++) {
            //            System.out.println(a + " bernilai "+ meanN[a]);
            if (meanN[a] > max) {
                indexMax = a;
                max = meanN[a];
            }
        }
        System.out.println("index max " + indexMax);

        //AMBIL K-1 TITIK DARI SIMPUL DENGAN RATAAN JARAK TERBESAR
        ArrayList<Integer> centroidPos = new ArrayList<Integer>();
        Double temp[] = new Double[n];
        temp = Arrays.copyOf(allDistances[indexMax], n);
        Arrays.sort(temp);
        centroidPos.add(indexMax);
        for (int a = 0; a < k - 1; a++) {
            for (int b = 0; b < n; b++) {
                if (temp[n - 1 - a] == allDistances[indexMax][b]) {
                    centroidPos.add(b);
                    //so the element wont be picked again
                    allDistances[indexMax][b] = -1.0;
                    break;
                }
            }
        }
        for (int a = 0; a < k; a++) {
            centroids[a] = i.instance(centroidPos.get(a));
        }
    }

    Instances[] oldClusters;

    public void assignClusters(Instances instances) throws Exception {
        oldClusters = clusters;
        clusters = new Instances[k];
        for (int i = 0; i < k; i++)
            clusters[i] = new Instances(instances, instances.numAttributes());
        for (int i = 0; i < instances.numInstances(); i++) {
            assignCluster(instances.instance(i));
        }
    }

    public void assignCluster(Instance instance) throws Exception {
        int clusterNum = nearestCentroid(instance);

        clusters[clusterNum].add(instance);
    }

    public int nearestCentroid(Instance instance) throws Exception {
        int iChosen = -1;
        double minDistance = Double.MAX_VALUE;
        for (int i = 0; i < centroids.length; i++) {
            double distance = distanceFunction.distanceOf(instance, centroids[i]);
            if (distance < minDistance) {
                iChosen = i;
                minDistance = distance;
            }
        }
        return iChosen;
    }

    public boolean clusterChanged() {
        if (oldClusters == null)
            return true;
        for (int i = 0; i < k; i++) {
            if (clusterDifferent(oldClusters[i], clusters[i])) {
                return true;
            }
        }
        return false;
    }

    public boolean clusterDifferent(Instances a, Instances b) {
        for (int i = 0; i < a.numInstances(); i++) {
            if (instanceDifferent(a.instance(i), b.instance(i)))
                return true;
        }
        return false;
    }

    public boolean instanceDifferent(Instance a, Instance b) {
        for (int i = 0; i < a.numAttributes(); i++) {
            if (a.value(i) != b.value(i))
                return true;
        }
        return false;
    }

    public void moveCentroids() throws Exception {
        for (int i = 0; i < k; i++) {
            centroids[i] = mean(clusters[i]);
        }
    }

    public Instance mean(Instances i) {
        Instance mean = new weka.core.Instance(i.numAttributes());
        for (int j = 0; j < i.numAttributes(); j++) {
            double meanValue = meanValue(i, j);
            mean.setValue(j, meanValue);
        }
        return mean;
    }

    public double meanValue(Instances i, int attrIndex) {
        double sum = 0;
        for (int j = 0; j < i.numInstances(); j++) {
            sum += i.instance(j).value(attrIndex);
        }
        return sum / i.numInstances();
    }

    int iter;

    @Override
    public void buildClusterer(Instances i) throws Exception {
        template = new Instances(i, 0);
        //DENGAN RANDOM
        initializeCentroids(i);
        //TANPA RANDOM
        //        initializeCentroidsDistance(i);
        assignClusters(i);
        iter = 0;
        do {
            moveCentroids();
            assignClusters(i);
            iter++;
        } while (clusterChanged());
    }

    @Override
    public int numberOfClusters() throws Exception {
        return k;
    }

    public String toString() {
        try {
            return "myKMeans\n" + "iterations:" + iter + "\n" + "sum squared error: "
                    + innerClusterSumSquaredError() + "\n" + "centroids:\n" + centroidsToString() + "\n";
        } catch (Exception ex) {
            Logger.getLogger(myKMeans.class.getName()).log(Level.SEVERE, null, ex);
            return ex.toString();
        }
    }

    public String centroidsToString() {
        Instances centroidInstances = new Instances(template, 0);
        for (int i = 0; i < k; i++) {
            centroidInstances.add(centroids[i]);
        }
        return centroidInstances.toString();
    }

    public String clustersToString() throws Exception {
        String retval = "";
        for (int i = 0; i < k; i++) {
            retval += "cluster-" + i;
            retval += clusters[i];
        }
        return retval;
    }

    public double innerClusterSumSquaredError() throws Exception {
        double sumError = 0;
        for (int clusterIndex = 0; clusterIndex < k; clusterIndex++) {
            Instances clusterInstances = clusters[clusterIndex];
            Instance centroid = centroids[clusterIndex];
            for (int i = 0; i < clusterInstances.numInstances(); i++) {
                Instance clusterInstance = clusterInstances.instance(i);
                double dist = distanceFunction.distanceOf(clusterInstance, centroid);
                sumError += dist * dist;
            }
        }
        return Math.sqrt(sumError);
    }
}