Example usage for weka.clusterers SimpleKMeans getNumClusters

List of usage examples for weka.clusterers SimpleKMeans getNumClusters

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getNumClusters.

Prototype

public int getNumClusters() 

Source Link

Document

gets the number of clusters to generate.

Usage

From source file:analysis.SilhouetteIndex.java

public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception {
    //Map<Integer, Instances> clustermap = sk.clusterInstance;
    sk.setNumClusters(c);//from  www  . j  av a  2  s .co  m
    sk.buildClusterer(inst);
    EuclideanDistance ed = new EuclideanDistance();
    double avgSilhouetteOverAllPoints = 0.d;

    if (sk.getNumClusters() == 1) {
        //Index is not defined for k=1. needs at least 2 clusters
        return Double.NaN;
    }

    for (int i = 0; i < inst.numInstances(); i++) {
        //for the current element get its cluster
        int currentcluster = sk.clusterInstance(inst.instance(i));
        //System.out.println(inst.instance(i).value(2));
        double[] current_attr = new double[inst.numAttributes()];
        double[] other_attr = new double[inst.numAttributes()];
        //get attributes of the current instance
        for (int attr = 0; attr < inst.numAttributes(); attr++) {
            current_attr[attr] = inst.instance(i).value(attr);
        }
        // int counter
        double[] distances = new double[sk.getNumClusters()];
        int[] counters = new int[sk.getNumClusters()];
        //System.out.println("distances: "+distances.length);
        double avgInClusterDist = 0, dist = 0;
        int countsamecluster = 0;
        distances[currentcluster] = Double.MAX_VALUE;
        for (int j = 0; j < inst.numInstances(); j++) {
            for (int attr = 0; attr < inst.numAttributes(); attr++) {
                other_attr[attr] = inst.instance(j).value(attr);
            }
            //get cluster number of j th element
            int clusternumber = sk.clusterInstance(inst.instance(j));
            //check if j and i in the same cluster
            if (clusternumber == currentcluster) {
                if (inst.instance(i) != inst.instance(j)) {
                    //calculate average dist to other elements in the cluster
                    //inst.

                    dist = ed.compute(current_attr, other_attr);
                    avgInClusterDist = avgInClusterDist + dist;
                    countsamecluster++;
                }
            } else {
                dist = ed.compute(current_attr, other_attr);
                distances[clusternumber] = distances[clusternumber] + dist;
                counters[clusternumber]++;
            }
        }
        //calculate value ai
        if (countsamecluster > 0) {
            avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai
        }
        //find average distances to other clusters
        for (int k = 0; k < distances.length; k++) {
            if (k != currentcluster) {
                distances[k] = distances[k] / counters[k];
            }
        }
        //Find the min value of average distance to other clusters
        double min = distances[0];
        for (int k = 1; k < distances.length; k++) {
            if (min > distances[k]) {
                min = distances[k];
            }
        }

        //si for current element:
        double si;
        // if we only have one element in our cluster it makes sense to set
        // si = 0
        if (countsamecluster == 1) {
            si = 0.0d;
        } else {
            si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist);
        }
        avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si;
    }
    //System.out.println(inst.numInstances());
    return avgSilhouetteOverAllPoints / inst.numInstances();

}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * /*from w w w.jav  a2 s  .  c om*/
 * @param instances
 * @param k
 * @param clusters_sizes
 * @param clusters_centroids
 * @return
 */
private double R2(SimpleKMeans kMeans) {
    //int k, int[] clusters_sizes, Instances clusters_centroids){
    final int k = kMeans.getNumClusters();
    final int[] clusters_sizes = kMeans.getClusterSizes();
    final Instances clusters_centroids = kMeans.getClusterCentroids();
    double inter, total;
    double[] weights = new double[k];
    double[] centroid = new double[instances.numAttributes()];
    final int N = instances.numInstances();
    final double instance_weight = 1.0;
    inter = total = 0;

    //Computing the centroid of the entire set
    for (int i = 0; i < N; i++) {
        final Instance instance = instances.get(i);
        double[] temp = instance.toDoubleArray();
        for (int j = 0; j < temp.length; j++)
            centroid[j] += temp[j];
    }
    for (int j = 0; j < centroid.length; j++) {
        centroid[j] = centroid[j] / N;
    }

    for (int i = 0; i < k; i++) {
        weights[i] = (0.0 + clusters_sizes[i]) / N;
    }

    final Instance centroid_G = new DenseInstance(instance_weight, centroid);
    for (int i = 0; i < N; i++) {
        total += Math.pow(distance.distance(instances.instance(i), centroid_G), 2);
    }
    total = total / N;

    for (int i = 0; i < k; i++) {
        inter += weights[i] * Math.pow(distance.distance(clusters_centroids.get(i), centroid_G), 2);
    }

    return (inter / total);
}

From source file:probcog.bayesnets.learning.DomainLearner.java

License:Open Source License

/**
 * sorts the domain of the given node, for which the given clusterer has
 * been learnt, in ascending order of cluster centroid
 * //from w  w  w  . ja  v  a  2 s.  co  m
 * @param node
 * @param clusterer
 */
protected void sortClusteredDomain(BeliefNode node, SimpleKMeans clusterer) {
    // get domain sort order (sort by centroid, ascending),
    // i.e. get an unsorted and a sorted version of
    // the centroids array
    int numClusters = clusterer.getNumClusters();
    double[] values = clusterer.getClusterCentroids().attributeToDoubleArray(0);
    double[] sorted_values = (double[]) values.clone();
    Arrays.sort(sorted_values);
    // create new sorted domain
    Discrete domain = (Discrete) node.getDomain();
    Discrete sorted_domain = new Discrete();
    for (int new_idx = 0; new_idx < numClusters; new_idx++) {
        for (int old_idx = 0; old_idx < numClusters; old_idx++)
            if (values[old_idx] == sorted_values[new_idx])
                sorted_domain.addName(domain.getName(old_idx));
    }
    // apply new, sorted domain
    bn.bn.changeBeliefNodeDomain(node, sorted_domain);
}

From source file:qoala.arff.java

public void SimpleKmeans(int numberOfCLuster) throws Exception {

    Instances train = new Instances(dataSet);

    SimpleKMeans skm = new SimpleKMeans();
    skm.setPreserveInstancesOrder(true);
    skm.setNumClusters(numberOfCLuster);
    skm.buildClusterer(train);// w w w. j  a  v a 2 s.c om
    skm.setSeed(10);
    int[] ClusterSize = skm.getClusterSizes();

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(train);

    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());

    int[] assignments = skm.getAssignments();

    System.out.println("# - cluster - distribution");

    for (int j = 0; j < skm.getNumClusters(); j++) {
        int i = 0;
        for (int clusterNum : assignments) {

            if (clusterNum == j)

                System.out.println("Instance " + i + " -> Cluster number: " + clusterNum);

            i++;
        }
    }
}