List of usage examples for weka.clusterers SimpleKMeans getNumClusters
public int getNumClusters()
From source file:analysis.SilhouetteIndex.java
public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception { //Map<Integer, Instances> clustermap = sk.clusterInstance; sk.setNumClusters(c);//from www . j av a 2 s .co m sk.buildClusterer(inst); EuclideanDistance ed = new EuclideanDistance(); double avgSilhouetteOverAllPoints = 0.d; if (sk.getNumClusters() == 1) { //Index is not defined for k=1. needs at least 2 clusters return Double.NaN; } for (int i = 0; i < inst.numInstances(); i++) { //for the current element get its cluster int currentcluster = sk.clusterInstance(inst.instance(i)); //System.out.println(inst.instance(i).value(2)); double[] current_attr = new double[inst.numAttributes()]; double[] other_attr = new double[inst.numAttributes()]; //get attributes of the current instance for (int attr = 0; attr < inst.numAttributes(); attr++) { current_attr[attr] = inst.instance(i).value(attr); } // int counter double[] distances = new double[sk.getNumClusters()]; int[] counters = new int[sk.getNumClusters()]; //System.out.println("distances: "+distances.length); double avgInClusterDist = 0, dist = 0; int countsamecluster = 0; distances[currentcluster] = Double.MAX_VALUE; for (int j = 0; j < inst.numInstances(); j++) { for (int attr = 0; attr < inst.numAttributes(); attr++) { other_attr[attr] = inst.instance(j).value(attr); } //get cluster number of j th element int clusternumber = sk.clusterInstance(inst.instance(j)); //check if j and i in the same cluster if (clusternumber == currentcluster) { if (inst.instance(i) != inst.instance(j)) { //calculate average dist to other elements in the cluster //inst. dist = ed.compute(current_attr, other_attr); avgInClusterDist = avgInClusterDist + dist; countsamecluster++; } } else { dist = ed.compute(current_attr, other_attr); distances[clusternumber] = distances[clusternumber] + dist; counters[clusternumber]++; } } //calculate value ai if (countsamecluster > 0) { avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai } //find average distances to other clusters for (int k = 0; k < distances.length; k++) { if (k != currentcluster) { distances[k] = distances[k] / counters[k]; } } //Find the min value of average distance to other clusters double min = distances[0]; for (int k = 1; k < distances.length; k++) { if (min > distances[k]) { min = distances[k]; } } //si for current element: double si; // if we only have one element in our cluster it makes sense to set // si = 0 if (countsamecluster == 1) { si = 0.0d; } else { si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist); } avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si; } //System.out.println(inst.numInstances()); return avgSilhouetteOverAllPoints / inst.numInstances(); }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * /*from w w w.jav a2 s . c om*/ * @param instances * @param k * @param clusters_sizes * @param clusters_centroids * @return */ private double R2(SimpleKMeans kMeans) { //int k, int[] clusters_sizes, Instances clusters_centroids){ final int k = kMeans.getNumClusters(); final int[] clusters_sizes = kMeans.getClusterSizes(); final Instances clusters_centroids = kMeans.getClusterCentroids(); double inter, total; double[] weights = new double[k]; double[] centroid = new double[instances.numAttributes()]; final int N = instances.numInstances(); final double instance_weight = 1.0; inter = total = 0; //Computing the centroid of the entire set for (int i = 0; i < N; i++) { final Instance instance = instances.get(i); double[] temp = instance.toDoubleArray(); for (int j = 0; j < temp.length; j++) centroid[j] += temp[j]; } for (int j = 0; j < centroid.length; j++) { centroid[j] = centroid[j] / N; } for (int i = 0; i < k; i++) { weights[i] = (0.0 + clusters_sizes[i]) / N; } final Instance centroid_G = new DenseInstance(instance_weight, centroid); for (int i = 0; i < N; i++) { total += Math.pow(distance.distance(instances.instance(i), centroid_G), 2); } total = total / N; for (int i = 0; i < k; i++) { inter += weights[i] * Math.pow(distance.distance(clusters_centroids.get(i), centroid_G), 2); } return (inter / total); }
From source file:probcog.bayesnets.learning.DomainLearner.java
License:Open Source License
/** * sorts the domain of the given node, for which the given clusterer has * been learnt, in ascending order of cluster centroid * //from w w w . ja v a 2 s. co m * @param node * @param clusterer */ protected void sortClusteredDomain(BeliefNode node, SimpleKMeans clusterer) { // get domain sort order (sort by centroid, ascending), // i.e. get an unsorted and a sorted version of // the centroids array int numClusters = clusterer.getNumClusters(); double[] values = clusterer.getClusterCentroids().attributeToDoubleArray(0); double[] sorted_values = (double[]) values.clone(); Arrays.sort(sorted_values); // create new sorted domain Discrete domain = (Discrete) node.getDomain(); Discrete sorted_domain = new Discrete(); for (int new_idx = 0; new_idx < numClusters; new_idx++) { for (int old_idx = 0; old_idx < numClusters; old_idx++) if (values[old_idx] == sorted_values[new_idx]) sorted_domain.addName(domain.getName(old_idx)); } // apply new, sorted domain bn.bn.changeBeliefNodeDomain(node, sorted_domain); }
From source file:qoala.arff.java
public void SimpleKmeans(int numberOfCLuster) throws Exception { Instances train = new Instances(dataSet); SimpleKMeans skm = new SimpleKMeans(); skm.setPreserveInstancesOrder(true); skm.setNumClusters(numberOfCLuster); skm.buildClusterer(train);// w w w. j a v a 2 s.c om skm.setSeed(10); int[] ClusterSize = skm.getClusterSizes(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(train); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); int[] assignments = skm.getAssignments(); System.out.println("# - cluster - distribution"); for (int j = 0; j < skm.getNumClusters(); j++) { int i = 0; for (int clusterNum : assignments) { if (clusterNum == j) System.out.println("Instance " + i + " -> Cluster number: " + clusterNum); i++; } } }