List of usage examples for weka.clusterers SimpleKMeans getSquaredError
public double getSquaredError()
From source file:analysis.Purity.java
/** * /* www .ja v a 2 s .c om*/ * @param k number of clusters * @param originalfile original data * @param imputedfile imputed data * @throws Exception */ public void findpurity(int k, String originalfile, String imputedfile) throws Exception { //get original data ConverterUtils.DataSource source = new ConverterUtils.DataSource(originalfile); // get imputed data ConverterUtils.DataSource mysource = new ConverterUtils.DataSource(imputedfile); //get instances for clustering this.instances = source.getDataSet(); this.myinstances = mysource.getDataSet(); //Simple Kmeans for clustering SimpleKMeans globalkmeans = new SimpleKMeans(); SimpleKMeans mykmeans = new SimpleKMeans(); //set number of clusters globalkmeans.setNumClusters(k); mykmeans.setNumClusters(k); // build clusters globalkmeans.buildClusterer(instances); mykmeans.buildClusterer(myinstances); // to compare clusters create matrix for original data and imputed data // this matrix indicates the instances in the came clusters original = new Matrix(instances.numInstances(), k); imputed = new Matrix(myinstances.numInstances(), k); // get cluster numbers for each instance and initialize associated cluster value to 1 for (int i = 0; i < myinstances.numInstances(); i++) { //System.out.println(instances.instance(i)); original.set(i, globalkmeans.clusterInstance(instances.instance(i)), 1); imputed.set(i, mykmeans.clusterInstance(myinstances.instance(i)), 1); } System.out.println("k is: \t" + original.getColumnDimension()); //System.out.println(imputed.getRowDimension()); original = original.times(original.transpose()); imputed = imputed.times(imputed.transpose()); int total1 = 0;// to count instances in the imputed data in the same cluster int total2 = 0; // to count instances in the original data in the same cluster //int value = 1; for (int i = 0; i < original.getRowDimension(); i++) { for (int j = i; j < original.getColumnDimension(); j++) { if ((original.get(i, j) == 1)) { if (imputed.get(i, j) == 1) { total1++; // if i and j th instance in the same cluster in the imputed data } total2++;// if the i and j th instance in the same cluster in the original data } } //System.out.println(); } // calculate purity double purity; purity = (double) total1 / (double) total2; System.out.println("WCSS --> Original Data: " + mykmeans.getSquaredError()); System.out.println("WCSS --> Imputed Data: " + globalkmeans.getSquaredError()); // System.out.println("Total Hit is \t" + total1); //System.out.println("Total for hit is \t" + total2); System.out.println("Purity is: " + purity); }
From source file:br.ufrn.ia.core.clustering.EMIaProject.java
License:Open Source License
private void EM_Init(Instances inst) throws Exception { int i, j, k;// www. j a va 2 s. co m // run k means 10 times and choose best solution SimpleKMeans bestK = null; double bestSqE = Double.MAX_VALUE; for (i = 0; i < 10; i++) { SimpleKMeans sk = new SimpleKMeans(); sk.setSeed(m_rr.nextInt()); sk.setNumClusters(m_num_clusters); sk.setDisplayStdDevs(true); sk.buildClusterer(inst); if (sk.getSquaredError() < bestSqE) { bestSqE = sk.getSquaredError(); bestK = sk; } } // initialize with best k-means solution m_num_clusters = bestK.numberOfClusters(); m_weights = new double[inst.numInstances()][m_num_clusters]; m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs]; m_modelNormal = new double[m_num_clusters][m_num_attribs][3]; m_priors = new double[m_num_clusters]; Instances centers = bestK.getClusterCentroids(); Instances stdD = bestK.getClusterStandardDevs(); double[][][] nominalCounts = bestK.getClusterNominalCounts(); double[] clusterSizes = bestK.getClusterSizes(); for (i = 0; i < m_num_clusters; i++) { Instance center = centers.instance(i); for (j = 0; j < m_num_attribs; j++) { if (inst.attribute(j).isNominal()) { m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true); for (k = 0; k < inst.attribute(j).numValues(); k++) { m_model[i][j].addValue(k, nominalCounts[i][j][k]); } } else { double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev; double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j); m_modelNormal[i][j][0] = mean; double stdv = (stdD.instance(i).isMissing(j)) ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters)) : stdD.instance(i).value(j); if (stdv < minStdD) { stdv = inst.attributeStats(j).numericStats.stdDev; if (Double.isInfinite(stdv)) { stdv = minStdD; } if (stdv < minStdD) { stdv = minStdD; } } if (stdv <= 0) { stdv = m_minStdDev; } m_modelNormal[i][j][1] = stdv; m_modelNormal[i][j][2] = 1.0; } } } for (j = 0; j < m_num_clusters; j++) { // m_priors[j] += 1.0; m_priors[j] = clusterSizes[j]; } Utils.normalize(m_priors); }
From source file:kmeansapps.Kmeans.java
public void startCluster(String path, int numOfCluster, JTable tableResult, JFrame apps) { try {//ww w . j a va2 s. co m // TODO code application logic here SimpleKMeans kmeans = new SimpleKMeans(); String[] columnNames = new String[numOfCluster]; kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numOfCluster); BufferedReader datafile = readDataFile(path); Instances data = new Instances(datafile); kmeans.buildClusterer(data); double SSE = kmeans.getSquaredError(); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); // //setting columNames // for (int i = 0; i < numOfCluster; i++) { // columnNames[i] = "Cluster "+i+""; // } // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa. ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>(); ArrayList<String> listMemberOfCluster; //tambahkan list cluster for (int i = 0; i < numOfCluster; i++) { listMemberOfCluster = new ArrayList<>(); listOfCluster.add(listMemberOfCluster); } //tambahkan anggota list ke cluster int j = 0; for (int clusterNum : assignments) { listOfCluster.get(clusterNum).add(j + ""); j++; } for (int i = 0; i < listOfCluster.size(); i++) { System.out.print("Cluster - " + i + " -> "); for (String listMemberOfCluster1 : listOfCluster.get(i)) { System.out.print(listMemberOfCluster1 + " "); } System.out.println(""); } // int i=0; // for(int clusterNum : assignments) { // System.out.printf("Instance %d -> Cluster %d \n", i, clusterNum); // i++; // System.out.println(SSE); // } // //output to table // tableResult.setModel(new DefaultTableModel( // new Object[][]{ // }, // columnNames)); // apps.setVisible(true); // // int j=0; // DefaultTableModel model = (DefaultTableModel) tableResult.getModel(); // for(int clusterNum : assignments) { // if (clusterNum==0){ // model.addRow(new Object[]{j, "", "", "", "", ""}); // } // else if (clusterNum==1){ // model.addRow(new Object[]{"", j, "", "", "", ""}); // } // else if (clusterNum==2){ // model.addRow(new Object[]{"", "", j, "", "", ""}); // } // else if (clusterNum==3){ // model.addRow(new Object[]{"", "", "", j, "", ""}); // } // else if (clusterNum==4){ // model.addRow(new Object[]{"", "", "", "", j, ""}); // } // else if (clusterNum==5){ // model.addRow(new Object[]{"", "", "", "", "", j}); // } // // j++; // } } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:kmeansapps.Kmeans.java
public void startCluster(String path, int numOfCluster, JTextArea textarea) { try {/*from w w w . jav a 2s. c o m*/ // TODO code application logic here SimpleKMeans kmeans = new SimpleKMeans(); String[] columnNames = new String[numOfCluster]; kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numOfCluster); BufferedReader datafile = readDataFile(path); Instances data = new Instances(datafile); kmeans.buildClusterer(data); double SSE = kmeans.getSquaredError(); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa. ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>(); ArrayList<String> listMemberOfCluster; //tambahkan list cluster for (int i = 0; i < numOfCluster; i++) { listMemberOfCluster = new ArrayList<>(); listOfCluster.add(listMemberOfCluster); } //tambahkan anggota list ke cluster int j = 0; for (int clusterNum : assignments) { listOfCluster.get(clusterNum).add(j + ""); j++; } textarea.setText(""); String result = ""; for (int i = 0; i < listOfCluster.size(); i++) { result = result + ("Cluster - " + i + " ==> "); for (String listMemberOfCluster1 : listOfCluster.get(i)) { result = result + (listMemberOfCluster1 + " "); } result = result + ("\n"); } result = result + ("\nSSE : ") + kmeans.getSquaredError(); textarea.setText(result); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } }