Example usage for weka.clusterers SimpleKMeans getSquaredError

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getSquaredError.

Prototype

public double getSquaredError()

Source Link

Document

Gets the squared error for all clusters.

Usage

From source file:analysis.Purity.java

/**
 * /* www  .ja v  a  2 s .c  om*/
 * @param k number of clusters
 * @param originalfile original data
 * @param imputedfile imputed data
 * @throws Exception 
 */
public void findpurity(int k, String originalfile, String imputedfile) throws Exception {
    //get original data
    ConverterUtils.DataSource source = new ConverterUtils.DataSource(originalfile);
    // get imputed data
    ConverterUtils.DataSource mysource = new ConverterUtils.DataSource(imputedfile);
    //get instances for clustering
    this.instances = source.getDataSet();
    this.myinstances = mysource.getDataSet();
    //Simple Kmeans for clustering
    SimpleKMeans globalkmeans = new SimpleKMeans();
    SimpleKMeans mykmeans = new SimpleKMeans();
    //set  number of clusters
    globalkmeans.setNumClusters(k);
    mykmeans.setNumClusters(k);
    // build clusters
    globalkmeans.buildClusterer(instances);
    mykmeans.buildClusterer(myinstances);

    // to compare clusters create matrix for original data and imputed data
    // this matrix indicates the  instances in the came clusters 
    original = new Matrix(instances.numInstances(), k);
    imputed = new Matrix(myinstances.numInstances(), k);
    // get cluster numbers for each instance and initialize associated cluster value to 1
    for (int i = 0; i < myinstances.numInstances(); i++) {
        //System.out.println(instances.instance(i));
        original.set(i, globalkmeans.clusterInstance(instances.instance(i)), 1);
        imputed.set(i, mykmeans.clusterInstance(myinstances.instance(i)), 1);
    }
    System.out.println("k is: \t" + original.getColumnDimension());
    //System.out.println(imputed.getRowDimension());
    original = original.times(original.transpose());
    imputed = imputed.times(imputed.transpose());

    int total1 = 0;// to count  instances in the imputed data in the same cluster
    int total2 = 0; // to count  instances in the original data in the same cluster
    //int value = 1;
    for (int i = 0; i < original.getRowDimension(); i++) {
        for (int j = i; j < original.getColumnDimension(); j++) {

            if ((original.get(i, j) == 1)) {
                if (imputed.get(i, j) == 1) {

                    total1++; // if i  and j th instance in the same cluster in the imputed data
                }
                total2++;// if the i and j th instance in the same cluster in the original data

            }

        }
        //System.out.println();
    }

    // calculate purity
    double purity;
    purity = (double) total1 / (double) total2;
    System.out.println("WCSS --> Original Data: " + mykmeans.getSquaredError());
    System.out.println("WCSS --> Imputed Data: " + globalkmeans.getSquaredError());
    // System.out.println("Total Hit is \t" + total1);
    //System.out.println("Total for  hit is \t" + total2);
    System.out.println("Purity is: " + purity);

}

From source file:br.ufrn.ia.core.clustering.EMIaProject.java

License:Open Source License

private void EM_Init(Instances inst) throws Exception {
    int i, j, k;// www.  j a  va  2 s. co  m

    // run k means 10 times and choose best solution
    SimpleKMeans bestK = null;
    double bestSqE = Double.MAX_VALUE;
    for (i = 0; i < 10; i++) {
        SimpleKMeans sk = new SimpleKMeans();
        sk.setSeed(m_rr.nextInt());
        sk.setNumClusters(m_num_clusters);
        sk.setDisplayStdDevs(true);
        sk.buildClusterer(inst);
        if (sk.getSquaredError() < bestSqE) {
            bestSqE = sk.getSquaredError();
            bestK = sk;
        }
    }

    // initialize with best k-means solution
    m_num_clusters = bestK.numberOfClusters();
    m_weights = new double[inst.numInstances()][m_num_clusters];
    m_model = new DiscreteEstimator[m_num_clusters][m_num_attribs];
    m_modelNormal = new double[m_num_clusters][m_num_attribs][3];
    m_priors = new double[m_num_clusters];
    Instances centers = bestK.getClusterCentroids();
    Instances stdD = bestK.getClusterStandardDevs();
    double[][][] nominalCounts = bestK.getClusterNominalCounts();
    double[] clusterSizes = bestK.getClusterSizes();

    for (i = 0; i < m_num_clusters; i++) {
        Instance center = centers.instance(i);
        for (j = 0; j < m_num_attribs; j++) {
            if (inst.attribute(j).isNominal()) {
                m_model[i][j] = new DiscreteEstimator(m_theInstances.attribute(j).numValues(), true);
                for (k = 0; k < inst.attribute(j).numValues(); k++) {
                    m_model[i][j].addValue(k, nominalCounts[i][j][k]);
                }
            } else {
                double minStdD = (m_minStdDevPerAtt != null) ? m_minStdDevPerAtt[j] : m_minStdDev;
                double mean = (center.isMissing(j)) ? inst.meanOrMode(j) : center.value(j);
                m_modelNormal[i][j][0] = mean;
                double stdv = (stdD.instance(i).isMissing(j))
                        ? ((m_maxValues[j] - m_minValues[j]) / (2 * m_num_clusters))
                        : stdD.instance(i).value(j);
                if (stdv < minStdD) {
                    stdv = inst.attributeStats(j).numericStats.stdDev;
                    if (Double.isInfinite(stdv)) {
                        stdv = minStdD;
                    }
                    if (stdv < minStdD) {
                        stdv = minStdD;
                    }
                }
                if (stdv <= 0) {
                    stdv = m_minStdDev;
                }

                m_modelNormal[i][j][1] = stdv;
                m_modelNormal[i][j][2] = 1.0;
            }
        }
    }

    for (j = 0; j < m_num_clusters; j++) {
        // m_priors[j] += 1.0;
        m_priors[j] = clusterSizes[j];
    }
    Utils.normalize(m_priors);
}

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTable tableResult, JFrame apps) {
    try {//ww  w . j  a  va2  s. co m
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];

        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        //            //setting columNames
        //            for (int i = 0; i < numOfCluster; i++) {
        //                columnNames[i] = "Cluster "+i+"";
        //            }

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }

        for (int i = 0; i < listOfCluster.size(); i++) {
            System.out.print("Cluster - " + i + " -> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                System.out.print(listMemberOfCluster1 + " ");
            }
            System.out.println("");
        }

        //            int i=0;
        //            for(int clusterNum : assignments) {
        //                System.out.printf("Instance %d -> Cluster %d \n", i, clusterNum);
        //                i++;
        //                System.out.println(SSE);
        //            }

        //            //output to table
        //            tableResult.setModel(new DefaultTableModel(
        //            new Object[][]{
        //            },
        //            columnNames));
        //            apps.setVisible(true);
        //            
        //            int j=0;
        //            DefaultTableModel model = (DefaultTableModel) tableResult.getModel();
        //            for(int clusterNum : assignments) {
        //                if (clusterNum==0){
        //                    model.addRow(new Object[]{j, "", "", "", "", ""});
        //                }
        //                else if (clusterNum==1){
        //                    model.addRow(new Object[]{"", j, "", "", "", ""});
        //                }
        //                else if (clusterNum==2){
        //                    model.addRow(new Object[]{"", "", j, "", "", ""});
        //                }
        //                else if (clusterNum==3){
        //                    model.addRow(new Object[]{"", "", "", j, "", ""});
        //                }
        //                else if (clusterNum==4){
        //                    model.addRow(new Object[]{"", "", "", "", j, ""});
        //                }
        //                else if (clusterNum==5){
        //                    model.addRow(new Object[]{"", "", "", "", "", j});
        //                }
        //                
        //                j++;
        //            }
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTextArea textarea) {
    try {/*from   w w w  .  jav  a 2s. c  o  m*/
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];
        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }
        textarea.setText("");
        String result = "";
        for (int i = 0; i < listOfCluster.size(); i++) {
            result = result + ("Cluster - " + i + " ==> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                result = result + (listMemberOfCluster1 + " ");
            }
            result = result + ("\n");
        }
        result = result + ("\nSSE : ") + kmeans.getSquaredError();
        textarea.setText(result);
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}