Example usage for weka.clusterers SimpleKMeans getAssignments

List of usage examples for weka.clusterers SimpleKMeans getAssignments

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getAssignments.

Prototype

public int[] getAssignments() throws Exception 

Source Link

Document

Gets the assignments for each instance.

Usage

From source file:ClusteringClass.java

public static void main(String[] args) throws Exception {
    String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv";

    try {/*  w  w  w .j  av  a  2s.com*/
        FileWriter fw = new FileWriter(filename);
        Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance();
        Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani",
                "dani");

        String query = "SELECT * FROM SONG_RATING2";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(query);

        for (int i = 1; i < 23; i++) {
            if (i != 2) {
                ResultSetMetaData rsmd = rs.getMetaData();
                String name = rsmd.getColumnName(i);
                fw.append(name);
                if (i != 22) {
                    fw.append(',');
                } else {
                    fw.append('\n');
                }
            }
        }

        String query1 = "SELECT * FROM SONG_DATA";
        Statement stmt1 = conn.createStatement();
        ResultSet rs1 = stmt1.executeQuery(query1);

        String[] titles = new String[150];

        for (int ii = 0; ii < 150; ii++) {
            rs1.next();
            titles[ii] = rs1.getString("TITLE");
        }

        while (rs.next()) {
            for (int i = 1; i < 23; i++) {
                if (i == 22)
                    fw.append('\n');
                else if (i != 2) {
                    fw.append(',');
                }
            }
        }

        fw.flush();
        fw.close();
        conn.close();
        System.out.println("CSV File is created successfully.");

        /*
         Clustering part
         */
        DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv");
        Instances train = source.getDataSet();

        /*
         Applichiamo il filtro Remove fornito da Weka per non considerare un
         attributo nell'algoritmo di Clustering.
         */
        Remove filter = new Remove();
        filter.setAttributeIndices("1");
        filter.setInputFormat(train);
        Instances train2 = Filter.useFilter(train, filter);
        System.out.println("Nominal attributes removed from computation.");

        /*
         Applichiamo il filtro Normalize fornito da Weka per normalizzare il 
         nostro dataset.
         */
        Normalize norm = new Normalize();
        norm.setInputFormat(train2);
        Instances train3 = Filter.useFilter(train2, norm);
        System.out.println("Dataset normalized.");

        /*
         First Clustering Algorithm
         */
        EuclideanDistance df = new EuclideanDistance();
        SimpleKMeans clus1 = new SimpleKMeans();
        int k = 10;
        clus1.setNumClusters(k);
        clus1.setDistanceFunction(df);
        clus1.setPreserveInstancesOrder(true);
        clus1.buildClusterer(train3);

        /*
         First Evaluation
         */
        ClusterEvaluation eval1 = new ClusterEvaluation();
        eval1.setClusterer(clus1);
        eval1.evaluateClusterer(train3);
        System.out.println(eval1.clusterResultsToString());

        int[] assignments = clus1.getAssignments();
        String[][] dati = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati[kk][0] = String.valueOf(kk);
            dati[kk][1] = train2.instance(kk).toString();
            dati[kk][2] = String.valueOf(assignments[kk]);
            dati[kk][3] = titles[kk];
        }

        for (int w = 0; w < 10; w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (dati[i][2].equals(String.valueOf(w))) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati[i][j] + "-> \t");
                        } else {
                            System.out.println(dati[i][j]);
                        }
                    }
                }
            }
        }

        /*first graph  
                
         PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1);
         //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date());
         String name = "";
         String cname = clus1.getClass().getName();
         if (cname.startsWith("weka.clusterers."))
         name += cname.substring("weka.clusterers.".length());
         else
         name += cname;
                
                
         VisualizePanel vp = new VisualizePanel();
         vp.setName(name + " (" + train.relationName() + ")");
         predData.setPlotName(name + " (" + train.relationName() + ")");
         vp.addPlot(predData);
                
         String plotName = vp.getName();
         final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName);
         jf.setSize(500,400);
         jf.getContentPane().setLayout(new BorderLayout());
         jf.getContentPane().add(vp, BorderLayout.CENTER);
         jf.dispose();
         jf.addWindowListener(new java.awt.event.WindowAdapter() {
         public void windowClosing(java.awt.event.WindowEvent e) {
         jf.dispose();
         }
         });
         jf.setVisible(true);
                
         end first graph
         */

        /*
         Second Clustering Algorithm
         */

        System.out.println();

        DBSCAN clus3 = new DBSCAN();
        clus3.setEpsilon(0.7);
        clus3.setMinPoints(2);
        clus3.buildClusterer(train3);

        /*
         Second Evaluation
         */
        ClusterEvaluation eval3 = new ClusterEvaluation();
        eval3.setClusterer(clus3);
        eval3.evaluateClusterer(train3);
        System.out.println(eval3.clusterResultsToString());

        double[] assignments3 = eval3.getClusterAssignments();
        String[][] dati3 = new String[150][4];

        for (int kk = 0; kk < 150; kk++) {
            dati3[kk][0] = String.valueOf(kk);
            dati3[kk][1] = train2.instance(kk).toString();
            dati3[kk][2] = String.valueOf(assignments3[kk]);
            dati3[kk][3] = titles[kk];
        }

        for (int w = 0; w < eval3.getNumClusters(); w++) {
            System.out.println();
            for (int i = 0; i < 150; i++) {
                if (Double.parseDouble(dati3[i][2]) == w) {
                    for (int j = 0; j < 4; j++) {
                        if (j != 3) {
                            System.out.print(dati3[i][j] + "-> \t");
                        } else {
                            System.out.println(dati3[i][j]);
                        }
                    }
                }
            }
        }
        System.out.println();
        for (int i = 0; i < 150; i++) {
            if (Double.parseDouble(dati3[i][2]) == -1.0) {
                for (int j = 0; j < 4; j++) {
                    if (j != 3) {
                        System.out.print(dati3[i][j] + "-> \t");
                    } else {
                        System.out.println(dati3[i][j]);
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:Clustering.WekaKMeansClustererWrapper.java

public ArrayList<String>[] classify(HashMap<String, List> data, boolean clearData) {
    ArrayList<String>[] clusterResult;
    try {// w  ww .  jav a 2  s . co m
        File arff = m_ArffExporter.getArff(data);
        int nSize = data.size();
        if (arff == null)
            return null;
        if (clearData)
            data.clear();

        FileInputStream is = new FileInputStream(arff.getAbsolutePath());
        Instances instances = ConverterUtils.DataSource.read(is);
        is.close();

        String[] keys = new String[instances.numInstances()];
        for (int i = 0; i < instances.numInstances(); ++i) {
            Instance instance = instances.instance(i);
            keys[i] = instance.stringValue(0); // assume that the 0th attribute is the key string
        }

        instances.deleteStringAttributes();

        SimpleKMeans cl = new SimpleKMeans();

        int numClusters = m_NumberOfClusters < nSize ? m_NumberOfClusters : nSize;

        String[] options = new String[5];
        options[0] = "-O";
        options[1] = "-N";
        options[2] = Integer.toString(numClusters);
        options[3] = "-A";
        options[4] = m_DistanceFunction;

        cl.setOptions(options);

        //System.out.println( "Clustering" );
        cl.buildClusterer(instances);

        //System.out.println( "Create ArrayList" );
        clusterResult = new ArrayList[m_NumberOfClusters];
        for (int i = 0; i < m_NumberOfClusters; ++i) {
            clusterResult[i] = new ArrayList<>();
        }

        //System.out.println( "Assigning" );
        int[] assignment = cl.getAssignments();
        for (int i = 0; i < assignment.length; ++i) {
            clusterResult[assignment[i]].add(keys[i]);
        }

        //System.out.println( "Done" );
        if (!arff.delete())
            arff.deleteOnExit();
    } catch (Exception ex) {
        //System.out.println( "[EXCEPTION] " + ex.getMessage() );
        m_LastErrorMessage = ex.getMessage();
        return null;
    }

    return clusterResult;
}

From source file:eu.cassandra.server.mongo.csn.MongoCluster.java

License:Apache License

/**
 * /*www. j a v  a  2  s .co  m*/
 * @param message
 * @param graph_id
 * @param clusterBasedOn
 * @param numberOfClusters
 * @param httpHeaders
 * @return
 */
private DBObject clusterKmeans(String message, String graph_id, String run_id, String clusterBasedOn,
        int numberOfClusters, String name, String clusterbasedon) {
    try {
        Instances instances = getInstances(clusterBasedOn, graph_id);
        if (instances.numInstances() < 2) {
            return new JSONtoReturn().createJSONError(message, new Exception("Number of CSN Nodes is < 2"));
        }

        SimpleKMeans kmeans = new SimpleKMeans();
        kmeans.setSeed((int) Calendar.getInstance().getTimeInMillis());
        // This is the important parameter to set
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numberOfClusters);
        kmeans.buildClusterer(instances);

        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        int i = 0;
        HashMap<Integer, Vector<String>> clusters = new HashMap<Integer, Vector<String>>();
        for (int clusterNum : assignments) {
            if (clusters.containsKey(clusterNum)) {
                Vector<String> cluster = clusters.get(clusterNum);
                cluster.add(nodeIDs.get(i));
                clusters.put(clusterNum, cluster);
            } else {
                Vector<String> cluster = new Vector<String>();
                cluster.add(nodeIDs.get(i));
                clusters.put(clusterNum, cluster);
            }
            i++;
        }
        nodeIDs.clear();
        return saveClusters(graph_id, run_id, "kmeans", clusters, null, name, clusterbasedon);
    } catch (Exception e) {
        e.printStackTrace();
        return new JSONtoReturn().createJSONError(message, e);
    }
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception //  w  w  w  .j  a  v  a  2  s . c  o m
 * 
 * @param file
 *          the .arff file
 * */
public ClassifierSet initializePopulation(final String file) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = InstancesUtility.openInstance(file);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, file);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file);

    /*
     * Instead of having multiple positions for the same label combination, use only one.
     * This is the one that will be used to "cover" the centroids.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
     * Delete the labels from the partitions.
     */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
        //System.out.println(partitions[i]);
    }
    // partitions now contains only attributes

    /*
     * delete the attributes from partitionsWithCLasses
     */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }

    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
     * It does not start from 7 because it assumes that the user inputs the number. See the api.
     */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();
            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomClusteringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java

License:Open Source License

/**
 * Initialize the rule population by clustering the train set and producing rules based upon the clusters.
 * The train set is initially divided in as many partitions as are the distinct label combinations.
 * @throws Exception /*from  w w  w .  j a  v a 2  s  .co  m*/
 * 
 * @param trainSet
 *             the type of Instances train set
 * */

public ClassifierSet initializePopulation(final Instances trainset) throws Exception {

    final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2);

    int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1);

    final Instances set = trainset;

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setSeed(10);
    kmeans.setPreserveInstancesOrder(true);

    /*
     * Table partitions will hold instances only with attributes.
     * On the contrary, table partitionsWithCLasses will hold only the labels
     */
    Instances[] partitions = InstancesUtility.partitionInstances(this, trainset);
    Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset);

    /*
    * Instead of having multiple positions for the same label combination, use only one.
    * This is the one that will be used to "cover" the centroids.
    */

    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Instance temp = partitionsWithCLasses[i].instance(0);
        partitionsWithCLasses[i].delete();
        partitionsWithCLasses[i].add(temp);
    }

    /*
    * Delete the labels from the partitions.
    */
    String attributesIndicesForDeletion = "";

    for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) {
        if (k != set.numAttributes())
            attributesIndicesForDeletion += k + ",";
        else
            attributesIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitions.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(attributesIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitions[i]);
        partitions[i] = Filter.useFilter(partitions[i], remove);
    }
    // partitions now contains only attributes

    /*
    * delete the attributes from partitionsWithCLasses
    */
    String labelsIndicesForDeletion = "";

    for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) {
        if (k != set.numAttributes() - numberOfLabels)
            labelsIndicesForDeletion += k + ",";
        else
            labelsIndicesForDeletion += k;
    }
    /*    attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. 
    * It does not start from 7 because it assumes that the user inputs the number. See the api.
    */
    for (int i = 0; i < partitionsWithCLasses.length; i++) {
        Remove remove = new Remove();
        remove.setAttributeIndices(labelsIndicesForDeletion);
        remove.setInvertSelection(false);
        remove.setInputFormat(partitionsWithCLasses[i]);
        partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove);
        //System.out.println(partitionsWithCLasses[i]);
    }
    // partitionsWithCLasses now contains only labels

    int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500);

    // the set used to store the rules from all the clusters
    ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this,
            populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true)));

    for (int i = 0; i < partitions.length; i++) {

        try {

            kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances()));
            kmeans.buildClusterer(partitions[i]);
            int[] assignments = kmeans.getAssignments();

            /*            int k=0;
                        for (int j = 0; j < assignments.length; j++) {
                           System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                           k++;
                           System.out.println();
                    
                        }
                        System.out.println();*/

            Instances centroids = kmeans.getClusterCentroids();

            int numOfCentroidAttributes = centroids.numAttributes();

            /*
             * The centroids in this stage hold only attributes. To continue, we need to provide them the labels.
             * These are the ones we removed earlier.
             * But first, open up positions for attributes.
             * */

            for (int j = 0; j < numberOfLabels; j++) {
                Attribute label = new Attribute("label" + j);
                centroids.insertAttributeAt(label, numOfCentroidAttributes + j);
            }

            for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) {
                for (int labels = 0; labels < numberOfLabels; labels++) {
                    centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels,
                            partitionsWithCLasses[i].instance(0).value(labels));
                }
            }

            //System.out.println(centroids);
            double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids);

            for (int j = 0; j < centroidsArray.length; j++) {
                //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]);
                final Classifier coveringClassifier = this.getClassifierTransformBridge()
                        .createRandomCoveringClassifier(centroidsArray[j]);

                coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT);
                initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    //System.out.println(initialClassifiers);
    return initialClassifiers;
}

From source file:graph.clustering.NodeClusterer.java

License:Apache License

private int[] performClustering(Instances clusterTrainingSet, int numOfClusters) {
    String[] options = new String[7];
    options[0] = "-N"; // num of clusters
    options[1] = String.valueOf(numOfClusters);
    options[2] = "-I"; // max num of iterations
    options[3] = "500";
    options[4] = "-S"; // the random seed number
    options[5] = "10";
    options[6] = "-O"; // preserve instance order

    String[] distanceOptions = new String[2];
    distanceOptions[0] = "-R"; // attribute indices
    distanceOptions[1] = "first-last";

    EuclideanDistance distanceFunc = new EuclideanDistance();
    SimpleKMeans clusterer = new SimpleKMeans();
    int[] assignments = null;
    try {/*from   w w  w  .  ja  v a 2s.c o  m*/
        distanceFunc.setOptions(distanceOptions);

        clusterer.setOptions(options);
        clusterer.setDistanceFunction(distanceFunc);
        clusterer.buildClusterer(clusterTrainingSet);

        assignments = clusterer.getAssignments();
    } catch (Exception e1) {
        System.out.println("Error in clustering:");
        e1.printStackTrace();
    }

    return assignments;
}

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTable tableResult, JFrame apps) {
    try {/*from  w w w . j  av a2 s . com*/
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];

        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        //            //setting columNames
        //            for (int i = 0; i < numOfCluster; i++) {
        //                columnNames[i] = "Cluster "+i+"";
        //            }

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }

        for (int i = 0; i < listOfCluster.size(); i++) {
            System.out.print("Cluster - " + i + " -> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                System.out.print(listMemberOfCluster1 + " ");
            }
            System.out.println("");
        }

        //            int i=0;
        //            for(int clusterNum : assignments) {
        //                System.out.printf("Instance %d -> Cluster %d \n", i, clusterNum);
        //                i++;
        //                System.out.println(SSE);
        //            }

        //            //output to table
        //            tableResult.setModel(new DefaultTableModel(
        //            new Object[][]{
        //            },
        //            columnNames));
        //            apps.setVisible(true);
        //            
        //            int j=0;
        //            DefaultTableModel model = (DefaultTableModel) tableResult.getModel();
        //            for(int clusterNum : assignments) {
        //                if (clusterNum==0){
        //                    model.addRow(new Object[]{j, "", "", "", "", ""});
        //                }
        //                else if (clusterNum==1){
        //                    model.addRow(new Object[]{"", j, "", "", "", ""});
        //                }
        //                else if (clusterNum==2){
        //                    model.addRow(new Object[]{"", "", j, "", "", ""});
        //                }
        //                else if (clusterNum==3){
        //                    model.addRow(new Object[]{"", "", "", j, "", ""});
        //                }
        //                else if (clusterNum==4){
        //                    model.addRow(new Object[]{"", "", "", "", j, ""});
        //                }
        //                else if (clusterNum==5){
        //                    model.addRow(new Object[]{"", "", "", "", "", j});
        //                }
        //                
        //                j++;
        //            }
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTextArea textarea) {
    try {/* w w w  .j  a v a  2s . co  m*/
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];
        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }
        textarea.setText("");
        String result = "";
        for (int i = 0; i < listOfCluster.size(); i++) {
            result = result + ("Cluster - " + i + " ==> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                result = result + (listMemberOfCluster1 + " ");
            }
            result = result + ("\n");
        }
        result = result + ("\nSSE : ") + kmeans.getSquaredError();
        textarea.setText(result);
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//  w  ww  . j  a v a  2  s  .  c o m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}// w w w .  ja va 2s.  co  m
 * 
 * <p>
 * This method is specialized for <b>kmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models
        Instances instances = getInstances(behaviorModelsAbsolute);

        // KMeans --> Weka
        SimpleKMeans kmeans = new SimpleKMeans();

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // kmeans.setDistanceFunction(manhattanDistance);

        // distance function with option don*t normalize
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        kmeans.setDistanceFunction(euclideanDistance);
        kmeans.setPreserveInstancesOrder(true);

        int[] clustersize = null;
        int[] assignments = null;

        // get number of clusters to be generated.
        int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());

        // clustering
        for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) {
            // must be specified in a fix way
            kmeans.setNumClusters(clusterSize);

            // build cluster
            kmeans.buildClusterer(instances);

            clustersize = kmeans.getClusterSizes();
            assignments = kmeans.getAssignments();

            ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
            clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids());
            clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances,
                    assignments);
            clusteringMetrics.calculateBetas();

            clusteringMetrics.printErrorMetricsHeader();
            clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances());
            clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
            // clusteringMetrics.printClusterAssignmentsToSession(assignments,
            // clusterSize);

        }

        Instances resultingCentroids = kmeans.getClusterCentroids();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}