List of usage examples for weka.clusterers SimpleKMeans getAssignments
public int[] getAssignments() throws Exception
From source file:ClusteringClass.java
public static void main(String[] args) throws Exception { String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"; try {/* w w w .j av a 2s.com*/ FileWriter fw = new FileWriter(filename); Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani", "dani"); String query = "SELECT * FROM SONG_RATING2"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(query); for (int i = 1; i < 23; i++) { if (i != 2) { ResultSetMetaData rsmd = rs.getMetaData(); String name = rsmd.getColumnName(i); fw.append(name); if (i != 22) { fw.append(','); } else { fw.append('\n'); } } } String query1 = "SELECT * FROM SONG_DATA"; Statement stmt1 = conn.createStatement(); ResultSet rs1 = stmt1.executeQuery(query1); String[] titles = new String[150]; for (int ii = 0; ii < 150; ii++) { rs1.next(); titles[ii] = rs1.getString("TITLE"); } while (rs.next()) { for (int i = 1; i < 23; i++) { if (i == 22) fw.append('\n'); else if (i != 2) { fw.append(','); } } } fw.flush(); fw.close(); conn.close(); System.out.println("CSV File is created successfully."); /* Clustering part */ DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"); Instances train = source.getDataSet(); /* Applichiamo il filtro Remove fornito da Weka per non considerare un attributo nell'algoritmo di Clustering. */ Remove filter = new Remove(); filter.setAttributeIndices("1"); filter.setInputFormat(train); Instances train2 = Filter.useFilter(train, filter); System.out.println("Nominal attributes removed from computation."); /* Applichiamo il filtro Normalize fornito da Weka per normalizzare il nostro dataset. */ Normalize norm = new Normalize(); norm.setInputFormat(train2); Instances train3 = Filter.useFilter(train2, norm); System.out.println("Dataset normalized."); /* First Clustering Algorithm */ EuclideanDistance df = new EuclideanDistance(); SimpleKMeans clus1 = new SimpleKMeans(); int k = 10; clus1.setNumClusters(k); clus1.setDistanceFunction(df); clus1.setPreserveInstancesOrder(true); clus1.buildClusterer(train3); /* First Evaluation */ ClusterEvaluation eval1 = new ClusterEvaluation(); eval1.setClusterer(clus1); eval1.evaluateClusterer(train3); System.out.println(eval1.clusterResultsToString()); int[] assignments = clus1.getAssignments(); String[][] dati = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati[kk][0] = String.valueOf(kk); dati[kk][1] = train2.instance(kk).toString(); dati[kk][2] = String.valueOf(assignments[kk]); dati[kk][3] = titles[kk]; } for (int w = 0; w < 10; w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (dati[i][2].equals(String.valueOf(w))) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati[i][j] + "-> \t"); } else { System.out.println(dati[i][j]); } } } } } /*first graph PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1); //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date()); String name = ""; String cname = clus1.getClass().getName(); if (cname.startsWith("weka.clusterers.")) name += cname.substring("weka.clusterers.".length()); else name += cname; VisualizePanel vp = new VisualizePanel(); vp.setName(name + " (" + train.relationName() + ")"); predData.setPlotName(name + " (" + train.relationName() + ")"); vp.addPlot(predData); String plotName = vp.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName); jf.setSize(500,400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vp, BorderLayout.CENTER); jf.dispose(); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); end first graph */ /* Second Clustering Algorithm */ System.out.println(); DBSCAN clus3 = new DBSCAN(); clus3.setEpsilon(0.7); clus3.setMinPoints(2); clus3.buildClusterer(train3); /* Second Evaluation */ ClusterEvaluation eval3 = new ClusterEvaluation(); eval3.setClusterer(clus3); eval3.evaluateClusterer(train3); System.out.println(eval3.clusterResultsToString()); double[] assignments3 = eval3.getClusterAssignments(); String[][] dati3 = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati3[kk][0] = String.valueOf(kk); dati3[kk][1] = train2.instance(kk).toString(); dati3[kk][2] = String.valueOf(assignments3[kk]); dati3[kk][3] = titles[kk]; } for (int w = 0; w < eval3.getNumClusters(); w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == w) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == -1.0) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:Clustering.WekaKMeansClustererWrapper.java
public ArrayList<String>[] classify(HashMap<String, List> data, boolean clearData) { ArrayList<String>[] clusterResult; try {// w ww . jav a 2 s . co m File arff = m_ArffExporter.getArff(data); int nSize = data.size(); if (arff == null) return null; if (clearData) data.clear(); FileInputStream is = new FileInputStream(arff.getAbsolutePath()); Instances instances = ConverterUtils.DataSource.read(is); is.close(); String[] keys = new String[instances.numInstances()]; for (int i = 0; i < instances.numInstances(); ++i) { Instance instance = instances.instance(i); keys[i] = instance.stringValue(0); // assume that the 0th attribute is the key string } instances.deleteStringAttributes(); SimpleKMeans cl = new SimpleKMeans(); int numClusters = m_NumberOfClusters < nSize ? m_NumberOfClusters : nSize; String[] options = new String[5]; options[0] = "-O"; options[1] = "-N"; options[2] = Integer.toString(numClusters); options[3] = "-A"; options[4] = m_DistanceFunction; cl.setOptions(options); //System.out.println( "Clustering" ); cl.buildClusterer(instances); //System.out.println( "Create ArrayList" ); clusterResult = new ArrayList[m_NumberOfClusters]; for (int i = 0; i < m_NumberOfClusters; ++i) { clusterResult[i] = new ArrayList<>(); } //System.out.println( "Assigning" ); int[] assignment = cl.getAssignments(); for (int i = 0; i < assignment.length; ++i) { clusterResult[assignment[i]].add(keys[i]); } //System.out.println( "Done" ); if (!arff.delete()) arff.deleteOnExit(); } catch (Exception ex) { //System.out.println( "[EXCEPTION] " + ex.getMessage() ); m_LastErrorMessage = ex.getMessage(); return null; } return clusterResult; }
From source file:eu.cassandra.server.mongo.csn.MongoCluster.java
License:Apache License
/** * /*www. j a v a 2 s .co m*/ * @param message * @param graph_id * @param clusterBasedOn * @param numberOfClusters * @param httpHeaders * @return */ private DBObject clusterKmeans(String message, String graph_id, String run_id, String clusterBasedOn, int numberOfClusters, String name, String clusterbasedon) { try { Instances instances = getInstances(clusterBasedOn, graph_id); if (instances.numInstances() < 2) { return new JSONtoReturn().createJSONError(message, new Exception("Number of CSN Nodes is < 2")); } SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed((int) Calendar.getInstance().getTimeInMillis()); // This is the important parameter to set kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numberOfClusters); kmeans.buildClusterer(instances); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); int i = 0; HashMap<Integer, Vector<String>> clusters = new HashMap<Integer, Vector<String>>(); for (int clusterNum : assignments) { if (clusters.containsKey(clusterNum)) { Vector<String> cluster = clusters.get(clusterNum); cluster.add(nodeIDs.get(i)); clusters.put(clusterNum, cluster); } else { Vector<String> cluster = new Vector<String>(); cluster.add(nodeIDs.get(i)); clusters.put(clusterNum, cluster); } i++; } nodeIDs.clear(); return saveClusters(graph_id, run_id, "kmeans", clusters, null, name, clusterbasedon); } catch (Exception e) { e.printStackTrace(); return new JSONtoReturn().createJSONError(message, e); } }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception // w w w .j a v a 2 s . c o m * * @param file * the .arff file * */ public ClassifierSet initializePopulation(final String file) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = InstancesUtility.openInstance(file); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, file); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, file); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); //System.out.println(partitions[i]); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomClusteringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } System.out.println(initialClassifiers); return initialClassifiers; }
From source file:gr.auth.ee.lcs.AbstractLearningClassifierSystem.java
License:Open Source License
/** * Initialize the rule population by clustering the train set and producing rules based upon the clusters. * The train set is initially divided in as many partitions as are the distinct label combinations. * @throws Exception /*from w w w . j a v a 2 s .co m*/ * * @param trainSet * the type of Instances train set * */ public ClassifierSet initializePopulation(final Instances trainset) throws Exception { final double gamma = SettingsLoader.getNumericSetting("CLUSTER_GAMMA", .2); int numberOfLabels = (int) SettingsLoader.getNumericSetting("numberOfLabels", 1); final Instances set = trainset; SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); /* * Table partitions will hold instances only with attributes. * On the contrary, table partitionsWithCLasses will hold only the labels */ Instances[] partitions = InstancesUtility.partitionInstances(this, trainset); Instances[] partitionsWithCLasses = InstancesUtility.partitionInstances(this, trainset); /* * Instead of having multiple positions for the same label combination, use only one. * This is the one that will be used to "cover" the centroids. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Instance temp = partitionsWithCLasses[i].instance(0); partitionsWithCLasses[i].delete(); partitionsWithCLasses[i].add(temp); } /* * Delete the labels from the partitions. */ String attributesIndicesForDeletion = ""; for (int k = set.numAttributes() - numberOfLabels + 1; k <= set.numAttributes(); k++) { if (k != set.numAttributes()) attributesIndicesForDeletion += k + ","; else attributesIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitions.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(attributesIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitions[i]); partitions[i] = Filter.useFilter(partitions[i], remove); } // partitions now contains only attributes /* * delete the attributes from partitionsWithCLasses */ String labelsIndicesForDeletion = ""; for (int k = 1; k <= set.numAttributes() - numberOfLabels; k++) { if (k != set.numAttributes() - numberOfLabels) labelsIndicesForDeletion += k + ","; else labelsIndicesForDeletion += k; } /* attributesIncicesForDeletion = 8,9,10,11,12,13,14 e.g. for 7 attributes and 7 labels. * It does not start from 7 because it assumes that the user inputs the number. See the api. */ for (int i = 0; i < partitionsWithCLasses.length; i++) { Remove remove = new Remove(); remove.setAttributeIndices(labelsIndicesForDeletion); remove.setInvertSelection(false); remove.setInputFormat(partitionsWithCLasses[i]); partitionsWithCLasses[i] = Filter.useFilter(partitionsWithCLasses[i], remove); //System.out.println(partitionsWithCLasses[i]); } // partitionsWithCLasses now contains only labels int populationSize = (int) SettingsLoader.getNumericSetting("populationSize", 1500); // the set used to store the rules from all the clusters ClassifierSet initialClassifiers = new ClassifierSet(new FixedSizeSetWorstFitnessDeletion(this, populationSize, new RouletteWheelSelector(AbstractUpdateStrategy.COMPARISON_MODE_DELETION, true))); for (int i = 0; i < partitions.length; i++) { try { kmeans.setNumClusters((int) Math.ceil(gamma * partitions[i].numInstances())); kmeans.buildClusterer(partitions[i]); int[] assignments = kmeans.getAssignments(); /* int k=0; for (int j = 0; j < assignments.length; j++) { System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); k++; System.out.println(); } System.out.println();*/ Instances centroids = kmeans.getClusterCentroids(); int numOfCentroidAttributes = centroids.numAttributes(); /* * The centroids in this stage hold only attributes. To continue, we need to provide them the labels. * These are the ones we removed earlier. * But first, open up positions for attributes. * */ for (int j = 0; j < numberOfLabels; j++) { Attribute label = new Attribute("label" + j); centroids.insertAttributeAt(label, numOfCentroidAttributes + j); } for (int centroidInstances = 0; centroidInstances < centroids.numInstances(); centroidInstances++) { for (int labels = 0; labels < numberOfLabels; labels++) { centroids.instance(centroidInstances).setValue(numOfCentroidAttributes + labels, partitionsWithCLasses[i].instance(0).value(labels)); } } //System.out.println(centroids); double[][] centroidsArray = InstancesUtility.convertIntancesToDouble(centroids); for (int j = 0; j < centroidsArray.length; j++) { //System.out.printf("Instance %d => Cluster %d ", k, assignments[j]); final Classifier coveringClassifier = this.getClassifierTransformBridge() .createRandomCoveringClassifier(centroidsArray[j]); coveringClassifier.setClassifierOrigin(Classifier.CLASSIFIER_ORIGIN_INIT); initialClassifiers.addClassifier(new Macroclassifier(coveringClassifier, 1), false); } } catch (Exception e) { e.printStackTrace(); } } //System.out.println(initialClassifiers); return initialClassifiers; }
From source file:graph.clustering.NodeClusterer.java
License:Apache License
private int[] performClustering(Instances clusterTrainingSet, int numOfClusters) { String[] options = new String[7]; options[0] = "-N"; // num of clusters options[1] = String.valueOf(numOfClusters); options[2] = "-I"; // max num of iterations options[3] = "500"; options[4] = "-S"; // the random seed number options[5] = "10"; options[6] = "-O"; // preserve instance order String[] distanceOptions = new String[2]; distanceOptions[0] = "-R"; // attribute indices distanceOptions[1] = "first-last"; EuclideanDistance distanceFunc = new EuclideanDistance(); SimpleKMeans clusterer = new SimpleKMeans(); int[] assignments = null; try {/*from w w w . ja v a 2s.c o m*/ distanceFunc.setOptions(distanceOptions); clusterer.setOptions(options); clusterer.setDistanceFunction(distanceFunc); clusterer.buildClusterer(clusterTrainingSet); assignments = clusterer.getAssignments(); } catch (Exception e1) { System.out.println("Error in clustering:"); e1.printStackTrace(); } return assignments; }
From source file:kmeansapps.Kmeans.java
public void startCluster(String path, int numOfCluster, JTable tableResult, JFrame apps) { try {/*from w w w . j av a2 s . com*/ // TODO code application logic here SimpleKMeans kmeans = new SimpleKMeans(); String[] columnNames = new String[numOfCluster]; kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numOfCluster); BufferedReader datafile = readDataFile(path); Instances data = new Instances(datafile); kmeans.buildClusterer(data); double SSE = kmeans.getSquaredError(); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); // //setting columNames // for (int i = 0; i < numOfCluster; i++) { // columnNames[i] = "Cluster "+i+""; // } // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa. ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>(); ArrayList<String> listMemberOfCluster; //tambahkan list cluster for (int i = 0; i < numOfCluster; i++) { listMemberOfCluster = new ArrayList<>(); listOfCluster.add(listMemberOfCluster); } //tambahkan anggota list ke cluster int j = 0; for (int clusterNum : assignments) { listOfCluster.get(clusterNum).add(j + ""); j++; } for (int i = 0; i < listOfCluster.size(); i++) { System.out.print("Cluster - " + i + " -> "); for (String listMemberOfCluster1 : listOfCluster.get(i)) { System.out.print(listMemberOfCluster1 + " "); } System.out.println(""); } // int i=0; // for(int clusterNum : assignments) { // System.out.printf("Instance %d -> Cluster %d \n", i, clusterNum); // i++; // System.out.println(SSE); // } // //output to table // tableResult.setModel(new DefaultTableModel( // new Object[][]{ // }, // columnNames)); // apps.setVisible(true); // // int j=0; // DefaultTableModel model = (DefaultTableModel) tableResult.getModel(); // for(int clusterNum : assignments) { // if (clusterNum==0){ // model.addRow(new Object[]{j, "", "", "", "", ""}); // } // else if (clusterNum==1){ // model.addRow(new Object[]{"", j, "", "", "", ""}); // } // else if (clusterNum==2){ // model.addRow(new Object[]{"", "", j, "", "", ""}); // } // else if (clusterNum==3){ // model.addRow(new Object[]{"", "", "", j, "", ""}); // } // else if (clusterNum==4){ // model.addRow(new Object[]{"", "", "", "", j, ""}); // } // else if (clusterNum==5){ // model.addRow(new Object[]{"", "", "", "", "", j}); // } // // j++; // } } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:kmeansapps.Kmeans.java
public void startCluster(String path, int numOfCluster, JTextArea textarea) { try {/* w w w .j a v a 2s . co m*/ // TODO code application logic here SimpleKMeans kmeans = new SimpleKMeans(); String[] columnNames = new String[numOfCluster]; kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numOfCluster); BufferedReader datafile = readDataFile(path); Instances data = new Instances(datafile); kmeans.buildClusterer(data); double SSE = kmeans.getSquaredError(); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa. ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>(); ArrayList<String> listMemberOfCluster; //tambahkan list cluster for (int i = 0; i < numOfCluster; i++) { listMemberOfCluster = new ArrayList<>(); listOfCluster.add(listMemberOfCluster); } //tambahkan anggota list ke cluster int j = 0; for (int clusterNum : assignments) { listOfCluster.get(clusterNum).add(j + ""); j++; } textarea.setText(""); String result = ""; for (int i = 0; i < listOfCluster.size(); i++) { result = result + ("Cluster - " + i + " ==> "); for (String listMemberOfCluster1 : listOfCluster.get(i)) { result = result + (listMemberOfCluster1 + " "); } result = result + ("\n"); } result = result + ("\nSSE : ") + kmeans.getSquaredError(); textarea.setText(result); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering// w ww . j a v a 2 s . c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}// w w w . ja va 2s. co m * * <p> * This method is specialized for <b>kmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // KMeans --> Weka SimpleKMeans kmeans = new SimpleKMeans(); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // kmeans.setDistanceFunction(manhattanDistance); // distance function with option don*t normalize DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); kmeans.setDistanceFunction(euclideanDistance); kmeans.setPreserveInstancesOrder(true); int[] clustersize = null; int[] assignments = null; // get number of clusters to be generated. int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); // clustering for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) { // must be specified in a fix way kmeans.setNumClusters(clusterSize); // build cluster kmeans.buildClusterer(instances); clustersize = kmeans.getClusterSizes(); assignments = kmeans.getAssignments(); ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids()); clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // clusterSize); } Instances resultingCentroids = kmeans.getClusterCentroids(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }