List of usage examples for weka.clusterers ClusterEvaluation getNumClusters
public int getNumClusters()
From source file:ClusteringClass.java
public static void main(String[] args) throws Exception { String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"; try {/*from w w w . j a v a2s. c o m*/ FileWriter fw = new FileWriter(filename); Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani", "dani"); String query = "SELECT * FROM SONG_RATING2"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(query); for (int i = 1; i < 23; i++) { if (i != 2) { ResultSetMetaData rsmd = rs.getMetaData(); String name = rsmd.getColumnName(i); fw.append(name); if (i != 22) { fw.append(','); } else { fw.append('\n'); } } } String query1 = "SELECT * FROM SONG_DATA"; Statement stmt1 = conn.createStatement(); ResultSet rs1 = stmt1.executeQuery(query1); String[] titles = new String[150]; for (int ii = 0; ii < 150; ii++) { rs1.next(); titles[ii] = rs1.getString("TITLE"); } while (rs.next()) { for (int i = 1; i < 23; i++) { if (i == 22) fw.append('\n'); else if (i != 2) { fw.append(','); } } } fw.flush(); fw.close(); conn.close(); System.out.println("CSV File is created successfully."); /* Clustering part */ DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"); Instances train = source.getDataSet(); /* Applichiamo il filtro Remove fornito da Weka per non considerare un attributo nell'algoritmo di Clustering. */ Remove filter = new Remove(); filter.setAttributeIndices("1"); filter.setInputFormat(train); Instances train2 = Filter.useFilter(train, filter); System.out.println("Nominal attributes removed from computation."); /* Applichiamo il filtro Normalize fornito da Weka per normalizzare il nostro dataset. */ Normalize norm = new Normalize(); norm.setInputFormat(train2); Instances train3 = Filter.useFilter(train2, norm); System.out.println("Dataset normalized."); /* First Clustering Algorithm */ EuclideanDistance df = new EuclideanDistance(); SimpleKMeans clus1 = new SimpleKMeans(); int k = 10; clus1.setNumClusters(k); clus1.setDistanceFunction(df); clus1.setPreserveInstancesOrder(true); clus1.buildClusterer(train3); /* First Evaluation */ ClusterEvaluation eval1 = new ClusterEvaluation(); eval1.setClusterer(clus1); eval1.evaluateClusterer(train3); System.out.println(eval1.clusterResultsToString()); int[] assignments = clus1.getAssignments(); String[][] dati = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati[kk][0] = String.valueOf(kk); dati[kk][1] = train2.instance(kk).toString(); dati[kk][2] = String.valueOf(assignments[kk]); dati[kk][3] = titles[kk]; } for (int w = 0; w < 10; w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (dati[i][2].equals(String.valueOf(w))) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati[i][j] + "-> \t"); } else { System.out.println(dati[i][j]); } } } } } /*first graph PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1); //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date()); String name = ""; String cname = clus1.getClass().getName(); if (cname.startsWith("weka.clusterers.")) name += cname.substring("weka.clusterers.".length()); else name += cname; VisualizePanel vp = new VisualizePanel(); vp.setName(name + " (" + train.relationName() + ")"); predData.setPlotName(name + " (" + train.relationName() + ")"); vp.addPlot(predData); String plotName = vp.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName); jf.setSize(500,400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vp, BorderLayout.CENTER); jf.dispose(); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); end first graph */ /* Second Clustering Algorithm */ System.out.println(); DBSCAN clus3 = new DBSCAN(); clus3.setEpsilon(0.7); clus3.setMinPoints(2); clus3.buildClusterer(train3); /* Second Evaluation */ ClusterEvaluation eval3 = new ClusterEvaluation(); eval3.setClusterer(clus3); eval3.evaluateClusterer(train3); System.out.println(eval3.clusterResultsToString()); double[] assignments3 = eval3.getClusterAssignments(); String[][] dati3 = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati3[kk][0] = String.valueOf(kk); dati3[kk][1] = train2.instance(kk).toString(); dati3[kk][2] = String.valueOf(assignments3[kk]); dati3[kk][3] = titles[kk]; } for (int w = 0; w < eval3.getNumClusters(); w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == w) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == -1.0) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * builds the classifier/* w w w. ja v a 2 s .c om*/ * * @param data the training instances * @throws Exception if something goes wrong */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // save original header (needed for clusters to classes output) m_OriginalHeader = data.stringFreeStructure(); // remove class attribute for clusterer Instances clusterData = new Instances(data); clusterData.setClassIndex(-1); clusterData.deleteAttributeAt(data.classIndex()); m_ClusteringHeader = clusterData.stringFreeStructure(); if (m_ClusteringHeader.numAttributes() == 0) { System.err.println("Data contains only class attribute, defaulting to ZeroR model."); m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(data); } else { m_ZeroR = null; // build clusterer m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(clusterData); if (!getLabelAllClusters()) { // determine classes-to-clusters mapping ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(m_ActualClusterer); eval.evaluateClusterer(clusterData); double[] clusterAssignments = eval.getClusterAssignments(); int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()]; int[] clusterTotals = new int[eval.getNumClusters()]; double[] best = new double[eval.getNumClusters() + 1]; double[] current = new double[eval.getNumClusters() + 1]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); if (!instance.classIsMissing()) { counts[(int) clusterAssignments[i]][(int) instance.classValue()]++; clusterTotals[(int) clusterAssignments[i]]++; } } best[eval.getNumClusters()] = Double.MAX_VALUE; ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0); m_ClustersToClasses = new double[best.length]; System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length); } else { m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance clusterInstance = clusterData.instance(i); Instance originalInstance = data.instance(i); if (!originalInstance.classIsMissing()) { double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance); for (int j = 0; j < probs.length; j++) { m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j]; } } } for (int i = 0; i < m_ClusterClassProbs.length; i++) { Utils.normalize(m_ClusterClassProbs[i]); } } } }
From source file:detplagiasi.EMClustering.java
EMClustering() { addd = ct.getAddress();//from ww w .java 2 s .c o m try { ClusterEvaluation eval; Instances data; String[] options; DensityBasedClusterer cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----EM Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new EM(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new EM(); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); getDataTraining(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); System.out.println(cl.clusterInstance(dataTraining.instance(b))); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 em cluster"); } } catch (IOException ex) { Logger.getLogger(EMClustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null em"); } }
From source file:detplagiasi.KMeansClustering.java
KMeansClustering() { addd = Container.getAddress(); try {//from w w w. j ava 2s . c om ClusterEvaluation eval; Instances data; String[] options; SimpleKMeans cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----KMeans Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new SimpleKMeans(); cl.setNumClusters(4); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); System.out.println(cl.clusterInstance(dataTraining.instance(b))); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 kmeans cluster"); } } catch (IOException ex) { Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null kmeans"); } }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * Expectation Maximization clustering// w w w.j a v a 2 s . c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] em(double[][] data, int numObs, int numFeatures) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); EM clusterer = new EM(); try { clusterer.buildClusterer(ds); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(new Instances(ds)); int numClusters = eval.getNumClusters(); Cluster[] clusters = new Cluster[numClusters]; double[][] clusterCentroids = new double[numClusters][numFeatures]; int[] clusterCount = new int[numClusters]; double[] assignments = eval.getClusterAssignments(); for (int i = 0; i < ds.numInstances(); i++) { Instance inst = ds.instance(i); int clusterId = (int) assignments[i]; for (int j = 0; j < numFeatures; j++) { clusterCentroids[clusterId][j] += inst.value(j); } clusterCount[clusterId]++; } for (int i = 0; i < numClusters; i++) { double[] mean = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { mean[j] = clusterCentroids[i][j] / clusterCount[i]; } clusters[i] = new Cluster(mean, i); } // cluster members & std dev double[][] clusterStdDev = new double[numClusters][numFeatures]; for (int i = 0; i < ds.numInstances(); i++) { int clusterId = (int) assignments[i]; clusters[clusterId].addMember(i); for (int j = 0; j < numFeatures; j++) { clusterStdDev[clusterId][j] += Math .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2); } } for (int i = 0; i < numClusters; i++) { double[] dev = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]); } clusters[i].setStdDev(dev); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java
License:Open Source License
private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k) throws Exception { final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet); final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet); final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction); skm.buildClusterer(trdataSet);/*from w w w . ja va 2s. com*/ final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(trdataSet); final int numClusters = eval.getNumClusters(); final List<String> possibleValues = new ArrayList<String>(numClusters); for (int c = 0; c < numClusters; c++) possibleValues.add("cluster_" + c); final double[] clusterAssignments = eval.getClusterAssignments(); final int numAttributes = dataSet.numAttributes(); final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes); for (int j = 0; j < numAttributes; j++) { //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]); valueForEachFeature.add((int) clusterAssignments[j]); } return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature); }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * /*from w w w .j av a 2 s . c om*/ * @param wekaClusterer * @param instances * @return * @throws Exception */ public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances) throws Exception { final Instances ii = new Instances(instances); ii.setClassIndex(-1); wekaClusterer.buildClusterer(ii); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(wekaClusterer); eval.evaluateClusterer(ii); final int clustersCount = eval.getNumClusters(); final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount); //Initialize instances for (int k = 0; k < clustersCount; k++) { clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>())); } final double[] ass = eval.getClusterAssignments(); if (ass.length != ii.numInstances()) throw new IllegalStateException(); for (int i = 0; i < ass.length; i++) { IndexedInstance idxi = clustersList.get((int) ass[i]); idxi.getInstances().add(instances.instance(i)); int pos = idxi.getInstances().size() - 1; idxi.getMapOrigIndex().put(pos, i); } return clustersList; }
From source file:nl.uva.sne.commons.ClusterUtils.java
public static Map<String, String> bulidClusters(Clusterer clusterer, Instances data, String inDir) throws Exception { FilteredClusterer fc = new FilteredClusterer(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1"; // we want to ignore the attribute that is in the position '1' Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options fc.setFilter(remove); //add filter to remove attributes fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer fc.buildClusterer(data);/* w ww.j a v a2 s .c o m*/ Map<String, String> clusters = new HashMap<>(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); int theClass = fc.clusterInstance(inst); String s = data.attribute(0).value(i); clusters.put(inDir + File.separator + s, String.valueOf(theClass)); System.err.println(s + " is in cluster " + theClass); } ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(fc); // the cluster to evaluate eval.evaluateClusterer(data); // data to evaluate the clusterer on // double ll = eval.getLogLikelihood(); // Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "LogLikelihood :{0}", ll); // // if (clusterer instanceof SimpleKMeans) { // double sqrErr = ((SimpleKMeans) clusterer).getSquaredError(); // Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Squared Error:{0}", sqrErr); // } Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "# of clusters: {0}", eval.getNumClusters()); Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "clusterResults: {0}", eval.clusterResultsToString()); return clusters; }
From source file:qoala.arff.java
public void EMClustering(int NumberOfCluster) throws Exception { Instances train = new Instances(dataSet); String[] options = new String[2]; options[0] = "-I"; options[1] = "100"; EM em = new EM(); em.setOptions(options);/*from ww w . ja v a 2 s .c o m*/ em.setNumClusters(NumberOfCluster); em.buildClusterer(train); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(em); eval.evaluateClusterer(train); eval.getNumClusters(); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); System.out.println("# - cluster - distribution"); for (int j = 0; j < eval.getNumClusters(); j++) { for (int i = 0; i < train.numInstances(); i++) { int cluster = em.clusterInstance(train.instance(i)); if (cluster == j) System.out.println("Instance " + i + " -> Cluster number: " + cluster); } } }
From source file:qoala.arff.java
public void XMenas() throws Exception { Instances train = new Instances(dataSet); XMeans xm = new XMeans(); xm.setMaxNumClusters(100);// ww w . j av a 2 s. c om xm.setMinNumClusters(2); xm.buildClusterer(train); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xm); eval.evaluateClusterer(train); eval.getNumClusters(); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); System.out.println("# - cluster - distribution"); for (int j = 0; j < eval.getNumClusters(); j++) { for (int i = 0; i < train.numInstances(); i++) { int cluster = xm.clusterInstance(train.instance(i)); if (cluster == j) System.out.println("Instance " + i + " -> Cluster number: " + cluster); } } }