List of usage examples for weka.clusterers ClusterEvaluation ClusterEvaluation
public ClusterEvaluation()
From source file:ClusteringClass.java
public static void main(String[] args) throws Exception { String filename = "C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"; try {//w ww . ja v a2s . co m FileWriter fw = new FileWriter(filename); Class.forName("org.apache.derby.jdbc.ClientDriver").newInstance(); Connection conn = DriverManager.getConnection("jdbc:derby://localhost:1527/HumoRadioDB", "dani", "dani"); String query = "SELECT * FROM SONG_RATING2"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(query); for (int i = 1; i < 23; i++) { if (i != 2) { ResultSetMetaData rsmd = rs.getMetaData(); String name = rsmd.getColumnName(i); fw.append(name); if (i != 22) { fw.append(','); } else { fw.append('\n'); } } } String query1 = "SELECT * FROM SONG_DATA"; Statement stmt1 = conn.createStatement(); ResultSet rs1 = stmt1.executeQuery(query1); String[] titles = new String[150]; for (int ii = 0; ii < 150; ii++) { rs1.next(); titles[ii] = rs1.getString("TITLE"); } while (rs.next()) { for (int i = 1; i < 23; i++) { if (i == 22) fw.append('\n'); else if (i != 2) { fw.append(','); } } } fw.flush(); fw.close(); conn.close(); System.out.println("CSV File is created successfully."); /* Clustering part */ DataSource source = new DataSource("C:\\Users\\Daniele\\Desktop\\Humoradio2.csv"); Instances train = source.getDataSet(); /* Applichiamo il filtro Remove fornito da Weka per non considerare un attributo nell'algoritmo di Clustering. */ Remove filter = new Remove(); filter.setAttributeIndices("1"); filter.setInputFormat(train); Instances train2 = Filter.useFilter(train, filter); System.out.println("Nominal attributes removed from computation."); /* Applichiamo il filtro Normalize fornito da Weka per normalizzare il nostro dataset. */ Normalize norm = new Normalize(); norm.setInputFormat(train2); Instances train3 = Filter.useFilter(train2, norm); System.out.println("Dataset normalized."); /* First Clustering Algorithm */ EuclideanDistance df = new EuclideanDistance(); SimpleKMeans clus1 = new SimpleKMeans(); int k = 10; clus1.setNumClusters(k); clus1.setDistanceFunction(df); clus1.setPreserveInstancesOrder(true); clus1.buildClusterer(train3); /* First Evaluation */ ClusterEvaluation eval1 = new ClusterEvaluation(); eval1.setClusterer(clus1); eval1.evaluateClusterer(train3); System.out.println(eval1.clusterResultsToString()); int[] assignments = clus1.getAssignments(); String[][] dati = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati[kk][0] = String.valueOf(kk); dati[kk][1] = train2.instance(kk).toString(); dati[kk][2] = String.valueOf(assignments[kk]); dati[kk][3] = titles[kk]; } for (int w = 0; w < 10; w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (dati[i][2].equals(String.valueOf(w))) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati[i][j] + "-> \t"); } else { System.out.println(dati[i][j]); } } } } } /*first graph PlotData2D predData = ClustererPanel.setUpVisualizableInstances(train, eval1); //String name = (new SimpleDateFormat("HH:mm:ss - ")).format(new Date()); String name = ""; String cname = clus1.getClass().getName(); if (cname.startsWith("weka.clusterers.")) name += cname.substring("weka.clusterers.".length()); else name += cname; VisualizePanel vp = new VisualizePanel(); vp.setName(name + " (" + train.relationName() + ")"); predData.setPlotName(name + " (" + train.relationName() + ")"); vp.addPlot(predData); String plotName = vp.getName(); final javax.swing.JFrame jf = new javax.swing.JFrame("Weka Clusterer Visualize: " + plotName); jf.setSize(500,400); jf.getContentPane().setLayout(new BorderLayout()); jf.getContentPane().add(vp, BorderLayout.CENTER); jf.dispose(); jf.addWindowListener(new java.awt.event.WindowAdapter() { public void windowClosing(java.awt.event.WindowEvent e) { jf.dispose(); } }); jf.setVisible(true); end first graph */ /* Second Clustering Algorithm */ System.out.println(); DBSCAN clus3 = new DBSCAN(); clus3.setEpsilon(0.7); clus3.setMinPoints(2); clus3.buildClusterer(train3); /* Second Evaluation */ ClusterEvaluation eval3 = new ClusterEvaluation(); eval3.setClusterer(clus3); eval3.evaluateClusterer(train3); System.out.println(eval3.clusterResultsToString()); double[] assignments3 = eval3.getClusterAssignments(); String[][] dati3 = new String[150][4]; for (int kk = 0; kk < 150; kk++) { dati3[kk][0] = String.valueOf(kk); dati3[kk][1] = train2.instance(kk).toString(); dati3[kk][2] = String.valueOf(assignments3[kk]); dati3[kk][3] = titles[kk]; } for (int w = 0; w < eval3.getNumClusters(); w++) { System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == w) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } System.out.println(); for (int i = 0; i < 150; i++) { if (Double.parseDouble(dati3[i][2]) == -1.0) { for (int j = 0; j < 4; j++) { if (j != 3) { System.out.print(dati3[i][j] + "-> \t"); } else { System.out.println(dati3[i][j]); } } } } } catch (Exception e) { e.printStackTrace(); } }
From source file:adams.flow.transformer.WekaTestSetClustererEvaluator.java
License:Open Source License
/** * Executes the flow item./*from w w w .j a v a 2 s .c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances test; ClusterEvaluation eval; weka.clusterers.Clusterer cls; CallableSource gs; Token output; result = null; try { // get test set test = null; gs = new CallableSource(); gs.setCallableName(m_Testset); gs.setParent(getParent()); gs.setUp(); gs.execute(); output = gs.output(); if (output != null) test = (Instances) output.getPayload(); else result = "No test set available!"; gs.wrapUp(); // evaluate clusterer if (result == null) { if (m_InputToken.getPayload() instanceof weka.clusterers.Clusterer) cls = (weka.clusterers.Clusterer) m_InputToken.getPayload(); else cls = (weka.clusterers.Clusterer) ((WekaModelContainer) m_InputToken.getPayload()) .getValue(WekaModelContainer.VALUE_MODEL); eval = new ClusterEvaluation(); eval.setClusterer(cls); eval.evaluateClusterer(test, null, m_OutputModel); m_OutputToken = new Token(new WekaClusterEvaluationContainer(eval, cls)); } } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to evaluate: ", e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:adams.flow.transformer.WekaTrainTestSetClustererEvaluator.java
License:Open Source License
/** * Executes the flow item./*from w w w.ja v a 2 s. c o m*/ * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances train; Instances test; weka.clusterers.Clusterer cls; ClusterEvaluation eval; WekaTrainTestSetContainer cont; result = null; try { // cross-validate clusterer cls = getClustererInstance(); if (cls == null) throw new IllegalStateException("Clusterer '" + getClusterer() + "' not found!"); cont = (WekaTrainTestSetContainer) m_InputToken.getPayload(); train = (Instances) cont.getValue(WekaTrainTestSetContainer.VALUE_TRAIN); test = (Instances) cont.getValue(WekaTrainTestSetContainer.VALUE_TEST); cls.buildClusterer(train); eval = new ClusterEvaluation(); eval.setClusterer(cls); eval.evaluateClusterer(test, null, m_OutputModel); // broadcast result m_OutputToken = new Token(new WekaClusterEvaluationContainer(eval, cls)); } catch (Exception e) { m_OutputToken = null; result = handleException("Failed to evaluate: ", e); } if (m_OutputToken != null) updateProvenance(m_OutputToken); return result; }
From source file:agnes.AgnesMain.java
public static void main(String[] args) throws Exception { // Instances data = loadData("C:\\Program Files\\Weka-3-8\\data\\weather.numeric.arff"); System.out.print("File: "); Scanner scanner = new Scanner(System.in); String filename = scanner.next(); System.out.print("Number of clusters: "); int numCluster = scanner.nextInt(); System.out.print("Single/complete: "); String link = scanner.next(); Instances data = loadData("src/Dataset/weather.arff"); MyAgnes agnes = new MyAgnes(link, numCluster); agnes.buildClusterer(data);/*from ww w. j a va2s. c o m*/ System.out.println("Cluster Hierarchies:\n"); agnes.printClustersID(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(agnes); eval.evaluateClusterer(data); System.out.println("Cluster Evaluation:"); System.out.println(eval.clusterResultsToString()); // agnes.printClusters(); }
From source file:aw_cluster.AW_Cluster.java
/** * @param args the command line arguments *//* ww w .j a v a2 s . com*/ public static void main(String[] args) throws Exception { // TODO code application logic here Scanner sc = new Scanner(System.in); Instances trainingData; ClusterEvaluation eval; String path; int pilihan; int jumlahCluster; int maxIter; int typeLinkage; do { System.out.println("Masukan pilihan algoritma: "); System.out.println("1. MyKMeans"); System.out.println("2. MyAgnes"); System.out.println("3. Exit"); System.out.print("Pilihan: "); pilihan = sc.nextInt(); if (pilihan == 1) { path = masukanFile(sc); System.out.println("Masukan jumlah cluster: "); jumlahCluster = sc.nextInt(); System.out.println("Masukan jumlah maksimum iterasi: "); maxIter = sc.nextInt(); BufferedReader data = new BufferedReader(new FileReader(path)); trainingData = new Instances(data); myKMeans kmeans = new myKMeans(); kmeans.setNumCluster(jumlahCluster); kmeans.setMaxIteration(maxIter); kmeans.buildClusterer(trainingData); eval = new ClusterEvaluation(); eval.setClusterer(kmeans); eval.evaluateClusterer(trainingData); System.out.println("Cluster Evaluation: " + eval.clusterResultsToString()); System.out.println(""); } else if (pilihan == 2) { path = masukanFile(sc); System.out.println("Masukan jumlah cluster: "); jumlahCluster = sc.nextInt(); typeLinkage = typeLinkage(sc); BufferedReader data = new BufferedReader(new FileReader(path)); trainingData = new Instances(data); myAgnes agnes = new myAgnes(); agnes.setNumCluster(jumlahCluster); agnes.setLinkage(typeLinkage); agnes.buildClusterer(trainingData); eval = new ClusterEvaluation(); eval.setClusterer(agnes); eval.evaluateClusterer(trainingData); System.out.println("Cluster Evaluation: " + eval.clusterResultsToString()); System.out.println(""); } } while (pilihan != 3); }
From source file:clustering.Clustering.java
public void percentageSplit(double percent) { try {/*from w ww . java2 s. c o m*/ data.randomize(new java.util.Random(0)); int trainSize = (int) Math.round((double) data.numInstances() * percent / 100f); int testSize = data.numInstances() - trainSize; Instances train = new Instances(data, 0, trainSize); Instances test = new Instances(data, trainSize, testSize); buildClusterer(clusterer, train); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(model); eval.evaluateClusterer(test); System.out.println(eval.clusterResultsToString()); } catch (Exception ex) { System.out.println(ex); } }
From source file:com.actelion.research.orbit.imageAnalysis.imaging.TMAPoints.java
License:Open Source License
private int guessNumClusters(EM clusterer, Instances instances, int start, int end) throws Exception { ClusterEvaluation eval = new ClusterEvaluation(); int bestNum = start; double best = Double.POSITIVE_INFINITY; double bic;//from ww w.j a va 2 s . co m for (int c = start; c <= end; c++) { clusterer.setNumClusters(c); clusterer.buildClusterer(instances); eval.setClusterer(clusterer); eval.evaluateClusterer(instances); bic = bic(eval.getLogLikelihood(), c, instances.numInstances()); logger.trace("numCluster " + c + " -> BIC: " + bic); if (bic < best) { best = bic; bestNum = c; logger.trace("bestNum: " + bestNum); } } return bestNum; }
From source file:com.spread.experiment.tempuntilofficialrelease.ClassificationViaClustering108.java
License:Open Source License
/** * builds the classifier//from w ww .ja v a 2 s . c om * * @param data the training instances * @throws Exception if something goes wrong */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // save original header (needed for clusters to classes output) m_OriginalHeader = data.stringFreeStructure(); // remove class attribute for clusterer Instances clusterData = new Instances(data); clusterData.setClassIndex(-1); clusterData.deleteAttributeAt(data.classIndex()); m_ClusteringHeader = clusterData.stringFreeStructure(); if (m_ClusteringHeader.numAttributes() == 0) { System.err.println("Data contains only class attribute, defaulting to ZeroR model."); m_ZeroR = new ZeroR(); m_ZeroR.buildClassifier(data); } else { m_ZeroR = null; // build clusterer m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(clusterData); if (!getLabelAllClusters()) { // determine classes-to-clusters mapping ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(m_ActualClusterer); eval.evaluateClusterer(clusterData); double[] clusterAssignments = eval.getClusterAssignments(); int[][] counts = new int[eval.getNumClusters()][m_OriginalHeader.numClasses()]; int[] clusterTotals = new int[eval.getNumClusters()]; double[] best = new double[eval.getNumClusters() + 1]; double[] current = new double[eval.getNumClusters() + 1]; for (int i = 0; i < data.numInstances(); i++) { Instance instance = data.instance(i); if (!instance.classIsMissing()) { counts[(int) clusterAssignments[i]][(int) instance.classValue()]++; clusterTotals[(int) clusterAssignments[i]]++; } } best[eval.getNumClusters()] = Double.MAX_VALUE; ClusterEvaluation.mapClasses(eval.getNumClusters(), 0, counts, clusterTotals, current, best, 0); m_ClustersToClasses = new double[best.length]; System.arraycopy(best, 0, m_ClustersToClasses, 0, best.length); } else { m_ClusterClassProbs = new double[m_ActualClusterer.numberOfClusters()][data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance clusterInstance = clusterData.instance(i); Instance originalInstance = data.instance(i); if (!originalInstance.classIsMissing()) { double[] probs = m_ActualClusterer.distributionForInstance(clusterInstance); for (int j = 0; j < probs.length; j++) { m_ClusterClassProbs[j][(int) originalInstance.classValue()] += probs[j]; } } } for (int i = 0; i < m_ClusterClassProbs.length; i++) { Utils.normalize(m_ClusterClassProbs[i]); } } } }
From source file:detplagiasi.EMClustering.java
EMClustering() { addd = ct.getAddress();/*from w w w . j a v a 2 s. c om*/ try { ClusterEvaluation eval; Instances data; String[] options; DensityBasedClusterer cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----EM Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new EM(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new EM(); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); getDataTraining(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); System.out.println(cl.clusterInstance(dataTraining.instance(b))); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 em cluster"); } } catch (IOException ex) { Logger.getLogger(EMClustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null em"); } }
From source file:detplagiasi.KMeansClustering.java
KMeansClustering() { addd = Container.getAddress(); try {//from w w w . j a v a2 s .c o m ClusterEvaluation eval; Instances data; String[] options; SimpleKMeans cl; File he = getArffFile(); data = new Instances(new BufferedReader(new FileReader(he))); System.out.println("-----KMeans Clustering-----"); // normal try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) { out.write("\r\n--> normal\r\n"); options = new String[2]; options[0] = "-t"; options[1] = he.getAbsolutePath(); out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n"); out.write("\r\n"); // manual call out.write("\n--> manual\r\n"); cl = new SimpleKMeans(); cl.setNumClusters(4); out.write("\r\n"); cl.buildClusterer(data); getDataUji(); System.out.println("jumlah kluster = " + cl.numberOfClusters()); System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0))); noClusterUji = cl.clusterInstance(dataUji.instance(0)); totalCluster = cl.numberOfClusters(); for (int b = 0; b < dataTraining.numInstances(); b++) { System.out.print("file " + td.fileName[b] + " termasuk cluster ke "); System.out.println(cl.clusterInstance(dataTraining.instance(b))); array1[b] = td.fileName[b]; array2[b] = cl.clusterInstance(dataTraining.instance(b)); //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui } out.write("\r\n"); eval = new ClusterEvaluation(); eval.setClusterer(cl); eval.evaluateClusterer(new Instances(data)); out.write("\r\n\n# of clusters: " + eval.getNumClusters()); } catch (Exception e) { System.err.println(e.getMessage()); System.out.println("error2 kmeans cluster"); } } catch (IOException ex) { Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex); System.out.println("errorrrr null kmeans"); } }