List of usage examples for weka.clusterers ClusterEvaluation ClusterEvaluation
public ClusterEvaluation()
From source file:jmetal.test.survivalanalysis.GenerateSurvivalGraph.java
License:Open Source License
/** * Evaluates a solution /*from ww w . j av a2s. c o m*/ * @param solution The solution to evaluate */ public void evaluate(Solution solution) { Binary variable; int counterSelectedFeatures; DataSource source; double testStatistic = Double.MAX_VALUE; double pValue = Double.MAX_VALUE; double ArithmeticHarmonicCutScore = Double.MAX_VALUE; //double statScore; REXP x; variable = ((Binary) solution.getDecisionVariables()[0]); counterSelectedFeatures = 0; try { // read the data file source = new DataSource(this.dataFileName); Instances data = source.getDataSet(); //System.out.print("Data read successfully. "); //System.out.print("Number of attributes: " + data.numAttributes()); //System.out.println(". Number of instances: " + data.numInstances()); // save the attribute 'T' and 'Censor' attTime = data.attribute(data.numAttributes() - 2); attCensor = data.attribute(data.numAttributes() - 1); // First filter the attributes based on chromosome Instances tmpData = this.filterByChromosome(data, solution); // Now filter the attribute 'T' and 'Censor' Remove filter = new Remove(); // remove the two last attributes : 'T' and 'Censor' filter.setAttributeIndices("" + (tmpData.numAttributes() - 1) + "," + tmpData.numAttributes()); //System.out.println("After chromosome filtering no of attributes: " + tmpData.numAttributes()); filter.setInputFormat(tmpData); Instances dataClusterer = Filter.useFilter(tmpData, filter); // filtering complete // List the selected features/attributes Enumeration<Attribute> attributeList = dataClusterer.enumerateAttributes(); System.out.println("Selected attributes/features: "); while (attributeList.hasMoreElements()) { Attribute att = attributeList.nextElement(); System.out.print(att.name() + ","); } System.out.println(); /* // debug: write the filtered dataset ArffSaver saver = new ArffSaver(); saver.setInstances(dataClusterer); saver.setFile(new File("filteered-data.arff")); saver.writeBatch(); // end debug */ // train hierarchical clusterer HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", this.HC_LinkType }); //Link type (Single, Complete, Average, Mean, Centroid, Ward, Adjusted complete, Neighbor Joining) //[SINGLE|COMPLETE|AVERAGE|MEAN|CENTROID|WARD|ADJCOMPLETE|NEIGHBOR_JOINING] //clusterer.setDebug(true); clusterer.setNumClusters(2); clusterer.setDistanceFunction(new EuclideanDistance()); clusterer.setDistanceIsBranchLength(false); // ?? Should it be changed to false? (Noman) clusterer.buildClusterer(dataClusterer); double[][] distanceMatrix = clusterer.getDistanceMatrix(); // Cluster evaluation: ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); if (this.testDataFileName != null) { DataSource testSource = new DataSource(this.testDataFileName); Instances tmpTestData = testSource.getDataSet(); tmpTestData.setClassIndex(tmpTestData.numAttributes() - 1); //testSource. // First filter the attributes based on chromosome Instances testData = this.filterByChromosome(tmpTestData, solution); //String[] options = new String[2]; //options[0] = "-t"; //options[1] = "/some/where/somefile.arff"; //eval. //System.out.println(eval.evaluateClusterer(testData, options)); eval.evaluateClusterer(testData); System.out.println("\nCluster evluation for this solution(" + this.testDataFileName + "): " + eval.clusterResultsToString()); } // First analyze using my library function // save the cluster assignments int[] clusterAssignment = new int[dataClusterer.numInstances()]; int classOneCnt = 0; int classTwoCnt = 0; for (int i = 0; i < dataClusterer.numInstances(); ++i) { clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i] == 0) { ++classOneCnt; } else if (clusterAssignment[i] == 1) { ++classTwoCnt; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } System.out.println("Class 1 cnt: " + classOneCnt + " Class 2 cnt: " + classTwoCnt); // create arrays with time (event occurrence time) and censor data for use with jstat LogRankTest double[] time1 = new double[classOneCnt]; double[] censor1 = new double[classOneCnt]; double[] time2 = new double[classTwoCnt]; double[] censor2 = new double[classTwoCnt]; //data = source.getDataSet(); for (int i = 0, cnt1 = 0, cnt2 = 0; i < dataClusterer.numInstances(); ++i) { //clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i] == 0) { time1[cnt1] = data.get(i).value(attTime); censor1[cnt1++] = data.get(i).value(attCensor); //System.out.println("i: " + i + " T: " + time1[cnt1-1]); } else if (clusterAssignment[i] == 1) { time2[cnt2] = data.get(i).value(attTime); //System.out.println("i: " + i + " T: " + time2[cnt2-1]); censor2[cnt2++] = data.get(i).value(attCensor); ; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } //Instances[] classInstances = separateClassInstances(clusterAssignment, this.dataFileName,solution); //System.out.println("Class instances seperated"); // calculate log rank test and p values LogRankTest testclass1 = new LogRankTest(time1, time2, censor1, censor2); double[] scores = testclass1.logRank(); testStatistic = scores[0]; pValue = scores[2]; ArithmeticHarmonicCutScore = this.getArithmeticHarmonicCutScore(distanceMatrix, clusterAssignment); //debug: System.out.println("Calculation by myLibrary:\n testStatistic: " + scores[0] + " pValue: " + scores[2] + " Arithmetic Harmonic Cut Score: " + ArithmeticHarmonicCutScore); //end debug //WilcoxonTest testclass1 = new WilcoxonTest(time1, censor1, time2, censor2); //testStatistic = testclass1.testStatistic; //pValue = testclass1.pValue;true // Now analyze calling R for Log Rank test, Parallelization not possible String strT = "time <- c("; String strC = "censor <- c("; String strG = "group <- c("; for (int i = 0; i < dataClusterer.numInstances() - 1; ++i) { strT = strT + (int) data.get(i).value(attTime) + ","; strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ","; strC = strC + (int) data.get(i).value(attCensor) + ","; } int tmpi = dataClusterer.numInstances() - 1; strT = strT + (int) data.get(tmpi).value(attTime) + ")"; strG = strG + clusterer.clusterInstance(dataClusterer.get(tmpi)) + ")"; strC = strC + (int) data.get(tmpi).value(attCensor) + ")"; this.re.eval(strT); this.re.eval(strC); this.re.eval(strG); //debug //System.out.println(strT); //System.out.println(strC); //System.out.println(strG); //end debug /** If you are calling surv_test from coin library */ /*v re.eval("library(coin)"); re.eval("grp <- factor (group)"); re.eval("result <- surv_test(Surv(time,censor)~grp,distribution=\"exact\")"); x=re.eval("statistic(result)"); testStatistic = x.asDouble(); //x=re.eval("pvalue(result)"); //pValue = x.asDouble(); //System.out.println("StatScore: " + statScore + "pValue: " + pValue); */ /** If you are calling survdiff from survival library (much faster) */ re.eval("library(survival)"); re.eval("res2 <- survdiff(Surv(time,censor)~group,rho=0)"); x = re.eval("res2$chisq"); testStatistic = x.asDouble(); //System.out.println(x); x = re.eval("pchisq(res2$chisq, df=1, lower.tail = FALSE)"); //x = re.eval("1.0 - pchisq(res2$chisq, df=1)"); pValue = x.asDouble(); //debug: //System.out.println("Calculation by R: StatScore: " + testStatistic + "pValue: " + pValue); //end debug System.out.println("Calculation by R:"); System.out.println("StatScore: " + testStatistic + " pValue: " + pValue); re.eval("timestrata1.surv <- survfit( Surv(time, censor)~ strata(group), conf.type=\"log-log\")"); re.eval("timestrata1.surv1 <- survfit( Surv(time, censor)~ 1, conf.type=\"none\")"); String evalStr = "jpeg('SurvivalPlot-" + this.SolutionID + ".jpg')"; re.eval(evalStr); re.eval("plot(timestrata1.surv, col=c(2,3), xlab=\"Time\", ylab=\"Survival Probability\")"); re.eval("par(new=T)"); re.eval("plot(timestrata1.surv1,col=1)"); re.eval("legend(0.2, c(\"Group1\",\"Group2\",\"Whole\"))"); re.eval("dev.off()"); System.out.println("\nCluster Assignments:"); for (int i = 0; i < dataClusterer.numInstances(); ++i) { System.out.println("Instance " + i + ": " + clusterAssignment[i]); } } catch (Exception e) { // TODO Auto-generated catch block System.err.println("Can't open the data file."); e.printStackTrace(); System.exit(1); } }
From source file:jmetal.test.survivalanalysis.GenerateSurvivalGraphOld.java
License:Open Source License
/** * Evaluates a solution - actually generate the survival graph * @param solution The solution to evaluate *///from w w w. j a v a2s. c o m public void evaluate(Solution solution) { Binary variable; int counterSelectedFeatures; DataSource source; double testStatistic = Double.MAX_VALUE; double pValue = Double.MAX_VALUE; //double statScore; REXP x; variable = ((Binary) solution.getDecisionVariables()[0]); counterSelectedFeatures = 0; System.out.println("\nSolution ID " + this.SolutionID); try { // read the data file source = new DataSource(this.dataFileName); Instances data = source.getDataSet(); //System.out.print("Data read successfully. "); //System.out.print("Number of attributes: " + data.numAttributes()); //System.out.println(". Number of instances: " + data.numInstances()); // save the attribute 'T' and 'Censor' attTime = data.attribute(data.numAttributes() - 2); attCensor = data.attribute(data.numAttributes() - 1); // First filter the attributes based on chromosome Instances tmpData = this.filterByChromosome(data, solution); // Now filter the attribute 'T' and 'Censor' Remove filter = new Remove(); // remove the two last attributes : 'T' and 'Censor' filter.setAttributeIndices("" + (tmpData.numAttributes() - 1) + "," + tmpData.numAttributes()); //System.out.println("After chromosome filtering no of attributes: " + tmpData.numAttributes()); filter.setInputFormat(tmpData); Instances dataClusterer = Filter.useFilter(tmpData, filter); Enumeration<Attribute> attributeList = dataClusterer.enumerateAttributes(); System.out.println("Selected attributes: "); while (attributeList.hasMoreElements()) { Attribute att = attributeList.nextElement(); System.out.print(att.name() + ","); } System.out.println(); // filtering complete // Debug: write the filtered dataset /* ArffSaver saver = new ArffSaver(); saver.setInstances(dataClusterer); saver.setFile(new File("filteered-data.arff")); saver.writeBatch(); */ // train hierarchical clusterer HierarchicalClusterer clusterer = new HierarchicalClusterer(); clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // complete linkage clustering //clusterer.setDebug(true); clusterer.setNumClusters(2); clusterer.setDistanceFunction(new EuclideanDistance()); //clusterer.setDistanceFunction(new ChebyshevDistance()); clusterer.setDistanceIsBranchLength(false); clusterer.buildClusterer(dataClusterer); // Cluster evaluation: ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); if (this.testDataFileName != null) { DataSource testSource = new DataSource(this.testDataFileName); Instances tmpTestData = testSource.getDataSet(); tmpTestData.setClassIndex(tmpTestData.numAttributes() - 1); //testSource. // First filter the attributes based on chromosome Instances testData = this.filterByChromosome(tmpTestData, solution); //String[] options = new String[2]; //options[0] = "-t"; //options[1] = "/some/where/somefile.arff"; //eval. //System.out.println(eval.evaluateClusterer(testData, options)); eval.evaluateClusterer(testData); System.out.println("\nCluster evluation for this solution: " + eval.clusterResultsToString()); } // Print the cluster assignments: // save the cluster assignments //if (printClusterAssignment==true){ int[] clusterAssignment = new int[dataClusterer.numInstances()]; int classOneCnt = 0; int classTwoCnt = 0; for (int i = 0; i < dataClusterer.numInstances(); ++i) { clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i] == 0) { ++classOneCnt; } else if (clusterAssignment[i] == 1) { ++classTwoCnt; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } System.out.println("Class 1 cnt: " + classOneCnt + " Class 2 cnt: " + classTwoCnt); //} /* // create arrays with time (event occurrence time) and censor data for use with jstat LogRankTest double[] time1 = new double[classOneCnt]; double[] censor1 = new double[classOneCnt]; double[] time2 = new double[classTwoCnt]; double[] censor2 = new double[classTwoCnt]; //data = source.getDataSet(); for (int i=0, cnt1=0, cnt2=0; i<dataClusterer.numInstances(); ++i){ clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i)); if (clusterAssignment[i]==0){ time1[cnt1] = data.get(i).value(attTime); censor1[cnt1++] = 1; //System.out.println("i: " + i + " T: " + time1[cnt1-1]); } else if (clusterAssignment[i]==1){ time2[cnt2] = data.get(i).value(attTime); //System.out.println("i: " + i + " T: " + time2[cnt2-1]); censor2[cnt2++] = 1; } //System.out.println("Instance " + i + ": " + clusterAssignment[i]); } //Instances[] classInstances = separateClassInstances(clusterAssignment, this.dataFileName,solution); //System.out.println("Class instances seperated"); // calculate log rank test and p values //LogRankTest testclass1 = new LogRankTest(time1, censor1, time2, censor2); //testStatistic = testclass1.testStatistic; //pValue = testclass1.pValue; WilcoxonTest testclass1 = new WilcoxonTest(time1, censor1, time2, censor2); testStatistic = testclass1.testStatistic; pValue = testclass1.pValue;true */ String strT = "time1 <- c("; String strC = "censor1 <- c("; String strG = "group1 <- c("; for (int i = 0; i < dataClusterer.numInstances() - 1; ++i) { strT = strT + (int) data.get(i).value(attTime) + ","; strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ","; strC = strC + (int) data.get(i).value(attCensor) + ","; } int tmpi = dataClusterer.numInstances() - 1; strT = strT + (int) data.get(tmpi).value(attTime) + ")"; strG = strG + clusterer.clusterInstance(dataClusterer.get(tmpi)) + ")"; strC = strC + (int) data.get(tmpi).value(attCensor) + ")"; this.re.eval(strT); this.re.eval(strC); this.re.eval(strG); // for MyLogRankTest double[] time1 = new double[classOneCnt]; double[] time2 = new double[classTwoCnt]; double[] censor1 = new double[classOneCnt]; double[] censor2 = new double[classTwoCnt]; int i1 = 0, i2 = 0; for (int i = 0; i < dataClusterer.numInstances(); ++i) { strT = strT + (int) data.get(i).value(attTime) + ","; strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ","; strC = strC + (int) data.get(i).value(attCensor) + ","; if (clusterer.clusterInstance(dataClusterer.get(i)) == 0) { time1[i1] = data.get(i).value(attTime); censor1[i1] = data.get(i).value(attCensor); ++i1; } else { time2[i2] = data.get(i).value(attTime); censor2[i2] = data.get(i).value(attCensor); ++i2; } } /** If you are calling surv_test from coin library */ /*v re.eval("library(coin)"); re.eval("grp <- factor (group)"); re.eval("result <- surv_test(Surv(time,censor)~grp,distribution=\"exact\")"); x=re.eval("statistic(result)"); testStatistic = x.asDouble(); //x=re.eval("pvalue(result)"); //pValue = x.asDouble(); //System.out.println("StatScore: " + statScore + "pValue: " + pValue); */ /** If you are calling survdiff from survival library (much faster) */ re.eval("library(survival)"); re.eval("res21 <- survdiff(Surv(time1,censor1)~group1,rho=0)"); x = re.eval("res21$chisq"); testStatistic = x.asDouble(); //System.out.println(x); x = re.eval("pchisq(res21$chisq, df=1, lower.tail = FALSE)"); //x = re.eval("1.0 - pchisq(res2$chisq, df=1)"); pValue = x.asDouble(); System.out.println("Results from R:"); System.out.println("StatScore: " + testStatistic + " pValue: " + pValue); re.eval("timestrata1.surv <- survfit( Surv(time1, censor1)~ strata(group1), conf.type=\"log-log\")"); re.eval("timestrata1.surv1 <- survfit( Surv(time1, censor1)~ 1, conf.type=\"none\")"); String evalStr = "jpeg('SurvivalPlot-" + this.SolutionID + ".jpg')"; re.eval(evalStr); re.eval("plot(timestrata1.surv, col=c(2,3), xlab=\"Time\", ylab=\"Survival Probability\")"); re.eval("par(new=T)"); re.eval("plot(timestrata1.surv1,col=1)"); re.eval("legend(0.2, c(\"Group1\",\"Group2\",\"Whole\"))"); re.eval("dev.off()"); System.out.println("Results from my code: "); LogRankTest lrt = new LogRankTest(time1, time2, censor1, censor2); double[] results = lrt.logRank(); System.out.println("Statistics: " + results[0] + " variance: " + results[1] + " pValue: " + results[2]); } catch (Exception e) { // TODO Auto-generated catch block System.err.println("Can't open the data file."); e.printStackTrace(); System.exit(1); } /********** * Current Implementation considers two objectives * 1. pvalue to be minimized / statistical score to be maximized * 2. Number of Features to be maximized/minimized */ }
From source file:kmeans_extend.KMeansMain.java
/** * @param args the command line arguments */// w w w .j av a2 s. c o m public static void main(String[] args) throws Exception { System.out.print("Put number cluster : "); Scanner scanner = new Scanner(System.in); int numCluster = scanner.nextInt(); Instances data = loadData("src/Dataset/weather.arff"); MyKMeans kmeans = new MyKMeans(numCluster); kmeans.buildClusterer(data); kmeans.printFinalCentroid(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(kmeans); eval.evaluateClusterer(data); System.out.println("\n==== Evaluation Result ===="); System.out.println(eval.clusterResultsToString()); }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * Expectation Maximization clustering//from ww w . j a v a 2 s . c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] em(double[][] data, int numObs, int numFeatures) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); EM clusterer = new EM(); try { clusterer.buildClusterer(ds); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(new Instances(ds)); int numClusters = eval.getNumClusters(); Cluster[] clusters = new Cluster[numClusters]; double[][] clusterCentroids = new double[numClusters][numFeatures]; int[] clusterCount = new int[numClusters]; double[] assignments = eval.getClusterAssignments(); for (int i = 0; i < ds.numInstances(); i++) { Instance inst = ds.instance(i); int clusterId = (int) assignments[i]; for (int j = 0; j < numFeatures; j++) { clusterCentroids[clusterId][j] += inst.value(j); } clusterCount[clusterId]++; } for (int i = 0; i < numClusters; i++) { double[] mean = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { mean[j] = clusterCentroids[i][j] / clusterCount[i]; } clusters[i] = new Cluster(mean, i); } // cluster members & std dev double[][] clusterStdDev = new double[numClusters][numFeatures]; for (int i = 0; i < ds.numInstances(); i++) { int clusterId = (int) assignments[i]; clusters[clusterId].addMember(i); for (int j = 0; j < numFeatures; j++) { clusterStdDev[clusterId][j] += Math .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2); } } for (int i = 0; i < numClusters; i++) { double[] dev = new double[numFeatures]; for (int j = 0; j < numFeatures; j++) { dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]); } clusters[i].setStdDev(dev); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java
License:Open Source License
private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k) throws Exception { final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet); final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet); final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction); skm.buildClusterer(trdataSet);//from w w w.ja v a 2 s. c o m final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(trdataSet); final int numClusters = eval.getNumClusters(); final List<String> possibleValues = new ArrayList<String>(numClusters); for (int c = 0; c < numClusters; c++) possibleValues.add("cluster_" + c); final double[] clusterAssignments = eval.getClusterAssignments(); final int numAttributes = dataSet.numAttributes(); final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes); for (int j = 0; j < numAttributes; j++) { //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]); valueForEachFeature.add((int) clusterAssignments[j]); } return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature); }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * //from w w w. ja va2 s . c o m * @param newInstances * @param K * @return * @throws Exception */ public static double[] doKMeans(final Instances newInstances, final int K) throws Exception { final SimpleKMeans clusterer = new SimpleKMeans(); clusterer.setOptions( Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance")); clusterer.buildClusterer(newInstances); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(newInstances); double[] ass = eval.getClusterAssignments(); return ass; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * //from w w w . j a v a2s . c o m * @param wekaClusterer * @param instances * @return * @throws Exception */ public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances) throws Exception { final Instances ii = new Instances(instances); ii.setClassIndex(-1); wekaClusterer.buildClusterer(ii); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(wekaClusterer); eval.evaluateClusterer(ii); final int clustersCount = eval.getNumClusters(); final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount); //Initialize instances for (int k = 0; k < clustersCount; k++) { clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>())); } final double[] ass = eval.getClusterAssignments(); if (ass.length != ii.numInstances()) throw new IllegalStateException(); for (int i = 0; i < ass.length; i++) { IndexedInstance idxi = clustersList.get((int) ass[i]); idxi.getInstances().add(instances.instance(i)); int pos = idxi.getInstances().size() - 1; idxi.getMapOrigIndex().put(pos, i); } return clustersList; }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * /*w w w . j a va 2s. co m*/ * @return * @throws Exception */ public double[] getClusteredInstances() throws Exception { //Removing potential class index instances.setClassIndex(-1); //Clustering using Kmeans int k; double max = 0, r2 = 0, pseudoF = 0; //Testing from 2 to 10 clusters, should be set as entry of this function SimpleKMeans bestKMeans = new SimpleKMeans(); for (k = 2; k <= maxClusters; k++) { final SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(k); kMeans.buildClusterer(instances); //Choosing the "optimal" number of clusters r2 = R2(kMeans); pseudoF = pseudoF(r2, k); //System.out.println(pseudo_f); if (pseudoF > max) { max = pseudoF; bestKMeans = kMeans; } } //Real clustering using the chosen number final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(bestKMeans); eval.evaluateClusterer(instances); double[] clusterAssignments = eval.getClusterAssignments(); this.usedKmeans = bestKMeans; return clusterAssignments; }
From source file:model.clustering.Clustering.java
public String filledFile(Instances data, int numOfClusters, String remove) throws Exception { String mainData = data.toString(); int index = mainData.indexOf("@data"); String clusterData = mainData.substring(0, index + 6); Remove removeFilter = new Remove(); removeFilter.setAttributeIndices(remove); kMeansCLusterer = new SimpleKMeans(); kMeansCLusterer.setNumClusters(numOfClusters); FilteredClusterer filteredClusterer = new FilteredClusterer(); filteredClusterer.setClusterer(kMeansCLusterer); filteredClusterer.setFilter(removeFilter); filteredClusterer.buildClusterer(data); Enumeration<Instance> newData = data.enumerateInstances(); eval = new ClusterEvaluation(); eval.setClusterer(filteredClusterer); eval.evaluateClusterer(data);// w w w . j a v a2 s. c o m while (newData.hasMoreElements()) { Instance i = (Instance) newData.nextElement(); int kluster = filteredClusterer.clusterInstance(i); String instanceString = i.toString() + "," + kluster; clusterData = clusterData + instanceString + "\n"; } return clusterData; }
From source file:myclusterer.MyClusterer.java
/** * @param args the command line arguments *///from w w w.ja v a 2 s .c om public static void main(String[] args) { // TODO code application logic here String nameOfFile; Clusterer clusterer; Instances dataSet; ClusterEvaluation eval; //Baca input file Scanner scan = new Scanner(System.in); System.out.print("Masukkan nama file untuk di-cluster: "); nameOfFile = scan.nextLine(); try { //Baca File arff dataSet = WekaCode.readFileArff(nameOfFile); //Build Clusterer System.out.println( "Tuliskan model clusterer : 0.SimpleKMeans / 1.HierarchicalClusterer / 2.MyKMeans / 3.MyAgnes "); int clustererType = scan.nextInt(); clusterer = WekaCode.buildClusterer(dataSet, clustererType); eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(dataSet); System.out.println("Cluster Evaluation:"); System.out.println(eval.clusterResultsToString()); //Given test set } catch (Exception ex) { Logger.getLogger(MyClusterer.class.getName()).log(Level.SEVERE, null, ex); } }