List of usage examples for weka.clusterers ClusterEvaluation evaluateClusterer
public void evaluateClusterer(Instances test) throws Exception
From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java
License:Open Source License
private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k) throws Exception { final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet); final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet); final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction); skm.buildClusterer(trdataSet);// ww w . ja v a 2 s .co m final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(trdataSet); final int numClusters = eval.getNumClusters(); final List<String> possibleValues = new ArrayList<String>(numClusters); for (int c = 0; c < numClusters; c++) possibleValues.add("cluster_" + c); final double[] clusterAssignments = eval.getClusterAssignments(); final int numAttributes = dataSet.numAttributes(); final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes); for (int j = 0; j < numAttributes; j++) { //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]); valueForEachFeature.add((int) clusterAssignments[j]); } return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature); }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * /* w w w . java2 s .c om*/ * @param newInstances * @param K * @return * @throws Exception */ public static double[] doKMeans(final Instances newInstances, final int K) throws Exception { final SimpleKMeans clusterer = new SimpleKMeans(); clusterer.setOptions( Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance")); clusterer.buildClusterer(newInstances); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(newInstances); double[] ass = eval.getClusterAssignments(); return ass; }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * /* ww w . j a v a2 s. c o m*/ * @param wekaClusterer * @param instances * @return * @throws Exception */ public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances) throws Exception { final Instances ii = new Instances(instances); ii.setClassIndex(-1); wekaClusterer.buildClusterer(ii); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(wekaClusterer); eval.evaluateClusterer(ii); final int clustersCount = eval.getNumClusters(); final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount); //Initialize instances for (int k = 0; k < clustersCount; k++) { clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>())); } final double[] ass = eval.getClusterAssignments(); if (ass.length != ii.numInstances()) throw new IllegalStateException(); for (int i = 0; i < ass.length; i++) { IndexedInstance idxi = clustersList.get((int) ass[i]); idxi.getInstances().add(instances.instance(i)); int pos = idxi.getInstances().size() - 1; idxi.getMapOrigIndex().put(pos, i); } return clustersList; }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * // www .ja va 2 s . com * @return * @throws Exception */ public double[] getClusteredInstances() throws Exception { //Removing potential class index instances.setClassIndex(-1); //Clustering using Kmeans int k; double max = 0, r2 = 0, pseudoF = 0; //Testing from 2 to 10 clusters, should be set as entry of this function SimpleKMeans bestKMeans = new SimpleKMeans(); for (k = 2; k <= maxClusters; k++) { final SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(k); kMeans.buildClusterer(instances); //Choosing the "optimal" number of clusters r2 = R2(kMeans); pseudoF = pseudoF(r2, k); //System.out.println(pseudo_f); if (pseudoF > max) { max = pseudoF; bestKMeans = kMeans; } } //Real clustering using the chosen number final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(bestKMeans); eval.evaluateClusterer(instances); double[] clusterAssignments = eval.getClusterAssignments(); this.usedKmeans = bestKMeans; return clusterAssignments; }
From source file:myclusterer.MyClusterer.java
/** * @param args the command line arguments *///from www. j ava 2 s. c o m public static void main(String[] args) { // TODO code application logic here String nameOfFile; Clusterer clusterer; Instances dataSet; ClusterEvaluation eval; //Baca input file Scanner scan = new Scanner(System.in); System.out.print("Masukkan nama file untuk di-cluster: "); nameOfFile = scan.nextLine(); try { //Baca File arff dataSet = WekaCode.readFileArff(nameOfFile); //Build Clusterer System.out.println( "Tuliskan model clusterer : 0.SimpleKMeans / 1.HierarchicalClusterer / 2.MyKMeans / 3.MyAgnes "); int clustererType = scan.nextInt(); clusterer = WekaCode.buildClusterer(dataSet, clustererType); eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(dataSet); System.out.println("Cluster Evaluation:"); System.out.println(eval.clusterResultsToString()); //Given test set } catch (Exception ex) { Logger.getLogger(MyClusterer.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.XMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}//from w w w .ja v a 2 s .com * * <p> * This method is specialized for <b>xmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // XMeans --> Weka XMeans xmeans = new XMeans(); if (CommandLineArgumentsHandler.getSeedValue() != null) { xmeans.setSeed(Integer.parseInt(CommandLineArgumentsHandler.getSeedValue())); } // distance function DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); xmeans.setDistanceF(euclideanDistance); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // xmeans.setDistanceF(manhattanDistance); int[] clustersize = null; // create new assignments int[] assignments = new int[instances.numInstances()]; // get number of clusters to be generated. int numberOfClustersMin = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); int numberOfClustersMax = 0; if (CommandLineArgumentsHandler.getNumberOfClustersMax() != "") { numberOfClustersMax = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMax()); } else { numberOfClustersMax = numberOfClustersMin; } // clustering xmeans.setMinNumClusters(numberOfClustersMin); xmeans.setMaxNumClusters(numberOfClustersMax); // build cluster xmeans.buildClusterer(instances); ClusterEvaluation clusterEvaluation = new ClusterEvaluation(); clusterEvaluation.setClusterer(xmeans); clusterEvaluation.evaluateClusterer(instances); // clusterSize clustersize = new int[xmeans.getClusterCenters().numInstances()]; // set assignments and clustersize for (int s = 0; s < instances.numInstances(); s++) { assignments[s] = xmeans.clusterInstance(instances.instance(s)); clustersize[xmeans.clusterInstance(instances.instance(s))]++; } ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(xmeans.getClusterCenters()); clusteringMetrics.calculateIntraClusteringSimilarity(xmeans.getClusterCenters(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(xmeans.getClusterCenters().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // xmeans.getClusterCenters().numInstances()); Instances resultingCentroids = xmeans.getClusterCenters(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }
From source file:nl.uva.sne.commons.ClusterUtils.java
public static Map<String, String> bulidClusters(Clusterer clusterer, Instances data, String inDir) throws Exception { FilteredClusterer fc = new FilteredClusterer(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1"; // we want to ignore the attribute that is in the position '1' Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options fc.setFilter(remove); //add filter to remove attributes fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer fc.buildClusterer(data);//from w w w .j a va 2s. c om Map<String, String> clusters = new HashMap<>(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = data.instance(i); int theClass = fc.clusterInstance(inst); String s = data.attribute(0).value(i); clusters.put(inDir + File.separator + s, String.valueOf(theClass)); System.err.println(s + " is in cluster " + theClass); } ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(fc); // the cluster to evaluate eval.evaluateClusterer(data); // data to evaluate the clusterer on // double ll = eval.getLogLikelihood(); // Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "LogLikelihood :{0}", ll); // // if (clusterer instanceof SimpleKMeans) { // double sqrErr = ((SimpleKMeans) clusterer).getSquaredError(); // Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Squared Error:{0}", sqrErr); // } Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "# of clusters: {0}", eval.getNumClusters()); Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "clusterResults: {0}", eval.clusterResultsToString()); return clusters; }
From source file:qoala.arff.java
public void SimpleKmeans(int numberOfCLuster) throws Exception { Instances train = new Instances(dataSet); SimpleKMeans skm = new SimpleKMeans(); skm.setPreserveInstancesOrder(true); skm.setNumClusters(numberOfCLuster); skm.buildClusterer(train);/*from ww w. j a va 2 s.c o m*/ skm.setSeed(10); int[] ClusterSize = skm.getClusterSizes(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(train); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); int[] assignments = skm.getAssignments(); System.out.println("# - cluster - distribution"); for (int j = 0; j < skm.getNumClusters(); j++) { int i = 0; for (int clusterNum : assignments) { if (clusterNum == j) System.out.println("Instance " + i + " -> Cluster number: " + clusterNum); i++; } } }
From source file:qoala.arff.java
public void EMClustering(int NumberOfCluster) throws Exception { Instances train = new Instances(dataSet); String[] options = new String[2]; options[0] = "-I"; options[1] = "100"; EM em = new EM(); em.setOptions(options);/*from w ww.j av a 2 s.c om*/ em.setNumClusters(NumberOfCluster); em.buildClusterer(train); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(em); eval.evaluateClusterer(train); eval.getNumClusters(); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); System.out.println("# - cluster - distribution"); for (int j = 0; j < eval.getNumClusters(); j++) { for (int i = 0; i < train.numInstances(); i++) { int cluster = em.clusterInstance(train.instance(i)); if (cluster == j) System.out.println("Instance " + i + " -> Cluster number: " + cluster); } } }
From source file:qoala.arff.java
public void XMenas() throws Exception { Instances train = new Instances(dataSet); XMeans xm = new XMeans(); xm.setMaxNumClusters(100);/*from w w w. ja v a2 s . c o m*/ xm.setMinNumClusters(2); xm.buildClusterer(train); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(xm); eval.evaluateClusterer(train); eval.getNumClusters(); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); System.out.println("# - cluster - distribution"); for (int j = 0; j < eval.getNumClusters(); j++) { for (int i = 0; i < train.numInstances(); i++) { int cluster = xm.clusterInstance(train.instance(i)); if (cluster == j) System.out.println("Instance " + i + " -> Cluster number: " + cluster); } } }