List of usage examples for weka.clusterers SimpleKMeans buildClusterer
@Override public void buildClusterer(Instances data) throws Exception
From source file:kmeansapps.Kmeans.java
public void startCluster(String path, int numOfCluster, JTextArea textarea) { try {/*from w w w . j a v a2 s .c o m*/ // TODO code application logic here SimpleKMeans kmeans = new SimpleKMeans(); String[] columnNames = new String[numOfCluster]; kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numOfCluster); BufferedReader datafile = readDataFile(path); Instances data = new Instances(datafile); kmeans.buildClusterer(data); double SSE = kmeans.getSquaredError(); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa. ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>(); ArrayList<String> listMemberOfCluster; //tambahkan list cluster for (int i = 0; i < numOfCluster; i++) { listMemberOfCluster = new ArrayList<>(); listOfCluster.add(listMemberOfCluster); } //tambahkan anggota list ke cluster int j = 0; for (int clusterNum : assignments) { listOfCluster.get(clusterNum).add(j + ""); j++; } textarea.setText(""); String result = ""; for (int i = 0; i < listOfCluster.size(); i++) { result = result + ("Cluster - " + i + " ==> "); for (String listMemberOfCluster1 : listOfCluster.get(i)) { result = result + (listMemberOfCluster1 + " "); } result = result + ("\n"); } result = result + ("\nSSE : ") + kmeans.getSquaredError(); textarea.setText(result); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering//w w w . j a va2 s.co m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java
License:Open Source License
private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k) throws Exception { final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet); final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet); final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction); skm.buildClusterer(trdataSet); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm);/*from ww w . j av a 2s . co m*/ eval.evaluateClusterer(trdataSet); final int numClusters = eval.getNumClusters(); final List<String> possibleValues = new ArrayList<String>(numClusters); for (int c = 0; c < numClusters; c++) possibleValues.add("cluster_" + c); final double[] clusterAssignments = eval.getClusterAssignments(); final int numAttributes = dataSet.numAttributes(); final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes); for (int j = 0; j < numAttributes; j++) { //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]); valueForEachFeature.add((int) clusterAssignments[j]); } return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature); }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * /*from w w w.java2 s. c o m*/ * @param newInstances * @param K * @return * @throws Exception */ public static double[] doKMeans(final Instances newInstances, final int K) throws Exception { final SimpleKMeans clusterer = new SimpleKMeans(); clusterer.setOptions( Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance")); clusterer.buildClusterer(newInstances); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(newInstances); double[] ass = eval.getClusterAssignments(); return ass; }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * /* ww w . j a v a2 s . c om*/ * @return * @throws Exception */ public double[] getClusteredInstances() throws Exception { //Removing potential class index instances.setClassIndex(-1); //Clustering using Kmeans int k; double max = 0, r2 = 0, pseudoF = 0; //Testing from 2 to 10 clusters, should be set as entry of this function SimpleKMeans bestKMeans = new SimpleKMeans(); for (k = 2; k <= maxClusters; k++) { final SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(k); kMeans.buildClusterer(instances); //Choosing the "optimal" number of clusters r2 = R2(kMeans); pseudoF = pseudoF(r2, k); //System.out.println(pseudo_f); if (pseudoF > max) { max = pseudoF; bestKMeans = kMeans; } } //Real clustering using the chosen number final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(bestKMeans); eval.evaluateClusterer(instances); double[] clusterAssignments = eval.getClusterAssignments(); this.usedKmeans = bestKMeans; return clusterAssignments; }
From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java
License:Open Source License
/** * /*from w ww .j a v a 2s . c o m*/ */ private static KmeansResult getSimplifiedInstances(final Instances instances, final DistanceFunction df, final int maxInstances) throws Exception { Instances centroids = null; List<Instances> clusters = null; final int savedClassIndex = instances.classIndex(); instances.setClassIndex(-1); final SimpleKMeans clusterer = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(maxInstances, df); clusterer.buildClusterer(instances); clusters = WekaMachineLearningUtil.computeClusters(clusterer, instances).getClustersList(); instances.setClassIndex(savedClassIndex); final int numClusters = clusters.size(); //Set class index for each cluster instances //System.out.println("Setting class index to each cluster : " + savedClassIndex); for (int i = 0; i < numClusters; i++) { clusters.get(i).setClassIndex(savedClassIndex); } //Save centroids centroids = clusterer.getClusterCentroids(); return new KmeansResult(centroids, clusters); }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}//from w w w .j a v a 2 s . co m * * <p> * This method is specialized for <b>kmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // KMeans --> Weka SimpleKMeans kmeans = new SimpleKMeans(); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // kmeans.setDistanceFunction(manhattanDistance); // distance function with option don*t normalize DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); kmeans.setDistanceFunction(euclideanDistance); kmeans.setPreserveInstancesOrder(true); int[] clustersize = null; int[] assignments = null; // get number of clusters to be generated. int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); // clustering for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) { // must be specified in a fix way kmeans.setNumClusters(clusterSize); // build cluster kmeans.buildClusterer(instances); clustersize = kmeans.getClusterSizes(); assignments = kmeans.getAssignments(); ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids()); clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // clusterSize); } Instances resultingCentroids = kmeans.getClusterCentroids(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {/*ww w . j a va 2s. co m*/ clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java
License:Open Source License
/** * {@inheritDoc}/*from w w w. j a va2s.c om*/ */ @Override protected void extractContourData(int[] translations, int[] permutation) { SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setNumClusters(m_numClusters); // cluster the data ArrayList<Attribute> attInfo = new ArrayList<Attribute>(); for (int a = 0; a < contourDataGrid().numFeatures(); a++) { attInfo.add(new Attribute("att" + a)); } Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors()); for (double[] vec : contourDataGrid()) { data.add(new DenseInstance(1.0, vec)); } clusterer.buildClusterer(data); // create clustered images p(C|x) Img[] imgs = new Img[m_numClusters]; int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() }; Cursor<FloatType>[] cursors = new Cursor[m_numClusters]; for (int i = 0; i < imgs.length; i++) { imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType()); cursors[i] = imgs[i].localizingCursor(); } int cluster; for (Instance instance : data) { for (int i = 0; i < cursors.length; i++) { cursors[i].fwd(); } cluster = clusterer.clusterInstance(instance); cursors[cluster].get().set(1.0f); } // greedily select the best cluster combination starting with all // clusters together and then removing the one whose removal // maximises the score of the remaining clusters Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType()); Cursor<FloatType> resC = res.cursor(); while (resC.hasNext()) { resC.fwd(); resC.get().set(1.0f); } Img<FloatType> tmp = res.factory().create(res, new FloatType()); // TODO: normalize img // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm = // new NormalizeIterableInterval<FloatType, Img<FloatType>>(); double score = 0; double bestScore = -Double.MAX_VALUE; double globalBestScore = -Double.MAX_VALUE; int bestCluster = 0; // ShowInSameFrame showInFrame = new ShowInSameFrame(); for (int i = 0; i < m_numClusters; i++) { for (int j = 0; j < m_numClusters; j++) { if (imgs[j] != null) { substract(res, imgs[j], tmp); score = calcScore(tmp, m_bias); if (score > bestScore) { bestScore = score; bestCluster = j; } } } substract(res, imgs[bestCluster], res); imgs[bestCluster] = null; // Pair<FloatType, FloatType> minmax = // Operations.compute(new MinMax<FloatType>(), tmp); // Operations.<FloatType, FloatType> map( // new Normalize<FloatType>(minmax.getA().getRealDouble(), // minmax.getB().getRealDouble(), // -Float.MAX_VALUE, Float.MAX_VALUE)).compute( // tmp, tmp); // showInFrame.show(tmp, 2.0); if (bestScore < globalBestScore) { break; } globalBestScore = bestScore; bestScore = -Double.MAX_VALUE; } // calculate the translations (mean positions) resC = res.localizingCursor(); double meanPos = 0; double num = 0; int index = 0; while (resC.hasNext()) { resC.fwd(); meanPos += resC.get().get() * resC.getDoublePosition(0); num += resC.get().get(); index++; if ((index % res.dimension(0)) == 0) { if (num > 0) { translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num) - CENTER_COL; } else { // setWeight((int)((index - 1) / res.dimension(0)), 0); translations[(int) ((index - 1) / res.dimension(0))] = 0; } meanPos = 0; num = 0; } } } catch (Exception e) { // TODO Auto-generated catch block } }
From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java
License:Open Source License
public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster) throws Exception { Instances inputInstances = WekaUtils.loadARFF(inPath); EuclideanDistance euclideanDistance = new EuclideanDistance(); euclideanDistance.setAttributeIndices("first-last"); euclideanDistance.setDontNormalize(false); euclideanDistance.setInvertSelection(false); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setPreserveInstancesOrder(/*from w w w . ja v a 2 s .c om*/ Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.PERSERVE_INSTANCE))); kmeans.setDontReplaceMissingValues(Boolean .valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DONT_REPLACE_MISSING_VALUES))); kmeans.setDisplayStdDevs( Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DISPLAY_STD_DEVS))); kmeans.setMaxIterations( Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.MAX_ITERATIONS))); kmeans.setNumClusters( Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.NUM_CLUSTERS))); kmeans.setSeed(10); //kmeans.setSeed( // Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.SEED))); kmeans.setDistanceFunction(euclideanDistance); kmeans.buildClusterer(inputInstances); WekaUtils.saveModel(kmeans, outPath); /* * * Pour obtenir les pourcentages de les clusters * ClusterEvaluation eval = new ClusterEvaluation(); * eval.setClusterer(kmeans); * eval.evaluateClusterer(inputInstances); * System.out.println(eval.clusterResultsToString()); * * */ return kmeans; }