List of usage examples for weka.clusterers SimpleKMeans SimpleKMeans
public SimpleKMeans()
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering/* w ww. j av a 2s . c o m*/ * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java
License:Open Source License
/** * //from w w w.j a v a 2s . c om * @param newInstances * @param K * @return * @throws Exception */ public static double[] doKMeans(final Instances newInstances, final int K) throws Exception { final SimpleKMeans clusterer = new SimpleKMeans(); clusterer.setOptions( Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance")); clusterer.buildClusterer(newInstances); final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(clusterer); eval.evaluateClusterer(newInstances); double[] ass = eval.getClusterAssignments(); return ass; }
From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java
License:Open Source License
/** * // ww w . j av a 2s . c om * @return * @throws Exception */ public double[] getClusteredInstances() throws Exception { //Removing potential class index instances.setClassIndex(-1); //Clustering using Kmeans int k; double max = 0, r2 = 0, pseudoF = 0; //Testing from 2 to 10 clusters, should be set as entry of this function SimpleKMeans bestKMeans = new SimpleKMeans(); for (k = 2; k <= maxClusters; k++) { final SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(k); kMeans.buildClusterer(instances); //Choosing the "optimal" number of clusters r2 = R2(kMeans); pseudoF = pseudoF(r2, k); //System.out.println(pseudo_f); if (pseudoF > max) { max = pseudoF; bestKMeans = kMeans; } } //Real clustering using the chosen number final ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(bestKMeans); eval.evaluateClusterer(instances); double[] clusterAssignments = eval.getClusterAssignments(); this.usedKmeans = bestKMeans; return clusterAssignments; }
From source file:milk.classifiers.MIRBFNetwork.java
License:Open Source License
public Exemplars transform(Exemplars ex) throws Exception { // Throw all the instances together Instances data = new Instances(ex.exemplar(0).getInstances()); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i);//from w ww.j a v a2 s. co m double weight = 1.0 / (double) curr.getInstances().numInstances(); for (int j = 0; j < curr.getInstances().numInstances(); j++) { Instance inst = (Instance) curr.getInstances().instance(j).copy(); inst.setWeight(weight); data.add(inst); } } double factor = (double) data.numInstances() / (double) data.sumOfWeights(); for (int i = 0; i < data.numInstances(); i++) { data.instance(i).setWeight(data.instance(i).weight() * factor); } SimpleKMeans kMeans = new SimpleKMeans(); kMeans.setNumClusters(m_num_clusters); MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer(); clust.setClusterer(kMeans); m_clm.setDensityBasedClusterer(clust); m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1)); m_clm.setInputFormat(data); // Use filter and discard result Instances tempData = Filter.useFilter(data, m_clm); tempData = new Instances(tempData, 0); tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); // Go through exemplars and add them to new dataset Exemplars newExs = new Exemplars(tempData); for (int i = 0; i < ex.numExemplars(); i++) { Exemplar curr = ex.exemplar(i); Instances temp = Filter.useFilter(curr.getInstances(), m_clm); temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0); for (int j = 0; j < temp.numInstances(); j++) { temp.instance(j).setValue(0, curr.idValue()); } newExs.add(new Exemplar(temp)); } //System.err.println("Finished transforming"); //System.err.println(newExs); return newExs; }
From source file:model.clustering.Clustering.java
public String filledFile(Instances data, int numOfClusters, String remove) throws Exception { String mainData = data.toString(); int index = mainData.indexOf("@data"); String clusterData = mainData.substring(0, index + 6); Remove removeFilter = new Remove(); removeFilter.setAttributeIndices(remove); kMeansCLusterer = new SimpleKMeans(); kMeansCLusterer.setNumClusters(numOfClusters); FilteredClusterer filteredClusterer = new FilteredClusterer(); filteredClusterer.setClusterer(kMeansCLusterer); filteredClusterer.setFilter(removeFilter); filteredClusterer.buildClusterer(data); Enumeration<Instance> newData = data.enumerateInstances(); eval = new ClusterEvaluation(); eval.setClusterer(filteredClusterer); eval.evaluateClusterer(data);/*from w ww. ja va2s . co m*/ while (newData.hasMoreElements()) { Instance i = (Instance) newData.nextElement(); int kluster = filteredClusterer.clusterInstance(i); String instanceString = i.toString() + "," + kluster; clusterData = clusterData + instanceString + "\n"; } return clusterData; }
From source file:myclusterer.WekaCode.java
public static Clusterer buildClusterer(Instances dataSet, int clusterType) throws Exception { Clusterer clusterer = null;/*from w w w. j av a2 s . co m*/ if (clusterType == SimpleKMeans) { SimpleKMeans kmeans = new SimpleKMeans(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); kmeans.setNumClusters(K); clusterer = kmeans; clusterer.buildClusterer(dataSet); } else if (clusterType == HierarchicalClusterer) { HierarchicalClusterer hierarchical = new HierarchicalClusterer(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); hierarchical.setNumClusters(K); clusterer = hierarchical; clusterer.buildClusterer(dataSet); } else if (clusterType == MyKMeans) { MyKMeans kmeans = new MyKMeans(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); kmeans.setNumClusters(K); clusterer = kmeans; clusterer.buildClusterer(dataSet); } else if (clusterType == MyAgnes) { MyAgnes agnes = new MyAgnes(); Scanner scan = new Scanner(System.in); System.out.print("Masukkan jumlah cluster: "); int K = scan.nextInt(); agnes.setNumClusters(K); clusterer = agnes; clusterer.buildClusterer(dataSet); } return clusterer; }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}//from w ww. j a v a 2 s . c om * * <p> * This method is specialized for <b>kmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // KMeans --> Weka SimpleKMeans kmeans = new SimpleKMeans(); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // kmeans.setDistanceFunction(manhattanDistance); // distance function with option don*t normalize DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); kmeans.setDistanceFunction(euclideanDistance); kmeans.setPreserveInstancesOrder(true); int[] clustersize = null; int[] assignments = null; // get number of clusters to be generated. int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); // clustering for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) { // must be specified in a fix way kmeans.setNumClusters(clusterSize); // build cluster kmeans.buildClusterer(instances); clustersize = kmeans.getClusterSizes(); assignments = kmeans.getAssignments(); ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids()); clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // clusterSize); } Instances resultingCentroids = kmeans.getClusterCentroids(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }
From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java
License:Open Source License
@Override public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) { List<Integer> clusters = new ArrayList<Integer>(); String[] options = new String[2]; SimpleKMeans clusterer = new SimpleKMeans(); int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue(); options[0] = "-N"; options[1] = String.valueOf(numberOfGroups); try {// w w w . jav a2 s . c o m clusterer.setOptions(options); clusterer.buildClusterer(dataset); Enumeration<?> e = dataset.enumerateInstances(); while (e.hasMoreElements()) { clusters.add(clusterer.clusterInstance((Instance) e.nextElement())); } ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(), parameters.getParameter(EMClustererParameters.visualization).getValue()); return result; } catch (Exception ex) { logger.log(Level.SEVERE, null, ex); return null; } }
From source file:nl.uva.sne.classifiers.Kmeans.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {/*from w w w .jav a2 s . co m*/ Instances data = ClusterUtils.terms2Instances(inDir, false); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Euclidean": df = new EuclideanDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } SimpleKMeans clusterer = new SimpleKMeans(); Random rand = new Random(System.currentTimeMillis()); int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000; clusterer.setSeed(seed); clusterer.setMaxIterations(1000000000); Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing"); clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java
License:Open Source License
protected void unmodifiedContourData() throws Exception { m_contourData = new ContourDataExtractor() { protected void extractContourData(int[] translations, int[] permutation) { Arrays.fill(translations, 0); }// w ww . j av a 2 s .co m }; SimpleKMeans clusterer = new SimpleKMeans(); int clustersPerSample = 2; clusterer.setNumClusters(clustersPerSample * m_contourDataGrid.numSamples()); m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer); m_classifier.buildClassifier(m_contourDataGrid, m_bgData); m_contourModels = new double[1][m_contourDataGrid.numClusters() + 1]; m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()]; for (int i = 0; i < m_contourData.numVectors(); i++) { if (m_contourData.weight(i) > 0) m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0; } removeRedundantContourModels(); }