List of usage examples for weka.clusterers SimpleKMeans setPreserveInstancesOrder
public void setPreserveInstancesOrder(boolean r)
From source file:kmeansapps.Kmeans.java
public void startCluster(String path, int numOfCluster, JTextArea textarea) { try {/*from ww w .j av a2s.c om*/ // TODO code application logic here SimpleKMeans kmeans = new SimpleKMeans(); String[] columnNames = new String[numOfCluster]; kmeans.setSeed(10); kmeans.setPreserveInstancesOrder(true); kmeans.setNumClusters(numOfCluster); BufferedReader datafile = readDataFile(path); Instances data = new Instances(datafile); kmeans.buildClusterer(data); double SSE = kmeans.getSquaredError(); // This array returns the cluster number (starting with 0) for each instance // The array has as many elements as the number of instances int[] assignments = kmeans.getAssignments(); // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa. ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>(); ArrayList<String> listMemberOfCluster; //tambahkan list cluster for (int i = 0; i < numOfCluster; i++) { listMemberOfCluster = new ArrayList<>(); listOfCluster.add(listMemberOfCluster); } //tambahkan anggota list ke cluster int j = 0; for (int clusterNum : assignments) { listOfCluster.get(clusterNum).add(j + ""); j++; } textarea.setText(""); String result = ""; for (int i = 0; i < listOfCluster.size(); i++) { result = result + ("Cluster - " + i + " ==> "); for (String listMemberOfCluster1 : listOfCluster.get(i)) { result = result + (listMemberOfCluster1 + " "); } result = result + ("\n"); } result = result + ("\nSSE : ") + kmeans.getSquaredError(); textarea.setText(result); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:lineage.AAFClusterer.java
License:Open Source License
/** * K-Means Clustering//w ww . jav a2 s . c o m * @param data - matrix of observations (numObs x numFeatures) * @param k - number of clusters */ public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) { Instances ds = convertMatrixToWeka(data, numObs, numFeatures); // uses Euclidean distance by default SimpleKMeans clusterer = new SimpleKMeans(); try { clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(k); clusterer.buildClusterer(ds); // cluster centers Instances centers = clusterer.getClusterCentroids(); Cluster[] clusters = new Cluster[centers.numInstances()]; for (int i = 0; i < centers.numInstances(); i++) { Instance inst = centers.instance(i); double[] mean = new double[inst.numAttributes()]; for (int j = 0; j < mean.length; j++) { mean[j] = inst.value(j); } clusters[i] = new Cluster(mean, i); } // cluster members int[] assignments = clusterer.getAssignments(); for (int i = 0; i < assignments.length; i++) { clusters[assignments[i]].addMember(i); } return clusters; } catch (Exception e) { e.printStackTrace(); System.exit(-1); return null; } }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java
License:Apache License
/** * {@inheritDoc}//from w w w.java 2 s.co m * * <p> * This method is specialized for <b>kmeans</b> clustering. */ @Override public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute, final UseCaseRepository useCaseRepository) { final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer(); // Behavior Mix to be returned; final BehaviorMix behaviorMix = this.createBehaviorMix(); try { // Returns a valid instances set, generated based on the absolut // behavior models Instances instances = getInstances(behaviorModelsAbsolute); // KMeans --> Weka SimpleKMeans kmeans = new SimpleKMeans(); // DistanceFunction manhattanDistance = new ManhattanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // manhattanDistance.setOptions(options); // manhattanDistance.setInstances(instances); // kmeans.setDistanceFunction(manhattanDistance); // distance function with option don*t normalize DistanceFunction euclideanDistance = new EuclideanDistance(); // String[] options = new String[1]; // options[0] = "-D"; // euclideanDistance.setOptions(options); euclideanDistance.setInstances(instances); kmeans.setDistanceFunction(euclideanDistance); kmeans.setPreserveInstancesOrder(true); int[] clustersize = null; int[] assignments = null; // get number of clusters to be generated. int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin()); // clustering for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) { // must be specified in a fix way kmeans.setNumClusters(clusterSize); // build cluster kmeans.buildClusterer(instances); clustersize = kmeans.getClusterSizes(); assignments = kmeans.getAssignments(); ClusteringMetrics clusteringMetrics = new ClusteringMetrics(); clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids()); clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances, assignments); clusteringMetrics.calculateBetas(); clusteringMetrics.printErrorMetricsHeader(); clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances()); clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances); // clusteringMetrics.printClusterAssignmentsToSession(assignments, // clusterSize); } Instances resultingCentroids = kmeans.getClusterCentroids(); // for each centroid instance, create new behaviorModelRelative for (int i = 0; i < resultingCentroids.numInstances(); i++) { Instance centroid = resultingCentroids.instance(i); // create a Behavior Model, which includes all vertices only; // the vertices are associated with the use cases, and a // dedicated // vertex that represents the final state will be added; final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases()); // install the transitions in between vertices; this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid, assignments, i); // convert absolute to relative behaviorModel final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer .transform(behaviorModelAbsoluteCentroid); // relative Frequency of cluster i double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances(); // create the (unique) Behavior Mix entry to be returned; final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry( AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency; behaviorModelRelative); // add to resulting behaviorMix behaviorMix.getEntries().add(behaviorMixEntry); } return behaviorMix; } catch (ExtractionException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // if any error occurs, an ExtractionExeption should be thrown, // indicating the error that occurred; // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy" // should give an idea for handling the Behavior Models and how to // use the helping methods of the (abstract) parent class. return behaviorMix; }
From source file:nl.uva.sne.classifiers.Kmeans.java
@Override public Map<String, String> cluster(String inDir) throws IOException, ParseException { try {/*from ww w . j a va 2 s . c o m*/ Instances data = ClusterUtils.terms2Instances(inDir, false); DistanceFunction df; // SimpleKMeans currently only supports the Euclidean and Manhattan distances. switch (distanceFunction) { case "Euclidean": df = new EuclideanDistance(data); break; case "Manhattan": df = new ManhattanDistance(data); break; default: df = new EuclideanDistance(data); break; } SimpleKMeans clusterer = new SimpleKMeans(); Random rand = new Random(System.currentTimeMillis()); int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000; clusterer.setSeed(seed); clusterer.setMaxIterations(1000000000); Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing"); clusterer.setPreserveInstancesOrder(true); clusterer.setNumClusters(numOfClusters); clusterer.setDistanceFunction(df); return ClusterUtils.bulidClusters(clusterer, data, inDir); } catch (Exception ex) { Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java
License:Open Source License
public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster) throws Exception { Instances inputInstances = WekaUtils.loadARFF(inPath); EuclideanDistance euclideanDistance = new EuclideanDistance(); euclideanDistance.setAttributeIndices("first-last"); euclideanDistance.setDontNormalize(false); euclideanDistance.setInvertSelection(false); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setPreserveInstancesOrder( Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.PERSERVE_INSTANCE))); kmeans.setDontReplaceMissingValues(Boolean .valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DONT_REPLACE_MISSING_VALUES))); kmeans.setDisplayStdDevs(// w w w .j a v a 2 s . co m Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DISPLAY_STD_DEVS))); kmeans.setMaxIterations( Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.MAX_ITERATIONS))); kmeans.setNumClusters( Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.NUM_CLUSTERS))); kmeans.setSeed(10); //kmeans.setSeed( // Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.SEED))); kmeans.setDistanceFunction(euclideanDistance); kmeans.buildClusterer(inputInstances); WekaUtils.saveModel(kmeans, outPath); /* * * Pour obtenir les pourcentages de les clusters * ClusterEvaluation eval = new ClusterEvaluation(); * eval.setClusterer(kmeans); * eval.evaluateClusterer(inputInstances); * System.out.println(eval.clusterResultsToString()); * * */ return kmeans; }
From source file:qoala.arff.java
public void SimpleKmeans(int numberOfCLuster) throws Exception { Instances train = new Instances(dataSet); SimpleKMeans skm = new SimpleKMeans(); skm.setPreserveInstancesOrder(true); skm.setNumClusters(numberOfCLuster); skm.buildClusterer(train);//from w w w . ja v a 2 s .c om skm.setSeed(10); int[] ClusterSize = skm.getClusterSizes(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(train); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); int[] assignments = skm.getAssignments(); System.out.println("# - cluster - distribution"); for (int j = 0; j < skm.getNumClusters(); j++) { int i = 0; for (int clusterNum : assignments) { if (clusterNum == j) System.out.println("Instance " + i + " -> Cluster number: " + clusterNum); i++; } } }
From source file:swm.project.mappings.UserToUserCluster.java
private void clusterUserHistoryWithKmeans() throws FileNotFoundException, IOException, Exception { Reader reader;/*from w ww . ja v a 2 s. co m*/ userToUserClusterHistory = new HashMap<>(); userClustersToUsersHistory = new HashMap<>(); reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS); Instances instanceValues = new Instances(reader); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setNumClusters(20); kmeans.setPreserveInstancesOrder(true); kmeans.setDistanceFunction(new EuclideanDistance()); kmeans.buildClusterer(instanceValues); int[] assignments = kmeans.getAssignments(); int userid = 0; for (int clusterNo : assignments) { int user = (int) instanceValues.get(userid).value(0); userToUserClusterHistory.put(user, clusterNo); ArrayList<Integer> users = new ArrayList<>(); if (userClustersToUsersHistory.containsKey(clusterNo)) { users = userClustersToUsersHistory.get(clusterNo); users.add(user); } else { users.add(user); userClustersToUsersHistory.put(clusterNo, users); } userid++; } }