List of usage examples for weka.clusterers SimpleKMeans getAssignments
public int[] getAssignments() throws Exception
From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java
License:Open Source License
public Map<Integer, List<Integer>> computeInstanceByCluster(String arffFilter, String inModel, Map<Integer, Integer> arffToIdUser) throws Exception { SimpleKMeans kmeans = WekaUtils.loadModel(inModel); Instances data = new Instances(WekaUtils.loadARFF(arffFilter)); int[] clusters = kmeans.getAssignments(); Map<Integer, List<Integer>> idUserByCluster = new HashMap<Integer, List<Integer>>(); for (int i = 0; i < data.numInstances(); i++) { int ind = clusters[i]; List<Integer> users = idUserByCluster.get(ind); if (users == null) { users = new ArrayList<Integer>(); idUserByCluster.put(ind, users); }//from w ww .j a v a2s . com users.add(arffToIdUser.get(i)); } return idUserByCluster; }
From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java
License:Open Source License
public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel, Map<Integer, Integer> arffToIdUser) throws Exception { SimpleKMeans kmeans = WekaUtils.loadModel(inModel); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(arffData)); int[] clusters = kmeans.getAssignments(); Instances clusterCentroid = kmeans.getClusterCentroids(); Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>(); for (int i = 0; i < clusterCentroid.numInstances(); i++) { nearUser.put(i, new ArrayList<DistanceUser>()); }//w w w . j a v a 2 s .co m for (int i = 0; i < data.numInstances(); i++) { int ind = clusters[i]; double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i)); List<DistanceUser> nears = nearUser.get(ind); if (nears.size() < 10) { nears.add(new DistanceUser(i, dist)); } else { DistanceUser max = Collections.max(nears); if (max.getDistance() > dist) { int maxIndex = nears.indexOf(max); nears.set(maxIndex, new DistanceUser(i, dist)); } } } for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) { for (DistanceUser user : item.getValue()) { user.setIdentifier(arffToIdUser.get(user.getIdentifier())); } } return nearUser; }
From source file:qoala.arff.java
public void SimpleKmeans(int numberOfCLuster) throws Exception { Instances train = new Instances(dataSet); SimpleKMeans skm = new SimpleKMeans(); skm.setPreserveInstancesOrder(true); skm.setNumClusters(numberOfCLuster); skm.buildClusterer(train);/*from w ww .jav a 2 s . c o m*/ skm.setSeed(10); int[] ClusterSize = skm.getClusterSizes(); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(skm); eval.evaluateClusterer(train); System.out.println("Cluster Evaluation:" + eval.clusterResultsToString()); int[] assignments = skm.getAssignments(); System.out.println("# - cluster - distribution"); for (int j = 0; j < skm.getNumClusters(); j++) { int i = 0; for (int clusterNum : assignments) { if (clusterNum == j) System.out.println("Instance " + i + " -> Cluster number: " + clusterNum); i++; } } }
From source file:swm.project.mappings.UserToUserCluster.java
private void clusterUserHistoryWithKmeans() throws FileNotFoundException, IOException, Exception { Reader reader;/* www. j a va 2 s . c o m*/ userToUserClusterHistory = new HashMap<>(); userClustersToUsersHistory = new HashMap<>(); reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS); Instances instanceValues = new Instances(reader); SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setNumClusters(20); kmeans.setPreserveInstancesOrder(true); kmeans.setDistanceFunction(new EuclideanDistance()); kmeans.buildClusterer(instanceValues); int[] assignments = kmeans.getAssignments(); int userid = 0; for (int clusterNo : assignments) { int user = (int) instanceValues.get(userid).value(0); userToUserClusterHistory.put(user, clusterNo); ArrayList<Integer> users = new ArrayList<>(); if (userClustersToUsersHistory.containsKey(clusterNo)) { users = userClustersToUsersHistory.get(clusterNo); users.add(user); } else { users.add(user); userClustersToUsersHistory.put(clusterNo, users); } userid++; } }
From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java
License:Open Source License
public Instances clusteredInstances(Instances data) { if (data == null) { throw new NullPointerException("Data is null at clusteredInstances method"); }// w w w. ja v a2 s .com Instances sampled_data = data; for (int i = 0; i < sampled_data.numInstances(); i++) { sampled_data.remove(i); } SimpleKMeans sKmeans = new SimpleKMeans(); data.setClassIndex(data.numAttributes() - 1); Remove filter = new Remove(); filter.setAttributeIndices("" + (data.classIndex() + 1)); List assignments = new ArrayList(); try { filter.setInputFormat(data); Instances dataClusterer = Filter.useFilter(data, filter); String[] options = new String[3]; options[0] = "-I"; // max. iterations options[1] = "500"; options[2] = "-O"; sKmeans.setNumClusters(data.numClasses()); sKmeans.setOptions(options); sKmeans.buildClusterer(dataClusterer); System.out.println("Kmeans\n:" + sKmeans); System.out.println(Arrays.toString(sKmeans.getAssignments())); assignments = Arrays.asList(sKmeans.getAssignments()); } catch (Exception e) { e.printStackTrace(); } System.out.println("Assignments\n: " + assignments); ClusterEvaluation eval = new ClusterEvaluation(); eval.setClusterer(sKmeans); try { eval.evaluateClusterer(data); } catch (Exception e) { e.printStackTrace(); } int classesToClustersMap[] = eval.getClassesToClusters(); for (int i = 0; i < classesToClustersMap.length; i++) { if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) { ((Instances) sampled_data).add(data.get(i)); } } return ((Instances) sampled_data); }