List of usage examples for weka.clusterers SimpleKMeans getClusterCentroids
public Instances getClusterCentroids()
From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java
License:Open Source License
public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel, Map<Integer, Integer> arffToIdUser) throws Exception { SimpleKMeans kmeans = WekaUtils.loadModel(inModel); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(arffData)); int[] clusters = kmeans.getAssignments(); Instances clusterCentroid = kmeans.getClusterCentroids(); Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>(); for (int i = 0; i < clusterCentroid.numInstances(); i++) { nearUser.put(i, new ArrayList<DistanceUser>()); }// w w w .j av a 2s . c o m for (int i = 0; i < data.numInstances(); i++) { int ind = clusters[i]; double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i)); List<DistanceUser> nears = nearUser.get(ind); if (nears.size() < 10) { nears.add(new DistanceUser(i, dist)); } else { DistanceUser max = Collections.max(nears); if (max.getDistance() > dist) { int maxIndex = nears.indexOf(max); nears.set(maxIndex, new DistanceUser(i, dist)); } } } for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) { for (DistanceUser user : item.getValue()) { user.setIdentifier(arffToIdUser.get(user.getIdentifier())); } } return nearUser; }
From source file:org.montp2.m1decol.ter.servlets.AnalyserMessageServlet.java
License:Open Source License
protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); String message = request.getParameter("message"); HttpSession session = request.getSession(); //create file message try {/* w w w . j a v a2s .c o m*/ OutputStreamUtils.writeSimple(message.toLowerCase(), ROOT_PATH + "message.txt"); UniGramsPreProcessing uni = new UniGramsPreProcessing(); uni.computeLemmatization(ROOT_PATH, STOP_WORD); // crer le fichier arff List<String> arffData = InputStreamUtils.readByLine(ARFF_BASE); List<String> mgsLem = new ArrayList<String>(); for (String line : InputStreamUtils.readByLine(ROOT_PATH + "message_lemma.txt")) { mgsLem.addAll(Arrays.asList(line.split("\\s"))); } StringBuilder arffMessage = new StringBuilder(); arffMessage.append("@relation 'Message_Utilisateur'\n\n"); StringBuilder vectorMessage = new StringBuilder(); vectorMessage.append("{"); boolean copy = true; for (int i = 2; i < arffData.size(); i++) { String line = arffData.get(i); if (line.equalsIgnoreCase("@data")) { arffMessage.append(line + "\n"); break; } if (!line.equals("")) { String values[] = line.split("\\s"); if (mgsLem.contains(values[1])) { vectorMessage.append(i - 2 + " 1,"); } } arffMessage.append(line + "\n"); } String vector = vectorMessage.toString(); arffMessage.append(vector.substring(0, vector.length() - 1) + "}\n"); OutputStreamUtils.writeSimple(arffMessage.toString(), ROOT_PATH + "message_arff.arff"); // chercher le cluster SimpleKMeans kmeans = WekaUtils.loadModel(IN_MODEL); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(ROOT_PATH + "message_arff.arff")); Instances clusterCentroid = kmeans.getClusterCentroids(); double dist = Double.MAX_VALUE; int cluster_current = -99; for (int i = 0; i < clusterCentroid.numInstances(); i++) { System.out.println("cluster:" + i); double newDist = eclidean.distance(clusterCentroid.instance(i), data.instance(0)); if (newDist < dist) { cluster_current = i; dist = newDist; } } // chercher les 10 profil System.out.println("cluster_current:" + cluster_current); boolean findUsers = false; List<Integer> idUsers = new ArrayList<Integer>(); for (String line : InputStreamUtils.readByLine(NEIGHBOR)) { if (!"".equals(line)) { String values[] = line.split(":"); if (!findUsers && values[0].equalsIgnoreCase("Cluster")) { if (cluster_current == Integer.parseInt(values[1])) { findUsers = true; } } else { if (findUsers && values[0].equalsIgnoreCase("id_user")) { idUsers.add(Integer.parseInt(values[1].trim())); } if (findUsers && values[0].equalsIgnoreCase("Cluster")) { break; } } } } AbstractBusiness business = new ForumBusinness(); session.setAttribute("LIST_USERS", business.findUsersByIDs(idUsers)); } catch (Exception e) { e.printStackTrace(); } finally { FileUtils.removeFile(ROOT_PATH + "message.txt"); FileUtils.removeFile(ROOT_PATH + "message_lemma.txt"); FileUtils.removeFile(ROOT_PATH + "message_arff.arff"); } response.sendRedirect("/CategorizeUserForum/results.jsp"); }
From source file:probcog.bayesnets.learning.DomainLearner.java
License:Open Source License
/** * sorts the domain of the given node, for which the given clusterer has * been learnt, in ascending order of cluster centroid * //from w w w . j a va 2s . co m * @param node * @param clusterer */ protected void sortClusteredDomain(BeliefNode node, SimpleKMeans clusterer) { // get domain sort order (sort by centroid, ascending), // i.e. get an unsorted and a sorted version of // the centroids array int numClusters = clusterer.getNumClusters(); double[] values = clusterer.getClusterCentroids().attributeToDoubleArray(0); double[] sorted_values = (double[]) values.clone(); Arrays.sort(sorted_values); // create new sorted domain Discrete domain = (Discrete) node.getDomain(); Discrete sorted_domain = new Discrete(); for (int new_idx = 0; new_idx < numClusters; new_idx++) { for (int old_idx = 0; old_idx < numClusters; old_idx++) if (values[old_idx] == sorted_values[new_idx]) sorted_domain.addName(domain.getName(old_idx)); } // apply new, sorted domain bn.bn.changeBeliefNodeDomain(node, sorted_domain); }
From source file:wekimini.InputGenerator.java
public void selectKmClusters(int numClusters) throws Exception { buildDataset();/*from ww w.j ava2 s .c o m*/ SimpleKMeans km = new SimpleKMeans(); km.setNumClusters(numClusters); km.buildClusterer(dataset); clusters = km.getClusterCentroids(); addKmClustersToTraining(); }