List of usage examples for weka.clusterers SimpleKMeans getDistanceFunction
public DistanceFunction getDistanceFunction()
From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java
License:Open Source License
public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel, Map<Integer, Integer> arffToIdUser) throws Exception { SimpleKMeans kmeans = WekaUtils.loadModel(inModel); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(arffData)); int[] clusters = kmeans.getAssignments(); Instances clusterCentroid = kmeans.getClusterCentroids(); Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>(); for (int i = 0; i < clusterCentroid.numInstances(); i++) { nearUser.put(i, new ArrayList<DistanceUser>()); }/*from w w w . j a v a 2 s .c om*/ for (int i = 0; i < data.numInstances(); i++) { int ind = clusters[i]; double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i)); List<DistanceUser> nears = nearUser.get(ind); if (nears.size() < 10) { nears.add(new DistanceUser(i, dist)); } else { DistanceUser max = Collections.max(nears); if (max.getDistance() > dist) { int maxIndex = nears.indexOf(max); nears.set(maxIndex, new DistanceUser(i, dist)); } } } for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) { for (DistanceUser user : item.getValue()) { user.setIdentifier(arffToIdUser.get(user.getIdentifier())); } } return nearUser; }
From source file:org.montp2.m1decol.ter.servlets.AnalyserMessageServlet.java
License:Open Source License
protected void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html;charset=UTF-8"); String message = request.getParameter("message"); HttpSession session = request.getSession(); //create file message try {// w w w .ja va 2 s . com OutputStreamUtils.writeSimple(message.toLowerCase(), ROOT_PATH + "message.txt"); UniGramsPreProcessing uni = new UniGramsPreProcessing(); uni.computeLemmatization(ROOT_PATH, STOP_WORD); // crer le fichier arff List<String> arffData = InputStreamUtils.readByLine(ARFF_BASE); List<String> mgsLem = new ArrayList<String>(); for (String line : InputStreamUtils.readByLine(ROOT_PATH + "message_lemma.txt")) { mgsLem.addAll(Arrays.asList(line.split("\\s"))); } StringBuilder arffMessage = new StringBuilder(); arffMessage.append("@relation 'Message_Utilisateur'\n\n"); StringBuilder vectorMessage = new StringBuilder(); vectorMessage.append("{"); boolean copy = true; for (int i = 2; i < arffData.size(); i++) { String line = arffData.get(i); if (line.equalsIgnoreCase("@data")) { arffMessage.append(line + "\n"); break; } if (!line.equals("")) { String values[] = line.split("\\s"); if (mgsLem.contains(values[1])) { vectorMessage.append(i - 2 + " 1,"); } } arffMessage.append(line + "\n"); } String vector = vectorMessage.toString(); arffMessage.append(vector.substring(0, vector.length() - 1) + "}\n"); OutputStreamUtils.writeSimple(arffMessage.toString(), ROOT_PATH + "message_arff.arff"); // chercher le cluster SimpleKMeans kmeans = WekaUtils.loadModel(IN_MODEL); EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction(); Instances data = new Instances(WekaUtils.loadARFF(ROOT_PATH + "message_arff.arff")); Instances clusterCentroid = kmeans.getClusterCentroids(); double dist = Double.MAX_VALUE; int cluster_current = -99; for (int i = 0; i < clusterCentroid.numInstances(); i++) { System.out.println("cluster:" + i); double newDist = eclidean.distance(clusterCentroid.instance(i), data.instance(0)); if (newDist < dist) { cluster_current = i; dist = newDist; } } // chercher les 10 profil System.out.println("cluster_current:" + cluster_current); boolean findUsers = false; List<Integer> idUsers = new ArrayList<Integer>(); for (String line : InputStreamUtils.readByLine(NEIGHBOR)) { if (!"".equals(line)) { String values[] = line.split(":"); if (!findUsers && values[0].equalsIgnoreCase("Cluster")) { if (cluster_current == Integer.parseInt(values[1])) { findUsers = true; } } else { if (findUsers && values[0].equalsIgnoreCase("id_user")) { idUsers.add(Integer.parseInt(values[1].trim())); } if (findUsers && values[0].equalsIgnoreCase("Cluster")) { break; } } } } AbstractBusiness business = new ForumBusinness(); session.setAttribute("LIST_USERS", business.findUsersByIDs(idUsers)); } catch (Exception e) { e.printStackTrace(); } finally { FileUtils.removeFile(ROOT_PATH + "message.txt"); FileUtils.removeFile(ROOT_PATH + "message_lemma.txt"); FileUtils.removeFile(ROOT_PATH + "message_arff.arff"); } response.sendRedirect("/CategorizeUserForum/results.jsp"); }