Example usage for weka.clusterers SimpleKMeans getDistanceFunction

List of usage examples for weka.clusterers SimpleKMeans getDistanceFunction

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getDistanceFunction.

Prototype

public DistanceFunction getDistanceFunction() 

Source Link

Document

returns the distance function currently in use.

Usage

From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java

License:Open Source License

public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel,
        Map<Integer, Integer> arffToIdUser) throws Exception {

    SimpleKMeans kmeans = WekaUtils.loadModel(inModel);

    EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

    Instances data = new Instances(WekaUtils.loadARFF(arffData));

    int[] clusters = kmeans.getAssignments();
    Instances clusterCentroid = kmeans.getClusterCentroids();

    Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>();

    for (int i = 0; i < clusterCentroid.numInstances(); i++) {
        nearUser.put(i, new ArrayList<DistanceUser>());
    }/*from  w w w  .  j a v  a  2  s .c  om*/

    for (int i = 0; i < data.numInstances(); i++) {
        int ind = clusters[i];
        double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i));
        List<DistanceUser> nears = nearUser.get(ind);
        if (nears.size() < 10) {
            nears.add(new DistanceUser(i, dist));
        } else {
            DistanceUser max = Collections.max(nears);
            if (max.getDistance() > dist) {
                int maxIndex = nears.indexOf(max);
                nears.set(maxIndex, new DistanceUser(i, dist));
            }
        }
    }

    for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) {
        for (DistanceUser user : item.getValue()) {
            user.setIdentifier(arffToIdUser.get(user.getIdentifier()));
        }
    }

    return nearUser;
}

From source file:org.montp2.m1decol.ter.servlets.AnalyserMessageServlet.java

License:Open Source License

protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    String message = request.getParameter("message");
    HttpSession session = request.getSession();
    //create file message
    try {// w  w  w  .ja va 2 s . com
        OutputStreamUtils.writeSimple(message.toLowerCase(), ROOT_PATH + "message.txt");
        UniGramsPreProcessing uni = new UniGramsPreProcessing();
        uni.computeLemmatization(ROOT_PATH, STOP_WORD);

        // crer le fichier arff
        List<String> arffData = InputStreamUtils.readByLine(ARFF_BASE);
        List<String> mgsLem = new ArrayList<String>();

        for (String line : InputStreamUtils.readByLine(ROOT_PATH + "message_lemma.txt")) {
            mgsLem.addAll(Arrays.asList(line.split("\\s")));
        }

        StringBuilder arffMessage = new StringBuilder();
        arffMessage.append("@relation 'Message_Utilisateur'\n\n");
        StringBuilder vectorMessage = new StringBuilder();
        vectorMessage.append("{");
        boolean copy = true;
        for (int i = 2; i < arffData.size(); i++) {
            String line = arffData.get(i);
            if (line.equalsIgnoreCase("@data")) {
                arffMessage.append(line + "\n");
                break;
            }

            if (!line.equals("")) {
                String values[] = line.split("\\s");
                if (mgsLem.contains(values[1])) {
                    vectorMessage.append(i - 2 + " 1,");
                }
            }

            arffMessage.append(line + "\n");
        }

        String vector = vectorMessage.toString();
        arffMessage.append(vector.substring(0, vector.length() - 1) + "}\n");

        OutputStreamUtils.writeSimple(arffMessage.toString(), ROOT_PATH + "message_arff.arff");

        // chercher le cluster
        SimpleKMeans kmeans = WekaUtils.loadModel(IN_MODEL);

        EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

        Instances data = new Instances(WekaUtils.loadARFF(ROOT_PATH + "message_arff.arff"));

        Instances clusterCentroid = kmeans.getClusterCentroids();

        double dist = Double.MAX_VALUE;
        int cluster_current = -99;

        for (int i = 0; i < clusterCentroid.numInstances(); i++) {
            System.out.println("cluster:" + i);
            double newDist = eclidean.distance(clusterCentroid.instance(i), data.instance(0));
            if (newDist < dist) {
                cluster_current = i;
                dist = newDist;
            }
        }
        // chercher les 10 profil
        System.out.println("cluster_current:" + cluster_current);

        boolean findUsers = false;
        List<Integer> idUsers = new ArrayList<Integer>();
        for (String line : InputStreamUtils.readByLine(NEIGHBOR)) {
            if (!"".equals(line)) {
                String values[] = line.split(":");

                if (!findUsers && values[0].equalsIgnoreCase("Cluster")) {
                    if (cluster_current == Integer.parseInt(values[1])) {
                        findUsers = true;
                    }
                } else {

                    if (findUsers && values[0].equalsIgnoreCase("id_user")) {
                        idUsers.add(Integer.parseInt(values[1].trim()));
                    }

                    if (findUsers && values[0].equalsIgnoreCase("Cluster")) {
                        break;
                    }
                }

            }

        }

        AbstractBusiness business = new ForumBusinness();
        session.setAttribute("LIST_USERS", business.findUsersByIDs(idUsers));

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        FileUtils.removeFile(ROOT_PATH + "message.txt");
        FileUtils.removeFile(ROOT_PATH + "message_lemma.txt");
        FileUtils.removeFile(ROOT_PATH + "message_arff.arff");
    }
    response.sendRedirect("/CategorizeUserForum/results.jsp");
}