Example usage for weka.clusterers SimpleKMeans getClusterCentroids

List of usage examples for weka.clusterers SimpleKMeans getClusterCentroids

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getClusterCentroids.

Prototype

public Instances getClusterCentroids() 

Source Link

Document

Gets the the cluster centroids.

Usage

From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java

License:Open Source License

public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel,
        Map<Integer, Integer> arffToIdUser) throws Exception {

    SimpleKMeans kmeans = WekaUtils.loadModel(inModel);

    EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

    Instances data = new Instances(WekaUtils.loadARFF(arffData));

    int[] clusters = kmeans.getAssignments();
    Instances clusterCentroid = kmeans.getClusterCentroids();

    Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>();

    for (int i = 0; i < clusterCentroid.numInstances(); i++) {
        nearUser.put(i, new ArrayList<DistanceUser>());
    }//  w  w  w .j  av a  2s . c  o m

    for (int i = 0; i < data.numInstances(); i++) {
        int ind = clusters[i];
        double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i));
        List<DistanceUser> nears = nearUser.get(ind);
        if (nears.size() < 10) {
            nears.add(new DistanceUser(i, dist));
        } else {
            DistanceUser max = Collections.max(nears);
            if (max.getDistance() > dist) {
                int maxIndex = nears.indexOf(max);
                nears.set(maxIndex, new DistanceUser(i, dist));
            }
        }
    }

    for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) {
        for (DistanceUser user : item.getValue()) {
            user.setIdentifier(arffToIdUser.get(user.getIdentifier()));
        }
    }

    return nearUser;
}

From source file:org.montp2.m1decol.ter.servlets.AnalyserMessageServlet.java

License:Open Source License

protected void processRequest(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    response.setContentType("text/html;charset=UTF-8");
    String message = request.getParameter("message");
    HttpSession session = request.getSession();
    //create file message
    try {/*  w  w  w  .  j  a  v  a2s  .c o  m*/
        OutputStreamUtils.writeSimple(message.toLowerCase(), ROOT_PATH + "message.txt");
        UniGramsPreProcessing uni = new UniGramsPreProcessing();
        uni.computeLemmatization(ROOT_PATH, STOP_WORD);

        // crer le fichier arff
        List<String> arffData = InputStreamUtils.readByLine(ARFF_BASE);
        List<String> mgsLem = new ArrayList<String>();

        for (String line : InputStreamUtils.readByLine(ROOT_PATH + "message_lemma.txt")) {
            mgsLem.addAll(Arrays.asList(line.split("\\s")));
        }

        StringBuilder arffMessage = new StringBuilder();
        arffMessage.append("@relation 'Message_Utilisateur'\n\n");
        StringBuilder vectorMessage = new StringBuilder();
        vectorMessage.append("{");
        boolean copy = true;
        for (int i = 2; i < arffData.size(); i++) {
            String line = arffData.get(i);
            if (line.equalsIgnoreCase("@data")) {
                arffMessage.append(line + "\n");
                break;
            }

            if (!line.equals("")) {
                String values[] = line.split("\\s");
                if (mgsLem.contains(values[1])) {
                    vectorMessage.append(i - 2 + " 1,");
                }
            }

            arffMessage.append(line + "\n");
        }

        String vector = vectorMessage.toString();
        arffMessage.append(vector.substring(0, vector.length() - 1) + "}\n");

        OutputStreamUtils.writeSimple(arffMessage.toString(), ROOT_PATH + "message_arff.arff");

        // chercher le cluster
        SimpleKMeans kmeans = WekaUtils.loadModel(IN_MODEL);

        EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

        Instances data = new Instances(WekaUtils.loadARFF(ROOT_PATH + "message_arff.arff"));

        Instances clusterCentroid = kmeans.getClusterCentroids();

        double dist = Double.MAX_VALUE;
        int cluster_current = -99;

        for (int i = 0; i < clusterCentroid.numInstances(); i++) {
            System.out.println("cluster:" + i);
            double newDist = eclidean.distance(clusterCentroid.instance(i), data.instance(0));
            if (newDist < dist) {
                cluster_current = i;
                dist = newDist;
            }
        }
        // chercher les 10 profil
        System.out.println("cluster_current:" + cluster_current);

        boolean findUsers = false;
        List<Integer> idUsers = new ArrayList<Integer>();
        for (String line : InputStreamUtils.readByLine(NEIGHBOR)) {
            if (!"".equals(line)) {
                String values[] = line.split(":");

                if (!findUsers && values[0].equalsIgnoreCase("Cluster")) {
                    if (cluster_current == Integer.parseInt(values[1])) {
                        findUsers = true;
                    }
                } else {

                    if (findUsers && values[0].equalsIgnoreCase("id_user")) {
                        idUsers.add(Integer.parseInt(values[1].trim()));
                    }

                    if (findUsers && values[0].equalsIgnoreCase("Cluster")) {
                        break;
                    }
                }

            }

        }

        AbstractBusiness business = new ForumBusinness();
        session.setAttribute("LIST_USERS", business.findUsersByIDs(idUsers));

    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        FileUtils.removeFile(ROOT_PATH + "message.txt");
        FileUtils.removeFile(ROOT_PATH + "message_lemma.txt");
        FileUtils.removeFile(ROOT_PATH + "message_arff.arff");
    }
    response.sendRedirect("/CategorizeUserForum/results.jsp");
}

From source file:probcog.bayesnets.learning.DomainLearner.java

License:Open Source License

/**
 * sorts the domain of the given node, for which the given clusterer has
 * been learnt, in ascending order of cluster centroid
 * //from  w  w  w . j  a  va 2s . co m
 * @param node
 * @param clusterer
 */
protected void sortClusteredDomain(BeliefNode node, SimpleKMeans clusterer) {
    // get domain sort order (sort by centroid, ascending),
    // i.e. get an unsorted and a sorted version of
    // the centroids array
    int numClusters = clusterer.getNumClusters();
    double[] values = clusterer.getClusterCentroids().attributeToDoubleArray(0);
    double[] sorted_values = (double[]) values.clone();
    Arrays.sort(sorted_values);
    // create new sorted domain
    Discrete domain = (Discrete) node.getDomain();
    Discrete sorted_domain = new Discrete();
    for (int new_idx = 0; new_idx < numClusters; new_idx++) {
        for (int old_idx = 0; old_idx < numClusters; old_idx++)
            if (values[old_idx] == sorted_values[new_idx])
                sorted_domain.addName(domain.getName(old_idx));
    }
    // apply new, sorted domain
    bn.bn.changeBeliefNodeDomain(node, sorted_domain);
}

From source file:wekimini.InputGenerator.java

public void selectKmClusters(int numClusters) throws Exception {
    buildDataset();/*from  ww  w.j  ava2  s .c  o  m*/

    SimpleKMeans km = new SimpleKMeans();
    km.setNumClusters(numClusters);
    km.buildClusterer(dataset);

    clusters = km.getClusterCentroids();

    addKmClustersToTraining();
}