Example usage for weka.clusterers SimpleKMeans getAssignments

List of usage examples for weka.clusterers SimpleKMeans getAssignments

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans getAssignments.

Prototype

public int[] getAssignments() throws Exception 

Source Link

Document

Gets the assignments for each instance.

Usage

From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java

License:Open Source License

public Map<Integer, List<Integer>> computeInstanceByCluster(String arffFilter, String inModel,
        Map<Integer, Integer> arffToIdUser) throws Exception {

    SimpleKMeans kmeans = WekaUtils.loadModel(inModel);

    Instances data = new Instances(WekaUtils.loadARFF(arffFilter));

    int[] clusters = kmeans.getAssignments();

    Map<Integer, List<Integer>> idUserByCluster = new HashMap<Integer, List<Integer>>();

    for (int i = 0; i < data.numInstances(); i++) {
        int ind = clusters[i];

        List<Integer> users = idUserByCluster.get(ind);
        if (users == null) {
            users = new ArrayList<Integer>();
            idUserByCluster.put(ind, users);
        }//from w ww  .j a v a2s  .  com
        users.add(arffToIdUser.get(i));
    }

    return idUserByCluster;

}

From source file:org.montp2.m1decol.ter.clustering.NearestNeighbor.java

License:Open Source License

public Map<Integer, List<DistanceUser>> computeNearestNeighbor(String arffData, String inModel,
        Map<Integer, Integer> arffToIdUser) throws Exception {

    SimpleKMeans kmeans = WekaUtils.loadModel(inModel);

    EuclideanDistance eclidean = (EuclideanDistance) kmeans.getDistanceFunction();

    Instances data = new Instances(WekaUtils.loadARFF(arffData));

    int[] clusters = kmeans.getAssignments();
    Instances clusterCentroid = kmeans.getClusterCentroids();

    Map<Integer, List<DistanceUser>> nearUser = new HashMap<Integer, List<DistanceUser>>();

    for (int i = 0; i < clusterCentroid.numInstances(); i++) {
        nearUser.put(i, new ArrayList<DistanceUser>());
    }//w  w w  . j a  v  a  2 s .co m

    for (int i = 0; i < data.numInstances(); i++) {
        int ind = clusters[i];
        double dist = eclidean.distance(clusterCentroid.instance(ind), data.instance(i));
        List<DistanceUser> nears = nearUser.get(ind);
        if (nears.size() < 10) {
            nears.add(new DistanceUser(i, dist));
        } else {
            DistanceUser max = Collections.max(nears);
            if (max.getDistance() > dist) {
                int maxIndex = nears.indexOf(max);
                nears.set(maxIndex, new DistanceUser(i, dist));
            }
        }
    }

    for (Map.Entry<Integer, List<DistanceUser>> item : nearUser.entrySet()) {
        for (DistanceUser user : item.getValue()) {
            user.setIdentifier(arffToIdUser.get(user.getIdentifier()));
        }
    }

    return nearUser;
}

From source file:qoala.arff.java

public void SimpleKmeans(int numberOfCLuster) throws Exception {

    Instances train = new Instances(dataSet);

    SimpleKMeans skm = new SimpleKMeans();
    skm.setPreserveInstancesOrder(true);
    skm.setNumClusters(numberOfCLuster);
    skm.buildClusterer(train);/*from w  ww  .jav  a  2 s . c o m*/
    skm.setSeed(10);
    int[] ClusterSize = skm.getClusterSizes();

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(train);

    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());

    int[] assignments = skm.getAssignments();

    System.out.println("# - cluster - distribution");

    for (int j = 0; j < skm.getNumClusters(); j++) {
        int i = 0;
        for (int clusterNum : assignments) {

            if (clusterNum == j)

                System.out.println("Instance " + i + " -> Cluster number: " + clusterNum);

            i++;
        }
    }
}

From source file:swm.project.mappings.UserToUserCluster.java

private void clusterUserHistoryWithKmeans() throws FileNotFoundException, IOException, Exception {
    Reader reader;/* www.  j  a  va 2  s  .  c  o  m*/
    userToUserClusterHistory = new HashMap<>();
    userClustersToUsersHistory = new HashMap<>();
    reader = new FileReader(MappingConstants.USER_MOVIE_CLUSTERS);
    Instances instanceValues = new Instances(reader);
    SimpleKMeans kmeans = new SimpleKMeans();

    kmeans.setNumClusters(20);
    kmeans.setPreserveInstancesOrder(true);
    kmeans.setDistanceFunction(new EuclideanDistance());
    kmeans.buildClusterer(instanceValues);

    int[] assignments = kmeans.getAssignments();
    int userid = 0;
    for (int clusterNo : assignments) {
        int user = (int) instanceValues.get(userid).value(0);
        userToUserClusterHistory.put(user, clusterNo);
        ArrayList<Integer> users = new ArrayList<>();
        if (userClustersToUsersHistory.containsKey(clusterNo)) {
            users = userClustersToUsersHistory.get(clusterNo);
            users.add(user);
        } else {
            users.add(user);
            userClustersToUsersHistory.put(clusterNo, users);
        }
        userid++;

    }
}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public Instances clusteredInstances(Instances data) {
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }// w  w w. ja  v a2  s .com
    Instances sampled_data = data;
    for (int i = 0; i < sampled_data.numInstances(); i++) {
        sampled_data.remove(i);
    }

    SimpleKMeans sKmeans = new SimpleKMeans();
    data.setClassIndex(data.numAttributes() - 1);
    Remove filter = new Remove();
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    List assignments = new ArrayList();

    try {
        filter.setInputFormat(data);
        Instances dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[3];
        options[0] = "-I"; // max. iterations
        options[1] = "500";
        options[2] = "-O";
        sKmeans.setNumClusters(data.numClasses());
        sKmeans.setOptions(options);
        sKmeans.buildClusterer(dataClusterer);
        System.out.println("Kmeans\n:" + sKmeans);
        System.out.println(Arrays.toString(sKmeans.getAssignments()));
        assignments = Arrays.asList(sKmeans.getAssignments());
    } catch (Exception e) {
        e.printStackTrace();
    }
    System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(sKmeans);
    try {
        eval.evaluateClusterer(data);
    } catch (Exception e) {
        e.printStackTrace();
    }
    int classesToClustersMap[] = eval.getClassesToClusters();
    for (int i = 0; i < classesToClustersMap.length; i++) {
        if (assignments.get(i).equals(((Integer) classesToClustersMap[(int) data.get(i).classValue()]))) {
            ((Instances) sampled_data).add(data.get(i));
        }
    }
    return ((Instances) sampled_data);
}