Example usage for weka.clusterers SimpleKMeans buildClusterer

List of usage examples for weka.clusterers SimpleKMeans buildClusterer

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans buildClusterer.

Prototype

@Override
public void buildClusterer(Instances data) throws Exception 

Source Link

Document

Generates a clusterer.

Usage

From source file:kmeansapps.Kmeans.java

public void startCluster(String path, int numOfCluster, JTextArea textarea) {
    try {/*from   w  w w .  j  a  v a2  s .c o m*/
        // TODO code application logic here
        SimpleKMeans kmeans = new SimpleKMeans();
        String[] columnNames = new String[numOfCluster];
        kmeans.setSeed(10);
        kmeans.setPreserveInstancesOrder(true);
        kmeans.setNumClusters(numOfCluster);

        BufferedReader datafile = readDataFile(path);
        Instances data = new Instances(datafile);

        kmeans.buildClusterer(data);
        double SSE = kmeans.getSquaredError();
        // This array returns the cluster number (starting with 0) for each instance
        // The array has as many elements as the number of instances
        int[] assignments = kmeans.getAssignments();

        // bikin arraylist 2 dimensi untuk menampung instance masuk ke cluster berapa.
        ArrayList<ArrayList<String>> listOfCluster = new ArrayList<ArrayList<String>>();
        ArrayList<String> listMemberOfCluster;

        //tambahkan list cluster
        for (int i = 0; i < numOfCluster; i++) {
            listMemberOfCluster = new ArrayList<>();
            listOfCluster.add(listMemberOfCluster);
        }
        //tambahkan anggota list ke cluster
        int j = 0;
        for (int clusterNum : assignments) {
            listOfCluster.get(clusterNum).add(j + "");
            j++;
        }
        textarea.setText("");
        String result = "";
        for (int i = 0; i < listOfCluster.size(); i++) {
            result = result + ("Cluster - " + i + " ==> ");
            for (String listMemberOfCluster1 : listOfCluster.get(i)) {
                result = result + (listMemberOfCluster1 + " ");
            }
            result = result + ("\n");
        }
        result = result + ("\nSSE : ") + kmeans.getSquaredError();
        textarea.setText(result);
    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//w w  w  .  j a va2 s.co m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java

License:Open Source License

private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k)
        throws Exception {
    final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet);

    final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet);

    final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction);
    skm.buildClusterer(trdataSet);
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);/*from  ww w  . j  av a  2s  . co  m*/
    eval.evaluateClusterer(trdataSet);

    final int numClusters = eval.getNumClusters();
    final List<String> possibleValues = new ArrayList<String>(numClusters);
    for (int c = 0; c < numClusters; c++)
        possibleValues.add("cluster_" + c);

    final double[] clusterAssignments = eval.getClusterAssignments();

    final int numAttributes = dataSet.numAttributes();
    final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes);
    for (int j = 0; j < numAttributes; j++) {
        //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]);
        valueForEachFeature.add((int) clusterAssignments[j]);
    }

    return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature);
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * /*from w w  w.java2 s.  c o  m*/
 * @param newInstances
 * @param K
 * @return
 * @throws Exception
 */
public static double[] doKMeans(final Instances newInstances, final int K) throws Exception {
    final SimpleKMeans clusterer = new SimpleKMeans();
    clusterer.setOptions(
            Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance"));

    clusterer.buildClusterer(newInstances);

    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(clusterer);
    eval.evaluateClusterer(newInstances);

    double[] ass = eval.getClusterAssignments();
    return ass;
}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * /*  ww  w  . j a v a2 s .  c  om*/
 * @return
 * @throws Exception
 */
public double[] getClusteredInstances() throws Exception {

    //Removing potential class index 
    instances.setClassIndex(-1);

    //Clustering using Kmeans
    int k;
    double max = 0, r2 = 0, pseudoF = 0;

    //Testing from 2 to 10 clusters, should be set as entry of this function
    SimpleKMeans bestKMeans = new SimpleKMeans();
    for (k = 2; k <= maxClusters; k++) {
        final SimpleKMeans kMeans = new SimpleKMeans();
        kMeans.setNumClusters(k);
        kMeans.buildClusterer(instances);
        //Choosing the "optimal" number of clusters
        r2 = R2(kMeans);
        pseudoF = pseudoF(r2, k);
        //System.out.println(pseudo_f);
        if (pseudoF > max) {
            max = pseudoF;
            bestKMeans = kMeans;
        }
    }

    //Real clustering using the chosen number
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(bestKMeans);
    eval.evaluateClusterer(instances);
    double[] clusterAssignments = eval.getClusterAssignments();

    this.usedKmeans = bestKMeans;

    return clusterAssignments;

}

From source file:lu.lippmann.cdb.lab.mds.ClassicMDS.java

License:Open Source License

/**
 * /*from   w ww .j  a v a 2s  . c  o m*/
 */
private static KmeansResult getSimplifiedInstances(final Instances instances, final DistanceFunction df,
        final int maxInstances) throws Exception {
    Instances centroids = null;
    List<Instances> clusters = null;

    final int savedClassIndex = instances.classIndex();
    instances.setClassIndex(-1);
    final SimpleKMeans clusterer = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(maxInstances, df);
    clusterer.buildClusterer(instances);
    clusters = WekaMachineLearningUtil.computeClusters(clusterer, instances).getClustersList();
    instances.setClassIndex(savedClassIndex);
    final int numClusters = clusters.size();
    //Set class index for each cluster instances
    //System.out.println("Setting class index to each cluster : " + savedClassIndex);
    for (int i = 0; i < numClusters; i++) {
        clusters.get(i).setClassIndex(savedClassIndex);
    }
    //Save centroids
    centroids = clusterer.getClusterCentroids();

    return new KmeansResult(centroids, clusters);
}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}//from   w  w w  .j  a  v  a 2 s  .  co m
 * 
 * <p>
 * This method is specialized for <b>kmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models
        Instances instances = getInstances(behaviorModelsAbsolute);

        // KMeans --> Weka
        SimpleKMeans kmeans = new SimpleKMeans();

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // kmeans.setDistanceFunction(manhattanDistance);

        // distance function with option don*t normalize
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        kmeans.setDistanceFunction(euclideanDistance);
        kmeans.setPreserveInstancesOrder(true);

        int[] clustersize = null;
        int[] assignments = null;

        // get number of clusters to be generated.
        int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());

        // clustering
        for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) {
            // must be specified in a fix way
            kmeans.setNumClusters(clusterSize);

            // build cluster
            kmeans.buildClusterer(instances);

            clustersize = kmeans.getClusterSizes();
            assignments = kmeans.getAssignments();

            ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
            clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids());
            clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances,
                    assignments);
            clusteringMetrics.calculateBetas();

            clusteringMetrics.printErrorMetricsHeader();
            clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances());
            clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
            // clusteringMetrics.printClusterAssignmentsToSession(assignments,
            // clusterSize);

        }

        Instances resultingCentroids = kmeans.getClusterCentroids();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java

License:Open Source License

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    SimpleKMeans clusterer = new SimpleKMeans();

    int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
    options[0] = "-N";
    options[1] = String.valueOf(numberOfGroups);

    try {/*ww w  . j a  va  2s. co  m*/
        clusterer.setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration<?> e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
                parameters.getParameter(EMClustererParameters.visualization).getValue());
        return result;

    } catch (Exception ex) {
        logger.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w  w w.  j a va2s.c  om*/
 */
@Override
protected void extractContourData(int[] translations, int[] permutation) {
    SimpleKMeans clusterer = new SimpleKMeans();
    try {

        clusterer.setNumClusters(m_numClusters);

        // cluster the data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        for (int a = 0; a < contourDataGrid().numFeatures(); a++) {
            attInfo.add(new Attribute("att" + a));
        }
        Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors());
        for (double[] vec : contourDataGrid()) {
            data.add(new DenseInstance(1.0, vec));
        }
        clusterer.buildClusterer(data);

        // create clustered images p(C|x)
        Img[] imgs = new Img[m_numClusters];
        int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() };
        Cursor<FloatType>[] cursors = new Cursor[m_numClusters];
        for (int i = 0; i < imgs.length; i++) {
            imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType());
            cursors[i] = imgs[i].localizingCursor();
        }

        int cluster;
        for (Instance instance : data) {
            for (int i = 0; i < cursors.length; i++) {
                cursors[i].fwd();
            }
            cluster = clusterer.clusterInstance(instance);
            cursors[cluster].get().set(1.0f);
        }

        // greedily select the best cluster combination starting with all
        // clusters together and then removing the one whose removal
        // maximises the score of the remaining clusters
        Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType());
        Cursor<FloatType> resC = res.cursor();
        while (resC.hasNext()) {
            resC.fwd();
            resC.get().set(1.0f);
        }
        Img<FloatType> tmp = res.factory().create(res, new FloatType());

        // TODO: normalize img
        // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm =
        // new NormalizeIterableInterval<FloatType, Img<FloatType>>();
        double score = 0;
        double bestScore = -Double.MAX_VALUE;
        double globalBestScore = -Double.MAX_VALUE;
        int bestCluster = 0;

        // ShowInSameFrame showInFrame = new ShowInSameFrame();

        for (int i = 0; i < m_numClusters; i++) {
            for (int j = 0; j < m_numClusters; j++) {
                if (imgs[j] != null) {
                    substract(res, imgs[j], tmp);
                    score = calcScore(tmp, m_bias);
                    if (score > bestScore) {
                        bestScore = score;
                        bestCluster = j;
                    }
                }
            }
            substract(res, imgs[bestCluster], res);
            imgs[bestCluster] = null;

            // Pair<FloatType, FloatType> minmax =
            // Operations.compute(new MinMax<FloatType>(), tmp);
            // Operations.<FloatType, FloatType> map(
            // new Normalize<FloatType>(minmax.getA().getRealDouble(),
            // minmax.getB().getRealDouble(),
            // -Float.MAX_VALUE, Float.MAX_VALUE)).compute(
            // tmp, tmp);

            // showInFrame.show(tmp, 2.0);

            if (bestScore < globalBestScore) {
                break;
            }

            globalBestScore = bestScore;
            bestScore = -Double.MAX_VALUE;

        }

        // calculate the translations (mean positions)
        resC = res.localizingCursor();
        double meanPos = 0;
        double num = 0;
        int index = 0;
        while (resC.hasNext()) {
            resC.fwd();

            meanPos += resC.get().get() * resC.getDoublePosition(0);
            num += resC.get().get();
            index++;
            if ((index % res.dimension(0)) == 0) {
                if (num > 0) {
                    translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num)
                            - CENTER_COL;
                } else {
                    // setWeight((int)((index - 1) / res.dimension(0)), 0);
                    translations[(int) ((index - 1) / res.dimension(0))] = 0;
                }
                meanPos = 0;
                num = 0;
            }

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
    }

}

From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java

License:Open Source License

public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster)
        throws Exception {
    Instances inputInstances = WekaUtils.loadARFF(inPath);

    EuclideanDistance euclideanDistance = new EuclideanDistance();
    euclideanDistance.setAttributeIndices("first-last");
    euclideanDistance.setDontNormalize(false);
    euclideanDistance.setInvertSelection(false);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setPreserveInstancesOrder(/*from w  w  w . ja  v a  2  s .c om*/
            Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.PERSERVE_INSTANCE)));
    kmeans.setDontReplaceMissingValues(Boolean
            .valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DONT_REPLACE_MISSING_VALUES)));
    kmeans.setDisplayStdDevs(
            Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DISPLAY_STD_DEVS)));
    kmeans.setMaxIterations(
            Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.MAX_ITERATIONS)));
    kmeans.setNumClusters(
            Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.NUM_CLUSTERS)));
    kmeans.setSeed(10);
    //kmeans.setSeed(
    //      Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.SEED)));
    kmeans.setDistanceFunction(euclideanDistance);
    kmeans.buildClusterer(inputInstances);

    WekaUtils.saveModel(kmeans, outPath);

    /*
    *
    * Pour obtenir les pourcentages de les clusters
    * ClusterEvaluation eval = new ClusterEvaluation();
    * eval.setClusterer(kmeans);
    * eval.evaluateClusterer(inputInstances);
    * System.out.println(eval.clusterResultsToString());
    *
    * */

    return kmeans;
}