Example usage for weka.clusterers XMeans buildClusterer

Introduction

In this page you can find the example usage for weka.clusterers XMeans buildClusterer.

Prototype

@Override
public void buildClusterer(Instances data) throws Exception

Source Link

Document

Generates the X-Means clusterer.

Usage

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.XMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}// www  .  j a  v  a2 s . co  m
 * 
 * <p>
 * This method is specialized for <b>xmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models

        Instances instances = getInstances(behaviorModelsAbsolute);

        // XMeans --> Weka
        XMeans xmeans = new XMeans();

        if (CommandLineArgumentsHandler.getSeedValue() != null) {
            xmeans.setSeed(Integer.parseInt(CommandLineArgumentsHandler.getSeedValue()));
        }

        // distance function
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        xmeans.setDistanceF(euclideanDistance);

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // xmeans.setDistanceF(manhattanDistance);

        int[] clustersize = null;
        // create new assignments
        int[] assignments = new int[instances.numInstances()];

        // get number of clusters to be generated.
        int numberOfClustersMin = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());
        int numberOfClustersMax = 0;
        if (CommandLineArgumentsHandler.getNumberOfClustersMax() != "") {
            numberOfClustersMax = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMax());
        } else {
            numberOfClustersMax = numberOfClustersMin;
        }

        // clustering
        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);

        // build cluster
        xmeans.buildClusterer(instances);

        ClusterEvaluation clusterEvaluation = new ClusterEvaluation();
        clusterEvaluation.setClusterer(xmeans);
        clusterEvaluation.evaluateClusterer(instances);

        // clusterSize
        clustersize = new int[xmeans.getClusterCenters().numInstances()];

        // set assignments and clustersize
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
        clusteringMetrics.calculateInterClusteringSimilarity(xmeans.getClusterCenters());
        clusteringMetrics.calculateIntraClusteringSimilarity(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateBetas();

        clusteringMetrics.printErrorMetricsHeader();
        clusteringMetrics.printErrorMetrics(xmeans.getClusterCenters().numInstances());
        clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
        // clusteringMetrics.printClusterAssignmentsToSession(assignments,
        // xmeans.getClusterCenters().numInstances());

        Instances resultingCentroids = xmeans.getClusterCenters();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:org.iobserve.analysis.behavior.clustering.xmeans.XMeansClustering.java

License:Apache License

private Optional<ClusteringResults> getClusteringResults(final Instances instances) {
    final XMeans xMeansClusterer = new XMeans();

    xMeansClusterer.setSeed(new Random().nextInt(Integer.MAX_VALUE));
    xMeansClusterer.setDistanceF(this.distanceMetric);

    xMeansClusterer.setMinNumClusters(this.minClusters);
    xMeansClusterer.setMaxNumClusters(this.maxClusters);

    try {//from   ww  w.j a  va2  s.c  o  m
        xMeansClusterer.buildClusterer(instances);

        /**
         * Code used from org.iobserve.analysis.userbehavior.XMeansClustering to use
         * org.iobserve.analysis.userbehavior.ClusteringResults
         */
        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];
        clustersize = new int[xMeansClusterer.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xMeansClusterer.clusterInstance(instances.instance(s));
            clustersize[xMeansClusterer.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xMeansClusterer.getClusterCenters(),
                instances, assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        final ClusteringResults xMeansClusteringResults = new ClusteringResults("X-Means",
                xMeansClusterer.getClusterCenters().numInstances(), assignments, clusteringMetrics);

        return Optional.of(xMeansClusteringResults);

    } catch (final Exception e) { // NOPMD NOCS api dependency
        XMeansClustering.LOGGER.error("Clustering failed.", e);
    }

    return Optional.empty();
}

From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances/*from ww  w .  j  a  v a2  s .c  om*/
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances/*from   ww  w.  jav a  2  s . c  o  m*/
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.montp2.m1decol.ter.clustering.XMeansClustering.java

License:Open Source License

public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster)
        throws Exception {
    Instances inputInstances = WekaUtils.loadARFF(inPath);

    EuclideanDistance euclideanDistance = new EuclideanDistance();
    euclideanDistance.setAttributeIndices("first-last");
    euclideanDistance.setDontNormalize(false);
    euclideanDistance.setInvertSelection(false);

    XMeans xmeans = new XMeans();
    xmeans.setMaxIterations(500);//w w  w . java 2s  .co m
    xmeans.setSeed(10);
    xmeans.setMinNumClusters(5);
    xmeans.setMaxNumClusters(12);
    xmeans.setMaxKMeans(1000);
    xmeans.setMaxKMeansForChildren(1000);
    xmeans.setBinValue(1.0);
    xmeans.setCutOffFactor(0.5);
    xmeans.setDebugLevel(0);
    xmeans.setMaxIterations(1);
    xmeans.buildClusterer(inputInstances);

    Enumeration<Instance> e = inputInstances.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance ins = e.nextElement();
        int cluster_num = xmeans.clusterInstance(ins);
        System.out.println(ins.toString());
        System.out.println(cluster_num);
    }

    WekaUtils.saveModel(xmeans, outPath);

    return xmeans;
}

From source file:qoala.arff.java

public void XMenas() throws Exception {

    Instances train = new Instances(dataSet);
    XMeans xm = new XMeans();

    xm.setMaxNumClusters(100);/*from w ww  . j a  v a2 s. co  m*/
    xm.setMinNumClusters(2);
    xm.buildClusterer(train);

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xm);
    eval.evaluateClusterer(train);
    eval.getNumClusters();
    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());
    System.out.println("# - cluster - distribution");
    for (int j = 0; j < eval.getNumClusters(); j++) {

        for (int i = 0; i < train.numInstances(); i++) {
            int cluster = xm.clusterInstance(train.instance(i));
            if (cluster == j)

                System.out.println("Instance " + i + " -> Cluster number: " + cluster);
        }
    }

}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public static Instances clusterInstances(Instances data) {
    XMeans xmeans = new XMeans();
    Remove filter = new Remove();
    Instances dataClusterer = null;//from   w  w  w  .java  2s . c o  m
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }
    //Get the attributes from the data for creating the sampled_data object

    ArrayList<Attribute> attrList = new ArrayList<Attribute>();
    Enumeration attributes = data.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        attrList.add((Attribute) attributes.nextElement());
    }

    Instances sampled_data = new Instances(data.relationName(), attrList, 0);
    data.setClassIndex(data.numAttributes() - 1);
    sampled_data.setClassIndex(data.numAttributes() - 1);
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    data.remove(0);//In Wavelet Stream of MOA always the first element comes without class

    try {
        filter.setInputFormat(data);
        dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[4];
        options[0] = "-L"; // max. iterations
        options[1] = Integer.toString(noOfClassesInPool - 1);
        if (noOfClassesInPool > 2) {
            options[1] = Integer.toString(noOfClassesInPool - 1);
            xmeans.setMinNumClusters(noOfClassesInPool - 1);
        } else {
            options[1] = Integer.toString(noOfClassesInPool);
            xmeans.setMinNumClusters(noOfClassesInPool);
        }
        xmeans.setMaxNumClusters(data.numClasses() + 1);
        System.out.println("No of classes in the pool: " + noOfClassesInPool);
        xmeans.setUseKDTree(true);
        //xmeans.setOptions(options);
        xmeans.buildClusterer(dataClusterer);
        System.out.println("Xmeans\n:" + xmeans);
    } catch (Exception e) {
        e.printStackTrace();
    }
    //System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xmeans);
    try {
        eval.evaluateClusterer(data);
        int classesToClustersMap[] = eval.getClassesToClusters();
        //check the classes to cluster map
        int clusterNo = 0;
        for (int i = 0; i < data.size(); i++) {
            clusterNo = xmeans.clusterInstance(dataClusterer.get(i));
            //Check if the class value of instance and class value of cluster matches
            if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) {
                sampled_data.add(data.get(i));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ((Instances) sampled_data);
}