Example usage for weka.clusterers XMeans setDistanceF

Introduction

In this page you can find the example usage for weka.clusterers XMeans setDistanceF.

Prototype

public void setDistanceF(DistanceFunction distanceF)

Source Link

Document

gets the "binary" distance value.

Usage

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.XMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}// ww  w .  j  ava 2 s.  com
 * 
 * <p>
 * This method is specialized for <b>xmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models

        Instances instances = getInstances(behaviorModelsAbsolute);

        // XMeans --> Weka
        XMeans xmeans = new XMeans();

        if (CommandLineArgumentsHandler.getSeedValue() != null) {
            xmeans.setSeed(Integer.parseInt(CommandLineArgumentsHandler.getSeedValue()));
        }

        // distance function
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        xmeans.setDistanceF(euclideanDistance);

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // xmeans.setDistanceF(manhattanDistance);

        int[] clustersize = null;
        // create new assignments
        int[] assignments = new int[instances.numInstances()];

        // get number of clusters to be generated.
        int numberOfClustersMin = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());
        int numberOfClustersMax = 0;
        if (CommandLineArgumentsHandler.getNumberOfClustersMax() != "") {
            numberOfClustersMax = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMax());
        } else {
            numberOfClustersMax = numberOfClustersMin;
        }

        // clustering
        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);

        // build cluster
        xmeans.buildClusterer(instances);

        ClusterEvaluation clusterEvaluation = new ClusterEvaluation();
        clusterEvaluation.setClusterer(xmeans);
        clusterEvaluation.evaluateClusterer(instances);

        // clusterSize
        clustersize = new int[xmeans.getClusterCenters().numInstances()];

        // set assignments and clustersize
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
        clusteringMetrics.calculateInterClusteringSimilarity(xmeans.getClusterCenters());
        clusteringMetrics.calculateIntraClusteringSimilarity(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateBetas();

        clusteringMetrics.printErrorMetricsHeader();
        clusteringMetrics.printErrorMetrics(xmeans.getClusterCenters().numInstances());
        clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
        // clusteringMetrics.printClusterAssignmentsToSession(assignments,
        // xmeans.getClusterCenters().numInstances());

        Instances resultingCentroids = xmeans.getClusterCenters();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:org.iobserve.analysis.behavior.clustering.xmeans.XMeansClustering.java

License:Apache License

private Optional<ClusteringResults> getClusteringResults(final Instances instances) {
    final XMeans xMeansClusterer = new XMeans();

    xMeansClusterer.setSeed(new Random().nextInt(Integer.MAX_VALUE));
    xMeansClusterer.setDistanceF(this.distanceMetric);

    xMeansClusterer.setMinNumClusters(this.minClusters);
    xMeansClusterer.setMaxNumClusters(this.maxClusters);

    try {//from  w ww.j a  va 2s.  com
        xMeansClusterer.buildClusterer(instances);

        /**
         * Code used from org.iobserve.analysis.userbehavior.XMeansClustering to use
         * org.iobserve.analysis.userbehavior.ClusteringResults
         */
        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];
        clustersize = new int[xMeansClusterer.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xMeansClusterer.clusterInstance(instances.instance(s));
            clustersize[xMeansClusterer.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xMeansClusterer.getClusterCenters(),
                instances, assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        final ClusteringResults xMeansClusteringResults = new ClusteringResults("X-Means",
                xMeansClusterer.getClusterCenters().numInstances(), assignments, clusteringMetrics);

        return Optional.of(xMeansClusteringResults);

    } catch (final Exception e) { // NOPMD NOCS api dependency
        XMeansClustering.LOGGER.error("Clustering failed.", e);
    }

    return Optional.empty();
}

From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances//from  w  ww  .j a va2 s .  c om
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances/*from   www. j  ava2s.  c  o  m*/
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}