Example usage for weka.clusterers ClusterEvaluation evaluateClusterer

Introduction

In this page you can find the example usage for weka.clusterers ClusterEvaluation evaluateClusterer.

Prototype

public void evaluateClusterer(Instances test) throws Exception

Source Link

Document

Evaluate the clusterer on a set of instances.

Usage

From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java

License:Open Source License

private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k)
        throws Exception {
    final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet);

    final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet);

    final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction);
    skm.buildClusterer(trdataSet);//  ww w  . ja v a  2  s .co  m
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(trdataSet);

    final int numClusters = eval.getNumClusters();
    final List<String> possibleValues = new ArrayList<String>(numClusters);
    for (int c = 0; c < numClusters; c++)
        possibleValues.add("cluster_" + c);

    final double[] clusterAssignments = eval.getClusterAssignments();

    final int numAttributes = dataSet.numAttributes();
    final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes);
    for (int j = 0; j < numAttributes; j++) {
        //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]);
        valueForEachFeature.add((int) clusterAssignments[j]);
    }

    return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature);
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * /* w w w  . java2  s  .c  om*/
 * @param newInstances
 * @param K
 * @return
 * @throws Exception
 */
public static double[] doKMeans(final Instances newInstances, final int K) throws Exception {
    final SimpleKMeans clusterer = new SimpleKMeans();
    clusterer.setOptions(
            Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance"));

    clusterer.buildClusterer(newInstances);

    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(clusterer);
    eval.evaluateClusterer(newInstances);

    double[] ass = eval.getClusterAssignments();
    return ass;
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * /*  ww w . j a  v a2  s.  c  o  m*/
 * @param wekaClusterer
 * @param instances
 * @return
 * @throws Exception
 */
public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances)
        throws Exception {
    final Instances ii = new Instances(instances);
    ii.setClassIndex(-1);

    wekaClusterer.buildClusterer(ii);

    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(wekaClusterer);
    eval.evaluateClusterer(ii);

    final int clustersCount = eval.getNumClusters();
    final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount);

    //Initialize instances
    for (int k = 0; k < clustersCount; k++) {
        clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>()));
    }

    final double[] ass = eval.getClusterAssignments();
    if (ass.length != ii.numInstances())
        throw new IllegalStateException();
    for (int i = 0; i < ass.length; i++) {
        IndexedInstance idxi = clustersList.get((int) ass[i]);
        idxi.getInstances().add(instances.instance(i));
        int pos = idxi.getInstances().size() - 1;
        idxi.getMapOrigIndex().put(pos, i);
    }

    return clustersList;
}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * // www .ja  va 2  s  .  com
 * @return
 * @throws Exception
 */
public double[] getClusteredInstances() throws Exception {

    //Removing potential class index 
    instances.setClassIndex(-1);

    //Clustering using Kmeans
    int k;
    double max = 0, r2 = 0, pseudoF = 0;

    //Testing from 2 to 10 clusters, should be set as entry of this function
    SimpleKMeans bestKMeans = new SimpleKMeans();
    for (k = 2; k <= maxClusters; k++) {
        final SimpleKMeans kMeans = new SimpleKMeans();
        kMeans.setNumClusters(k);
        kMeans.buildClusterer(instances);
        //Choosing the "optimal" number of clusters
        r2 = R2(kMeans);
        pseudoF = pseudoF(r2, k);
        //System.out.println(pseudo_f);
        if (pseudoF > max) {
            max = pseudoF;
            bestKMeans = kMeans;
        }
    }

    //Real clustering using the chosen number
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(bestKMeans);
    eval.evaluateClusterer(instances);
    double[] clusterAssignments = eval.getClusterAssignments();

    this.usedKmeans = bestKMeans;

    return clusterAssignments;

}

From source file:myclusterer.MyClusterer.java

/**
 * @param args the command line arguments
 *///from www. j ava 2 s. c  o m
public static void main(String[] args) {
    // TODO code application logic here

    String nameOfFile;
    Clusterer clusterer;
    Instances dataSet;
    ClusterEvaluation eval;

    //Baca input file 
    Scanner scan = new Scanner(System.in);
    System.out.print("Masukkan nama file untuk di-cluster: ");
    nameOfFile = scan.nextLine();
    try {
        //Baca File arff
        dataSet = WekaCode.readFileArff(nameOfFile);

        //Build Clusterer
        System.out.println(
                "Tuliskan model clusterer : 0.SimpleKMeans / 1.HierarchicalClusterer / 2.MyKMeans / 3.MyAgnes ");
        int clustererType = scan.nextInt();
        clusterer = WekaCode.buildClusterer(dataSet, clustererType);
        eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(dataSet);
        System.out.println("Cluster Evaluation:");
        System.out.println(eval.clusterResultsToString());

        //Given test set 
    } catch (Exception ex) {
        Logger.getLogger(MyClusterer.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.XMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}//from w  w w  .ja v  a 2  s  .com
 * 
 * <p>
 * This method is specialized for <b>xmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models

        Instances instances = getInstances(behaviorModelsAbsolute);

        // XMeans --> Weka
        XMeans xmeans = new XMeans();

        if (CommandLineArgumentsHandler.getSeedValue() != null) {
            xmeans.setSeed(Integer.parseInt(CommandLineArgumentsHandler.getSeedValue()));
        }

        // distance function
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        xmeans.setDistanceF(euclideanDistance);

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // xmeans.setDistanceF(manhattanDistance);

        int[] clustersize = null;
        // create new assignments
        int[] assignments = new int[instances.numInstances()];

        // get number of clusters to be generated.
        int numberOfClustersMin = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());
        int numberOfClustersMax = 0;
        if (CommandLineArgumentsHandler.getNumberOfClustersMax() != "") {
            numberOfClustersMax = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMax());
        } else {
            numberOfClustersMax = numberOfClustersMin;
        }

        // clustering
        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);

        // build cluster
        xmeans.buildClusterer(instances);

        ClusterEvaluation clusterEvaluation = new ClusterEvaluation();
        clusterEvaluation.setClusterer(xmeans);
        clusterEvaluation.evaluateClusterer(instances);

        // clusterSize
        clustersize = new int[xmeans.getClusterCenters().numInstances()];

        // set assignments and clustersize
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
        clusteringMetrics.calculateInterClusteringSimilarity(xmeans.getClusterCenters());
        clusteringMetrics.calculateIntraClusteringSimilarity(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateBetas();

        clusteringMetrics.printErrorMetricsHeader();
        clusteringMetrics.printErrorMetrics(xmeans.getClusterCenters().numInstances());
        clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
        // clusteringMetrics.printClusterAssignmentsToSession(assignments,
        // xmeans.getClusterCenters().numInstances());

        Instances resultingCentroids = xmeans.getClusterCenters();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:nl.uva.sne.commons.ClusterUtils.java

public static Map<String, String> bulidClusters(Clusterer clusterer, Instances data, String inDir)
        throws Exception {

    FilteredClusterer fc = new FilteredClusterer();
    String[] options = new String[2];
    options[0] = "-R"; // "range"
    options[1] = "1"; // we want to ignore the attribute that is in the position '1'
    Remove remove = new Remove(); // new instance of filter
    remove.setOptions(options); // set options

    fc.setFilter(remove); //add filter to remove attributes
    fc.setClusterer(clusterer); //bind FilteredClusterer to original clusterer
    fc.buildClusterer(data);//from   w  w w  .j a va 2s. c om

    Map<String, String> clusters = new HashMap<>();
    for (int i = 0; i < data.numInstances(); i++) {
        Instance inst = data.instance(i);
        int theClass = fc.clusterInstance(inst);
        String s = data.attribute(0).value(i);
        clusters.put(inDir + File.separator + s, String.valueOf(theClass));
        System.err.println(s + " is in cluster " + theClass);
    }
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(fc); // the cluster to evaluate
    eval.evaluateClusterer(data); // data to evaluate the clusterer on
    //        double ll = eval.getLogLikelihood();
    //        Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "LogLikelihood :{0}", ll);
    //
    //        if (clusterer instanceof SimpleKMeans) {
    //            double sqrErr = ((SimpleKMeans) clusterer).getSquaredError();
    //            Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "Squared Error:{0}", sqrErr);
    //        }

    Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "# of clusters: {0}", eval.getNumClusters());
    Logger.getLogger(ClusterUtils.class.getName()).log(Level.INFO, "clusterResults: {0}",
            eval.clusterResultsToString());

    return clusters;
}

From source file:qoala.arff.java

public void SimpleKmeans(int numberOfCLuster) throws Exception {

    Instances train = new Instances(dataSet);

    SimpleKMeans skm = new SimpleKMeans();
    skm.setPreserveInstancesOrder(true);
    skm.setNumClusters(numberOfCLuster);
    skm.buildClusterer(train);/*from  ww w.  j a va  2 s.c  o m*/
    skm.setSeed(10);
    int[] ClusterSize = skm.getClusterSizes();

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(train);

    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());

    int[] assignments = skm.getAssignments();

    System.out.println("# - cluster - distribution");

    for (int j = 0; j < skm.getNumClusters(); j++) {
        int i = 0;
        for (int clusterNum : assignments) {

            if (clusterNum == j)

                System.out.println("Instance " + i + " -> Cluster number: " + clusterNum);

            i++;
        }
    }
}

From source file:qoala.arff.java

public void EMClustering(int NumberOfCluster) throws Exception {

    Instances train = new Instances(dataSet);
    String[] options = new String[2];
    options[0] = "-I";
    options[1] = "100";

    EM em = new EM();
    em.setOptions(options);/*from  w ww.j av a  2 s.c om*/
    em.setNumClusters(NumberOfCluster);
    em.buildClusterer(train);

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(em);
    eval.evaluateClusterer(train);
    eval.getNumClusters();
    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());

    System.out.println("# - cluster - distribution");
    for (int j = 0; j < eval.getNumClusters(); j++) {

        for (int i = 0; i < train.numInstances(); i++) {

            int cluster = em.clusterInstance(train.instance(i));
            if (cluster == j)
                System.out.println("Instance " + i + " -> Cluster number: " + cluster);

        }
    }
}

From source file:qoala.arff.java

public void XMenas() throws Exception {

    Instances train = new Instances(dataSet);
    XMeans xm = new XMeans();

    xm.setMaxNumClusters(100);/*from  w  w w. ja v a2 s . c  o m*/
    xm.setMinNumClusters(2);
    xm.buildClusterer(train);

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xm);
    eval.evaluateClusterer(train);
    eval.getNumClusters();
    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());
    System.out.println("# - cluster - distribution");
    for (int j = 0; j < eval.getNumClusters(); j++) {

        for (int i = 0; i < train.numInstances(); i++) {
            int cluster = xm.clusterInstance(train.instance(i));
            if (cluster == j)

                System.out.println("Instance " + i + " -> Cluster number: " + cluster);
        }
    }

}