Example usage for weka.clusterers XMeans XMeans

Introduction

In this page you can find the example usage for weka.clusterers XMeans XMeans.

Prototype

public XMeans()

Source Link

Document

the default constructor.

Usage

From source file:experimentos.CDBlocagem.java

License:Open Source License

/**
 * Executes the naive duplicate detection on the <code>CD</code> data source. During the process all duplicates will be written onto the console.
 * /*  ww w  . ja va2  s.  co m*/
 * @param args
 *            No arguments will be processed.
 * @throws IOException
 *             If an error occurs while reading from the file.
 */
public static void main(String[] args) throws IOException {
    EuclideanDistance ec = new EuclideanDistance();

    //KMedoids km = new KMedoids(50, 100, ec);

    // enables dynamic data-loading for file-based sorting
    //GlobalConfig.getInstance().setInMemoryObjectThreshold(10000);

    // instantiates the CSV data source for reading records
    // "cddb" is the source identifier
    //CSVSource dataSource = new CSVSource("cddb", new File("cd.csv"));
    //dataSource.enableHeader();
    Dataset data = FileHandler.loadDataset(new File("cd1.csv"));
    System.err.println(data.size());
    System.err.println(data.get(5).keySet().first());

    System.err.println(" testeeeeeeee  1   ");
    XMeans cl = new XMeans();
    Clusterer cl2 = new WekaClusterer(cl);
    //Clusterer cl = new KMeans(10, 1,ec);
    System.err.println(" testeeeeeeee 2    ");
    Dataset[] dt = cl2.cluster(data);
    System.err.println(" testeeeeeeee     " + dt.length);
    System.err.println(data);
    FileHandler.exportDataset(data, new File("output.txt"));

    // uses the id attribute for the object id - this call is optional, if no id attribute is set, DuDe will generate its own object ids
    //dataSource.addIdAttributes("pk");

    // TextBasedSubkey artistSubkey = new TextBasedSubkey("artist");
    //artistSubkey.setIgnoredCharactersRegEx(TextBasedSubkey.NO_VOWELS_REGEX);
    //artistSubkey.setRange(2);

    // instantiates the CSV data source for reading the goldstandard
    // "goldstandard" is the goldstandard identifier
    //CSVSource goldstandardSource = new CSVSource("goldstandard", new File("cd_gold.csv"));
    //goldstandardSource.enableHeader();

    // instantiate the gold standard
    // "cddb" is the source identifier
    //GoldStandard goldStandard = new GoldStandard(goldstandardSource);
    //goldStandard.setFirstElementsObjectIdAttributes("disc1_id");
    //goldStandard.setSecondElementsObjectIdAttributes("disc2_id");
    //goldStandard.setSourceIdLiteral("cddb");

    // instantiates the naive duplicate detection algorithm
    //Algorithm algorithm = new NaiveDuplicateDetection();
    //algorithm.enableInMemoryProcessing();

    // adds the "data" to the algorithm
    //algorithm.addDataSource(dataSource);

    // instantiates the similarity function
    // checks the Levenshtein distance of the CD titles
    //LevenshteinDistanceFunction similarityFunction = new LevenshteinDistanceFunction("title");

    // writes the duplicate pairs onto the console by using the Json syntax
    //DuDeOutput output = new JsonOutput(System.out);

    // instantiate statistic component to calculate key figures
    // like runtime, number of comparisons, precision and recall
    //StatisticComponent statistic = new StatisticComponent(goldStandard, algorithm);

    // the actual computation starts
    // the algorithm returns each generated pair step-by-step
    //long tempoInicioProcesso = System.currentTimeMillis();
    // long tempoASerDecontado = 0;
    //for (DuDeObjectPair pair : algorithm) {
    /**final double similarity = similarityFunction.getSimilarity(pair);
    if (similarity > 0.9) {
    // if it is a duplicate - print it and add it to the
    // statistic component as duplicate
                    long tempoInicio = System.currentTimeMillis();
                           
                    DoubleMetaphone dbm = new DoubleMetaphone();
           String keyBlock1 = dbm.encode(pair.getFirstElement().getAttributeValues("artist").toString());
           String keyBlock2 = dbm.encode(pair.getSecondElement().getAttributeValues("artist").toString());
           String id1 = dbm.encode(pair.getFirstElement().getAttributeValues("pk").toString());
           String id2 = dbm.encode(pair.getSecondElement().getAttributeValues("pk").toString());
            
           Vertice v1 = new Vertice(id1, "Base 1", 0);
           Vertice v2 = new Vertice(id2, "Base 1", 0);
           BlockIndex BI = new BlockIndex();
                  BI .insertVertice(keyBlock1, v1);
                  BI .insertVertice(keyBlock2, v1);
                   tempoASerDecontado = tempoASerDecontado + (System.currentTimeMillis()-tempoInicio);
                            
    //output.write(pair);
                    System.err.println(  pair.getSecondElement().getAttributeValues("pk").toString());
                                
    System.out.println();
    statistic.addDuplicate(pair);
    } else {
    // if it is not a duplicate, add it to the statistic
    // component as non-duplicate
    statistic.addNonDuplicate(pair);
    }
    }
        System.err.println(tempoASerDecontado + " Subtrair");
    //statistic.setEndTime();
        System.err.println(" Tempo total processamento " + (System.currentTimeMillis()-tempoInicioProcesso)   );
                
                
            
    // Write statistics
    StatisticOutput statisticOutput = new SimpleStatisticOutput(System.out, statistic);
    statisticOutput.writeStatistics();
    System.out.println("Experiment finished.");
                
                
            
    // clean up
    dataSource.cleanUp();
    goldStandard.close();
    }**/
}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.XMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}//from   www .j a  v  a  2s . c om
 * 
 * <p>
 * This method is specialized for <b>xmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models

        Instances instances = getInstances(behaviorModelsAbsolute);

        // XMeans --> Weka
        XMeans xmeans = new XMeans();

        if (CommandLineArgumentsHandler.getSeedValue() != null) {
            xmeans.setSeed(Integer.parseInt(CommandLineArgumentsHandler.getSeedValue()));
        }

        // distance function
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        xmeans.setDistanceF(euclideanDistance);

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // xmeans.setDistanceF(manhattanDistance);

        int[] clustersize = null;
        // create new assignments
        int[] assignments = new int[instances.numInstances()];

        // get number of clusters to be generated.
        int numberOfClustersMin = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());
        int numberOfClustersMax = 0;
        if (CommandLineArgumentsHandler.getNumberOfClustersMax() != "") {
            numberOfClustersMax = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMax());
        } else {
            numberOfClustersMax = numberOfClustersMin;
        }

        // clustering
        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);

        // build cluster
        xmeans.buildClusterer(instances);

        ClusterEvaluation clusterEvaluation = new ClusterEvaluation();
        clusterEvaluation.setClusterer(xmeans);
        clusterEvaluation.evaluateClusterer(instances);

        // clusterSize
        clustersize = new int[xmeans.getClusterCenters().numInstances()];

        // set assignments and clustersize
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
        clusteringMetrics.calculateInterClusteringSimilarity(xmeans.getClusterCenters());
        clusteringMetrics.calculateIntraClusteringSimilarity(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateBetas();

        clusteringMetrics.printErrorMetricsHeader();
        clusteringMetrics.printErrorMetrics(xmeans.getClusterCenters().numInstances());
        clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
        // clusteringMetrics.printClusterAssignmentsToSession(assignments,
        // xmeans.getClusterCenters().numInstances());

        Instances resultingCentroids = xmeans.getClusterCenters();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:org.iobserve.analysis.behavior.clustering.xmeans.XMeansClustering.java

License:Apache License

private Optional<ClusteringResults> getClusteringResults(final Instances instances) {
    final XMeans xMeansClusterer = new XMeans();

    xMeansClusterer.setSeed(new Random().nextInt(Integer.MAX_VALUE));
    xMeansClusterer.setDistanceF(this.distanceMetric);

    xMeansClusterer.setMinNumClusters(this.minClusters);
    xMeansClusterer.setMaxNumClusters(this.maxClusters);

    try {// w w w  . j a  v  a2  s.  co  m
        xMeansClusterer.buildClusterer(instances);

        /**
         * Code used from org.iobserve.analysis.userbehavior.XMeansClustering to use
         * org.iobserve.analysis.userbehavior.ClusteringResults
         */
        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];
        clustersize = new int[xMeansClusterer.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xMeansClusterer.clusterInstance(instances.instance(s));
            clustersize[xMeansClusterer.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xMeansClusterer.getClusterCenters(),
                instances, assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        final ClusteringResults xMeansClusteringResults = new ClusteringResults("X-Means",
                xMeansClusterer.getClusterCenters().numInstances(), assignments, clusteringMetrics);

        return Optional.of(xMeansClusteringResults);

    } catch (final Exception e) { // NOPMD NOCS api dependency
        XMeansClustering.LOGGER.error("Clustering failed.", e);
    }

    return Optional.empty();
}

From source file:org.iobserve.analysis.behavior.karlsruhe.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances//from  w w  w  .j  a va 2 s  .c om
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }
        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOPMD NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.iobserve.analysis.userbehavior.XMeansClustering.java

License:Apache License

/**
 *
 * @param instances/*w ww.  j a v a 2s. c om*/
 *            data to cluster in Weka format
 * @param numberOfUserGroupsFromInputUsageModel
 *            is the input number of clusters
 * @param varianceOfUserGroups
 *            enables the creation of a minimum and maximum number of clusters
 * @param seed
 *            states a random determination of the initial centroids
 * @return the clustering results that contain the number of cluster and the assignments
 */
public ClusteringResults clusterSessionsWithXMeans(final Instances instances,
        final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {

    ClusteringResults xMeansClusteringResults = null;

    try {

        final XMeans xmeans = new XMeans();
        xmeans.setSeed(seed);

        final NormalizableDistance manhattenDistance = new ManhattanDistance();
        manhattenDistance.setDontNormalize(false);
        manhattenDistance.setInstances(instances);
        xmeans.setDistanceF(manhattenDistance);

        int[] clustersize = null;
        final int[] assignments = new int[instances.numInstances()];

        // Determines the range of clusters
        // The X-Means clustering algorithm determines the best fitting number of clusters
        // within this range by itself
        int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
        int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
        if (numberOfClustersMax < 2) {
            numberOfClustersMax = 1;
            numberOfClustersMin = 1;
        } else {
            if (numberOfClustersMin < 2) {
                numberOfClustersMin = 2;
            }
        }

        xmeans.setMinNumClusters(numberOfClustersMin);
        xmeans.setMaxNumClusters(numberOfClustersMax);
        xmeans.buildClusterer(instances);

        clustersize = new int[xmeans.getClusterCenters().numInstances()];
        for (int s = 0; s < instances.numInstances(); s++) {
            assignments[s] = xmeans.clusterInstance(instances.instance(s));
            clustersize[xmeans.clusterInstance(instances.instance(s))]++;
        }

        final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances,
                assignments);
        clusteringMetrics.calculateSimilarityMetrics();

        xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(),
                assignments, clusteringMetrics);

    } catch (final Exception e) { // NOCS due to broken xmeans implementation triggering
                                  // Exception
        e.printStackTrace();
    }

    return xMeansClusteringResults;
}

From source file:org.montp2.m1decol.ter.clustering.XMeansClustering.java

License:Open Source License

public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster)
        throws Exception {
    Instances inputInstances = WekaUtils.loadARFF(inPath);

    EuclideanDistance euclideanDistance = new EuclideanDistance();
    euclideanDistance.setAttributeIndices("first-last");
    euclideanDistance.setDontNormalize(false);
    euclideanDistance.setInvertSelection(false);

    XMeans xmeans = new XMeans();
    xmeans.setMaxIterations(500);//from  w  w w  .  ja va  2s.  c o m
    xmeans.setSeed(10);
    xmeans.setMinNumClusters(5);
    xmeans.setMaxNumClusters(12);
    xmeans.setMaxKMeans(1000);
    xmeans.setMaxKMeansForChildren(1000);
    xmeans.setBinValue(1.0);
    xmeans.setCutOffFactor(0.5);
    xmeans.setDebugLevel(0);
    xmeans.setMaxIterations(1);
    xmeans.buildClusterer(inputInstances);

    Enumeration<Instance> e = inputInstances.enumerateInstances();
    while (e.hasMoreElements()) {
        Instance ins = e.nextElement();
        int cluster_num = xmeans.clusterInstance(ins);
        System.out.println(ins.toString());
        System.out.println(cluster_num);
    }

    WekaUtils.saveModel(xmeans, outPath);

    return xmeans;
}

From source file:qoala.arff.java

public void XMenas() throws Exception {

    Instances train = new Instances(dataSet);
    XMeans xm = new XMeans();

    xm.setMaxNumClusters(100);// w  w  w. ja  v  a  2  s  . co  m
    xm.setMinNumClusters(2);
    xm.buildClusterer(train);

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xm);
    eval.evaluateClusterer(train);
    eval.getNumClusters();
    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());
    System.out.println("# - cluster - distribution");
    for (int j = 0; j < eval.getNumClusters(); j++) {

        for (int i = 0; i < train.numInstances(); i++) {
            int cluster = xm.clusterInstance(train.instance(i));
            if (cluster == j)

                System.out.println("Instance " + i + " -> Cluster number: " + cluster);
        }
    }

}

From source file:tr.gov.ulakbim.jDenetX.experiments.wrappers.EvalActiveBoostingID.java

License:Open Source License

public static Instances clusterInstances(Instances data) {
    XMeans xmeans = new XMeans();
    Remove filter = new Remove();
    Instances dataClusterer = null;/*from  w  w w  .jav a2 s  .c  om*/
    if (data == null) {
        throw new NullPointerException("Data is null at clusteredInstances method");
    }
    //Get the attributes from the data for creating the sampled_data object

    ArrayList<Attribute> attrList = new ArrayList<Attribute>();
    Enumeration attributes = data.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        attrList.add((Attribute) attributes.nextElement());
    }

    Instances sampled_data = new Instances(data.relationName(), attrList, 0);
    data.setClassIndex(data.numAttributes() - 1);
    sampled_data.setClassIndex(data.numAttributes() - 1);
    filter.setAttributeIndices("" + (data.classIndex() + 1));
    data.remove(0);//In Wavelet Stream of MOA always the first element comes without class

    try {
        filter.setInputFormat(data);
        dataClusterer = Filter.useFilter(data, filter);
        String[] options = new String[4];
        options[0] = "-L"; // max. iterations
        options[1] = Integer.toString(noOfClassesInPool - 1);
        if (noOfClassesInPool > 2) {
            options[1] = Integer.toString(noOfClassesInPool - 1);
            xmeans.setMinNumClusters(noOfClassesInPool - 1);
        } else {
            options[1] = Integer.toString(noOfClassesInPool);
            xmeans.setMinNumClusters(noOfClassesInPool);
        }
        xmeans.setMaxNumClusters(data.numClasses() + 1);
        System.out.println("No of classes in the pool: " + noOfClassesInPool);
        xmeans.setUseKDTree(true);
        //xmeans.setOptions(options);
        xmeans.buildClusterer(dataClusterer);
        System.out.println("Xmeans\n:" + xmeans);
    } catch (Exception e) {
        e.printStackTrace();
    }
    //System.out.println("Assignments\n: " + assignments);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(xmeans);
    try {
        eval.evaluateClusterer(data);
        int classesToClustersMap[] = eval.getClassesToClusters();
        //check the classes to cluster map
        int clusterNo = 0;
        for (int i = 0; i < data.size(); i++) {
            clusterNo = xmeans.clusterInstance(dataClusterer.get(i));
            //Check if the class value of instance and class value of cluster matches
            if ((int) data.get(i).classValue() == classesToClustersMap[clusterNo]) {
                sampled_data.add(data.get(i));
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return ((Instances) sampled_data);
}

From source file:tutorials.tools.TutorialWekaClusterer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    /* Load data */
    Dataset data = FileHandler.loadDataset(new File("devtools/data/iris.data"), 4, ",");
    /* Create Weka classifier */
    XMeans xm = new XMeans();
    /* Wrap Weka clusterer in bridge */
    Clusterer jmlxm = new WekaClusterer(xm);
    /* Perform clustering */
    Dataset[] clusters = jmlxm.cluster(data);
    /* Output results */
    System.out.println(clusters.length);
}