Example usage for weka.clusterers SimpleKMeans setNumClusters

List of usage examples for weka.clusterers SimpleKMeans setNumClusters

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans setNumClusters.

Prototype

@Override
public void setNumClusters(int n) throws Exception 

Source Link

Document

set the number of clusters to generate.

Usage

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * K-Means Clustering//from  w w  w .  ja  v  a  2s  . c  om
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] kmeans(double[][] data, int numObs, int numFeatures, int k) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);

    // uses Euclidean distance by default
    SimpleKMeans clusterer = new SimpleKMeans();
    try {
        clusterer.setPreserveInstancesOrder(true);
        clusterer.setNumClusters(k);
        clusterer.buildClusterer(ds);

        // cluster centers
        Instances centers = clusterer.getClusterCentroids();
        Cluster[] clusters = new Cluster[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); i++) {
            Instance inst = centers.instance(i);
            double[] mean = new double[inst.numAttributes()];
            for (int j = 0; j < mean.length; j++) {
                mean[j] = inst.value(j);
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members
        int[] assignments = clusterer.getAssignments();
        for (int i = 0; i < assignments.length; i++) {
            clusters[assignments[i]].addMember(i);
        }
        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }

}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * /*from  www  .j a  v a 2  s .  c o m*/
 * @return
 * @throws Exception
 */
public double[] getClusteredInstances() throws Exception {

    //Removing potential class index 
    instances.setClassIndex(-1);

    //Clustering using Kmeans
    int k;
    double max = 0, r2 = 0, pseudoF = 0;

    //Testing from 2 to 10 clusters, should be set as entry of this function
    SimpleKMeans bestKMeans = new SimpleKMeans();
    for (k = 2; k <= maxClusters; k++) {
        final SimpleKMeans kMeans = new SimpleKMeans();
        kMeans.setNumClusters(k);
        kMeans.buildClusterer(instances);
        //Choosing the "optimal" number of clusters
        r2 = R2(kMeans);
        pseudoF = pseudoF(r2, k);
        //System.out.println(pseudo_f);
        if (pseudoF > max) {
            max = pseudoF;
            bestKMeans = kMeans;
        }
    }

    //Real clustering using the chosen number
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(bestKMeans);
    eval.evaluateClusterer(instances);
    double[] clusterAssignments = eval.getClusterAssignments();

    this.usedKmeans = bestKMeans;

    return clusterAssignments;

}

From source file:milk.classifiers.MIRBFNetwork.java

License:Open Source License

public Exemplars transform(Exemplars ex) throws Exception {

    // Throw all the instances together
    Instances data = new Instances(ex.exemplar(0).getInstances());
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);/*from w  ww.  ja  v a  2 s .  c  om*/
        double weight = 1.0 / (double) curr.getInstances().numInstances();
        for (int j = 0; j < curr.getInstances().numInstances(); j++) {
            Instance inst = (Instance) curr.getInstances().instance(j).copy();
            inst.setWeight(weight);
            data.add(inst);
        }
    }
    double factor = (double) data.numInstances() / (double) data.sumOfWeights();
    for (int i = 0; i < data.numInstances(); i++) {
        data.instance(i).setWeight(data.instance(i).weight() * factor);
    }

    SimpleKMeans kMeans = new SimpleKMeans();
    kMeans.setNumClusters(m_num_clusters);
    MakeDensityBasedClusterer clust = new MakeDensityBasedClusterer();
    clust.setClusterer(kMeans);
    m_clm.setDensityBasedClusterer(clust);
    m_clm.setIgnoredAttributeIndices("" + (ex.exemplar(0).idIndex() + 1));
    m_clm.setInputFormat(data);

    // Use filter and discard result
    Instances tempData = Filter.useFilter(data, m_clm);
    tempData = new Instances(tempData, 0);
    tempData.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);

    // Go through exemplars and add them to new dataset
    Exemplars newExs = new Exemplars(tempData);
    for (int i = 0; i < ex.numExemplars(); i++) {
        Exemplar curr = ex.exemplar(i);
        Instances temp = Filter.useFilter(curr.getInstances(), m_clm);
        temp.insertAttributeAt(ex.exemplar(0).getInstances().attribute(0), 0);
        for (int j = 0; j < temp.numInstances(); j++) {
            temp.instance(j).setValue(0, curr.idValue());
        }
        newExs.add(new Exemplar(temp));
    }
    //System.err.println("Finished transforming");
    //System.err.println(newExs);
    return newExs;
}

From source file:myclusterer.WekaCode.java

public static Clusterer buildClusterer(Instances dataSet, int clusterType) throws Exception {
    Clusterer clusterer = null;/* w  ww.  j a v a 2 s.co m*/
    if (clusterType == SimpleKMeans) {
        SimpleKMeans kmeans = new SimpleKMeans();
        Scanner scan = new Scanner(System.in);
        System.out.print("Masukkan jumlah cluster: ");
        int K = scan.nextInt();
        kmeans.setNumClusters(K);
        clusterer = kmeans;
        clusterer.buildClusterer(dataSet);
    } else if (clusterType == HierarchicalClusterer) {
        HierarchicalClusterer hierarchical = new HierarchicalClusterer();
        Scanner scan = new Scanner(System.in);
        System.out.print("Masukkan jumlah cluster: ");
        int K = scan.nextInt();
        hierarchical.setNumClusters(K);
        clusterer = hierarchical;
        clusterer.buildClusterer(dataSet);
    } else if (clusterType == MyKMeans) {
        MyKMeans kmeans = new MyKMeans();
        Scanner scan = new Scanner(System.in);
        System.out.print("Masukkan jumlah cluster: ");
        int K = scan.nextInt();
        kmeans.setNumClusters(K);
        clusterer = kmeans;
        clusterer.buildClusterer(dataSet);
    } else if (clusterType == MyAgnes) {
        MyAgnes agnes = new MyAgnes();
        Scanner scan = new Scanner(System.in);
        System.out.print("Masukkan jumlah cluster: ");
        int K = scan.nextInt();
        agnes.setNumClusters(K);
        clusterer = agnes;
        clusterer.buildClusterer(dataSet);
    }
    return clusterer;
}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.KMeansClusteringStrategy.java

License:Apache License

/**
 * {@inheritDoc}/*from ww w .j  a v  a  2  s.  c  om*/
 * 
 * <p>
 * This method is specialized for <b>kmeans</b> clustering.
 */
@Override
public BehaviorMix apply(final BehaviorModelAbsolute[] behaviorModelsAbsolute,
        final UseCaseRepository useCaseRepository) {

    final ABMToRBMTransformer abmToRbmTransformer = new ABMToRBMTransformer();

    // Behavior Mix to be returned;
    final BehaviorMix behaviorMix = this.createBehaviorMix();

    try {

        // Returns a valid instances set, generated based on the absolut
        // behavior models
        Instances instances = getInstances(behaviorModelsAbsolute);

        // KMeans --> Weka
        SimpleKMeans kmeans = new SimpleKMeans();

        // DistanceFunction manhattanDistance = new ManhattanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // manhattanDistance.setOptions(options);
        // manhattanDistance.setInstances(instances);
        // kmeans.setDistanceFunction(manhattanDistance);

        // distance function with option don*t normalize
        DistanceFunction euclideanDistance = new EuclideanDistance();
        // String[] options = new String[1];
        // options[0] = "-D";
        // euclideanDistance.setOptions(options);
        euclideanDistance.setInstances(instances);
        kmeans.setDistanceFunction(euclideanDistance);
        kmeans.setPreserveInstancesOrder(true);

        int[] clustersize = null;
        int[] assignments = null;

        // get number of clusters to be generated.
        int numberOfClusters = Integer.parseInt(CommandLineArgumentsHandler.getNumberOfClustersMin());

        // clustering
        for (int clusterSize = numberOfClusters; clusterSize <= numberOfClusters; clusterSize++) {
            // must be specified in a fix way
            kmeans.setNumClusters(clusterSize);

            // build cluster
            kmeans.buildClusterer(instances);

            clustersize = kmeans.getClusterSizes();
            assignments = kmeans.getAssignments();

            ClusteringMetrics clusteringMetrics = new ClusteringMetrics();
            clusteringMetrics.calculateInterClusteringSimilarity(kmeans.getClusterCentroids());
            clusteringMetrics.calculateIntraClusteringSimilarity(kmeans.getClusterCentroids(), instances,
                    assignments);
            clusteringMetrics.calculateBetas();

            clusteringMetrics.printErrorMetricsHeader();
            clusteringMetrics.printErrorMetrics(kmeans.getClusterCentroids().numInstances());
            clusteringMetrics.printClusteringMetrics(clustersize, assignments, instances);
            // clusteringMetrics.printClusterAssignmentsToSession(assignments,
            // clusterSize);

        }

        Instances resultingCentroids = kmeans.getClusterCentroids();

        // for each centroid instance, create new behaviorModelRelative
        for (int i = 0; i < resultingCentroids.numInstances(); i++) {

            Instance centroid = resultingCentroids.instance(i);

            // create a Behavior Model, which includes all vertices only;
            // the vertices are associated with the use cases, and a
            // dedicated
            // vertex that represents the final state will be added;
            final BehaviorModelAbsolute behaviorModelAbsoluteCentroid = this
                    .createBehaviorModelAbsoluteWithoutTransitions(useCaseRepository.getUseCases());

            // install the transitions in between vertices;
            this.installTransitions(behaviorModelsAbsolute, behaviorModelAbsoluteCentroid, centroid,
                    assignments, i);

            // convert absolute to relative behaviorModel
            final BehaviorModelRelative behaviorModelRelative = abmToRbmTransformer
                    .transform(behaviorModelAbsoluteCentroid);

            // relative Frequency of cluster i
            double relativeFrequency = (double) clustersize[i] / (double) instances.numInstances();

            // create the (unique) Behavior Mix entry to be returned;
            final BehaviorMixEntry behaviorMixEntry = this.createBehaviorMixEntry(
                    AbstractClusteringStrategy.GENERIC_BEHAVIOR_MODEL_NAME, relativeFrequency, // relative frequency;
                    behaviorModelRelative);

            // add to resulting behaviorMix
            behaviorMix.getEntries().add(behaviorMixEntry);

        }

        return behaviorMix;

    } catch (ExtractionException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }

    // if any error occurs, an ExtractionExeption should be thrown,
    // indicating the error that occurred;

    // the classes "NoClusteringStrategy" and "SimpleClusteringStrategy"
    // should give an idea for handling the Behavior Models and how to
    // use the helping methods of the (abstract) parent class.

    return behaviorMix;
}

From source file:nl.uva.sne.classifiers.Kmeans.java

@Override
public Map<String, String> cluster(String inDir) throws IOException, ParseException {
    try {// www.j  av a2  s .c  om

        Instances data = ClusterUtils.terms2Instances(inDir, false);

        DistanceFunction df;
        //            SimpleKMeans currently only supports the Euclidean and Manhattan distances.
        switch (distanceFunction) {
        case "Euclidean":
            df = new EuclideanDistance(data);
            break;
        case "Manhattan":
            df = new ManhattanDistance(data);
            break;
        default:
            df = new EuclideanDistance(data);
            break;
        }

        SimpleKMeans clusterer = new SimpleKMeans();

        Random rand = new Random(System.currentTimeMillis());
        int seed = rand.nextInt((Integer.MAX_VALUE - 1000000) + 1) + 1000000;
        clusterer.setSeed(seed);
        clusterer.setMaxIterations(1000000000);
        Logger.getLogger(Kmeans.class.getName()).log(Level.INFO, "Start clusteing");
        clusterer.setPreserveInstancesOrder(true);

        clusterer.setNumClusters(numOfClusters);
        clusterer.setDistanceFunction(df);

        return ClusterUtils.bulidClusters(clusterer, data, inDir);

    } catch (Exception ex) {
        Logger.getLogger(Kmeans.class.getName()).log(Level.SEVERE, null, ex);
    }
    return null;
}

From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java

License:Open Source License

protected void unmodifiedContourData() throws Exception {
    m_contourData = new ContourDataExtractor() {
        protected void extractContourData(int[] translations, int[] permutation) {
            Arrays.fill(translations, 0);

        }//from ww w .ja  v  a  2  s  .c  om
    };
    SimpleKMeans clusterer = new SimpleKMeans();
    int clustersPerSample = 2;
    clusterer.setNumClusters(clustersPerSample * m_contourDataGrid.numSamples());
    m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer);
    m_classifier.buildClassifier(m_contourDataGrid, m_bgData);
    m_contourModels = new double[1][m_contourDataGrid.numClusters() + 1];
    m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()];
    for (int i = 0; i < m_contourData.numVectors(); i++) {
        if (m_contourData.weight(i) > 0)
            m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0;
    }
    removeRedundantContourModels();
}

From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java

License:Open Source License

protected void iterativeInferenceApproach() throws Exception {
    double stdev = 200;
    /* Conditional random field approach */
    String val;
    if ((val = m_parameters.get(OPTIONAL_PARAMETER_STDEV)) != null) {
        stdev = Double.valueOf(val);
    }/* ww  w  .jav  a 2  s .c om*/
    m_contourData = new ContourDataMisc(stdev);
    // m_contourData = new ContourDataFromCRF();
    // m_contourData = new ContourDataFromCRFNaive();
    SimpleKMeans clusterer = new SimpleKMeans();
    double clustersPerSample = 1;
    clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples())));

    m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer);
    m_classifier.buildClassifier(m_contourDataGrid, m_bgData);
    m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()];
    for (int i = 0; i < m_contourData.numVectors(); i++) {
        if (m_contourData.weight(i) > 0)
            m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0;
    }
    removeRedundantContourModels();

}

From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java

License:Open Source License

protected void intervalRuleInductionApproach() throws Exception {
    /* Interval rule induction */
    // train the iri (interval rule induction) classifier
    double bias = 100;
    String val;
    if ((val = m_parameters.get(OPTIONAL_PARAMETER_BIAS)) != null) {
        bias = Double.valueOf(val);
    }/*from  ww w.  jav a  2 s  .  com*/
    final IRI miClass = new IRI();
    miClass.setBias(bias);

    // set the bias according to the mean sample length
    double meanSampleLength = 0;
    for (int i = 0; i < m_contourDataGrid.numSamples(); i++) {
        meanSampleLength += m_contourDataGrid.getSampleLength(i);
    }
    meanSampleLength /= m_contourDataGrid.numSamples();
    miClass.setBias((int) Math.round(meanSampleLength / 2));

    // extract the actual contour data to create the contour models
    m_contourData = new ContourDataFromIRI(miClass);
    // m_contourData.extractContourData(m_contourDataGrid);
    //
    new WekaMIContourDataClassifier(miClass).buildClassifier(m_contourDataGrid, m_bgData);

    /*
     * use the extracted contour data to feed a weka classifier
     */
    SimpleKMeans clusterer = new SimpleKMeans();
    double clustersPerSample = 1;
    clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples())));

    m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer);
    m_classifier.buildClassifier(m_contourDataGrid, m_bgData);
    m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()];
    for (int i = 0; i < m_contourData.numVectors(); i++) {
        if (m_contourData.weight(i) > 0)
            m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0;
    }
    removeRedundantContourModels();

    /*
     * use this, if the retrieved interval rules should be used directly for
     * classification
     */
    // // retrieve a rule distribution for each sample, summarize
    // // distributions to cell models
    // ContourDataExtractor cd = m_contourData;
    // int numSamples = cd.numSamples();
    // final int numClusters = cd.numClusters();
    // int numVectors = cd.numVectors();
    // m_contourModels = new double[numSamples][numClusters];
    // for (int i = 0; i < numVectors; i++) {
    // m_contourModels[cd.getSampleIndex(i)][cd.getClusterIdx(i)]++;
    // }
    //
    // for (int i = 0; i < numSamples; i++) {
    // System.out.println(Arrays.toString(m_contourModels[i]));
    // Utils.normalize(m_contourModels[i],
    // m_contourModels[i][Utils.maxIndex(m_contourModels[i])]);
    // }
    //
    // // create a new classifier for each contour model
    // m_classifier = new ContourDataClassifier() {
    // public double contourProbability(double[] inst) throws Exception {
    // return 0;
    // }
    //
    // @Override
    // public void buildClassifier(ContourDataGrid cData,
    // VectorDataList bgData) throws Exception {
    // //
    // }
    //
    // public double[] contourProbDistribution(double[] inst)
    // throws Exception {
    // Instance i = new DenseInstance(1.0, inst);
    // double[] distr = new double[numClusters - 1];
    // for (int j = 0; j < distr.length; j++) {
    // distr[j] = miClass.getRule(j).distributionForInstance(i)[1];
    // }
    // return distr;
    // }
    // };

}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java

License:Open Source License

/**
 * {@inheritDoc}//  w w  w  .  j  a v a 2 s .  com
 */
@Override
protected void extractContourData(int[] translations, int[] permutation) {
    SimpleKMeans clusterer = new SimpleKMeans();
    try {

        clusterer.setNumClusters(m_numClusters);

        // cluster the data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        for (int a = 0; a < contourDataGrid().numFeatures(); a++) {
            attInfo.add(new Attribute("att" + a));
        }
        Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors());
        for (double[] vec : contourDataGrid()) {
            data.add(new DenseInstance(1.0, vec));
        }
        clusterer.buildClusterer(data);

        // create clustered images p(C|x)
        Img[] imgs = new Img[m_numClusters];
        int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() };
        Cursor<FloatType>[] cursors = new Cursor[m_numClusters];
        for (int i = 0; i < imgs.length; i++) {
            imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType());
            cursors[i] = imgs[i].localizingCursor();
        }

        int cluster;
        for (Instance instance : data) {
            for (int i = 0; i < cursors.length; i++) {
                cursors[i].fwd();
            }
            cluster = clusterer.clusterInstance(instance);
            cursors[cluster].get().set(1.0f);
        }

        // greedily select the best cluster combination starting with all
        // clusters together and then removing the one whose removal
        // maximises the score of the remaining clusters
        Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType());
        Cursor<FloatType> resC = res.cursor();
        while (resC.hasNext()) {
            resC.fwd();
            resC.get().set(1.0f);
        }
        Img<FloatType> tmp = res.factory().create(res, new FloatType());

        // TODO: normalize img
        // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm =
        // new NormalizeIterableInterval<FloatType, Img<FloatType>>();
        double score = 0;
        double bestScore = -Double.MAX_VALUE;
        double globalBestScore = -Double.MAX_VALUE;
        int bestCluster = 0;

        // ShowInSameFrame showInFrame = new ShowInSameFrame();

        for (int i = 0; i < m_numClusters; i++) {
            for (int j = 0; j < m_numClusters; j++) {
                if (imgs[j] != null) {
                    substract(res, imgs[j], tmp);
                    score = calcScore(tmp, m_bias);
                    if (score > bestScore) {
                        bestScore = score;
                        bestCluster = j;
                    }
                }
            }
            substract(res, imgs[bestCluster], res);
            imgs[bestCluster] = null;

            // Pair<FloatType, FloatType> minmax =
            // Operations.compute(new MinMax<FloatType>(), tmp);
            // Operations.<FloatType, FloatType> map(
            // new Normalize<FloatType>(minmax.getA().getRealDouble(),
            // minmax.getB().getRealDouble(),
            // -Float.MAX_VALUE, Float.MAX_VALUE)).compute(
            // tmp, tmp);

            // showInFrame.show(tmp, 2.0);

            if (bestScore < globalBestScore) {
                break;
            }

            globalBestScore = bestScore;
            bestScore = -Double.MAX_VALUE;

        }

        // calculate the translations (mean positions)
        resC = res.localizingCursor();
        double meanPos = 0;
        double num = 0;
        int index = 0;
        while (resC.hasNext()) {
            resC.fwd();

            meanPos += resC.get().get() * resC.getDoublePosition(0);
            num += resC.get().get();
            index++;
            if ((index % res.dimension(0)) == 0) {
                if (num > 0) {
                    translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num)
                            - CENTER_COL;
                } else {
                    // setWeight((int)((index - 1) / res.dimension(0)), 0);
                    translations[(int) ((index - 1) / res.dimension(0))] = 0;
                }
                meanPos = 0;
                num = 0;
            }

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
    }

}