Example usage for weka.clusterers SimpleKMeans SimpleKMeans

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans SimpleKMeans.

Prototype

public SimpleKMeans()

Source Link

Document

the default constructor.

Usage

From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java

License:Open Source License

protected void iterativeInferenceApproach() throws Exception {
    double stdev = 200;
    /* Conditional random field approach */
    String val;
    if ((val = m_parameters.get(OPTIONAL_PARAMETER_STDEV)) != null) {
        stdev = Double.valueOf(val);
    }/* w w w .  java 2s  .  c o  m*/
    m_contourData = new ContourDataMisc(stdev);
    // m_contourData = new ContourDataFromCRF();
    // m_contourData = new ContourDataFromCRFNaive();
    SimpleKMeans clusterer = new SimpleKMeans();
    double clustersPerSample = 1;
    clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples())));

    m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer);
    m_classifier.buildClassifier(m_contourDataGrid, m_bgData);
    m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()];
    for (int i = 0; i < m_contourData.numVectors(); i++) {
        if (m_contourData.weight(i) > 0)
            m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0;
    }
    removeRedundantContourModels();

}

From source file:org.knime.knip.suise.node.boundarymodel.BoundaryModel.java

License:Open Source License

protected void intervalRuleInductionApproach() throws Exception {
    /* Interval rule induction */
    // train the iri (interval rule induction) classifier
    double bias = 100;
    String val;
    if ((val = m_parameters.get(OPTIONAL_PARAMETER_BIAS)) != null) {
        bias = Double.valueOf(val);
    }//w  w  w . j  ava 2 s. co m
    final IRI miClass = new IRI();
    miClass.setBias(bias);

    // set the bias according to the mean sample length
    double meanSampleLength = 0;
    for (int i = 0; i < m_contourDataGrid.numSamples(); i++) {
        meanSampleLength += m_contourDataGrid.getSampleLength(i);
    }
    meanSampleLength /= m_contourDataGrid.numSamples();
    miClass.setBias((int) Math.round(meanSampleLength / 2));

    // extract the actual contour data to create the contour models
    m_contourData = new ContourDataFromIRI(miClass);
    // m_contourData.extractContourData(m_contourDataGrid);
    //
    new WekaMIContourDataClassifier(miClass).buildClassifier(m_contourDataGrid, m_bgData);

    /*
     * use the extracted contour data to feed a weka classifier
     */
    SimpleKMeans clusterer = new SimpleKMeans();
    double clustersPerSample = 1;
    clusterer.setNumClusters(Math.max(1, (int) Math.round(clustersPerSample * m_contourDataGrid.numSamples())));

    m_classifier = new WekaContourDataClassifier(m_wekaClassifier, m_contourData, clusterer);
    m_classifier.buildClassifier(m_contourDataGrid, m_bgData);
    m_contourModels = new double[m_contourData.numSamples()][m_contourData.numClusters()];
    for (int i = 0; i < m_contourData.numVectors(); i++) {
        if (m_contourData.weight(i) > 0)
            m_contourModels[m_contourData.getSampleIndex(i)][m_contourData.getClusterIdx(i)] = 1.0;
    }
    removeRedundantContourModels();

    /*
     * use this, if the retrieved interval rules should be used directly for
     * classification
     */
    // // retrieve a rule distribution for each sample, summarize
    // // distributions to cell models
    // ContourDataExtractor cd = m_contourData;
    // int numSamples = cd.numSamples();
    // final int numClusters = cd.numClusters();
    // int numVectors = cd.numVectors();
    // m_contourModels = new double[numSamples][numClusters];
    // for (int i = 0; i < numVectors; i++) {
    // m_contourModels[cd.getSampleIndex(i)][cd.getClusterIdx(i)]++;
    // }
    //
    // for (int i = 0; i < numSamples; i++) {
    // System.out.println(Arrays.toString(m_contourModels[i]));
    // Utils.normalize(m_contourModels[i],
    // m_contourModels[i][Utils.maxIndex(m_contourModels[i])]);
    // }
    //
    // // create a new classifier for each contour model
    // m_classifier = new ContourDataClassifier() {
    // public double contourProbability(double[] inst) throws Exception {
    // return 0;
    // }
    //
    // @Override
    // public void buildClassifier(ContourDataGrid cData,
    // VectorDataList bgData) throws Exception {
    // //
    // }
    //
    // public double[] contourProbDistribution(double[] inst)
    // throws Exception {
    // Instance i = new DenseInstance(1.0, inst);
    // double[] distr = new double[numClusters - 1];
    // for (int j = 0; j < distr.length; j++) {
    // distr[j] = miClass.getRule(j).distributionForInstance(i)[1];
    // }
    // return distr;
    // }
    // };

}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java

License:Open Source License

/**
 * {@inheritDoc}/*from  w w  w.  j av a  2 s. c o m*/
 */
@Override
protected void extractContourData(int[] translations, int[] permutation) {
    SimpleKMeans clusterer = new SimpleKMeans();
    try {

        clusterer.setNumClusters(m_numClusters);

        // cluster the data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        for (int a = 0; a < contourDataGrid().numFeatures(); a++) {
            attInfo.add(new Attribute("att" + a));
        }
        Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors());
        for (double[] vec : contourDataGrid()) {
            data.add(new DenseInstance(1.0, vec));
        }
        clusterer.buildClusterer(data);

        // create clustered images p(C|x)
        Img[] imgs = new Img[m_numClusters];
        int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() };
        Cursor<FloatType>[] cursors = new Cursor[m_numClusters];
        for (int i = 0; i < imgs.length; i++) {
            imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType());
            cursors[i] = imgs[i].localizingCursor();
        }

        int cluster;
        for (Instance instance : data) {
            for (int i = 0; i < cursors.length; i++) {
                cursors[i].fwd();
            }
            cluster = clusterer.clusterInstance(instance);
            cursors[cluster].get().set(1.0f);
        }

        // greedily select the best cluster combination starting with all
        // clusters together and then removing the one whose removal
        // maximises the score of the remaining clusters
        Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType());
        Cursor<FloatType> resC = res.cursor();
        while (resC.hasNext()) {
            resC.fwd();
            resC.get().set(1.0f);
        }
        Img<FloatType> tmp = res.factory().create(res, new FloatType());

        // TODO: normalize img
        // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm =
        // new NormalizeIterableInterval<FloatType, Img<FloatType>>();
        double score = 0;
        double bestScore = -Double.MAX_VALUE;
        double globalBestScore = -Double.MAX_VALUE;
        int bestCluster = 0;

        // ShowInSameFrame showInFrame = new ShowInSameFrame();

        for (int i = 0; i < m_numClusters; i++) {
            for (int j = 0; j < m_numClusters; j++) {
                if (imgs[j] != null) {
                    substract(res, imgs[j], tmp);
                    score = calcScore(tmp, m_bias);
                    if (score > bestScore) {
                        bestScore = score;
                        bestCluster = j;
                    }
                }
            }
            substract(res, imgs[bestCluster], res);
            imgs[bestCluster] = null;

            // Pair<FloatType, FloatType> minmax =
            // Operations.compute(new MinMax<FloatType>(), tmp);
            // Operations.<FloatType, FloatType> map(
            // new Normalize<FloatType>(minmax.getA().getRealDouble(),
            // minmax.getB().getRealDouble(),
            // -Float.MAX_VALUE, Float.MAX_VALUE)).compute(
            // tmp, tmp);

            // showInFrame.show(tmp, 2.0);

            if (bestScore < globalBestScore) {
                break;
            }

            globalBestScore = bestScore;
            bestScore = -Double.MAX_VALUE;

        }

        // calculate the translations (mean positions)
        resC = res.localizingCursor();
        double meanPos = 0;
        double num = 0;
        int index = 0;
        while (resC.hasNext()) {
            resC.fwd();

            meanPos += resC.get().get() * resC.getDoublePosition(0);
            num += resC.get().get();
            index++;
            if ((index % res.dimension(0)) == 0) {
                if (num > 0) {
                    translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num)
                            - CENTER_COL;
                } else {
                    // setWeight((int)((index - 1) / res.dimension(0)), 0);
                    translations[(int) ((index - 1) / res.dimension(0))] = 0;
                }
                meanPos = 0;
                num = 0;
            }

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
    }

}

From source file:org.montp2.m1decol.ter.clustering.KMeansClustering.java

License:Open Source License

public Clusterer computeClustering(String inPath, String outPath, Properties propertiesCluster)
        throws Exception {
    Instances inputInstances = WekaUtils.loadARFF(inPath);

    EuclideanDistance euclideanDistance = new EuclideanDistance();
    euclideanDistance.setAttributeIndices("first-last");
    euclideanDistance.setDontNormalize(false);
    euclideanDistance.setInvertSelection(false);

    SimpleKMeans kmeans = new SimpleKMeans();
    kmeans.setPreserveInstancesOrder(//from  www.j  a  v a 2s . c  o  m
            Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.PERSERVE_INSTANCE)));
    kmeans.setDontReplaceMissingValues(Boolean
            .valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DONT_REPLACE_MISSING_VALUES)));
    kmeans.setDisplayStdDevs(
            Boolean.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.DISPLAY_STD_DEVS)));
    kmeans.setMaxIterations(
            Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.MAX_ITERATIONS)));
    kmeans.setNumClusters(
            Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.NUM_CLUSTERS)));
    kmeans.setSeed(10);
    //kmeans.setSeed(
    //      Integer.valueOf(propertiesCluster.getProperty(ClusterProperties.Kmeans.SEED)));
    kmeans.setDistanceFunction(euclideanDistance);
    kmeans.buildClusterer(inputInstances);

    WekaUtils.saveModel(kmeans, outPath);

    /*
    *
    * Pour obtenir les pourcentages de les clusters
    * ClusterEvaluation eval = new ClusterEvaluation();
    * eval.setClusterer(kmeans);
    * eval.evaluateClusterer(inputInstances);
    * System.out.println(eval.clusterResultsToString());
    *
    * */

    return kmeans;
}

From source file:probcog.bayesnets.learning.DomainLearner.java

License:Open Source License

/**
 * performs the clustering (if some domains are to be learnt by clustering)
 * and applies all the new domains. (This method is called by finish(),
 * which should be called when all the examples have been passed.)
 *///from   w w w. j  av  a  2  s  . c  om
protected void end_learning() throws Exception {
    if (directDomains != null)
        for (int i = 0; i < directDomains.length; i++) {
            if (verbose)
                System.out.println(directDomains[i]);
            HashSet<String> hs = directDomainData.get(i);
            Discrete domain = new Discrete();
            for (Iterator<String> iter = hs.iterator(); iter.hasNext();)
                domain.addName(iter.next());
            BeliefNode node = directDomains[i];
            if (node == null) {
                System.out.println(
                        "No node with name '" + directDomains[i] + "' found to learn direct domain for.");
            }
            //System.out.println("DomainLearner: applying domain " + hs + " to " + node.getName());
            bn.bn.changeBeliefNodeDomain(node, domain);
        }
    if (clusteredDomains != null)
        for (int i = 0; i < clusteredDomains.length; i++) {
            if (verbose)
                System.out.println(clusteredDomains[i].nodeName);
            try {
                // perform clustering
                clusterers[i] = new SimpleKMeans();
                if (clusteredDomains[i].numClusters != 0)
                    clusterers[i].setNumClusters(clusteredDomains[i].numClusters);
                clusterers[i].buildClusterer(clusterData[i]);
                // update domain
                bn.bn.changeBeliefNodeDomain(bn.getNode(clusteredDomains[i].nodeName),
                        new Discretized(clusterers[i], clusterNamer));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    if (duplicateDomains != null) {
        for (int i = 0; i < duplicateDomains.length; i++) {
            Domain srcDomain = bn.getDomain(duplicateDomains[i][0]);
            for (int j = 1; j < duplicateDomains[i].length; j++) {
                if (verbose)
                    System.out.println(duplicateDomains[i][j]);
                bn.bn.changeBeliefNodeDomain(bn.getNode(duplicateDomains[i][j]), srcDomain);
            }
        }
    }
}

From source file:probcog.clustering.SimpleClusterer.java

License:Open Source License

public SimpleClusterer() {
    this(new SimpleKMeans());
}

From source file:probcog.hmm.latent.SubHMM.java

License:Open Source License

public static SegmentSequence<? extends ObservationVector> learnViaClustering(
        IDwellTimeHMM<ObservationVector> hmm, Iterable<? extends Segment<? extends ObservationVector>> s,
        boolean usePseudoCounts) throws Exception {
    final int dim = s.iterator().next().firstElement().dimension();
    Integer numStates = hmm.getNumStates();

    // clustering
    MultiDimClusterer<?> clusterer;
    if (numStates != null)
        clusterer = new KMeansClusterer(new SimpleKMeans(), dim, numStates);
    else/*from w  w  w  . j av a  2 s  . c o m*/
        clusterer = new EMClusterer(new EM(), dim);
    for (Segment<? extends ObservationVector> seg : s)
        for (ObservationVector p : seg)
            //clusterer.addInstance(p.getArray());
            clusterer.addInstance(p.values()); // TODO slow, performs clone
    clusterer.buildClusterer();
    if (numStates == null) {
        numStates = clusterer.getWekaClusterer().numberOfClusters();
        hmm.setNumStates(numStates);
    }

    // count transitions and partition
    // partition observations according to clustering
    TransitionLearner tl = new TransitionLearner(numStates, usePseudoCounts);
    DistributionLearner dl = new DistributionLearner(numStates, usePseudoCounts);
    SegmentSequence<ObservationVector> segseq = new SegmentSequence<ObservationVector>("foo");
    for (Segment<? extends ObservationVector> seg : s) {
        int prev = -1;
        for (ObservationVector p : seg) {
            //int c = clusterer.classify(p.getArray());
            int c = clusterer.classify(p.values()); // TODO inefficient, clones values
            segseq.build(c, p);
            if (prev == -1)
                dl.learn(c);
            else
                tl.learn(prev, c);
            prev = c;
        }
        segseq.buildEndSegment();
    }
    hmm.setA(tl.finish());
    hmm.setPi(dl.finish());

    // learn observation models
    for (int i = 0; i < numStates; i++) {
        hmm.learnObservationModel(i, segseq.getSegments(i));
        //System.out.printf("    sub-hmm %d: %d data points\n", i, partitions.get(i).size());
    }

    return segseq;
}

From source file:processes.ClusterProcess.java

private void initiateClusters(int randomSeedMax, int clusterCount) {
    if (!clusterInitiated) {
        dataGraph = new SimpleKMeans();
        dataGraph.setPreserveInstancesOrder(true);
        dataGraph.setSeed(randomSeedMax);
        try {//from   w  w  w .  j av  a  2 s.  c o  m
            dataGraph.setNumClusters(clusterCount);
        } catch (Exception e) {
            LOGGER.log(Level.SEVERE, "Error@ClusterProcess_initiateCluster", e);
            clusterCount = ((-1) * clusterCount) + 1;
            initiateClusters(randomSeedMax, clusterCount);
        }
        clusterInitiated = true;
    }
}

From source file:qoala.arff.java

public void SimpleKmeans(int numberOfCLuster) throws Exception {

    Instances train = new Instances(dataSet);

    SimpleKMeans skm = new SimpleKMeans();
    skm.setPreserveInstancesOrder(true);
    skm.setNumClusters(numberOfCLuster);
    skm.buildClusterer(train);//from  ww w .  j a v a  2 s.  c  o  m
    skm.setSeed(10);
    int[] ClusterSize = skm.getClusterSizes();

    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(train);

    System.out.println("Cluster Evaluation:" + eval.clusterResultsToString());

    int[] assignments = skm.getAssignments();

    System.out.println("# - cluster - distribution");

    for (int j = 0; j < skm.getNumClusters(); j++) {
        int i = 0;
        for (int clusterNum : assignments) {

            if (clusterNum == j)

                System.out.println("Instance " + i + " -> Cluster number: " + clusterNum);

            i++;
        }
    }
}

From source file:rdfsystem.data.DataMining.java

public static String cluster(RdfManager manager) throws Exception {
    Instances ins = transformData(manager, false);
    SimpleKMeans cls = new SimpleKMeans();
    String[] options = "-N 5".split(" ");
    cls.setOptions(options);/*from  w  ww.j av  a2s .  c om*/
    cls.buildClusterer(ins);
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(cls);
    eval.evaluateClusterer(ins);
    return eval.clusterResultsToString();
}