Example usage for weka.clusterers SimpleKMeans clusterInstance

List of usage examples for weka.clusterers SimpleKMeans clusterInstance

Introduction

In this page you can find the example usage for weka.clusterers SimpleKMeans clusterInstance.

Prototype

@Override
public int clusterInstance(Instance instance) throws Exception 

Source Link

Document

Classifies a given instance.

Usage

From source file:analysis.Purity.java

/**
 * /*from w  ww .j  av  a 2s.co  m*/
 * @param k number of clusters
 * @param originalfile original data
 * @param imputedfile imputed data
 * @throws Exception 
 */
public void findpurity(int k, String originalfile, String imputedfile) throws Exception {
    //get original data
    ConverterUtils.DataSource source = new ConverterUtils.DataSource(originalfile);
    // get imputed data
    ConverterUtils.DataSource mysource = new ConverterUtils.DataSource(imputedfile);
    //get instances for clustering
    this.instances = source.getDataSet();
    this.myinstances = mysource.getDataSet();
    //Simple Kmeans for clustering
    SimpleKMeans globalkmeans = new SimpleKMeans();
    SimpleKMeans mykmeans = new SimpleKMeans();
    //set  number of clusters
    globalkmeans.setNumClusters(k);
    mykmeans.setNumClusters(k);
    // build clusters
    globalkmeans.buildClusterer(instances);
    mykmeans.buildClusterer(myinstances);

    // to compare clusters create matrix for original data and imputed data
    // this matrix indicates the  instances in the came clusters 
    original = new Matrix(instances.numInstances(), k);
    imputed = new Matrix(myinstances.numInstances(), k);
    // get cluster numbers for each instance and initialize associated cluster value to 1
    for (int i = 0; i < myinstances.numInstances(); i++) {
        //System.out.println(instances.instance(i));
        original.set(i, globalkmeans.clusterInstance(instances.instance(i)), 1);
        imputed.set(i, mykmeans.clusterInstance(myinstances.instance(i)), 1);
    }
    System.out.println("k is: \t" + original.getColumnDimension());
    //System.out.println(imputed.getRowDimension());
    original = original.times(original.transpose());
    imputed = imputed.times(imputed.transpose());

    int total1 = 0;// to count  instances in the imputed data in the same cluster
    int total2 = 0; // to count  instances in the original data in the same cluster
    //int value = 1;
    for (int i = 0; i < original.getRowDimension(); i++) {
        for (int j = i; j < original.getColumnDimension(); j++) {

            if ((original.get(i, j) == 1)) {
                if (imputed.get(i, j) == 1) {

                    total1++; // if i  and j th instance in the same cluster in the imputed data
                }
                total2++;// if the i and j th instance in the same cluster in the original data

            }

        }
        //System.out.println();
    }

    // calculate purity
    double purity;
    purity = (double) total1 / (double) total2;
    System.out.println("WCSS --> Original Data: " + mykmeans.getSquaredError());
    System.out.println("WCSS --> Imputed Data: " + globalkmeans.getSquaredError());
    // System.out.println("Total Hit is \t" + total1);
    //System.out.println("Total for  hit is \t" + total2);
    System.out.println("Purity is: " + purity);

}

From source file:analysis.SilhouetteIndex.java

public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception {
    //Map<Integer, Instances> clustermap = sk.clusterInstance;
    sk.setNumClusters(c);/* w  ww .ja v a2s.  c  o  m*/
    sk.buildClusterer(inst);
    EuclideanDistance ed = new EuclideanDistance();
    double avgSilhouetteOverAllPoints = 0.d;

    if (sk.getNumClusters() == 1) {
        //Index is not defined for k=1. needs at least 2 clusters
        return Double.NaN;
    }

    for (int i = 0; i < inst.numInstances(); i++) {
        //for the current element get its cluster
        int currentcluster = sk.clusterInstance(inst.instance(i));
        //System.out.println(inst.instance(i).value(2));
        double[] current_attr = new double[inst.numAttributes()];
        double[] other_attr = new double[inst.numAttributes()];
        //get attributes of the current instance
        for (int attr = 0; attr < inst.numAttributes(); attr++) {
            current_attr[attr] = inst.instance(i).value(attr);
        }
        // int counter
        double[] distances = new double[sk.getNumClusters()];
        int[] counters = new int[sk.getNumClusters()];
        //System.out.println("distances: "+distances.length);
        double avgInClusterDist = 0, dist = 0;
        int countsamecluster = 0;
        distances[currentcluster] = Double.MAX_VALUE;
        for (int j = 0; j < inst.numInstances(); j++) {
            for (int attr = 0; attr < inst.numAttributes(); attr++) {
                other_attr[attr] = inst.instance(j).value(attr);
            }
            //get cluster number of j th element
            int clusternumber = sk.clusterInstance(inst.instance(j));
            //check if j and i in the same cluster
            if (clusternumber == currentcluster) {
                if (inst.instance(i) != inst.instance(j)) {
                    //calculate average dist to other elements in the cluster
                    //inst.

                    dist = ed.compute(current_attr, other_attr);
                    avgInClusterDist = avgInClusterDist + dist;
                    countsamecluster++;
                }
            } else {
                dist = ed.compute(current_attr, other_attr);
                distances[clusternumber] = distances[clusternumber] + dist;
                counters[clusternumber]++;
            }
        }
        //calculate value ai
        if (countsamecluster > 0) {
            avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai
        }
        //find average distances to other clusters
        for (int k = 0; k < distances.length; k++) {
            if (k != currentcluster) {
                distances[k] = distances[k] / counters[k];
            }
        }
        //Find the min value of average distance to other clusters
        double min = distances[0];
        for (int k = 1; k < distances.length; k++) {
            if (min > distances[k]) {
                min = distances[k];
            }
        }

        //si for current element:
        double si;
        // if we only have one element in our cluster it makes sense to set
        // si = 0
        if (countsamecluster == 1) {
            si = 0.0d;
        } else {
            si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist);
        }
        avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si;
    }
    //System.out.println(inst.numInstances());
    return avgSilhouetteOverAllPoints / inst.numInstances();

}

From source file:detplagiasi.KMeansClustering.java

KMeansClustering() {
    addd = Container.getAddress();
    try {/*w w w  .  java  2  s .  c  om*/
        ClusterEvaluation eval;
        Instances data;
        String[] options;
        SimpleKMeans cl;

        File he = getArffFile();
        data = new Instances(new BufferedReader(new FileReader(he)));
        System.out.println("-----KMeans Clustering-----");
        // normal
        try (BufferedWriter out = new BufferedWriter(new FileWriter(addd + "\\output.txt", true))) {
            out.write("\r\n--> normal\r\n");
            options = new String[2];
            options[0] = "-t";
            options[1] = he.getAbsolutePath();
            out.write("\r\n" + ClusterEvaluation.evaluateClusterer(new SimpleKMeans(), options) + "\r\n");
            out.write("\r\n");

            // manual call
            out.write("\n--> manual\r\n");
            cl = new SimpleKMeans();
            cl.setNumClusters(4);
            out.write("\r\n");
            cl.buildClusterer(data);
            getDataUji();
            System.out.println("jumlah kluster = " + cl.numberOfClusters());
            System.out.println("kluster = " + cl.clusterInstance(dataUji.instance(0)));
            noClusterUji = cl.clusterInstance(dataUji.instance(0));
            totalCluster = cl.numberOfClusters();
            for (int b = 0; b < dataTraining.numInstances(); b++) {
                System.out.print("file " + td.fileName[b] + " termasuk cluster ke ");
                System.out.println(cl.clusterInstance(dataTraining.instance(b)));
                array1[b] = td.fileName[b];
                array2[b] = cl.clusterInstance(dataTraining.instance(b));
                //simpan nilai instance ke dalam sebuah array int buat dikirim ke detplaggui
            }

            out.write("\r\n");

            eval = new ClusterEvaluation();
            eval.setClusterer(cl);
            eval.evaluateClusterer(new Instances(data));
            out.write("\r\n\n# of clusters: " + eval.getNumClusters());

        } catch (Exception e) {
            System.err.println(e.getMessage());
            System.out.println("error2 kmeans cluster");
        }

    } catch (IOException ex) {
        Logger.getLogger(Clustering.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("errorrrr null kmeans");
    }
}

From source file:entities.ArffFile.java

/**
 * Dada una lista de parametros, se ejecuta el filtro de microagregacion.
 * Todos estos parametros son entrada del usuario.
 * @param df Puede ser Euclidian o Manhattan distance, se especifica en la entrada.
 * @param numCluster// w ww  .  jav a  2s . c  o  m
 * @param seed
 * @param maxIterations
 * @param replaceMissingValues
 * @param preserveInstancesOrder
 * @param attributes lista de los atributos que se desean generalizar con cluster
 */
public void microAgregacion(DistanceFunction df, int numCluster, int seed, int maxIterations,
        boolean replaceMissingValues, boolean preserveInstancesOrder, List<Integer> attributes)
        throws Exception {
    //instancesFilter = new Instances(instances);
    SimpleKMeans kMeans;
    kMeans = new SimpleKMeans();
    Instances uniqueAttributes;
    uniqueAttributes = new Instances(instancesFilter);
    List<String> names = new ArrayList<>();
    int i = 0;
    for (Integer attribute : attributes) {
        String name = new String(instancesFilter.attribute(attribute).name());
        if (instancesFilter.attribute(attribute).isDate() || instancesFilter.attribute(attribute).isString())
            throw new Exception("No se puede hacer cluster con atributos de tipo DATE o STRING");
        names.add(name);
    }
    while (uniqueAttributes.numAttributes() != attributes.size()) {
        if (!names.contains(uniqueAttributes.attribute(i).name()))
            uniqueAttributes.deleteAttributeAt(i);
        else
            i++;
    }
    try {
        kMeans.setNumClusters(numCluster);
        kMeans.setMaxIterations(maxIterations);
        kMeans.setSeed(seed);
        kMeans.setDisplayStdDevs(false);
        kMeans.setDistanceFunction(df);
        kMeans.setDontReplaceMissingValues(replaceMissingValues);
        kMeans.setPreserveInstancesOrder(preserveInstancesOrder);
        kMeans.buildClusterer(uniqueAttributes);
        //System.out.println(kMeans);
        for (int j = 0; j < uniqueAttributes.numInstances(); j++) {
            int cluster = kMeans.clusterInstance(uniqueAttributes.instance(j));
            for (int k = 0; k < uniqueAttributes.numAttributes(); k++) {
                if (uniqueAttributes.attribute(k).isNumeric())
                    uniqueAttributes.instance(j).setValue(k,
                            Double.parseDouble(kMeans.getClusterCentroids().instance(cluster).toString(k)));
                else
                    uniqueAttributes.instance(j).setValue(k,
                            kMeans.getClusterCentroids().instance(cluster).toString(k));
            }
        }
        replaceValues(uniqueAttributes, attributes);
    } catch (Exception ex) {
        Logger.getLogger(ArffFile.class.getName()).log(Level.SEVERE, null, ex);
    }
    //saveToFile("4");
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.clustering.simplekmeans.SimpleKMeansClusterer.java

License:Open Source License

@Override
public ClusteringResult performClustering(Instances dataset, ParameterSet parameters) {

    List<Integer> clusters = new ArrayList<Integer>();
    String[] options = new String[2];
    SimpleKMeans clusterer = new SimpleKMeans();

    int numberOfGroups = parameters.getParameter(SimpleKMeansClustererParameters.numberOfGroups).getValue();
    options[0] = "-N";
    options[1] = String.valueOf(numberOfGroups);

    try {/*from  w  w w  .  ja va2  s.  com*/
        clusterer.setOptions(options);
        clusterer.buildClusterer(dataset);
        Enumeration<?> e = dataset.enumerateInstances();
        while (e.hasMoreElements()) {
            clusters.add(clusterer.clusterInstance((Instance) e.nextElement()));
        }
        ClusteringResult result = new ClusteringResult(clusters, null, clusterer.numberOfClusters(),
                parameters.getParameter(EMClustererParameters.visualization).getValue());
        return result;

    } catch (Exception ex) {
        logger.log(Level.SEVERE, null, ex);
        return null;
    }
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.ContourDataFromClusterSelection.java

License:Open Source License

/**
 * {@inheritDoc}/*w  w w . j a  va 2  s . c  o  m*/
 */
@Override
protected void extractContourData(int[] translations, int[] permutation) {
    SimpleKMeans clusterer = new SimpleKMeans();
    try {

        clusterer.setNumClusters(m_numClusters);

        // cluster the data
        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        for (int a = 0; a < contourDataGrid().numFeatures(); a++) {
            attInfo.add(new Attribute("att" + a));
        }
        Instances data = new Instances("dataset", attInfo, contourDataGrid().numVectors());
        for (double[] vec : contourDataGrid()) {
            data.add(new DenseInstance(1.0, vec));
        }
        clusterer.buildClusterer(data);

        // create clustered images p(C|x)
        Img[] imgs = new Img[m_numClusters];
        int[] dims = new int[] { contourDataGrid().width(), contourDataGrid().totalLength() };
        Cursor<FloatType>[] cursors = new Cursor[m_numClusters];
        for (int i = 0; i < imgs.length; i++) {
            imgs[i] = new ArrayImgFactory<FloatType>().create(dims, new FloatType());
            cursors[i] = imgs[i].localizingCursor();
        }

        int cluster;
        for (Instance instance : data) {
            for (int i = 0; i < cursors.length; i++) {
                cursors[i].fwd();
            }
            cluster = clusterer.clusterInstance(instance);
            cursors[cluster].get().set(1.0f);
        }

        // greedily select the best cluster combination starting with all
        // clusters together and then removing the one whose removal
        // maximises the score of the remaining clusters
        Img<FloatType> res = imgs[0].factory().create(imgs[0], new FloatType());
        Cursor<FloatType> resC = res.cursor();
        while (resC.hasNext()) {
            resC.fwd();
            resC.get().set(1.0f);
        }
        Img<FloatType> tmp = res.factory().create(res, new FloatType());

        // TODO: normalize img
        // NormalizeIterableInterval<FloatType, Img<FloatType>> imgNorm =
        // new NormalizeIterableInterval<FloatType, Img<FloatType>>();
        double score = 0;
        double bestScore = -Double.MAX_VALUE;
        double globalBestScore = -Double.MAX_VALUE;
        int bestCluster = 0;

        // ShowInSameFrame showInFrame = new ShowInSameFrame();

        for (int i = 0; i < m_numClusters; i++) {
            for (int j = 0; j < m_numClusters; j++) {
                if (imgs[j] != null) {
                    substract(res, imgs[j], tmp);
                    score = calcScore(tmp, m_bias);
                    if (score > bestScore) {
                        bestScore = score;
                        bestCluster = j;
                    }
                }
            }
            substract(res, imgs[bestCluster], res);
            imgs[bestCluster] = null;

            // Pair<FloatType, FloatType> minmax =
            // Operations.compute(new MinMax<FloatType>(), tmp);
            // Operations.<FloatType, FloatType> map(
            // new Normalize<FloatType>(minmax.getA().getRealDouble(),
            // minmax.getB().getRealDouble(),
            // -Float.MAX_VALUE, Float.MAX_VALUE)).compute(
            // tmp, tmp);

            // showInFrame.show(tmp, 2.0);

            if (bestScore < globalBestScore) {
                break;
            }

            globalBestScore = bestScore;
            bestScore = -Double.MAX_VALUE;

        }

        // calculate the translations (mean positions)
        resC = res.localizingCursor();
        double meanPos = 0;
        double num = 0;
        int index = 0;
        while (resC.hasNext()) {
            resC.fwd();

            meanPos += resC.get().get() * resC.getDoublePosition(0);
            num += resC.get().get();
            index++;
            if ((index % res.dimension(0)) == 0) {
                if (num > 0) {
                    translations[(int) ((index - 1) / res.dimension(0))] = (int) Math.round(meanPos / num)
                            - CENTER_COL;
                } else {
                    // setWeight((int)((index - 1) / res.dimension(0)), 0);
                    translations[(int) ((index - 1) / res.dimension(0))] = 0;
                }
                meanPos = 0;
                num = 0;
            }

        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
    }

}