Example usage for weka.clusterers ClusterEvaluation ClusterEvaluation

List of usage examples for weka.clusterers ClusterEvaluation ClusterEvaluation

Introduction

In this page you can find the example usage for weka.clusterers ClusterEvaluation ClusterEvaluation.

Prototype

public ClusterEvaluation() 

Source Link

Document

Constructor.

Usage

From source file:jmetal.test.survivalanalysis.GenerateSurvivalGraph.java

License:Open Source License

/** 
 * Evaluates a solution /*from ww w .  j av a2s.  c o  m*/
 * @param solution The solution to evaluate
 */
public void evaluate(Solution solution) {
    Binary variable;
    int counterSelectedFeatures;

    DataSource source;

    double testStatistic = Double.MAX_VALUE;
    double pValue = Double.MAX_VALUE;
    double ArithmeticHarmonicCutScore = Double.MAX_VALUE;
    //double statScore;
    REXP x;

    variable = ((Binary) solution.getDecisionVariables()[0]);

    counterSelectedFeatures = 0;

    try {
        // read the data file 
        source = new DataSource(this.dataFileName);
        Instances data = source.getDataSet();
        //System.out.print("Data read successfully. ");
        //System.out.print("Number of attributes: " + data.numAttributes());
        //System.out.println(". Number of instances: " + data.numInstances());

        // save the attribute 'T' and 'Censor'
        attTime = data.attribute(data.numAttributes() - 2);
        attCensor = data.attribute(data.numAttributes() - 1);

        // First filter the attributes based on chromosome
        Instances tmpData = this.filterByChromosome(data, solution);

        // Now filter the attribute 'T' and 'Censor'
        Remove filter = new Remove();
        // remove the two last attributes : 'T' and 'Censor'
        filter.setAttributeIndices("" + (tmpData.numAttributes() - 1) + "," + tmpData.numAttributes());
        //System.out.println("After chromosome filtering no of attributes: " + tmpData.numAttributes());
        filter.setInputFormat(tmpData);
        Instances dataClusterer = Filter.useFilter(tmpData, filter);

        // filtering complete

        // List the selected features/attributes
        Enumeration<Attribute> attributeList = dataClusterer.enumerateAttributes();
        System.out.println("Selected attributes/features: ");
        while (attributeList.hasMoreElements()) {
            Attribute att = attributeList.nextElement();
            System.out.print(att.name() + ",");
        }

        System.out.println();

        /*
        // debug: write the filtered dataset
                
         ArffSaver saver = new ArffSaver();
         saver.setInstances(dataClusterer);
         saver.setFile(new File("filteered-data.arff"));
         saver.writeBatch();
        // end debug
                
        */

        // train hierarchical clusterer

        HierarchicalClusterer clusterer = new HierarchicalClusterer();
        clusterer.setOptions(new String[] { "-L", this.HC_LinkType });
        //Link type (Single, Complete, Average, Mean, Centroid, Ward, Adjusted complete, Neighbor Joining)
        //[SINGLE|COMPLETE|AVERAGE|MEAN|CENTROID|WARD|ADJCOMPLETE|NEIGHBOR_JOINING]

        //clusterer.setDebug(true);
        clusterer.setNumClusters(2);
        clusterer.setDistanceFunction(new EuclideanDistance());
        clusterer.setDistanceIsBranchLength(false); // ?? Should it be changed to false? (Noman)

        clusterer.buildClusterer(dataClusterer);

        double[][] distanceMatrix = clusterer.getDistanceMatrix();

        // Cluster evaluation:
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);

        if (this.testDataFileName != null) {

            DataSource testSource = new DataSource(this.testDataFileName);

            Instances tmpTestData = testSource.getDataSet();
            tmpTestData.setClassIndex(tmpTestData.numAttributes() - 1);
            //testSource.

            // First filter the attributes based on chromosome
            Instances testData = this.filterByChromosome(tmpTestData, solution);
            //String[] options = new String[2];
            //options[0] = "-t";
            //options[1] = "/some/where/somefile.arff";
            //eval.
            //System.out.println(eval.evaluateClusterer(testData, options));
            eval.evaluateClusterer(testData);
            System.out.println("\nCluster evluation for this solution(" + this.testDataFileName + "): "
                    + eval.clusterResultsToString());
        }

        // First analyze using my library function

        // save the cluster assignments

        int[] clusterAssignment = new int[dataClusterer.numInstances()];
        int classOneCnt = 0;
        int classTwoCnt = 0;
        for (int i = 0; i < dataClusterer.numInstances(); ++i) {
            clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i));
            if (clusterAssignment[i] == 0) {
                ++classOneCnt;
            } else if (clusterAssignment[i] == 1) {
                ++classTwoCnt;
            }
            //System.out.println("Instance " + i + ": " + clusterAssignment[i]);
        }

        System.out.println("Class 1 cnt: " + classOneCnt + " Class 2 cnt: " + classTwoCnt);

        // create arrays with time (event occurrence time) and censor data for use with jstat LogRankTest
        double[] time1 = new double[classOneCnt];
        double[] censor1 = new double[classOneCnt];
        double[] time2 = new double[classTwoCnt];
        double[] censor2 = new double[classTwoCnt];

        //data = source.getDataSet();
        for (int i = 0, cnt1 = 0, cnt2 = 0; i < dataClusterer.numInstances(); ++i) {
            //clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i));
            if (clusterAssignment[i] == 0) {
                time1[cnt1] = data.get(i).value(attTime);
                censor1[cnt1++] = data.get(i).value(attCensor);
                //System.out.println("i: " + i + " T: " + time1[cnt1-1]);
            } else if (clusterAssignment[i] == 1) {
                time2[cnt2] = data.get(i).value(attTime);
                //System.out.println("i: " + i + " T: " + time2[cnt2-1]);
                censor2[cnt2++] = data.get(i).value(attCensor);
                ;
            }
            //System.out.println("Instance " + i + ": " + clusterAssignment[i]);
        }

        //Instances[] classInstances = separateClassInstances(clusterAssignment, this.dataFileName,solution);
        //System.out.println("Class instances seperated");

        // calculate log rank test and p values

        LogRankTest testclass1 = new LogRankTest(time1, time2, censor1, censor2);
        double[] scores = testclass1.logRank();
        testStatistic = scores[0];
        pValue = scores[2];

        ArithmeticHarmonicCutScore = this.getArithmeticHarmonicCutScore(distanceMatrix, clusterAssignment);
        //debug:
        System.out.println("Calculation by myLibrary:\n testStatistic: " + scores[0] + " pValue: " + scores[2]
                + " Arithmetic Harmonic Cut Score: " + ArithmeticHarmonicCutScore);
        //end debug
        //WilcoxonTest testclass1 = new WilcoxonTest(time1, censor1, time2, censor2);
        //testStatistic = testclass1.testStatistic;
        //pValue = testclass1.pValue;true

        // Now analyze calling R for Log Rank test, Parallelization not possible

        String strT = "time <- c(";
        String strC = "censor <- c(";
        String strG = "group <- c(";

        for (int i = 0; i < dataClusterer.numInstances() - 1; ++i) {
            strT = strT + (int) data.get(i).value(attTime) + ",";
            strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ",";
            strC = strC + (int) data.get(i).value(attCensor) + ",";
        }

        int tmpi = dataClusterer.numInstances() - 1;
        strT = strT + (int) data.get(tmpi).value(attTime) + ")";
        strG = strG + clusterer.clusterInstance(dataClusterer.get(tmpi)) + ")";
        strC = strC + (int) data.get(tmpi).value(attCensor) + ")";

        this.re.eval(strT);
        this.re.eval(strC);
        this.re.eval(strG);

        //debug
        //System.out.println(strT);
        //System.out.println(strC);
        //System.out.println(strG);
        //end debug

        /** If you are calling surv_test from coin library */
        /*v
        re.eval("library(coin)");
        re.eval("grp <- factor (group)");
        re.eval("result <- surv_test(Surv(time,censor)~grp,distribution=\"exact\")");
                
        x=re.eval("statistic(result)");
        testStatistic = x.asDouble();
        //x=re.eval("pvalue(result)");
        //pValue = x.asDouble();
        //System.out.println("StatScore: " + statScore + "pValue: " + pValue);
         */

        /** If you are calling survdiff from survival library (much faster) */
        re.eval("library(survival)");
        re.eval("res2 <- survdiff(Surv(time,censor)~group,rho=0)");
        x = re.eval("res2$chisq");
        testStatistic = x.asDouble();
        //System.out.println(x);
        x = re.eval("pchisq(res2$chisq, df=1, lower.tail = FALSE)");
        //x = re.eval("1.0 - pchisq(res2$chisq, df=1)");
        pValue = x.asDouble();
        //debug:
        //System.out.println("Calculation by R: StatScore: " + testStatistic + "pValue: " + pValue);
        //end debug

        System.out.println("Calculation by R:");
        System.out.println("StatScore: " + testStatistic + "  pValue: " + pValue);

        re.eval("timestrata1.surv <- survfit( Surv(time, censor)~ strata(group), conf.type=\"log-log\")");
        re.eval("timestrata1.surv1 <- survfit( Surv(time, censor)~ 1, conf.type=\"none\")");
        String evalStr = "jpeg('SurvivalPlot-" + this.SolutionID + ".jpg')";
        re.eval(evalStr);
        re.eval("plot(timestrata1.surv, col=c(2,3), xlab=\"Time\", ylab=\"Survival Probability\")");
        re.eval("par(new=T)");
        re.eval("plot(timestrata1.surv1,col=1)");
        re.eval("legend(0.2, c(\"Group1\",\"Group2\",\"Whole\"))");
        re.eval("dev.off()");

        System.out.println("\nCluster Assignments:");
        for (int i = 0; i < dataClusterer.numInstances(); ++i) {
            System.out.println("Instance " + i + ": " + clusterAssignment[i]);
        }

    } catch (Exception e) {
        // TODO Auto-generated catch block
        System.err.println("Can't open the data file.");
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:jmetal.test.survivalanalysis.GenerateSurvivalGraphOld.java

License:Open Source License

/** 
 * Evaluates a solution - actually generate the survival graph 
 * @param solution The solution to evaluate
 *///from   w w  w. j  a v  a2s.  c o m
public void evaluate(Solution solution) {
    Binary variable;
    int counterSelectedFeatures;

    DataSource source;

    double testStatistic = Double.MAX_VALUE;
    double pValue = Double.MAX_VALUE;
    //double statScore;
    REXP x;

    variable = ((Binary) solution.getDecisionVariables()[0]);

    counterSelectedFeatures = 0;

    System.out.println("\nSolution ID " + this.SolutionID);

    try {
        // read the data file 
        source = new DataSource(this.dataFileName);
        Instances data = source.getDataSet();
        //System.out.print("Data read successfully. ");
        //System.out.print("Number of attributes: " + data.numAttributes());
        //System.out.println(". Number of instances: " + data.numInstances());

        // save the attribute 'T' and 'Censor'
        attTime = data.attribute(data.numAttributes() - 2);
        attCensor = data.attribute(data.numAttributes() - 1);

        // First filter the attributes based on chromosome
        Instances tmpData = this.filterByChromosome(data, solution);

        // Now filter the attribute 'T' and 'Censor'
        Remove filter = new Remove();
        // remove the two last attributes : 'T' and 'Censor'
        filter.setAttributeIndices("" + (tmpData.numAttributes() - 1) + "," + tmpData.numAttributes());
        //System.out.println("After chromosome filtering no of attributes: " + tmpData.numAttributes());
        filter.setInputFormat(tmpData);
        Instances dataClusterer = Filter.useFilter(tmpData, filter);

        Enumeration<Attribute> attributeList = dataClusterer.enumerateAttributes();
        System.out.println("Selected attributes: ");
        while (attributeList.hasMoreElements()) {
            Attribute att = attributeList.nextElement();
            System.out.print(att.name() + ",");
        }

        System.out.println();
        // filtering complete

        // Debug: write the filtered dataset
        /*
        ArffSaver saver = new ArffSaver();
        saver.setInstances(dataClusterer);
        saver.setFile(new File("filteered-data.arff"));
        saver.writeBatch();
         */

        // train hierarchical clusterer

        HierarchicalClusterer clusterer = new HierarchicalClusterer();
        clusterer.setOptions(new String[] { "-L", "COMPLETE" }); // complete linkage clustering
        //clusterer.setDebug(true);
        clusterer.setNumClusters(2);
        clusterer.setDistanceFunction(new EuclideanDistance());
        //clusterer.setDistanceFunction(new ChebyshevDistance());
        clusterer.setDistanceIsBranchLength(false);

        clusterer.buildClusterer(dataClusterer);

        // Cluster evaluation:
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);

        if (this.testDataFileName != null) {

            DataSource testSource = new DataSource(this.testDataFileName);

            Instances tmpTestData = testSource.getDataSet();
            tmpTestData.setClassIndex(tmpTestData.numAttributes() - 1);
            //testSource.

            // First filter the attributes based on chromosome
            Instances testData = this.filterByChromosome(tmpTestData, solution);
            //String[] options = new String[2];
            //options[0] = "-t";
            //options[1] = "/some/where/somefile.arff";
            //eval.
            //System.out.println(eval.evaluateClusterer(testData, options));
            eval.evaluateClusterer(testData);
            System.out.println("\nCluster evluation for this solution: " + eval.clusterResultsToString());
        }

        // Print the cluster assignments:

        // save the cluster assignments
        //if (printClusterAssignment==true){
        int[] clusterAssignment = new int[dataClusterer.numInstances()];
        int classOneCnt = 0;
        int classTwoCnt = 0;
        for (int i = 0; i < dataClusterer.numInstances(); ++i) {
            clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i));
            if (clusterAssignment[i] == 0) {
                ++classOneCnt;
            } else if (clusterAssignment[i] == 1) {
                ++classTwoCnt;
            }
            //System.out.println("Instance " + i + ": " + clusterAssignment[i]);
        }

        System.out.println("Class 1 cnt: " + classOneCnt + " Class 2 cnt: " + classTwoCnt);
        //}

        /*
                
                         
                 // create arrays with time (event occurrence time) and censor data for use with jstat LogRankTest
                 double[] time1 = new double[classOneCnt];   
                 double[] censor1 = new double[classOneCnt];
                 double[] time2 = new double[classTwoCnt];
                 double[] censor2 = new double[classTwoCnt];
                
                
                 //data = source.getDataSet();
                 for (int i=0, cnt1=0, cnt2=0; i<dataClusterer.numInstances(); ++i){
                    clusterAssignment[i] = clusterer.clusterInstance(dataClusterer.get(i));
                    if (clusterAssignment[i]==0){
                       time1[cnt1] = data.get(i).value(attTime);
                       censor1[cnt1++] = 1;
                       //System.out.println("i: " + i + " T: " + time1[cnt1-1]);
                    }
                    else if (clusterAssignment[i]==1){
                       time2[cnt2] = data.get(i).value(attTime);
                       //System.out.println("i: " + i + " T: " + time2[cnt2-1]);
                       censor2[cnt2++] = 1;
                    }
                    //System.out.println("Instance " + i + ": " + clusterAssignment[i]);
                 }
                
                
                
                 //Instances[] classInstances = separateClassInstances(clusterAssignment, this.dataFileName,solution);
                 //System.out.println("Class instances seperated");
                
                 // calculate log rank test and p values
                         
                 //LogRankTest testclass1 = new LogRankTest(time1, censor1, time2, censor2);
                 //testStatistic = testclass1.testStatistic;
                 //pValue = testclass1.pValue;
                
                
                 WilcoxonTest testclass1 = new WilcoxonTest(time1, censor1, time2, censor2);
                 testStatistic = testclass1.testStatistic;
                 pValue = testclass1.pValue;true
        */

        String strT = "time1 <- c(";
        String strC = "censor1 <- c(";
        String strG = "group1 <- c(";

        for (int i = 0; i < dataClusterer.numInstances() - 1; ++i) {
            strT = strT + (int) data.get(i).value(attTime) + ",";
            strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ",";
            strC = strC + (int) data.get(i).value(attCensor) + ",";

        }

        int tmpi = dataClusterer.numInstances() - 1;
        strT = strT + (int) data.get(tmpi).value(attTime) + ")";
        strG = strG + clusterer.clusterInstance(dataClusterer.get(tmpi)) + ")";
        strC = strC + (int) data.get(tmpi).value(attCensor) + ")";

        this.re.eval(strT);
        this.re.eval(strC);
        this.re.eval(strG);

        // for MyLogRankTest

        double[] time1 = new double[classOneCnt];
        double[] time2 = new double[classTwoCnt];
        double[] censor1 = new double[classOneCnt];
        double[] censor2 = new double[classTwoCnt];

        int i1 = 0, i2 = 0;

        for (int i = 0; i < dataClusterer.numInstances(); ++i) {

            strT = strT + (int) data.get(i).value(attTime) + ",";
            strG = strG + clusterer.clusterInstance(dataClusterer.get(i)) + ",";
            strC = strC + (int) data.get(i).value(attCensor) + ",";

            if (clusterer.clusterInstance(dataClusterer.get(i)) == 0) {
                time1[i1] = data.get(i).value(attTime);
                censor1[i1] = data.get(i).value(attCensor);
                ++i1;
            } else {
                time2[i2] = data.get(i).value(attTime);
                censor2[i2] = data.get(i).value(attCensor);
                ++i2;
            }

        }

        /** If you are calling surv_test from coin library */
        /*v
        re.eval("library(coin)");
        re.eval("grp <- factor (group)");
        re.eval("result <- surv_test(Surv(time,censor)~grp,distribution=\"exact\")");
                
        x=re.eval("statistic(result)");
        testStatistic = x.asDouble();
        //x=re.eval("pvalue(result)");
        //pValue = x.asDouble();
        //System.out.println("StatScore: " + statScore + "pValue: " + pValue);
        */

        /** If you are calling survdiff from survival library (much faster) */
        re.eval("library(survival)");
        re.eval("res21 <- survdiff(Surv(time1,censor1)~group1,rho=0)");
        x = re.eval("res21$chisq");
        testStatistic = x.asDouble();
        //System.out.println(x);
        x = re.eval("pchisq(res21$chisq, df=1, lower.tail = FALSE)");
        //x = re.eval("1.0 - pchisq(res2$chisq, df=1)");
        pValue = x.asDouble();
        System.out.println("Results from R:");
        System.out.println("StatScore: " + testStatistic + "  pValue: " + pValue);

        re.eval("timestrata1.surv <- survfit( Surv(time1, censor1)~ strata(group1), conf.type=\"log-log\")");
        re.eval("timestrata1.surv1 <- survfit( Surv(time1, censor1)~ 1, conf.type=\"none\")");
        String evalStr = "jpeg('SurvivalPlot-" + this.SolutionID + ".jpg')";
        re.eval(evalStr);
        re.eval("plot(timestrata1.surv, col=c(2,3), xlab=\"Time\", ylab=\"Survival Probability\")");
        re.eval("par(new=T)");
        re.eval("plot(timestrata1.surv1,col=1)");
        re.eval("legend(0.2, c(\"Group1\",\"Group2\",\"Whole\"))");
        re.eval("dev.off()");

        System.out.println("Results from my code: ");
        LogRankTest lrt = new LogRankTest(time1, time2, censor1, censor2);
        double[] results = lrt.logRank();
        System.out.println("Statistics: " + results[0] + " variance: " + results[1] + " pValue: " + results[2]);

    } catch (Exception e) {
        // TODO Auto-generated catch block
        System.err.println("Can't open the data file.");
        e.printStackTrace();
        System.exit(1);
    }

    /**********
     *  Current Implementation considers two objectives
     *  1. pvalue to be minimized / statistical score to be maximized
     *  2. Number of Features to be maximized/minimized
     */

}

From source file:kmeans_extend.KMeansMain.java

/**
 * @param args the command line arguments
 */// w  w w .j  av  a2  s.  c  o  m
public static void main(String[] args) throws Exception {
    System.out.print("Put number cluster : ");
    Scanner scanner = new Scanner(System.in);
    int numCluster = scanner.nextInt();
    Instances data = loadData("src/Dataset/weather.arff");
    MyKMeans kmeans = new MyKMeans(numCluster);
    kmeans.buildClusterer(data);
    kmeans.printFinalCentroid();
    ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(kmeans);
    eval.evaluateClusterer(data);
    System.out.println("\n==== Evaluation Result ====");
    System.out.println(eval.clusterResultsToString());

}

From source file:lineage.AAFClusterer.java

License:Open Source License

/**
 * Expectation Maximization clustering//from   ww w . j  a v  a 2 s  .  c  o m
 * @param data - matrix of observations (numObs x numFeatures)
 * @param k - number of clusters
 */
public Cluster[] em(double[][] data, int numObs, int numFeatures) {
    Instances ds = convertMatrixToWeka(data, numObs, numFeatures);
    EM clusterer = new EM();
    try {
        clusterer.buildClusterer(ds);
        ClusterEvaluation eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(new Instances(ds));
        int numClusters = eval.getNumClusters();

        Cluster[] clusters = new Cluster[numClusters];
        double[][] clusterCentroids = new double[numClusters][numFeatures];
        int[] clusterCount = new int[numClusters];

        double[] assignments = eval.getClusterAssignments();
        for (int i = 0; i < ds.numInstances(); i++) {
            Instance inst = ds.instance(i);
            int clusterId = (int) assignments[i];
            for (int j = 0; j < numFeatures; j++) {
                clusterCentroids[clusterId][j] += inst.value(j);
            }
            clusterCount[clusterId]++;
        }

        for (int i = 0; i < numClusters; i++) {
            double[] mean = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                mean[j] = clusterCentroids[i][j] / clusterCount[i];
            }
            clusters[i] = new Cluster(mean, i);
        }

        // cluster members & std dev
        double[][] clusterStdDev = new double[numClusters][numFeatures];
        for (int i = 0; i < ds.numInstances(); i++) {
            int clusterId = (int) assignments[i];
            clusters[clusterId].addMember(i);
            for (int j = 0; j < numFeatures; j++) {
                clusterStdDev[clusterId][j] += Math
                        .pow(ds.instance(i).value(j) - clusters[clusterId].getCentroid()[j], 2);
            }
        }

        for (int i = 0; i < numClusters; i++) {
            double[] dev = new double[numFeatures];
            for (int j = 0; j < numFeatures; j++) {
                dev[j] = Math.sqrt(clusterStdDev[i][j] / clusterCount[i]);
            }
            clusters[i].setStdDev(dev);
        }

        return clusters;
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}

From source file:lu.lippmann.cdb.datasetview.tabs.UnsupervisedFeatureEvaluationTabView.java

License:Open Source License

private static Instances buildDerivatedDatasetForFeaturesClusters(final Instances dataSet, final int k)
        throws Exception {
    final Instances trdataSet = WekaDataProcessingUtil.buildTransposedDataSet(dataSet);

    final EuclideanDistance distanceFunction = new EuclideanDistance(trdataSet);

    final SimpleKMeans skm = WekaMachineLearningUtil.buildSimpleKMeansClustererWithK(k, distanceFunction);
    skm.buildClusterer(trdataSet);//from  w w w.ja v  a 2  s.  c o  m
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(skm);
    eval.evaluateClusterer(trdataSet);

    final int numClusters = eval.getNumClusters();
    final List<String> possibleValues = new ArrayList<String>(numClusters);
    for (int c = 0; c < numClusters; c++)
        possibleValues.add("cluster_" + c);

    final double[] clusterAssignments = eval.getClusterAssignments();

    final int numAttributes = dataSet.numAttributes();
    final List<Integer> valueForEachFeature = new ArrayList<Integer>(numAttributes);
    for (int j = 0; j < numAttributes; j++) {
        //System.out.println(clusterAssignments[j]+" "+(int)clusterAssignments[j]);
        valueForEachFeature.add((int) clusterAssignments[j]);
    }

    return buildDerivatedDataset(dataSet, possibleValues, valueForEachFeature);
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * //from w  w  w.  ja va2 s . c  o m
 * @param newInstances
 * @param K
 * @return
 * @throws Exception
 */
public static double[] doKMeans(final Instances newInstances, final int K) throws Exception {
    final SimpleKMeans clusterer = new SimpleKMeans();
    clusterer.setOptions(
            Utils.splitOptions("-N " + K + " -R first-last -I 500 -S 10 -A weka.core.EuclideanDistance"));

    clusterer.buildClusterer(newInstances);

    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(clusterer);
    eval.evaluateClusterer(newInstances);

    double[] ass = eval.getClusterAssignments();
    return ass;
}

From source file:lu.lippmann.cdb.lab.beta.util.WekaUtil2.java

License:Open Source License

/**
 * //from  w  w  w  . j a  v a2s  .  c o  m
 * @param wekaClusterer
 * @param instances
 * @return
 * @throws Exception
 */
public static List<IndexedInstance> computeClusters(final Clusterer wekaClusterer, final Instances instances)
        throws Exception {
    final Instances ii = new Instances(instances);
    ii.setClassIndex(-1);

    wekaClusterer.buildClusterer(ii);

    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(wekaClusterer);
    eval.evaluateClusterer(ii);

    final int clustersCount = eval.getNumClusters();
    final List<IndexedInstance> clustersList = new ArrayList<IndexedInstance>(clustersCount);

    //Initialize instances
    for (int k = 0; k < clustersCount; k++) {
        clustersList.add(new IndexedInstance(new Instances(instances, 0), new HashMap<Integer, Integer>()));
    }

    final double[] ass = eval.getClusterAssignments();
    if (ass.length != ii.numInstances())
        throw new IllegalStateException();
    for (int i = 0; i < ass.length; i++) {
        IndexedInstance idxi = clustersList.get((int) ass[i]);
        idxi.getInstances().add(instances.instance(i));
        int pos = idxi.getInstances().size() - 1;
        idxi.getMapOrigIndex().put(pos, i);
    }

    return clustersList;
}

From source file:lu.lippmann.cdb.lab.kmeans.KmeansImproved.java

License:Open Source License

/**
 * /*w  w  w  .  j  a  va 2s.  co  m*/
 * @return
 * @throws Exception
 */
public double[] getClusteredInstances() throws Exception {

    //Removing potential class index 
    instances.setClassIndex(-1);

    //Clustering using Kmeans
    int k;
    double max = 0, r2 = 0, pseudoF = 0;

    //Testing from 2 to 10 clusters, should be set as entry of this function
    SimpleKMeans bestKMeans = new SimpleKMeans();
    for (k = 2; k <= maxClusters; k++) {
        final SimpleKMeans kMeans = new SimpleKMeans();
        kMeans.setNumClusters(k);
        kMeans.buildClusterer(instances);
        //Choosing the "optimal" number of clusters
        r2 = R2(kMeans);
        pseudoF = pseudoF(r2, k);
        //System.out.println(pseudo_f);
        if (pseudoF > max) {
            max = pseudoF;
            bestKMeans = kMeans;
        }
    }

    //Real clustering using the chosen number
    final ClusterEvaluation eval = new ClusterEvaluation();
    eval.setClusterer(bestKMeans);
    eval.evaluateClusterer(instances);
    double[] clusterAssignments = eval.getClusterAssignments();

    this.usedKmeans = bestKMeans;

    return clusterAssignments;

}

From source file:model.clustering.Clustering.java

public String filledFile(Instances data, int numOfClusters, String remove) throws Exception {

    String mainData = data.toString();
    int index = mainData.indexOf("@data");
    String clusterData = mainData.substring(0, index + 6);

    Remove removeFilter = new Remove();
    removeFilter.setAttributeIndices(remove);

    kMeansCLusterer = new SimpleKMeans();
    kMeansCLusterer.setNumClusters(numOfClusters);

    FilteredClusterer filteredClusterer = new FilteredClusterer();
    filteredClusterer.setClusterer(kMeansCLusterer);
    filteredClusterer.setFilter(removeFilter);
    filteredClusterer.buildClusterer(data);

    Enumeration<Instance> newData = data.enumerateInstances();

    eval = new ClusterEvaluation();
    eval.setClusterer(filteredClusterer);
    eval.evaluateClusterer(data);// w  w  w  .  j  a v  a2  s.  c  o  m

    while (newData.hasMoreElements()) {

        Instance i = (Instance) newData.nextElement();
        int kluster = filteredClusterer.clusterInstance(i);
        String instanceString = i.toString() + "," + kluster;
        clusterData = clusterData + instanceString + "\n";

    }
    return clusterData;
}

From source file:myclusterer.MyClusterer.java

/**
 * @param args the command line arguments
 *///from  w  w  w.ja  v  a 2 s .c  om
public static void main(String[] args) {
    // TODO code application logic here

    String nameOfFile;
    Clusterer clusterer;
    Instances dataSet;
    ClusterEvaluation eval;

    //Baca input file 
    Scanner scan = new Scanner(System.in);
    System.out.print("Masukkan nama file untuk di-cluster: ");
    nameOfFile = scan.nextLine();
    try {
        //Baca File arff
        dataSet = WekaCode.readFileArff(nameOfFile);

        //Build Clusterer
        System.out.println(
                "Tuliskan model clusterer : 0.SimpleKMeans / 1.HierarchicalClusterer / 2.MyKMeans / 3.MyAgnes ");
        int clustererType = scan.nextInt();
        clusterer = WekaCode.buildClusterer(dataSet, clustererType);
        eval = new ClusterEvaluation();
        eval.setClusterer(clusterer);
        eval.evaluateClusterer(dataSet);
        System.out.println("Cluster Evaluation:");
        System.out.println(eval.clusterResultsToString());

        //Given test set 
    } catch (Exception ex) {
        Logger.getLogger(MyClusterer.class.getName()).log(Level.SEVERE, null, ex);
    }
}