au.edu.usyd.it.yangpy.snp.Ensemble.java Source code

Java tutorial

Introduction

Here is the source code for au.edu.usyd.it.yangpy.snp.Ensemble.java

Source

package au.edu.usyd.it.yangpy.snp;

/* 
 *
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
    
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
*/

import java.util.LinkedList;

import weka.core.Instances;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;

import weka.core.neighboursearch.*;
import weka.classifiers.trees.*;
import weka.classifiers.evaluation.auc.AUCCalculator;
import weka.classifiers.evaluation.auc.ClassSort;
import weka.classifiers.lazy.*;

/** ----------------
* Creation Date: 
* 01/05/2007
* 
* Modification Date:
* 03/09/2010
* 09/11/2010
*/

public class Ensemble {

    private Instances train;
    private Instances test;
    private double[] givenValue; // prediction results variables.
    private int numInstances;
    private int numClasses;
    private int numClassifiers = 5;
    private double[] aucClassifiers = new double[numClassifiers];
    // each classifier will give a predict distribution for each instance
    private double[][][] predictDistribution;
    // each classifier will give a predict value for each instance
    private double[][] predictValue;
    // vote value is given by combining the predict distribution
    private double[][] voteValue;

    public Ensemble(Instances train, Instances test) {
        this.train = train;
        this.test = test;
    }

    // this method evaluate classifiers 
    public void ensemble(String mode) throws Exception {

        numInstances = test.numInstances();
        numClasses = test.numClasses();
        givenValue = new double[numInstances];
        predictDistribution = new double[numClassifiers][numInstances][numClasses];
        predictValue = new double[numClassifiers][numInstances];
        voteValue = new double[numInstances][numClasses];

        // Setting the given class values of the test instances.
        for (int i = 0; i < numInstances; i++) {
            givenValue[i] = test.instance(i).classValue();
        }

        // Calculating the predicted class values using each classifier respectively.
        // J48 coverTree1NN KStar coverTree3NN coverTree5NN

        J48 tree = new J48();
        tree.setUnpruned(true);
        aucClassifiers[0] = classify(tree, 0);

        KStar kstar = new KStar();
        aucClassifiers[1] = classify(kstar, 1);

        IBk ctnn1 = new IBk(1);
        CoverTree search = new CoverTree();
        ctnn1.setNearestNeighbourSearchAlgorithm(search);
        aucClassifiers[2] = classify(ctnn1, 2);

        IBk ctnn3 = new IBk(3);
        ctnn3.setNearestNeighbourSearchAlgorithm(search);
        aucClassifiers[3] = classify(ctnn3, 3);

        IBk ctnn5 = new IBk(5);
        ctnn5.setNearestNeighbourSearchAlgorithm(search);
        aucClassifiers[4] = classify(ctnn5, 4);

        // Print the classification results if in print mode.
        if (mode.equals("v")) {
            System.out.println("J48   AUC: " + aucClassifiers[0]);
            System.out.println("KStar AUC: " + aucClassifiers[1]);
            System.out.println("CTNN1 AUC: " + aucClassifiers[2]);
            System.out.println("CTNN3 AUC: " + aucClassifiers[3]);
            System.out.println("CTNN5 AUC: " + aucClassifiers[4]);
            System.out.println("   -         -   ");
        }
    }

    // blocking integration
    public double blocking() {
        double blockAccuracy = 0.0;

        // Calculating blocking accuracy (average accuracy).
        for (int i = 0; i < numClassifiers; i++) {
            blockAccuracy += aucClassifiers[i];
        }

        blockAccuracy /= numClassifiers;

        return blockAccuracy;
    }

    // voting integration 
    public double voting() {

        for (int i = 0; i < test.numInstances(); i++) {
            // combine prediction
            for (int cId = 0; cId < numClassifiers; cId++) {
                for (int t = 0; t < test.numClasses(); t++) {
                    voteValue[i][t] += predictDistribution[cId][i][t];
                }
            }

            for (int t = 0; t < test.numClasses(); t++) {
                voteValue[i][t] /= numClassifiers;
            }
        }

        int classIndex = 1;
        double[][] probList = new double[givenValue.length][2];
        for (int i = 0; i < givenValue.length; i++) {
            probList[i][0] = voteValue[i][classIndex];
            probList[i][1] = givenValue[i];
        }

        double aucEnsemble = areaUnderROC(probList, classIndex);
        return aucEnsemble * 100;
    }

    // kappa diversity
    public double kappaDiversity() {
        double diversityScore = 0.0;
        double phi_1 = 0.0;
        double phi_2 = 0.0;
        int count = 0;

        for (int i = 0; i < numClassifiers; i++) {
            for (int j = i + 1; j < numClassifiers; j++) {
                count++;
                double a0 = 0;
                double a1 = 0;
                double b0 = 0;
                double b1 = 0;
                int numOfAgree = 0;
                phi_1 = 0.0;
                phi_2 = 0.0;

                for (int k = 0; k < numInstances; k++) {
                    //phi_1 = classifier[i] and classifier[j] agree / N
                    if (predictValue[i][k] == predictValue[j][k])
                        numOfAgree++;

                    if (predictValue[i][k] == 0)
                        a0++;
                    if (predictValue[i][k] == 1)
                        a1++;
                    if (predictValue[j][k] == 0)
                        b0++;
                    if (predictValue[j][k] == 1)
                        b1++;
                }

                phi_1 = (double) numOfAgree / (double) numInstances;
                phi_2 = (a0 / numInstances) * (b0 / numInstances) + (a1 / numInstances) * (b1 / numInstances);

                double kappa = (phi_1 - phi_2) / (1 - phi_2);
                kappa = (1 - kappa) / 2;
                //System.out.println(kappa);
                diversityScore += kappa;
            }
        }

        diversityScore = (diversityScore / count) * 100;

        return diversityScore;
    }

    // double fault diversity
    public double doubleFaultDiversity() {
        double diversityScore = 0.0;
        double faultValue = 0.0;
        double overallValue = 0.0;

        for (int i = 0; i < numClassifiers; i++) {
            for (int j = i + 1; j < numClassifiers; j++) {
                int numOfFault = 0;
                faultValue = 0.0;

                for (int k = 0; k < numInstances; k++) {
                    if (predictValue[i][k] == predictValue[j][k]) {
                        if (givenValue[i] != predictValue[i][k])
                            numOfFault++;
                    }
                }

                faultValue = (double) numOfFault / (double) numInstances;
                overallValue += faultValue;
            }
        }

        diversityScore = 1 - ((overallValue * 2) / (numClassifiers * (numClassifiers - 1)));
        diversityScore *= 100;
        //System.out.println("double fault: " + diversityScore);

        return diversityScore;
    }

    public double classify(Classifier c, int cId) throws Exception {

        // train the classifier with training data
        c.buildClassifier(train);

        // get the predict value and predict distribution from each test instances
        for (int i = 0; i < test.numInstances(); i++) {
            predictDistribution[cId][i] = c.distributionForInstance(test.instance(i));
            predictValue[cId][i] = c.classifyInstance(test.instance(i));
        }

        // of course, get the AUC for each classifier
        Evaluation eval = new Evaluation(train);
        eval.evaluateModel(c, test);
        return eval.areaUnderROC(1) * 100;
    }

    // this function calculate AUC for ensemble
    public double areaUnderROC(double[][] probList, int classIndex) {

        LinkedList<ClassSort> classSortLinkedList = new LinkedList<ClassSort>();
        for (int i = 0; i < probList.length; i++) {
            int trueClass;
            if (classIndex == 0) {
                if ((int) probList[i][1] == 0)
                    trueClass = 1;
                else
                    trueClass = 0;
            } else
                trueClass = (int) probList[i][1];
            double prob = probList[i][0];
            //System.out.print(prob + " " + trueClass + "\n");
            classSortLinkedList.add(new ClassSort(prob, trueClass));
        }
        double roc = AUCCalculator.getROC(classSortLinkedList);
        return roc;
    }
}