Example usage for weka.core Instances numClasses

List of usage examples for weka.core Instances numClasses

Introduction

In this page you can find the example usage for weka.core Instances numClasses.

Prototype


publicint numClasses() 

Source Link

Document

Returns the number of class labels.

Usage

From source file:myclassifier.Util.java

public static double calculateE(Instances instances) {
    double[] labelCounts = new double[instances.numClasses()];
    for (int i = 0; i < instances.numInstances(); ++i)
        labelCounts[(int) instances.instance(i).classValue()]++;

    double entropy = 0.0;
    for (int i = 0; i < labelCounts.length; i++) {
        if (labelCounts[i] > 0) {
            double proportion = labelCounts[i] / instances.numInstances();
            entropy -= (proportion) * log2(proportion);
        }/* w  w  w .j  av a 2s .  c  om*/
    }
    return entropy;
}

From source file:myID3.MyId3.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance//from   w  w w.j a  va  2s .com
 */
public void buildTree(Instances data) {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyId3[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyId3();
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }
}

From source file:myID3.MyId3.java

/**
 * Find the entropy from a given dataset
 * @param data/*from   w  w w  . ja v  a  2 s  .  c  om*/
 * @return 
 */
private double entropy(Instances data) {

    /*  Entropy = -(p1 log2 p1) -(p2 log2 p2).... */

    double numInstance = data.numInstances();
    double numClass = data.numClasses();
    double[] distribution = new double[data.numClasses()];

    Enumeration instance = data.enumerateInstances();
    while (instance.hasMoreElements()) {
        Instance temp = (Instance) instance.nextElement();
        /* Count the p1, p2 */
        distribution[(int) temp.classValue()]++;
    }

    /* Sum all the distribution */
    double sum = 0;
    for (int i = 0; i < numClass; i++) {
        distribution[i] = distribution[i] / numInstance;
        if (distribution[i] > 0.0)
            distribution[i] *= Utils.log2(distribution[i]);
        // System.out.println(Arrays.toString(distribution));
        sum += distribution[i];
    }

    return -1 * sum;
}

From source file:myid3andc45classifier.Model.MyC45.java

public void makeMyC45Tree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        attribute = null;//from www. j av a2 s .co  m
        label = Instance.missingValue();
        return;
    }
    //System.out.println("NEW");
    double[] infoGainRatios = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        if (!att.isNumeric())
            infoGainRatios[att.index()] = computeInfoGainRatio(data, att);
        else
            infoGainRatios[att.index()] = Double.NEGATIVE_INFINITY;
        //System.out.println(att.name() + " " + infoGainRatios[att.index()]);
    }

    // TODO: build the tree
    attribute = data.attribute(maxIndex(infoGainRatios));
    //System.out.println(infoGainRatios[maxIndex(infoGainRatios)]);
    // Make leaf if information gain is zero. 
    // Otherwise create successors.
    if (infoGainRatios[maxIndex(infoGainRatios)] <= epsilon
            || Double.isNaN(infoGainRatios[maxIndex(infoGainRatios)])) {
        attribute = null;
        double[] numClasses = new double[data.numClasses()];

        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            numClasses[(int) inst.classValue()]++;
        }

        label = maxIndex(numClasses);
        classAttribute = data.classAttribute();
    } else {
        classAttribute = data.classAttribute();
        Instances[] splitData = splitInstancesByAttribute(data, attribute);
        Instances[] distrData = splitInstancesByAttribute(data, data.classAttribute());
        distribution = new double[distrData.length];
        for (int j = 0; j < distribution.length; j++) {
            distribution[j] = distrData[j].numInstances();
        }
        successors = new MyC45[attribute.numValues()];
        for (int j = 0; j < attribute.numValues(); j++) {
            successors[j] = new MyC45();
            successors[j].buildClassifier(splitData[j]);
        }
    }
    // TODO: prune
    //pruneTree(data);
}

From source file:myid3andc45classifier.Model.MyC45.java

public double[] listClassCountsValues(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()]; //array untuk menyimpan value kelas sesuai jumlah kelas
    Enumeration instanceEnum = data.enumerateInstances();

    //Masukkan data ke array
    while (instanceEnum.hasMoreElements()) {
        Instance inst = (Instance) instanceEnum.nextElement();
        classCounts[(int) inst.classValue()]++;
    }/* ww w .j a  v a2 s.  c  o m*/

    return classCounts;
}

From source file:myid3andc45classifier.Model.MyC45.java

public double computeEntropy(Instances data) throws Exception {

    double entropy = 0;

    double[] classCounts = listClassCountsValues(data);
    for (int i = 0; i < data.numClasses(); i++) {
        if (classCounts[i] > 0) {
            double p = classCounts[i] / (double) data.numInstances();
            entropy -= p * (Utils.log2(p));
        }/*  w  ww  .  jav a 2s  . c  o m*/
    }

    return entropy;
}

From source file:myid3andc45classifier.Model.MyID3.java

public void makeMyID3Tree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance yang dalam node ini
    if (data.numInstances() == 0) {
        attribute = null;/*from w w w .j  a  va 2  s  .  c  o  m*/
        classValue = Instance.missingValue();
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }

    attribute = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.
    if (isDoubleEqual(infoGains[attribute.index()], 0)) {
        attribute = null;
        double[] numClasses = new double[data.numClasses()];

        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            numClasses[(int) inst.classValue()]++;
        }

        label = maxIndex(numClasses);
        classAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitInstancesByAttribute(data, attribute);
        successors = new MyID3[attribute.numValues()];
        for (int j = 0; j < attribute.numValues(); j++) {
            successors[j] = new MyID3();
            successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:myid3andc45classifier.Model.MyID3.java

public double computeEntropy(Instances data) throws Exception {

    double entropy = 0;

    double[] classCounts = listClassCountsValues(data);
    for (int i = 0; i < data.numClasses(); i++) {
        if (classCounts[i] > 0) {
            double p = classCounts[i] / (double) data.numInstances();
            entropy -= p * (log2(p));/*from  ww w.jav  a  2 s  .  co m*/
        }
    }

    return entropy;
}

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance//from  w ww.ja v  a 2  s .  c o m
 */
public void buildTree(Instances data) throws Exception {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyJ48[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyJ48(this);
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }

}

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance//from www .  ja va 2 s .  c o  m
 */
public void pruneTree2(Instances data) throws Exception {
    if (currentAttribute == null) {
        Attribute tempAttr = predecessor.currentAttribute;
        predecessor.currentAttribute = null;
        // Set the class value as the highest frequency of the class
        classDistribution = new double[data.numClasses()];
        Enumeration enumInstance = data.enumerateInstances();
        while (enumInstance.hasMoreElements()) {
            Instance temp = (Instance) enumInstance.nextElement();
            classDistribution[(int) temp.classValue()]++;
        }
        Utils.normalize(classDistribution);
        predecessor.classValue = Utils.maxIndex(classDistribution);
        predecessor.classAttribute = data.classAttribute();
        Weka weka = new Weka();
        weka.setTraining("weather.nominal.arff");
        String[] options_cl = { "" };
        weka.setClassifier("myJ48.MyJ48", options_cl);

        weka.runCV(true);
        double currentAccuracy = weka.getM_Evaluation().correct();
        double maxFalseAccuracy = initAccuracy * 0.9;

        if (maxFalseAccuracy > currentAccuracy) {
            predecessor.currentAttribute = tempAttr;
            visited = true;
        } else {
            visited = false;
        }
    } else if (visited) {
    } else {
        for (int j = 0; j < currentAttribute.numValues(); j++) {
            if (nodes[j] == null) {
                //System.out.println("null nodes");
            } else {
                //System.out.println("ga null");
            }
            nodes[j].pruneTree(data);
        }
    }
}