List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:myclassifier.Util.java
public static double calculateE(Instances instances) { double[] labelCounts = new double[instances.numClasses()]; for (int i = 0; i < instances.numInstances(); ++i) labelCounts[(int) instances.instance(i).classValue()]++; double entropy = 0.0; for (int i = 0; i < labelCounts.length; i++) { if (labelCounts[i] > 0) { double proportion = labelCounts[i] / instances.numInstances(); entropy -= (proportion) * log2(proportion); }/* w w w .j av a 2s . c om*/ } return entropy; }
From source file:myID3.MyId3.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance//from w w w.j a va 2s .com */ public void buildTree(Instances data) { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyId3[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyId3(); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myID3.MyId3.java
/** * Find the entropy from a given dataset * @param data/*from w w w . ja v a 2 s . c om*/ * @return */ private double entropy(Instances data) { /* Entropy = -(p1 log2 p1) -(p2 log2 p2).... */ double numInstance = data.numInstances(); double numClass = data.numClasses(); double[] distribution = new double[data.numClasses()]; Enumeration instance = data.enumerateInstances(); while (instance.hasMoreElements()) { Instance temp = (Instance) instance.nextElement(); /* Count the p1, p2 */ distribution[(int) temp.classValue()]++; } /* Sum all the distribution */ double sum = 0; for (int i = 0; i < numClass; i++) { distribution[i] = distribution[i] / numInstance; if (distribution[i] > 0.0) distribution[i] *= Utils.log2(distribution[i]); // System.out.println(Arrays.toString(distribution)); sum += distribution[i]; } return -1 * sum; }
From source file:myid3andc45classifier.Model.MyC45.java
public void makeMyC45Tree(Instances data) throws Exception { if (data.numInstances() == 0) { attribute = null;//from www. j av a2 s .co m label = Instance.missingValue(); return; } //System.out.println("NEW"); double[] infoGainRatios = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); if (!att.isNumeric()) infoGainRatios[att.index()] = computeInfoGainRatio(data, att); else infoGainRatios[att.index()] = Double.NEGATIVE_INFINITY; //System.out.println(att.name() + " " + infoGainRatios[att.index()]); } // TODO: build the tree attribute = data.attribute(maxIndex(infoGainRatios)); //System.out.println(infoGainRatios[maxIndex(infoGainRatios)]); // Make leaf if information gain is zero. // Otherwise create successors. if (infoGainRatios[maxIndex(infoGainRatios)] <= epsilon || Double.isNaN(infoGainRatios[maxIndex(infoGainRatios)])) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { classAttribute = data.classAttribute(); Instances[] splitData = splitInstancesByAttribute(data, attribute); Instances[] distrData = splitInstancesByAttribute(data, data.classAttribute()); distribution = new double[distrData.length]; for (int j = 0; j < distribution.length; j++) { distribution[j] = distrData[j].numInstances(); } successors = new MyC45[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyC45(); successors[j].buildClassifier(splitData[j]); } } // TODO: prune //pruneTree(data); }
From source file:myid3andc45classifier.Model.MyC45.java
public double[] listClassCountsValues(Instances data) throws Exception { double[] classCounts = new double[data.numClasses()]; //array untuk menyimpan value kelas sesuai jumlah kelas Enumeration instanceEnum = data.enumerateInstances(); //Masukkan data ke array while (instanceEnum.hasMoreElements()) { Instance inst = (Instance) instanceEnum.nextElement(); classCounts[(int) inst.classValue()]++; }/* ww w .j a v a2 s. c o m*/ return classCounts; }
From source file:myid3andc45classifier.Model.MyC45.java
public double computeEntropy(Instances data) throws Exception { double entropy = 0; double[] classCounts = listClassCountsValues(data); for (int i = 0; i < data.numClasses(); i++) { if (classCounts[i] > 0) { double p = classCounts[i] / (double) data.numInstances(); entropy -= p * (Utils.log2(p)); }/* w ww . jav a 2s . c o m*/ } return entropy; }
From source file:myid3andc45classifier.Model.MyID3.java
public void makeMyID3Tree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance yang dalam node ini if (data.numInstances() == 0) { attribute = null;/*from w w w .j a va 2 s . c o m*/ classValue = Instance.missingValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } attribute = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (isDoubleEqual(infoGains[attribute.index()], 0)) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { Instances[] splitData = splitInstancesByAttribute(data, attribute); successors = new MyID3[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyID3(); successors[j].buildClassifier(splitData[j]); } } }
From source file:myid3andc45classifier.Model.MyID3.java
public double computeEntropy(Instances data) throws Exception { double entropy = 0; double[] classCounts = listClassCountsValues(data); for (int i = 0; i < data.numClasses(); i++) { if (classCounts[i] > 0) { double p = classCounts[i] / (double) data.numInstances(); entropy -= p * (log2(p));/*from ww w.jav a 2 s . co m*/ } } return entropy; }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance//from w ww.ja v a 2 s . c o m */ public void buildTree(Instances data) throws Exception { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyJ48[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyJ48(this); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance//from www . ja va 2 s . c o m */ public void pruneTree2(Instances data) throws Exception { if (currentAttribute == null) { Attribute tempAttr = predecessor.currentAttribute; predecessor.currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); predecessor.classValue = Utils.maxIndex(classDistribution); predecessor.classAttribute = data.classAttribute(); Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = { "" }; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct(); double maxFalseAccuracy = initAccuracy * 0.9; if (maxFalseAccuracy > currentAccuracy) { predecessor.currentAttribute = tempAttr; visited = true; } else { visited = false; } } else if (visited) { } else { for (int j = 0; j < currentAttribute.numValues(); j++) { if (nodes[j] == null) { //System.out.println("null nodes"); } else { //System.out.println("ga null"); } nodes[j].pruneTree(data); } } }