List of usage examples for weka.core Instances numInstances
publicint numInstances()
From source file:decisiontree.MyC45.java
/** * Splits a dataset according to the values of a numeric attribute. * * @param data the data which is to be split * @param att the attribute to be used for splitting * @return the sets of instances produced by the split *//*from w w w .ja v a2s . c om*/ private Instances[] splitData(Instances data, Attribute att, double threshold) { Instances[] splitData = new Instances[2]; for (int i = 0; i < 2; i++) { splitData[i] = new Instances(data, data.numInstances()); } Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.value(att) >= threshold) { inst.setValue(att, threshold); splitData[1].add(inst); } else { inst.setValue(att, 0); splitData[0].add(inst); } } for (int i = 0; i < splitData.length; i++) { splitData[i].compactify(); } return splitData; }
From source file:decisiontree.MyC45.java
/** * Computes information gain ratio for a nominal attribute. * * @param data the data for which info gain is to be computed * @param att the attribute//from w ww . j a va 2 s .c o m * @return the information gain ratio for the given attribute and data * @throws Exception if computation fails */ private double computeGainRatio(Instances instances, Attribute attr) throws Exception { double infoGain = computeEntropy(instances); double splitInfo = 0; double gainRatio = 0; double fraction = 0.0; Instances[] splitData = splitData(instances, attr); for (int j = 0; j < attr.numValues(); j++) { if (splitData[j].numInstances() > 0) { infoGain -= ((double) splitData[j].numInstances() / (double) instances.numInstances()) * computeEntropy(splitData[j]); fraction = (double) splitData[j].numInstances() / instances.numInstances(); if (fraction != 0) splitInfo += fraction * Utils.log2(fraction); } } if (splitInfo == 0) gainRatio = infoGain; else gainRatio = -1 * infoGain / splitInfo; return gainRatio; }
From source file:decisiontree.MyC45.java
/** * Computes information gain ratio for a numeric attribute. * * @param data the data for which info gain is to be computed * @param att the attribute//www. ja va 2 s . c om * @return the information gain ratio for the given attribute and data * @throws Exception if computation fails */ private double computeGainRatio(Instances instances, Attribute attr, double threshold) throws Exception { double infoGain = computeEntropy(instances); double splitInfo = 0; double gainRatio = 0; double fraction = 0.0; Instances[] splitData = splitData(instances, attr, threshold); for (int j = 0; j < 2; j++) { if (splitData[j].numInstances() > 0) { infoGain -= ((double) splitData[j].numInstances() / (double) instances.numInstances()) * computeEntropy(splitData[j]); fraction = (double) splitData[j].numInstances() / instances.numInstances(); if (fraction != 0) splitInfo += fraction * Utils.log2(fraction); } } if (splitInfo == 0) gainRatio = infoGain; else gainRatio = -1 * infoGain / splitInfo; return gainRatio; }
From source file:decisiontree.MyC45.java
private Instances handleMissingValues(Instances data) throws Exception { Instances newData = data; Enumeration attrEnum = newData.enumerateAttributes(); while (attrEnum.hasMoreElements()) { Attribute attr = (Attribute) attrEnum.nextElement(); AttributeStats attrStats = newData.attributeStats(attr.index()); if (attr.isNominal()) { int maxIdx = 0; for (int i = 0; i < attr.numValues(); i++) { if (attrStats.nominalCounts[i] > attrStats.nominalCounts[maxIdx]) { maxIdx = i;//from w w w.j a v a 2s .com } } for (int i = 0; i < newData.numInstances(); i++) { if (newData.instance(i).isMissing(attr.index())) { newData.instance(i).setValue(attr.index(), maxIdx); } } } else if (attr.isNumeric()) { double mean = attrStats.numericStats.mean; for (int i = 0; i < newData.numInstances(); i++) { if (newData.instance(i).isMissing(attr.index())) { newData.instance(i).setValue(attr.index(), mean); } } } } return newData; }
From source file:decisiontree.MyC45.java
private double computeThreshold(Instances instances, Attribute attr) throws Exception { double[] threshold = new double[instances.numInstances()]; double[] gainRatio = new double[instances.numInstances()]; for (int i = 0; i < instances.numInstances() - 1; i++) { if (instances.instance(i).classValue() != instances.instance(i + 1).classValue()) { threshold[i] = (instances.instance(i).value(attr) + instances.instance(i + 1).value(attr)) / 2; gainRatio[i] = computeGainRatio(instances, attr, threshold[i]); }//from w w w. ja va 2 s .c om } return (double) threshold[Utils.maxIndex(gainRatio)]; }
From source file:decisiontree.MyID3.java
private void makeTree(Instances data) { // Check if no instances have reached this node. if (data.numInstances() == 0) { splitAttr = null;//from w w w . j a v a 2 s . c o m leafValue = Double.NaN; leafDist = new double[data.numClasses()]; return; } if (data.numDistinctValues(data.classIndex()) == 1) { leafValue = data.firstInstance().classValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } splitAttr = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[splitAttr.index()], 0)) { splitAttr = null; leafDist = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); leafDist[(int) inst.classValue()]++; } normalize(leafDist); leafValue = Utils.maxIndex(leafDist); classAttr = data.classAttribute(); } else { Instances[] splitData = splitData(data, splitAttr); child = new MyID3[splitAttr.numValues()]; for (int j = 0; j < splitAttr.numValues(); j++) { child[j] = new MyID3(); child[j].makeTree(splitData[j]); } } }
From source file:decisiontree.MyID3.java
private double computeInfoGain(Instances data, Attribute att) { double infoGain = computeEntropy(data); Instances[] splitData = splitData(data, att); for (Instances split : splitData) { if (split.numInstances() > 0) { infoGain -= ((double) split.numInstances() / (double) data.numInstances()) * computeEntropy(split); }//from ww w . j av a2 s . c o m } return infoGain; }
From source file:decisiontree.MyID3.java
private double computeEntropy(Instances data) { int numClasses = data.numClasses(); int[] classCount = new int[numClasses]; ArrayList<Double> classValues = new ArrayList<>(); Enumeration<Instance> instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance instance = instEnum.nextElement(); double classValue = instance.classValue(); if (!classValues.contains(classValue)) { classValues.add(classValue); }// ww w. j av a 2 s.c o m int index = classValues.indexOf(classValue); classCount[index]++; } double entropy = 0.0; for (Double value : classValues) { int index = classValues.indexOf(value); if (classCount[index] > 0) { double temp = (double) classCount[index] / data.numInstances(); entropy -= temp * Utils.log2(temp); } } return entropy; }
From source file:decisiontree.MyID3.java
private Instances[] splitData(Instances data, Attribute att) { Instances[] splitData = new Instances[att.numValues()]; for (int j = 0; j < att.numValues(); j++) { splitData[j] = new Instances(data, data.numInstances()); }// ww w . ja va 2 s . com Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); splitData[(int) inst.value(att)].add(inst); } for (Instances split : splitData) { split.compactify(); } return splitData; }
From source file:decisiontreeclassifier.ITree2.java
/******************************************************************** * Changes the missing data to 0.0. For the voting data set, this * should be sufficient seeing as 0.00 is more or less random. ********************************************************************/ public Instances fixMissingData(Instances iToFix) { for (int i = 0; i < iToFix.numInstances(); i++) { for (int j = 0; j < iToFix.numAttributes(); j++) { if (iToFix.instance(i).isMissing(j)) { iToFix.instance(i).setValue(j, 0.0); }// www .j a va2 s . com } } return iToFix; }