Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package dewaweebtreeclassifier.veranda; import java.util.Enumeration; import weka.classifiers.AbstractClassifier; import weka.classifiers.Sourcable; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; import weka.core.Utils; /** * * @author Ahmad */ public class VerandaTree extends AbstractClassifier implements Sourcable { protected VerandaTree[] mChild; protected Attribute mSplitAttribute; protected double mClassValue; protected int[] mClassDistribution; /** * The main constructor */ public VerandaTree() { mSplitAttribute = null; } /** * * @param data */ @Override public void buildClassifier(Instances data) { // remove all instance with missing class value data.deleteWithMissingClass(); buildTree(data); } /** * * @param data */ public void buildTree(Instances data) { // exit if there is no data left in the dataset if (data.numInstances() == 0) { mChild = null; return; } double[] informationGains = new double[data.numAttributes()]; Enumeration enumAttrs = data.enumerateAttributes(); while (enumAttrs.hasMoreElements()) { Attribute attr = (Attribute) enumAttrs.nextElement(); informationGains[attr.index()] = computeGain(data, attr); } int maxIdx = Utils.maxIndex(informationGains); if (Utils.eq(informationGains[maxIdx], 0)) { mClassDistribution = new int[data.numClasses()]; Enumeration enumInst = data.enumerateInstances(); while (enumInst.hasMoreElements()) { Instance instance = (Instance) enumInst.nextElement(); mClassDistribution[(int) instance.classValue()]++; } mClassValue = Utils.maxIndex(mClassDistribution); } else { mSplitAttribute = data.attribute(maxIdx); Instances[] splitInstances = splitInstancesOnAttribute(data, mSplitAttribute); mChild = new VerandaTree[mSplitAttribute.numValues()]; for (int i = 0; i < mChild.length; i++) { mChild[i] = new VerandaTree(); mChild[i].buildTree(splitInstances[i]); } } } /** * * @param instance * @return * @throws java.lang.Exception */ @Override public double classifyInstance(Instance instance) throws Exception { if (mSplitAttribute == null) { return mClassValue; } else { return mChild[(int) instance.value(mSplitAttribute)].classifyInstance(instance); } } /** * * @param data * @param attr * @return */ public double computeGain(Instances data, Attribute attr) { double informationGain = computeEntropy(data); Instances[] splitInstances = splitInstancesOnAttribute(data, attr); for (Instances instances : splitInstances) { informationGain -= ((double) instances.numInstances() / (double) data.numInstances()) * computeEntropy(instances); } return informationGain; } /** * * @param data * @return */ public double computeEntropy(Instances data) { double[] nClass = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); nClass[(int) instance.classValue()]++; } double entropy = 0.0; for (int i = 0; i < data.numClasses(); i++) { if (nClass[i] > 0) { double ratio = nClass[i] / data.numInstances(); entropy -= (ratio * Utils.log2(ratio)); } } return entropy; } /** * * @param data * @param attr * @return */ public Instances[] splitInstancesOnAttribute(Instances data, Attribute attr) { Instances[] splitInstances = new Instances[attr.numValues()]; for (int i = 0; i < attr.numValues(); i++) { splitInstances[i] = new Instances(data, data.numInstances()); } Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance instance = (Instance) enumInstance.nextElement(); splitInstances[(int) instance.value(attr)].add(instance); } for (int i = 0; i < attr.numValues(); i++) { splitInstances[i].compactify(); } return splitInstances; } @Override public String toSource(String string) throws Exception { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. } }