List of usage examples for weka.core Instances enumerateInstances
publicEnumeration<Instance> enumerateInstances()
From source file:myid3andc45classifier.Model.MyC45.java
public double countError(Instances instances) { int ctrFalse = 0; int ctr = 0;/*from ww w . j a v a 2s . c o m*/ Enumeration enumeration = instances.enumerateInstances(); while (enumeration.hasMoreElements()) { Instance instance = (Instance) enumeration.nextElement(); if (!checkInstance(instance)) { ctrFalse++; } ctr++; } return (double) ctrFalse / (double) (ctr); }
From source file:myid3andc45classifier.Model.MyID3.java
@Override public void buildClassifier(Instances data) throws Exception { if (!data.classAttribute().isNominal()) { throw new Exception("MyID3: nominal class, please."); }/*from w w w . java 2 s.c om*/ Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { Attribute attr = (Attribute) enumAtt.nextElement(); if (!attr.isNominal()) { throw new Exception("MyID3: only nominal attributes, please."); } Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { if (((Instance) enumInstance.nextElement()).isMissing(attr)) { throw new Exception("MyID3: no missing values, please."); } } } data = new Instances(data); data.deleteWithMissingClass(); makeMyID3Tree(data); }
From source file:myid3andc45classifier.Model.MyID3.java
public void makeMyID3Tree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance yang dalam node ini if (data.numInstances() == 0) { attribute = null;//w w w . j a v a2s. com classValue = Instance.missingValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } attribute = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (isDoubleEqual(infoGains[attribute.index()], 0)) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { Instances[] splitData = splitInstancesByAttribute(data, attribute); successors = new MyID3[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyID3(); successors[j].buildClassifier(splitData[j]); } } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance/*from w w w . j a va 2s. co m*/ */ public void buildTree(Instances data) throws Exception { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyJ48[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyJ48(this); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance//ww w . j ava 2 s . c o m */ public void pruneTree2(Instances data) throws Exception { if (currentAttribute == null) { Attribute tempAttr = predecessor.currentAttribute; predecessor.currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); predecessor.classValue = Utils.maxIndex(classDistribution); predecessor.classAttribute = data.classAttribute(); Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = { "" }; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct(); double maxFalseAccuracy = initAccuracy * 0.9; if (maxFalseAccuracy > currentAccuracy) { predecessor.currentAttribute = tempAttr; visited = true; } else { visited = false; } } else if (visited) { } else { for (int j = 0; j < currentAttribute.numValues(); j++) { if (nodes[j] == null) { //System.out.println("null nodes"); } else { //System.out.println("ga null"); } nodes[j].pruneTree(data); } } }
From source file:myJ48.MyJ48.java
public MyJ48 pruneTree(Instances data) throws Exception { if (currentAttribute == null) { return this; } else {// w ww .ja v a2 s .com } if (currentAttribute != null) { for (int i = 0; i < currentAttribute.numValues(); i++) { boolean succLeaf = true; if (nodes[i].currentAttribute != null) { for (int j = 0; j < nodes[i].currentAttribute.numValues(); j++) { succLeaf = (succLeaf && (nodes[i].nodes[j].currentAttribute == null)); } if (succLeaf) { Attribute tempAttr = nodes[i].currentAttribute; nodes[i].currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); nodes[i].classValue = Utils.maxIndex(classDistribution); nodes[i].classAttribute = data.classAttribute(); /*Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = {""}; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct();*/ Random rand = new Random(); double currentAccuracy = rand.nextDouble(); System.out.println("acc kepake : " + currentAccuracy); double maxFalseAccuracy = 0.7; // coba coba if (maxFalseAccuracy > currentAccuracy) { nodes[i].currentAttribute = tempAttr; //visited = true; } else { //visited = false; } } } else { nodes[i] = nodes[i].pruneTree(data); } } } return this; }
From source file:NaiveBayes.NaiveBayes13514004.java
@Override public void buildClassifier(Instances i) { //Algoritma//from w w w .java 2 s. co m origin = new Instances(i); //Menghitung jumlah attribute dan kelas numAtt = i.numAttributes() - 1; numClass = i.numClasses(); //Inisialisasi matrix 3 dimensi data = new int[numAtt][numClass][0]; prob = new double[numAtt][numClass][0]; kelasdata = new int[numClass]; kelasprob = new double[numClass]; Enumeration<Instance> enu1 = i.enumerateInstances(); while (enu1.hasMoreElements()) { Instance ins = enu1.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); numDis = att.numValues(); data[x][(int) ins.classValue()] = new int[numDis]; prob[x][(int) ins.classValue()] = new double[numDis]; x++; } } //Mengisi matriks Frekuensi Enumeration<Instance> enu2 = i.enumerateInstances(); while (enu2.hasMoreElements()) { Instance ins = enu2.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); data[x][(int) ins.classValue()][(int) ins.value(att)]++; x++; } kelasdata[(int) ins.classValue()]++; } //Menghitung probabilitas kelas double numInstances = (double) i.numInstances(); for (int y = 0; y < numClass; y++) { kelasprob[y] = (double) kelasdata[y] / numInstances; } //Mengisi matriks probabilitas Enumeration<Instance> enu3 = i.enumerateInstances(); while (enu3.hasMoreElements()) { Instance ins = enu3.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); int sumDis = Utils.sum(data[x][(int) ins.classValue()]); numDis = att.numValues(); for (int z = 0; z < numDis; z++) { int y = (int) ins.classValue(); prob[x][y][z] = ((double) data[x][y][z] / (double) sumDis); } x++; } } }
From source file:net.sf.bddbddb.FindBestDomainOrder.java
License:LGPL
void dumpClassifierInfo(String name, Classifier c, Instances data) { BufferedWriter w = null;//from ww w . j a va 2 s . c om try { w = new BufferedWriter(new FileWriter(name)); w.write("Classifier \"name\":\n"); w.write("Attributes: \n"); for (Enumeration e = data.enumerateAttributes(); e.hasMoreElements();) { w.write(e.nextElement() + "\n"); } w.write("\n"); w.write("Based on data from " + data.numInstances() + " instances:\n"); for (Enumeration e = data.enumerateInstances(); e.hasMoreElements();) { Instance i = (Instance) e.nextElement(); if (i instanceof TrialInstance) { TrialInstance ti = (TrialInstance) i; InferenceRule ir = ti.ti.getCollection().getRule(solver); w.write(" " + ti.ti.getCollection().name + " " + ti.getOrder()); if (!ti.getOrder().equals(ti.ti.order)) w.write(" (" + ti.ti.order + ")"); if (ti.isMaxTime()) { w.write(" MAX TIME\n"); } else { w.write(" " + format(ti.getCost()) + " (" + ti.ti.cost + " ms)\n"); } } else { w.write(" " + i + "\n"); } } w.write(c.toString()); w.write("\n"); } catch (IOException x) { solver.err.println("IO Exception occurred writing \"" + name + "\": " + x); } finally { if (w != null) try { w.close(); } catch (IOException _) { } } }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Method for building an Id3 tree./*from w ww . java 2 s . c o m*/ * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; double sum = 0; laplaceSmooth(m_Distribution, sum, data.numClasses()); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); boolean makeLeaf; makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0); Instances[] splitData = null; if (!makeLeaf) { splitData = splitData(data, m_Attribute); for (int i = 0; i < splitData.length; ++i) { if (splitData[i].numInstances() == data.numInstances()) { //System.out.println("When splitting on attrib // "+m_Attribute+", child "+i+" is same size as current, // making into leaf."); makeLeaf = true; break; } } } // Make leaf if information gain is zero. // Otherwise create successors. if (makeLeaf) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); double sum = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; sum += inst.weight(); } //laplace smooth the distribution instead laplaceSmooth(m_Distribution, sum, data.numClasses()); //Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { m_Successors = new MyId3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyId3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Computes the entropy of a dataset.//from w w w . jav a2 s . c om * * @param data * the data for which entropy is to be computed * @return the entropy of the data's class distribution */ private double computeEntropy(Instances data, Attribute att) throws Exception { double[] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); int numInstances = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); if (inst.isMissing(att)) continue; classCounts[(int) inst.classValue()]++; ++numInstances; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) numInstances; return entropy + Utils.log2(numInstances); }