List of usage examples for weka.core Instances classAttribute
publicAttribute classAttribute()
From source file:myid3andc45classifier.Model.MyID3.java
@Override public void buildClassifier(Instances data) throws Exception { if (!data.classAttribute().isNominal()) { throw new Exception("MyID3: nominal class, please."); }/* w w w.ja v a 2 s . c om*/ Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { Attribute attr = (Attribute) enumAtt.nextElement(); if (!attr.isNominal()) { throw new Exception("MyID3: only nominal attributes, please."); } Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { if (((Instance) enumInstance.nextElement()).isMissing(attr)) { throw new Exception("MyID3: no missing values, please."); } } } data = new Instances(data); data.deleteWithMissingClass(); makeMyID3Tree(data); }
From source file:myid3andc45classifier.Model.MyID3.java
public void makeMyID3Tree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance yang dalam node ini if (data.numInstances() == 0) { attribute = null;/* www.jav a 2 s . com*/ classValue = Instance.missingValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } attribute = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (isDoubleEqual(infoGains[attribute.index()], 0)) { attribute = null; double[] numClasses = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); numClasses[(int) inst.classValue()]++; } label = maxIndex(numClasses); classAttribute = data.classAttribute(); } else { Instances[] splitData = splitInstancesByAttribute(data, attribute); successors = new MyID3[attribute.numValues()]; for (int j = 0; j < attribute.numValues(); j++) { successors[j] = new MyID3(); successors[j].buildClassifier(splitData[j]); } } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance/*from ww w . j a va2 s . c om*/ */ public void buildTree(Instances data) throws Exception { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyJ48[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyJ48(this); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance/*from w ww . j a v a2 s. c o m*/ */ public void pruneTree2(Instances data) throws Exception { if (currentAttribute == null) { Attribute tempAttr = predecessor.currentAttribute; predecessor.currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); predecessor.classValue = Utils.maxIndex(classDistribution); predecessor.classAttribute = data.classAttribute(); Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = { "" }; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct(); double maxFalseAccuracy = initAccuracy * 0.9; if (maxFalseAccuracy > currentAccuracy) { predecessor.currentAttribute = tempAttr; visited = true; } else { visited = false; } } else if (visited) { } else { for (int j = 0; j < currentAttribute.numValues(); j++) { if (nodes[j] == null) { //System.out.println("null nodes"); } else { //System.out.println("ga null"); } nodes[j].pruneTree(data); } } }
From source file:myJ48.MyJ48.java
public MyJ48 pruneTree(Instances data) throws Exception { if (currentAttribute == null) { return this; } else {/*from ww w. ja v a 2 s . c o m*/ } if (currentAttribute != null) { for (int i = 0; i < currentAttribute.numValues(); i++) { boolean succLeaf = true; if (nodes[i].currentAttribute != null) { for (int j = 0; j < nodes[i].currentAttribute.numValues(); j++) { succLeaf = (succLeaf && (nodes[i].nodes[j].currentAttribute == null)); } if (succLeaf) { Attribute tempAttr = nodes[i].currentAttribute; nodes[i].currentAttribute = null; // Set the class value as the highest frequency of the class classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); nodes[i].classValue = Utils.maxIndex(classDistribution); nodes[i].classAttribute = data.classAttribute(); /*Weka weka = new Weka(); weka.setTraining("weather.nominal.arff"); String[] options_cl = {""}; weka.setClassifier("myJ48.MyJ48", options_cl); weka.runCV(true); double currentAccuracy = weka.getM_Evaluation().correct();*/ Random rand = new Random(); double currentAccuracy = rand.nextDouble(); System.out.println("acc kepake : " + currentAccuracy); double maxFalseAccuracy = 0.7; // coba coba if (maxFalseAccuracy > currentAccuracy) { nodes[i].currentAttribute = tempAttr; //visited = true; } else { //visited = false; } } } else { nodes[i] = nodes[i].pruneTree(data); } } } return this; }
From source file:naive_bayes.Naive_bayes.java
@Override public void buildClassifier(Instances newData) throws Exception { int countAttr = newData.numAttributes(); int distinctClassValue = newData.attribute(classidx).numValues(); /* Inisialisasi Model */ M = new ArrayList[countAttr][distinctClassValue]; for (int i = 0; i < countAttr; i++) { for (int j = 0; j < distinctClassValue; j++) { M[i][j] = new ArrayList<ListElement>(); }/*from w w w .j a va2 s .com*/ } boolean add; ListElement le = new ListElement(); Attribute ab; for (int i = 0; i < countAttr; i++) { if (i != classidx) { for (int j = 0; j < distinctClassValue; j++) { for (int k = 0; k < newData.attribute(i).numValues(); k++) { ab = newData.attribute(i); String c = ab.value((int) newData.instance(149).value(i)); add = M[i][j].add(new ListElement()); } } } } /* Membuat array yang menghitung banyak nilai pada masing-masing kelas */ Attribute a; String c; arrayOfClass = new ListElement[newData.numClasses()]; for (int idx = 0; idx < newData.numClasses(); idx++) { arrayOfClass[idx] = new ListElement(); a = newData.classAttribute(); c = a.value(idx); arrayOfClass[idx].setDisAttrName(c); } for (int i = 0; i < newData.numInstances(); i++) { double z = newData.instance(i).classValue(); int zz = (int) z; arrayOfClass[zz].setCount(arrayOfClass[zz].getCount() + 1); } //Masukan frekuensi masing-masing atribut for (int i = 0; i < newData.numInstances(); i++) { for (int j = 0; j < newData.numAttributes(); j++) { if (j != classidx) { //bukan atribut kelas a = newData.attribute(classidx); c = a.value((int) newData.instance(i).value(classidx)); //Mengambil indeks kelas double z = newData.instance(i).classValue(); int zz = (int) z; le.setDisAttrName(c); //Mengambil indeks valueDistinct double x = newData.instance(i).value(j); int xx = (int) x; //Menambahkan frekuensi kemunculan nilai per kelas per atribut le.setCount(M[j][zz].get(xx).getCount() + 1); M[j][zz].set(xx, new ListElement(M[j][zz].get(xx).getDisAttrName(), M[j][zz].get(xx).getCount() + 1)); } } } /* Menghitung probabilitas masing-masing nilai distinct atribut per kelas */ for (int j = 0; j < newData.numAttributes(); j++) { if (j != classidx) { for (int zz = 0; zz < newData.numClasses(); zz++) { for (int xx = 0; xx < newData.attribute(j).numValues(); xx++) { M[j][zz].set(xx, new ListElement(M[j][zz].get(xx).getDisAttrName(), M[j][zz].get(xx).getCount() / arrayOfClass[zz].getCount())); } } } } }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Builds Id3 decision tree classifier./* www .ja v a 2s .c o m*/ * * @param data * the training data * @exception Exception * if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception { if (!data.classAttribute().isNominal()) { throw new UnsupportedClassTypeException("Id3: nominal class, please."); } Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { if (!((Attribute) enumAtt.nextElement()).isNominal()) { throw new UnsupportedAttributeTypeException("Id3: only nominal " + "attributes, please."); } } data = new Instances(data); data.deleteWithMissingClass(); makeTree(data); }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Method for building an Id3 tree.//from www .j av a 2 s . co m * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; double sum = 0; laplaceSmooth(m_Distribution, sum, data.numClasses()); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); boolean makeLeaf; makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0); Instances[] splitData = null; if (!makeLeaf) { splitData = splitData(data, m_Attribute); for (int i = 0; i < splitData.length; ++i) { if (splitData[i].numInstances() == data.numInstances()) { //System.out.println("When splitting on attrib // "+m_Attribute+", child "+i+" is same size as current, // making into leaf."); makeLeaf = true; break; } } } // Make leaf if information gain is zero. // Otherwise create successors. if (makeLeaf) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); double sum = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; sum += inst.weight(); } //laplace smooth the distribution instead laplaceSmooth(m_Distribution, sum, data.numClasses()); //Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { m_Successors = new MyId3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyId3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:net.sf.bddbddb.order.WekaInterface.java
License:LGPL
public static double cvError(int numFolds, Instances data0, String cClassName) { if (data0.numInstances() < numFolds) return Double.NaN; //more folds than elements if (numFolds == 0) return Double.NaN; // no folds if (data0.numInstances() == 0) return 0; //no instances Instances data = new Instances(data0); //data.randomize(new Random(System.currentTimeMillis())); data.stratify(numFolds);// ww w . j a va 2 s. co m Assert._assert(data.classAttribute() != null); double[] estimates = new double[numFolds]; for (int i = 0; i < numFolds; ++i) { Instances trainData = data.trainCV(numFolds, i); Assert._assert(trainData.classAttribute() != null); Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances."); Instances testData = data.testCV(numFolds, i); Assert._assert(testData.classAttribute() != null); Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances."); int temp = FindBestDomainOrder.TRACE; FindBestDomainOrder.TRACE = 0; Classifier classifier = buildClassifier(cClassName, trainData); FindBestDomainOrder.TRACE = temp; int count = testData.numInstances(); double loss = 0; double sum = 0; for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) { Instance instance = (Instance) e.nextElement(); Assert._assert(instance != null); Assert._assert(instance.classAttribute() != null && instance.classAttribute() == trainData.classAttribute()); try { double testClass = classifier.classifyInstance(instance); double weight = instance.weight(); if (testClass != instance.classValue()) loss += weight; sum += weight; } catch (Exception ex) { FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex); } } estimates[i] = 1 - loss / sum; } double average = 0; for (int i = 0; i < numFolds; ++i) average += estimates[i]; return average / numFolds; }
From source file:newclassifier.NewClassifier.java
public void classify(String path) throws Exception { // load unlabeled data and set class attribute Instances unlabeled = DataSource.read(path); unlabeled.setClassIndex(unlabeled.numAttributes() - 1); // copy//from w ww. ja v a2 s . com Instances labeled = new Instances(unlabeled); // label instances for (int i = 0; i < unlabeled.numInstances(); i++) { double clsLabel = cls.classifyInstance(unlabeled.instance(i)); labeled.instance(i).setClassValue(clsLabel); } // save labeled data DataSink.write("labeled.arff", labeled); // output prediction System.out.println("# - actual - predicted - distribution"); for (int i = 0; i < labeled.numInstances(); i++) { double pred = cls.classifyInstance(labeled.instance(i)); double[] dist = cls.distributionForInstance(labeled.instance(i)); System.out.print((i + 1) + " - "); System.out.print(labeled.instance(i).toString(labeled.classIndex()) + " - "); System.out.print(labeled.classAttribute().value((int) pred) + " - "); System.out.println(Utils.arrayToString(dist)); } }