List of usage examples for weka.core Instances enumerateAttributes
publicEnumeration<Attribute> enumerateAttributes()
From source file:myJ48.MyJ48.java
/** * Construct the tree using the given instance * Find the highest attribute value which best at dividing the data * @param data Instance//from w ww .j av a 2 s . c o m */ public void buildTree(Instances data) throws Exception { if (data.numInstances() > 0) { // Lets find the highest Information Gain! // First compute each information gain attribute double IG[] = new double[data.numAttributes()]; Enumeration enumAttribute = data.enumerateAttributes(); while (enumAttribute.hasMoreElements()) { Attribute attribute = (Attribute) enumAttribute.nextElement(); IG[attribute.index()] = informationGain(data, attribute); // System.out.println(attribute.toString() + ": " + IG[attribute.index()]); } // Assign it as the tree attribute! currentAttribute = data.attribute(maxIndex(IG)); //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]); // IG = 0 then current node = leaf! if (Utils.eq(IG[currentAttribute.index()], 0)) { // Set the class value as the highest frequency of the class currentAttribute = null; classDistribution = new double[data.numClasses()]; Enumeration enumInstance = data.enumerateInstances(); while (enumInstance.hasMoreElements()) { Instance temp = (Instance) enumInstance.nextElement(); classDistribution[(int) temp.classValue()]++; } Utils.normalize(classDistribution); classValue = Utils.maxIndex(classDistribution); classAttribute = data.classAttribute(); } else { // Create another node from the current tree Instances[] splitData = splitDataByAttribute(data, currentAttribute); nodes = new MyJ48[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { nodes[i] = new MyJ48(this); nodes[i].buildTree(splitData[i]); } } } else { classAttribute = null; classValue = Utils.missingValue(); classDistribution = new double[data.numClasses()]; } }
From source file:NaiveBayes.NaiveBayes13514004.java
@Override public void buildClassifier(Instances i) { //Algoritma//from ww w . j a v a 2 s .c o m origin = new Instances(i); //Menghitung jumlah attribute dan kelas numAtt = i.numAttributes() - 1; numClass = i.numClasses(); //Inisialisasi matrix 3 dimensi data = new int[numAtt][numClass][0]; prob = new double[numAtt][numClass][0]; kelasdata = new int[numClass]; kelasprob = new double[numClass]; Enumeration<Instance> enu1 = i.enumerateInstances(); while (enu1.hasMoreElements()) { Instance ins = enu1.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); numDis = att.numValues(); data[x][(int) ins.classValue()] = new int[numDis]; prob[x][(int) ins.classValue()] = new double[numDis]; x++; } } //Mengisi matriks Frekuensi Enumeration<Instance> enu2 = i.enumerateInstances(); while (enu2.hasMoreElements()) { Instance ins = enu2.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); data[x][(int) ins.classValue()][(int) ins.value(att)]++; x++; } kelasdata[(int) ins.classValue()]++; } //Menghitung probabilitas kelas double numInstances = (double) i.numInstances(); for (int y = 0; y < numClass; y++) { kelasprob[y] = (double) kelasdata[y] / numInstances; } //Mengisi matriks probabilitas Enumeration<Instance> enu3 = i.enumerateInstances(); while (enu3.hasMoreElements()) { Instance ins = enu3.nextElement(); Enumeration<Attribute> enu_t = i.enumerateAttributes(); int x = 0; while (enu_t.hasMoreElements()) { Attribute att = enu_t.nextElement(); int sumDis = Utils.sum(data[x][(int) ins.classValue()]); numDis = att.numValues(); for (int z = 0; z < numDis; z++) { int y = (int) ins.classValue(); prob[x][y][z] = ((double) data[x][y][z] / (double) sumDis); } x++; } } }
From source file:net.sf.bddbddb.FindBestDomainOrder.java
License:LGPL
void dumpClassifierInfo(String name, Classifier c, Instances data) { BufferedWriter w = null;//from ww w . j a v a 2 s .c o m try { w = new BufferedWriter(new FileWriter(name)); w.write("Classifier \"name\":\n"); w.write("Attributes: \n"); for (Enumeration e = data.enumerateAttributes(); e.hasMoreElements();) { w.write(e.nextElement() + "\n"); } w.write("\n"); w.write("Based on data from " + data.numInstances() + " instances:\n"); for (Enumeration e = data.enumerateInstances(); e.hasMoreElements();) { Instance i = (Instance) e.nextElement(); if (i instanceof TrialInstance) { TrialInstance ti = (TrialInstance) i; InferenceRule ir = ti.ti.getCollection().getRule(solver); w.write(" " + ti.ti.getCollection().name + " " + ti.getOrder()); if (!ti.getOrder().equals(ti.ti.order)) w.write(" (" + ti.ti.order + ")"); if (ti.isMaxTime()) { w.write(" MAX TIME\n"); } else { w.write(" " + format(ti.getCost()) + " (" + ti.ti.cost + " ms)\n"); } } else { w.write(" " + i + "\n"); } } w.write(c.toString()); w.write("\n"); } catch (IOException x) { solver.err.println("IO Exception occurred writing \"" + name + "\": " + x); } finally { if (w != null) try { w.close(); } catch (IOException _) { } } }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Builds Id3 decision tree classifier./* w w w .j a v a 2s. co m*/ * * @param data * the training data * @exception Exception * if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception { if (!data.classAttribute().isNominal()) { throw new UnsupportedClassTypeException("Id3: nominal class, please."); } Enumeration enumAtt = data.enumerateAttributes(); while (enumAtt.hasMoreElements()) { if (!((Attribute) enumAtt.nextElement()).isNominal()) { throw new UnsupportedAttributeTypeException("Id3: only nominal " + "attributes, please."); } } data = new Instances(data); data.deleteWithMissingClass(); makeTree(data); }
From source file:net.sf.bddbddb.order.MyId3.java
License:LGPL
/** * Method for building an Id3 tree./*from w ww. j a v a 2s .c o m*/ * * @param data * the training data * @exception Exception * if decision tree can't be built successfully */ private void makeTree(Instances data) throws Exception { // Check if no instances have reached this node. if (data.numInstances() == 0) { m_Attribute = null; m_ClassValue = Instance.missingValue(); m_Distribution = new double[data.numClasses()]; double sum = 0; laplaceSmooth(m_Distribution, sum, data.numClasses()); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } m_Attribute = data.attribute(Utils.maxIndex(infoGains)); boolean makeLeaf; makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0); Instances[] splitData = null; if (!makeLeaf) { splitData = splitData(data, m_Attribute); for (int i = 0; i < splitData.length; ++i) { if (splitData[i].numInstances() == data.numInstances()) { //System.out.println("When splitting on attrib // "+m_Attribute+", child "+i+" is same size as current, // making into leaf."); makeLeaf = true; break; } } } // Make leaf if information gain is zero. // Otherwise create successors. if (makeLeaf) { m_Attribute = null; m_Distribution = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); double sum = 0; while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); m_Distribution[(int) inst.classValue()]++; sum += inst.weight(); } //laplace smooth the distribution instead laplaceSmooth(m_Distribution, sum, data.numClasses()); //Utils.normalize(m_Distribution); m_ClassValue = Utils.maxIndex(m_Distribution); m_ClassAttribute = data.classAttribute(); } else { m_Successors = new MyId3[m_Attribute.numValues()]; for (int j = 0; j < m_Attribute.numValues(); j++) { m_Successors[j] = new MyId3(); m_Successors[j].buildClassifier(splitData[j]); } } }
From source file:newdtl.NewID3.java
/** * Creates an Id3 tree./*from ww w. j a va 2s . co m*/ * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; //??? } else { // Mencari IG maksimum double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } // cek max IG int maxIG = maxIndex(infoGains); if (maxIG != -1) { splitAttribute = data.attribute(maxIndex(infoGains)); } else { Exception exception = new Exception("array null"); throw exception; } // Membuat daun jika IG-nya 0 if (Double.compare(infoGains[splitAttribute.index()], 0) == 0) { splitAttribute = null; classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } normalizeClassDistribution(); label = maxIndex(classDistributions); classAttribute = data.classAttribute(); } else { // Membuat tree baru di bawah node ini Instances[] splitData = splitData(data, splitAttribute); children = new NewID3[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewID3(); children[j].makeTree(splitData[j]); } } } }
From source file:newdtl.NewJ48.java
/** * Creates a J48 tree./*from www .ja v a 2s .com*/ * * @param data the training data * @exception Exception if tree failed to build */ private void makeTree(Instances data) throws Exception { // Mengecek apakah tidak terdapat instance dalam node ini if (data.numInstances() == 0) { splitAttribute = null; label = DOUBLE_MISSING_VALUE; classDistributions = new double[data.numClasses()]; isLeaf = true; } else { // Mencari Gain Ratio maksimum double[] gainRatios = new double[data.numAttributes()]; double[] thresholds = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); double[] result = computeGainRatio(data, att); gainRatios[att.index()] = result[0]; thresholds[att.index()] = result[1]; } splitAttribute = data.attribute(maxIndex(gainRatios)); if (splitAttribute.isNumeric()) { splitThreshold = thresholds[maxIndex(gainRatios)]; } else { splitThreshold = Double.NaN; } classDistributions = new double[data.numClasses()]; for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); classDistributions[(int) inst.classValue()]++; } // Membuat daun jika Gain Ratio-nya 0 if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) { splitAttribute = null; label = maxIndex(classDistributions); classAttribute = data.classAttribute(); isLeaf = true; } else { // Mengecek jika ada missing value if (isMissing(data, splitAttribute)) { // cari modus int index = modusIndex(data, splitAttribute); // ubah data yang punya missing value Enumeration dataEnum = data.enumerateInstances(); while (dataEnum.hasMoreElements()) { Instance inst = (Instance) dataEnum.nextElement(); if (inst.isMissing(splitAttribute)) { inst.setValue(splitAttribute, splitAttribute.value(index)); } } } // Membuat tree baru di bawah node ini Instances[] splitData; if (splitAttribute.isNumeric()) { splitData = splitData(data, splitAttribute, splitThreshold); children = new NewJ48[2]; for (int j = 0; j < 2; j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } else { splitData = splitData(data, splitAttribute); children = new NewJ48[splitAttribute.numValues()]; for (int j = 0; j < splitAttribute.numValues(); j++) { children[j] = new NewJ48(); children[j].makeTree(splitData[j]); } } isLeaf = false; } } }
From source file:org.barcelonamedia.uima.CAS2WekaInstance.java
License:Open Source License
private static DenseInstance toWekaInternalInstance(List<AttributeValue> attributeValues, Instances wekaInstances) throws CASException { double[] zeroValues = new double[wekaInstances.numAttributes()]; Arrays.fill(zeroValues, 0.0d); DenseInstance wekaInstance = new DenseInstance(1.0d, zeroValues); wekaInstance.setDataset(wekaInstances); Iterator<AttributeValue> attributeValuesIterator = attributeValues.iterator(); while (attributeValuesIterator.hasNext()) { String value = null;//from w w w . j a v a 2s . co m String attributeName = null; AttributeValue attributeValue = attributeValuesIterator.next(); attributeName = attributeValue.getAttributeName(); Attribute attribute = wekaInstances.attribute(attributeName); if (attribute == null) continue; if (attributeValue instanceof NumericAttributeValue) { value = ((NumericAttributeValue) attributeValue).getValue(); wekaInstance.setValue(attribute, Double.parseDouble(value)); } else if (attributeValue instanceof DateAttributeValue) { //this isn't actually very smart.... I need to understand this better //any volunteers for the four lines of code I need here? value = ((DateAttributeValue) attributeValue).getValue(); wekaInstance.setValue(attribute, value); } else if (attributeValue instanceof NominalAttributeValue) { value = ((NominalAttributeValue) attributeValue).getValue(); int valueIndex = attribute.indexOfValue(value); wekaInstance.setValue(attribute, (double) valueIndex); } else if (attributeValue instanceof StringAttributeValue) { value = ((StringAttributeValue) attributeValue).getValue(); wekaInstance.setValue(attribute, value); } } Enumeration attributes = wekaInstances.enumerateAttributes(); while (attributes.hasMoreElements()) { Attribute attribute = (Attribute) attributes.nextElement(); if (attribute.isNumeric() && wekaInstance.isMissing(attribute)) { wekaInstance.setValue(attribute, 0); } } return wekaInstance; }
From source file:org.dynamicfactory.property.InstancesFactory.java
License:Open Source License
@Override public String exportToString(Instances type, Graph g) { StringBuffer ret = new StringBuffer(); ret.append(type.relationName());// w w w.ja v a 2 s .c o m Enumeration attributes = type.enumerateAttributes(); while (attributes.hasMoreElements()) { ret.append(","); outputAttribute(ret, (Attribute) attributes.nextElement()); } return ret.toString(); }
From source file:org.opentox.www.rest.resources.AlgorithmResource.java
License:Open Source License
@SuppressWarnings({ "unchecked" }) private Representation filterData(final Representation entity, final Variant variant) { InputProcessor p1 = new InputProcessor(); DatasetBuilder p2 = new DatasetBuilder(); InstancesProcessor p3 = new InstancesProcessor(); AttributeCleanup p4 = new AttributeCleanup(ATTRIBUTE_TYPE.string); Pipeline pipe = new Pipeline(p1, p2, p3, p4); YaqpForm form = new YaqpForm(entity); URI uri;/*from w w w . jav a 2 s. c o m*/ try { uri = new URI(form.getFirstValue(ConstantParameters.dataset_uri)); } catch (URISyntaxException ex) { toggleBadRequest(); return sendMessage( "Inacceptable URI (" + form.getFirstValue(ConstantParameters.dataset_uri) + ")" + NEWLINE); } Instances filteredData = null; try { filteredData = (Instances) pipe.process(uri); } catch (YaqpException ex) { toggleBadRequest(); return sendMessage(ex.toString()); } Enumeration attributes = filteredData.enumerateAttributes(); String list = ""; Attribute att; while (attributes.hasMoreElements()) { att = (Attribute) attributes.nextElement(); list += "feature_uris[]=" + att.name(); if (attributes.hasMoreElements()) list += "&"; } return new StringRepresentation(uri + "?" + list, MediaType.TEXT_URI_LIST); }