List of usage examples for weka.core Instance stringValue
public String stringValue(Attribute att);
From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java
License:Open Source License
/** * //from w ww .ja va 2 s . c o m * @return */ public Instances getModifiedInstances() { //Copy attribute list (and change categorical by numerical) final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>(); for (int i = 0; i < instances.numAttributes(); i++) { Attribute attr = instances.attribute(i); if (attr.isNumeric() || attr.index() == instances.classIndex()) { lAttrs.add(attr); } else { Attribute newAttr = new Attribute(attr.name()); lAttrs.add(newAttr); } } //Build new instance final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances()); newInstances.setClassIndex(instances.classIndex()); for (int i = 0; i < instances.numInstances(); i++) { final Instance instance = instances.instance(i); final Instance cpyInstance = (Instance) instance.copy(); for (int j = 0; j < instance.numAttributes(); j++) { Attribute attribute = instance.attribute(j); int k = 0; if (attribute.index() == instances.classIndex()) { //The class index is nominal cpyInstance.setValue(attribute, instance.stringValue(j)); } else if (!attribute.isNumeric()) { String elt = attribute.value((int) instance.value(j)); cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j))); } else { if (maxNum[k] > 1) { cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]); } k++; } } newInstances.add(cpyInstance); } if (ignoreClass && instances.classIndex() != -1) { newInstances.deleteAttributeAt(instances.classIndex()); } return newInstances; }
From source file:machinelearningproject.RFTree.java
@Override public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; //choose random fraction int numAttr = instances.numAttributes(); int k = (int) round(sqrt(numAttr)); ArrayList<Integer> randomIdx = randomFraction(numAttr); for (int idx = 0; idx < k; idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }// w ww .j ava2 s .com } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningproject.Tree.java
public String getModeClass(Instances instances, int classIdx) { HashMap<String, Integer> classMap = new HashMap<>(); int numInstances = instances.size(); for (int i = 0; i < numInstances; i++) { Instance instance = instances.get(i); String key = instance.stringValue(classIdx); if (classMap.isEmpty() || !classMap.containsKey(key)) { classMap.put(key, 1);//ww w .ja v a 2 s .co m } else { if (classMap.containsKey(key)) { classMap.put(key, classMap.get(key) + 1); } } } Iterator<String> keySetIterator = classMap.keySet().iterator(); String modeClass = ""; int count = 0; while (keySetIterator.hasNext()) { String key = keySetIterator.next(); System.out.println("key: " + key + " value: " + classMap.get(key)); if (count < classMap.get(key)) { modeClass = key; count = classMap.get(key); } } return modeClass; }
From source file:machinelearningproject.Tree.java
public double calculateEntropy(Instances instances, int attrIdx) { HashMap<String, Integer> classMap = new HashMap<>(); double entropy = (double) 0; int numInstances = instances.size(); for (int i = 0; i < numInstances; i++) { Instance instance = instances.get(i); String key = instance.stringValue(attrIdx); if (classMap.isEmpty() || !classMap.containsKey(key)) { classMap.put(key, 1);//from w ww.j a v a 2s. com } else { if (classMap.containsKey(key)) { classMap.put(key, classMap.get(key) + 1); } } } Iterator<String> keySetIterator = classMap.keySet().iterator(); while (keySetIterator.hasNext()) { String key = keySetIterator.next(); // reference source code http://onoffswitch.net/building-decision-tree/ double prob = (double) classMap.get(key) / (double) numInstances; entropy -= prob * (Math.log(prob) / Math.log(2)); } return entropy; }
From source file:machinelearningproject.Tree.java
public double calculateInformationGain(Instances instances, int attrIdx, int classIdx) throws Exception { HashMap<String, Integer> attrCount = new HashMap<>(); HashMap<String, Integer> attrClassCount = new HashMap<>(); int numInstances = instances.size(); for (int i = 0; i < numInstances; i++) { Instance instance = instances.get(i); String attrKey = instance.stringValue(attrIdx); if (attrCount.isEmpty() || !attrCount.containsKey(attrKey)) { attrCount.put(attrKey, 1);/* w w w . j a v a 2 s . c o m*/ } else { if (attrCount.containsKey(attrKey)) { attrCount.put(attrKey, attrCount.get(attrKey) + 1); } } String attrClassKey = instance.stringValue(attrIdx) + "-" + instance.stringValue(classIdx); if (attrClassCount.isEmpty() || !attrClassCount.containsKey(attrClassKey)) { attrClassCount.put(attrClassKey, 1); } else { if (attrClassCount.containsKey(attrClassKey)) { attrClassCount.put(attrClassKey, attrClassCount.get(attrClassKey) + 1); } } } double attrEntropy = (double) 0; Iterator<String> attrKeySetIterator = attrCount.keySet().iterator(); while (attrKeySetIterator.hasNext()) { String attrKey = attrKeySetIterator.next(); double bufferEntropy = (double) 0; Iterator<String> keySetIterator = attrClassCount.keySet().iterator(); while (keySetIterator.hasNext()) { String key = keySetIterator.next(); String[] keys = key.split("-"); String attrValue = keys[0]; if (attrKey.equals(attrValue)) { double prob = (double) attrClassCount.get(key) / (double) attrCount.get(attrKey); bufferEntropy -= prob * (Math.log(prob) / Math.log(2)); } } attrEntropy += (attrCount.get(attrKey) / (double) numInstances) * bufferEntropy; } double classEntropy = calculateEntropy(instances, classIdx); return (classEntropy - attrEntropy); }
From source file:machinelearningproject.Tree.java
public Tree buildTree(Instances instances) throws Exception { Tree tree = new Tree(); ArrayList<String> availableAttributes = new ArrayList(); int largestInfoGainAttrIdx = -1; double largestInfoGainAttrValue = 0.0; for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { availableAttributes.add(instances.attribute(idx).name()); }//from w w w . j av a 2s . co m } if (instances.numInstances() == 0) { return null; } else if (calculateClassEntropy(instances) == 0.0) { // all examples have the sama classification tree.attributeName = instances.get(0).stringValue(instances.classIndex()); } else if (availableAttributes.isEmpty()) { // mode classification tree.attributeName = getModeClass(instances, instances.classIndex()); } else { for (int idx = 0; idx < instances.numAttributes(); idx++) { if (idx != instances.classIndex()) { double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex()); if (largestInfoGainAttrValue < attrInfoGain) { largestInfoGainAttrIdx = idx; largestInfoGainAttrValue = attrInfoGain; } } } if (largestInfoGainAttrIdx != -1) { tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name(); ArrayList<String> attrValues = new ArrayList(); for (int i = 0; i < instances.numInstances(); i++) { Instance instance = instances.get(i); String attrValue = instance.stringValue(largestInfoGainAttrIdx); if (attrValues.isEmpty() || !attrValues.contains(attrValue)) { attrValues.add(attrValue); } } for (String attrValue : attrValues) { Node node = new Node(attrValue); Instances copyInstances = new Instances(instances); copyInstances.setClassIndex(instances.classIndex()); int i = 0; while (i < copyInstances.numInstances()) { Instance instance = copyInstances.get(i); // reducing examples if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) { copyInstances.delete(i); i--; } i++; } copyInstances.deleteAttributeAt(largestInfoGainAttrIdx); node.subTree = buildTree(copyInstances); tree.nodes.add(node); } } } return tree; }
From source file:machinelearningproject.Tree.java
public String traverseTree(Instance instance) { String attrValue = ""; Tree buffTree = this; while (!buffTree.isLeaf()) { //get attribute value of an instance for (int i = 0; i < instance.numAttributes(); i++) { if (instance.attribute(i).name().equals(buffTree.attributeName)) { attrValue = instance.stringValue(i); break; }/* w ww . j a va 2 s . c o m*/ } //compare attribute with node value for (int i = 0; i < buffTree.nodes.size(); i++) { if (attrValue.equals(buffTree.nodes.get(i).value)) { buffTree = buffTree.nodes.get(i).subTree; break; } } } //isLeaf attrValue = buffTree.attributeName; return attrValue; }
From source file:meka.experiment.statisticsexporters.WekaFilter.java
License:Open Source License
/** * Converts the Instances back into statistics. * * @param data the data to convert * @return the generated statistics *//* w ww. j av a 2 s.c o m*/ protected List<EvaluationStatistics> fromInstances(Instances data) { List<EvaluationStatistics> result; EvaluationStatistics stat; MultiLabelClassifier cls; String rel; int i; int n; Instance inst; result = new ArrayList<>(); if (data.attribute(EvaluationStatistics.KEY_CLASSIFIER) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_CLASSIFIER); return result; } if (data.attribute(EvaluationStatistics.KEY_RELATION) == null) { log("Failed to locate attribute: " + EvaluationStatistics.KEY_RELATION); return result; } for (i = 0; i < data.numInstances(); i++) { inst = data.instance(i); try { cls = OptionUtils.fromCommandLine(MultiLabelClassifier.class, inst.stringValue(data.attribute(EvaluationStatistics.KEY_CLASSIFIER))); rel = inst.stringValue(data.attribute(EvaluationStatistics.KEY_RELATION)); stat = new EvaluationStatistics(cls, rel, null); for (n = 0; n < inst.numAttributes(); n++) { if (inst.attribute(n).isNumeric() && !inst.isMissing(n)) { stat.put(inst.attribute(n).name(), inst.value(n)); } } result.add(stat); } catch (Exception e) { handleException("Failed to process instance: " + inst, e); } } return result; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** (3,1,2) -> "3+1+2" */ public static String encodeValue(Instance x, int indices[]) { String v = ""; for (int j = 0; j < indices.length; j++) { v += x.stringValue(indices[j]) + "+"; }/*from w ww . j a v a2 s. c om*/ v = v.substring(0, v.length() - 1); return v; }
From source file:meka.filters.multilabel.SuperNodeFilter.java
License:Open Source License
/** * Merge Labels.//from w w w.jav a 2 s . co m * * @param j index 1 (assume that <code>j < k</code>) * @param k index 2 (assume that <code>j < k</code>) * @param D iInstances, with attributes in labeled by original index * @return Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1 */ public static Instances mergeLabels(Instances D, int j, int k, int p) { int L = D.classIndex(); HashMap<String, Integer> count = new HashMap<String, Integer>(); Set<String> values = new HashSet<String>(); for (int i = 0; i < D.numInstances(); i++) { String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)); String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k); //System.out.println("w = "+w); count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1); values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k))); } //System.out.println("("+j+","+k+")"+values); System.out.print("pruned from " + count.size() + " to "); MLUtils.pruneCountHashMap(count, p); String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future System.out.println("" + count.size() + " with p = " + p); System.out.println("" + count); values = count.keySet(); // Create and insert the new attribute D.insertAttributeAt( new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L); // Set values for the new attribute for (int i = 0; i < D.numInstances(); i++) { Instance x = D.instance(i); String y_jk = encodeValue(x.stringValue(j), x.stringValue(k)); try { x.setValue(L, y_jk); // y_jk = } catch (Exception e) { //x.setMissing(L); //D.delete(i); //i--; String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close)); int max_c = 0; for (String y_ : y_close) { int c = count.get(y_); if (c > max_c) { max_c = c; y_max = y_; } } //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ..."); x.setValue(L, y_max); // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one) } } // Delete separate attributes D.deleteAttributeAt(k > j ? k : j); D.deleteAttributeAt(k > j ? j : k); // Set class index D.setClassIndex(L - 1); return D; }