Example usage for weka.core Instance stringValue

List of usage examples for weka.core Instance stringValue

Introduction

In this page you can find the example usage for weka.core Instance stringValue.

Prototype

public String stringValue(Attribute att);

Source Link

Document

Returns the value of a nominal, string, date, or relational attribute for the instance as a string.

Usage

From source file:lu.lippmann.cdb.lab.beta.shih.Shih2010.java

License:Open Source License

/**
 * //from  w  ww .ja va 2 s  .  c o  m
 * @return
 */
public Instances getModifiedInstances() {

    //Copy attribute list (and change categorical by numerical)
    final ArrayList<Attribute> lAttrs = new ArrayList<Attribute>();
    for (int i = 0; i < instances.numAttributes(); i++) {
        Attribute attr = instances.attribute(i);
        if (attr.isNumeric() || attr.index() == instances.classIndex()) {
            lAttrs.add(attr);
        } else {
            Attribute newAttr = new Attribute(attr.name());
            lAttrs.add(newAttr);
        }
    }

    //Build new instance
    final Instances newInstances = new Instances("Shih instance", lAttrs, instances.numInstances());
    newInstances.setClassIndex(instances.classIndex());
    for (int i = 0; i < instances.numInstances(); i++) {
        final Instance instance = instances.instance(i);
        final Instance cpyInstance = (Instance) instance.copy();
        for (int j = 0; j < instance.numAttributes(); j++) {
            Attribute attribute = instance.attribute(j);
            int k = 0;
            if (attribute.index() == instances.classIndex()) {
                //The class index is nominal
                cpyInstance.setValue(attribute, instance.stringValue(j));
            } else if (!attribute.isNumeric()) {
                String elt = attribute.value((int) instance.value(j));
                cpyInstance.setValue(attribute, F.get(new TupleSI(elt, j)));
            } else {
                if (maxNum[k] > 1) {
                    cpyInstance.setValue(attribute, instance.value(j) / maxNum[k]);
                }
                k++;
            }
        }
        newInstances.add(cpyInstance);
    }

    if (ignoreClass && instances.classIndex() != -1) {
        newInstances.deleteAttributeAt(instances.classIndex());
    }
    return newInstances;
}

From source file:machinelearningproject.RFTree.java

@Override
public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();
    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    //choose random fraction
    int numAttr = instances.numAttributes();
    int k = (int) round(sqrt(numAttr));
    ArrayList<Integer> randomIdx = randomFraction(numAttr);

    for (int idx = 0; idx < k; idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }//  w ww .j ava2  s  .com
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningproject.Tree.java

public String getModeClass(Instances instances, int classIdx) {
    HashMap<String, Integer> classMap = new HashMap<>();
    int numInstances = instances.size();

    for (int i = 0; i < numInstances; i++) {
        Instance instance = instances.get(i);
        String key = instance.stringValue(classIdx);
        if (classMap.isEmpty() || !classMap.containsKey(key)) {
            classMap.put(key, 1);//ww w .ja v a  2  s .co  m
        } else {
            if (classMap.containsKey(key)) {
                classMap.put(key, classMap.get(key) + 1);
            }
        }
    }
    Iterator<String> keySetIterator = classMap.keySet().iterator();
    String modeClass = "";
    int count = 0;
    while (keySetIterator.hasNext()) {
        String key = keySetIterator.next();
        System.out.println("key: " + key + " value: " + classMap.get(key));
        if (count < classMap.get(key)) {
            modeClass = key;
            count = classMap.get(key);
        }
    }
    return modeClass;
}

From source file:machinelearningproject.Tree.java

public double calculateEntropy(Instances instances, int attrIdx) {
    HashMap<String, Integer> classMap = new HashMap<>();
    double entropy = (double) 0;
    int numInstances = instances.size();

    for (int i = 0; i < numInstances; i++) {
        Instance instance = instances.get(i);
        String key = instance.stringValue(attrIdx);
        if (classMap.isEmpty() || !classMap.containsKey(key)) {
            classMap.put(key, 1);//from   w ww.j a v a  2s.  com
        } else {
            if (classMap.containsKey(key)) {
                classMap.put(key, classMap.get(key) + 1);
            }
        }
    }

    Iterator<String> keySetIterator = classMap.keySet().iterator();
    while (keySetIterator.hasNext()) {
        String key = keySetIterator.next();
        // reference source code http://onoffswitch.net/building-decision-tree/
        double prob = (double) classMap.get(key) / (double) numInstances;
        entropy -= prob * (Math.log(prob) / Math.log(2));
    }

    return entropy;
}

From source file:machinelearningproject.Tree.java

public double calculateInformationGain(Instances instances, int attrIdx, int classIdx) throws Exception {
    HashMap<String, Integer> attrCount = new HashMap<>();
    HashMap<String, Integer> attrClassCount = new HashMap<>();
    int numInstances = instances.size();

    for (int i = 0; i < numInstances; i++) {
        Instance instance = instances.get(i);

        String attrKey = instance.stringValue(attrIdx);
        if (attrCount.isEmpty() || !attrCount.containsKey(attrKey)) {
            attrCount.put(attrKey, 1);/*  w w w  .  j  a  v a 2 s  . c  o  m*/
        } else {
            if (attrCount.containsKey(attrKey)) {
                attrCount.put(attrKey, attrCount.get(attrKey) + 1);
            }
        }

        String attrClassKey = instance.stringValue(attrIdx) + "-" + instance.stringValue(classIdx);
        if (attrClassCount.isEmpty() || !attrClassCount.containsKey(attrClassKey)) {
            attrClassCount.put(attrClassKey, 1);
        } else {
            if (attrClassCount.containsKey(attrClassKey)) {
                attrClassCount.put(attrClassKey, attrClassCount.get(attrClassKey) + 1);
            }
        }
    }
    double attrEntropy = (double) 0;

    Iterator<String> attrKeySetIterator = attrCount.keySet().iterator();
    while (attrKeySetIterator.hasNext()) {
        String attrKey = attrKeySetIterator.next();
        double bufferEntropy = (double) 0;
        Iterator<String> keySetIterator = attrClassCount.keySet().iterator();
        while (keySetIterator.hasNext()) {
            String key = keySetIterator.next();
            String[] keys = key.split("-");
            String attrValue = keys[0];
            if (attrKey.equals(attrValue)) {
                double prob = (double) attrClassCount.get(key) / (double) attrCount.get(attrKey);
                bufferEntropy -= prob * (Math.log(prob) / Math.log(2));
            }
        }
        attrEntropy += (attrCount.get(attrKey) / (double) numInstances) * bufferEntropy;
    }
    double classEntropy = calculateEntropy(instances, classIdx);

    return (classEntropy - attrEntropy);
}

From source file:machinelearningproject.Tree.java

public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();

    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    for (int idx = 0; idx < instances.numAttributes(); idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }//from  w w w . j  av  a 2s  . co m
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningproject.Tree.java

public String traverseTree(Instance instance) {
    String attrValue = "";
    Tree buffTree = this;
    while (!buffTree.isLeaf()) {
        //get attribute value of an instance
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (instance.attribute(i).name().equals(buffTree.attributeName)) {
                attrValue = instance.stringValue(i);
                break;
            }/* w  ww .  j a va  2  s  .  c o  m*/
        }

        //compare attribute with node value
        for (int i = 0; i < buffTree.nodes.size(); i++) {
            if (attrValue.equals(buffTree.nodes.get(i).value)) {
                buffTree = buffTree.nodes.get(i).subTree;
                break;
            }
        }
    }

    //isLeaf
    attrValue = buffTree.attributeName;

    return attrValue;
}

From source file:meka.experiment.statisticsexporters.WekaFilter.java

License:Open Source License

/**
 * Converts the Instances back into statistics.
 *
 * @param data          the data to convert
 * @return              the generated statistics
 *//* w ww. j  av a 2  s.c o m*/
protected List<EvaluationStatistics> fromInstances(Instances data) {
    List<EvaluationStatistics> result;
    EvaluationStatistics stat;
    MultiLabelClassifier cls;
    String rel;
    int i;
    int n;
    Instance inst;

    result = new ArrayList<>();

    if (data.attribute(EvaluationStatistics.KEY_CLASSIFIER) == null) {
        log("Failed to locate attribute: " + EvaluationStatistics.KEY_CLASSIFIER);
        return result;
    }
    if (data.attribute(EvaluationStatistics.KEY_RELATION) == null) {
        log("Failed to locate attribute: " + EvaluationStatistics.KEY_RELATION);
        return result;
    }

    for (i = 0; i < data.numInstances(); i++) {
        inst = data.instance(i);
        try {
            cls = OptionUtils.fromCommandLine(MultiLabelClassifier.class,
                    inst.stringValue(data.attribute(EvaluationStatistics.KEY_CLASSIFIER)));
            rel = inst.stringValue(data.attribute(EvaluationStatistics.KEY_RELATION));
            stat = new EvaluationStatistics(cls, rel, null);
            for (n = 0; n < inst.numAttributes(); n++) {
                if (inst.attribute(n).isNumeric() && !inst.isMissing(n)) {
                    stat.put(inst.attribute(n).name(), inst.value(n));
                }
            }
            result.add(stat);
        } catch (Exception e) {
            handleException("Failed to process instance: " + inst, e);
        }
    }

    return result;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/** (3,1,2) -&gt; "3+1+2" */
public static String encodeValue(Instance x, int indices[]) {
    String v = "";
    for (int j = 0; j < indices.length; j++) {
        v += x.stringValue(indices[j]) + "+";
    }/*from  w  ww  .  j a v  a2 s. c  om*/
    v = v.substring(0, v.length() - 1);
    return v;
}

From source file:meka.filters.multilabel.SuperNodeFilter.java

License:Open Source License

/**
 * Merge Labels.//from w w  w.jav  a 2  s  . co m
 *
 * @param   j    index 1 (assume that <code>j &lt; k</code>)
 * @param   k   index 2 (assume that <code>j &lt; k</code>)
 * @param   D   iInstances, with attributes in labeled by original index
 * @return       Instaces with attributes at j and k moved to position L as (j,k), with classIndex = L-1
 */
public static Instances mergeLabels(Instances D, int j, int k, int p) {
    int L = D.classIndex();

    HashMap<String, Integer> count = new HashMap<String, Integer>();

    Set<String> values = new HashSet<String>();
    for (int i = 0; i < D.numInstances(); i++) {
        String v = encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k));
        String w = "" + (int) D.instance(i).value(j) + (int) D.instance(i).value(k);
        //System.out.println("w = "+w);
        count.put(v, count.containsKey(v) ? count.get(v) + 1 : 1);
        values.add(encodeValue(D.instance(i).stringValue(j), D.instance(i).stringValue(k)));
    }
    //System.out.println("("+j+","+k+")"+values);
    System.out.print("pruned from " + count.size() + " to ");
    MLUtils.pruneCountHashMap(count, p);
    String y_max = (String) MLUtils.argmax(count); // @todo won't need this in the future
    System.out.println("" + count.size() + " with p = " + p);
    System.out.println("" + count);
    values = count.keySet();

    // Create and insert the new attribute
    D.insertAttributeAt(
            new Attribute(encodeClass(D.attribute(j).name(), D.attribute(k).name()), new ArrayList(values)), L);

    // Set values for the new attribute
    for (int i = 0; i < D.numInstances(); i++) {
        Instance x = D.instance(i);
        String y_jk = encodeValue(x.stringValue(j), x.stringValue(k));
        try {
            x.setValue(L, y_jk); // y_jk = 
        } catch (Exception e) {
            //x.setMissing(L);
            //D.delete(i);
            //i--;
            String y_close[] = getNeighbours(y_jk, count, 1); // A+B+NEG, A+C+NEG
            //System.out.println("OK, that value ("+y_jk+") didn't exist ... set the closests ones ...: "+Arrays.toString(y_close));
            int max_c = 0;
            for (String y_ : y_close) {
                int c = count.get(y_);
                if (c > max_c) {
                    max_c = c;
                    y_max = y_;
                }
            }
            //System.out.println("we actually found "+Arrays.toString(y_close)+" but will only set one for now (the one with the highest count) : "+y_max+" ...");
            x.setValue(L, y_max);
            // ok, that value didn't exist, set the maximum one (@TODO: set the nearest one)
        }
    }

    // Delete separate attributes
    D.deleteAttributeAt(k > j ? k : j);
    D.deleteAttributeAt(k > j ? j : k);

    // Set class index
    D.setClassIndex(L - 1);
    return D;
}