Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:myclassifier.myC45Pack.SplitModel.java

@Override
public String leftSide(Instances data) {
    return data.attribute(attribIndex).name();
}

From source file:myclassifier.myC45Pack.SplitModel.java

@Override
public String rightSide(int index, Instances data) {
    StringBuffer text = new StringBuffer();
    if (data.attribute(attribIndex).isNominal()) {
        text.append(" = ").append(data.attribute(attribIndex).value(index));
    } else {/*from   w  ww  . j  a  v  a  2s  .  com*/
        if (index == 0) {
            text.append(" <= ").append(Utils.doubleToString(splitPointValue, 6));
        } else {
            text.append(" > ").append(Utils.doubleToString(splitPointValue, 6));
        }
    }
    return text.toString();
}

From source file:myclassifier.Util.java

public static Instances setAttributeThreshold(Instances data, Attribute att, int threshold) throws Exception {
    Instances temp = new Instances(data);
    Add filter = new Add();
    filter.setAttributeName("thresholded " + att.name());
    filter.setAttributeIndex(String.valueOf(att.index() + 2));
    filter.setNominalLabels("<=" + threshold + ",>" + threshold);
    filter.setInputFormat(temp);//from  w  ww  .  j a va 2 s . c  o  m

    Instances thresholdedData = Filter.useFilter(data, filter);

    for (int i = 0; i < thresholdedData.numInstances(); i++) {
        if ((int) thresholdedData.instance(i).value(thresholdedData.attribute(att.name())) <= threshold)
            thresholdedData.instance(i).setValue(thresholdedData.attribute("thresholded " + att.name()),
                    "<=" + threshold);
        else
            thresholdedData.instance(i).setValue(thresholdedData.attribute("thresholded " + att.name()),
                    ">" + threshold);
    }
    thresholdedData = wekaCode.removeAttributes(thresholdedData, String.valueOf(att.index() + 1));
    thresholdedData.renameAttribute(thresholdedData.attribute("thresholded " + att.name()), att.name());
    return thresholdedData;
}

From source file:myclassifier.Util.java

public static Instances toNominal(Instances data) throws Exception {
    for (int n = 0; n < data.numAttributes(); n++) {
        Attribute att = data.attribute(n);
        if (data.attribute(n).isNumeric()) {
            HashSet<Integer> uniqueValues = new HashSet();
            for (int i = 0; i < data.numInstances(); ++i) {
                uniqueValues.add((int) (data.instance(i).value(att)));
            }/*ww  w.j a  va  2 s  .co m*/
            List<Integer> dataValues = new ArrayList<>(uniqueValues);
            dataValues.sort((Integer o1, Integer o2) -> {
                if (o1 > o2)
                    return 1;
                else
                    return -1;
            });

            double[] infoGains = new double[dataValues.size() - 1];
            Instances[] tempInstances = new Instances[dataValues.size() - 1];
            for (int i = 0; i < dataValues.size() - 1; ++i) {
                tempInstances[i] = setAttributeThreshold(data, att, dataValues.get(i));
                infoGains[i] = calculateIG(tempInstances[i], tempInstances[i].attribute(att.name()));
            }
            data = new Instances(tempInstances[Util.indexOfMax(infoGains)]);
        }
    }
    return data;
}

From source file:myclassifier.wekaCode.java

public static void classifyUnseenData(String[] attributes, Classifier classifiers, Instances data)
        throws Exception {
    Instance newInstance = new Instance(data.numAttributes());
    newInstance.setDataset(data);/*from  w  w w .  j a v  a 2s  .co  m*/
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        if (Attribute.NUMERIC == data.attribute(i).type()) {
            Double value = Double.valueOf(attributes[i]);
            newInstance.setValue(i, value);
        } else {
            newInstance.setValue(i, attributes[i]);
        }
    }

    double clsLabel = classifiers.classifyInstance(newInstance);
    newInstance.setClassValue(clsLabel);

    String result = data.classAttribute().value((int) clsLabel);

    System.out.println("Hasil Classify Unseen Data Adalah: " + result);
}

From source file:myclusterer.MyKMeans.java

private Instance createCentroid(Instances members) {
    double[] vals = new double[members.numAttributes()];
    double[][] nominalDists = new double[members.numAttributes()][];
    double[] weightMissing = new double[members.numAttributes()];
    double[] weightNonMissing = new double[members.numAttributes()];

    for (int j = 0; j < members.numAttributes(); j++) {
        if (members.attribute(j).isNominal()) {
            nominalDists[j] = new double[members.attribute(j).numValues()];
        }//from w  w w .  j  a va2 s  .  com
    }
    for (int i = 0; i < members.numInstances(); ++i) {
        Instance inst = members.instance(i);
        for (int j = 0; j < members.numAttributes(); j++) {
            if (inst.isMissing(j)) {
                weightMissing[j] += inst.weight();
            } else {
                weightNonMissing[j] += inst.weight();
                if (members.attribute(j).isNumeric())
                    vals[j] += inst.weight() * inst.value(j);
                else
                    nominalDists[j][(int) inst.value(j)] += inst.weight();
            }
        }
    }
    for (int i = 0; i < members.numAttributes(); i++) {
        if (members.attribute(i).isNumeric()) {
            if (weightNonMissing[i] > 0) {
                vals[i] /= weightNonMissing[i];
            } else {
                vals[i] = Instance.missingValue();
            }
        } else {
            double max = -Double.MAX_VALUE;
            double maxIndex = -1;
            for (int j = 0; j < nominalDists[i].length; j++) {
                if (nominalDists[i][j] > max) {
                    max = nominalDists[i][j];
                    maxIndex = j;
                }
                vals[i] = max < weightMissing[i] ? Instance.missingValue() : maxIndex;
            }
        }
    }
    return new Instance(1.0, vals);
}

From source file:myclusterer.WekaCode.java

public static void classifyUnseenData(String[] attributes, Clusterer clusterer, Instances data)
        throws Exception {
    Instance newInstance = new Instance(data.numAttributes());
    newInstance.setDataset(data);/*from   w w w . j  a  va2s  . co  m*/
    for (int i = 0; i < data.numAttributes() - 1; i++) {
        if (Attribute.NUMERIC == data.attribute(i).type()) {
            Double value = Double.valueOf(attributes[i]);
            newInstance.setValue(i, value);
        } else {
            newInstance.setValue(i, attributes[i]);
        }
    }

    double clsLabel = clusterer.clusterInstance(newInstance);
    newInstance.setClassValue(clsLabel);

    String result = data.classAttribute().value((int) clsLabel);

    System.out.println("Hasil Classify Unseen Data Adalah: " + result);
}

From source file:myID3.MyId3.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance/*from   w  w  w.j  a v  a 2 s  .  co  m*/
 */
public void buildTree(Instances data) {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyId3[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyId3();
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }
}

From source file:myid3andc45classifier.Model.MyC45.java

@Override
public void buildClassifier(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();//  w  ww . j a v a  2  s .  c o m

    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        Attribute attr = (Attribute) enumAtt.nextElement();
        if (attr.isNumeric()) {
            ArrayList<Double> mid = new ArrayList<Double>();
            Instances savedData = null;
            double temp, max = Double.NEGATIVE_INFINITY;
            // TODO: split nominal
            data.sort(attr);
            for (int i = 0; i < data.numInstances() - 1; i++) {
                if (data.instance(i).classValue() != data.instance(i + 1).classValue()) {
                    if (data.attribute(attr.name() + " "
                            + (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2) == null) {
                        data = convertInstances(data, attr,
                                (data.instance(i + 1).value(attr) + data.instance(i).value(attr)) / 2);
                        //temp = computeInfoGainRatio(newData, newData.attribute(newData.numAttributes()-1));
                        //System.out.println("attribute "+newData.attribute(newData.numAttributes()-1).name());
                        //if (temp > max) {
                        //    max = temp;
                        //    savedData = newData;
                        //}
                    }
                }
            }

            //Penanganan Missing Value
            AttributeStats attributeStats = data.attributeStats(attr.index());
            double mean = attributeStats.numericStats.mean;
            if (Double.isNaN(mean))
                mean = 0;
            // Replace missing value with mean
            Enumeration instEnumerate = data.enumerateInstances();
            while (instEnumerate.hasMoreElements()) {
                Instance instance = (Instance) instEnumerate.nextElement();
                if (instance.isMissing(attr.index())) {
                    instance.setValue(attr.index(), mean);
                }
            }

            //data = new Instances(savedData);
        } else {
            //Penanganan Missing Value
            AttributeStats attributeStats = data.attributeStats(attr.index());
            int maxIndex = 0;
            for (int i = 1; i < attr.numValues(); i++) {
                if (attributeStats.nominalCounts[maxIndex] < attributeStats.nominalCounts[i]) {
                    maxIndex = i;
                }
            }
            // Replace missing value with max index
            Enumeration instEnumerate = data.enumerateInstances();
            while (instEnumerate.hasMoreElements()) {
                Instance instance = (Instance) instEnumerate.nextElement();
                if (instance.isMissing(attr.index())) {
                    instance.setValue(attr.index(), maxIndex);
                }
            }
        }
    }
    makeMyC45Tree(data);

}

From source file:myid3andc45classifier.Model.MyC45.java

public void makeMyC45Tree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        attribute = null;/* www  . java 2s.  co m*/
        label = Instance.missingValue();
        return;
    }
    //System.out.println("NEW");
    double[] infoGainRatios = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        if (!att.isNumeric())
            infoGainRatios[att.index()] = computeInfoGainRatio(data, att);
        else
            infoGainRatios[att.index()] = Double.NEGATIVE_INFINITY;
        //System.out.println(att.name() + " " + infoGainRatios[att.index()]);
    }

    // TODO: build the tree
    attribute = data.attribute(maxIndex(infoGainRatios));
    //System.out.println(infoGainRatios[maxIndex(infoGainRatios)]);
    // Make leaf if information gain is zero. 
    // Otherwise create successors.
    if (infoGainRatios[maxIndex(infoGainRatios)] <= epsilon
            || Double.isNaN(infoGainRatios[maxIndex(infoGainRatios)])) {
        attribute = null;
        double[] numClasses = new double[data.numClasses()];

        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            numClasses[(int) inst.classValue()]++;
        }

        label = maxIndex(numClasses);
        classAttribute = data.classAttribute();
    } else {
        classAttribute = data.classAttribute();
        Instances[] splitData = splitInstancesByAttribute(data, attribute);
        Instances[] distrData = splitInstancesByAttribute(data, data.classAttribute());
        distribution = new double[distrData.length];
        for (int j = 0; j < distribution.length; j++) {
            distribution[j] = distrData[j].numInstances();
        }
        successors = new MyC45[attribute.numValues()];
        for (int j = 0; j < attribute.numValues(); j++) {
            successors[j] = new MyC45();
            successors[j].buildClassifier(splitData[j]);
        }
    }
    // TODO: prune
    //pruneTree(data);
}