Example usage for weka.core Instances classAttribute

Introduction

In this page you can find the example usage for weka.core Instances classAttribute.

Prototype


publicAttribute classAttribute()

Source Link

Document

Returns the class attribute.

Usage

From source file:myid3andc45classifier.Model.MyID3.java

@Override
public void buildClassifier(Instances data) throws Exception {
    if (!data.classAttribute().isNominal()) {
        throw new Exception("MyID3: nominal class, please.");
    }/*  w  w  w.ja  v a 2 s  .  c  om*/
    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        Attribute attr = (Attribute) enumAtt.nextElement();
        if (!attr.isNominal()) {
            throw new Exception("MyID3: only nominal attributes, please.");
        }
        Enumeration enumInstance = data.enumerateInstances();
        while (enumInstance.hasMoreElements()) {
            if (((Instance) enumInstance.nextElement()).isMissing(attr)) {
                throw new Exception("MyID3: no missing values, please.");
            }
        }
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    makeMyID3Tree(data);
}

From source file:myid3andc45classifier.Model.MyID3.java

public void makeMyID3Tree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance yang dalam node ini
    if (data.numInstances() == 0) {
        attribute = null;/*  www.jav a  2  s  . com*/
        classValue = Instance.missingValue();
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }

    attribute = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.
    if (isDoubleEqual(infoGains[attribute.index()], 0)) {
        attribute = null;
        double[] numClasses = new double[data.numClasses()];

        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            numClasses[(int) inst.classValue()]++;
        }

        label = maxIndex(numClasses);
        classAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitInstancesByAttribute(data, attribute);
        successors = new MyID3[attribute.numValues()];
        for (int j = 0; j < attribute.numValues(); j++) {
            successors[j] = new MyID3();
            successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance/*from ww w .  j a  va2 s  .  c om*/
 */
public void buildTree(Instances data) throws Exception {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyJ48[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyJ48(this);
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }

}

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance/*from  w  ww  . j  a v a2 s. c o m*/
 */
public void pruneTree2(Instances data) throws Exception {
    if (currentAttribute == null) {
        Attribute tempAttr = predecessor.currentAttribute;
        predecessor.currentAttribute = null;
        // Set the class value as the highest frequency of the class
        classDistribution = new double[data.numClasses()];
        Enumeration enumInstance = data.enumerateInstances();
        while (enumInstance.hasMoreElements()) {
            Instance temp = (Instance) enumInstance.nextElement();
            classDistribution[(int) temp.classValue()]++;
        }
        Utils.normalize(classDistribution);
        predecessor.classValue = Utils.maxIndex(classDistribution);
        predecessor.classAttribute = data.classAttribute();
        Weka weka = new Weka();
        weka.setTraining("weather.nominal.arff");
        String[] options_cl = { "" };
        weka.setClassifier("myJ48.MyJ48", options_cl);

        weka.runCV(true);
        double currentAccuracy = weka.getM_Evaluation().correct();
        double maxFalseAccuracy = initAccuracy * 0.9;

        if (maxFalseAccuracy > currentAccuracy) {
            predecessor.currentAttribute = tempAttr;
            visited = true;
        } else {
            visited = false;
        }
    } else if (visited) {
    } else {
        for (int j = 0; j < currentAttribute.numValues(); j++) {
            if (nodes[j] == null) {
                //System.out.println("null nodes");
            } else {
                //System.out.println("ga null");
            }
            nodes[j].pruneTree(data);
        }
    }
}

From source file:myJ48.MyJ48.java

public MyJ48 pruneTree(Instances data) throws Exception {
    if (currentAttribute == null) {
        return this;
    } else {/*from  ww w.  ja  v  a 2 s .  c o  m*/

    }
    if (currentAttribute != null) {
        for (int i = 0; i < currentAttribute.numValues(); i++) {
            boolean succLeaf = true;
            if (nodes[i].currentAttribute != null) {
                for (int j = 0; j < nodes[i].currentAttribute.numValues(); j++) {
                    succLeaf = (succLeaf && (nodes[i].nodes[j].currentAttribute == null));
                }
                if (succLeaf) {
                    Attribute tempAttr = nodes[i].currentAttribute;
                    nodes[i].currentAttribute = null;
                    // Set the class value as the highest frequency of the class
                    classDistribution = new double[data.numClasses()];
                    Enumeration enumInstance = data.enumerateInstances();
                    while (enumInstance.hasMoreElements()) {
                        Instance temp = (Instance) enumInstance.nextElement();
                        classDistribution[(int) temp.classValue()]++;
                    }
                    Utils.normalize(classDistribution);
                    nodes[i].classValue = Utils.maxIndex(classDistribution);
                    nodes[i].classAttribute = data.classAttribute();
                    /*Weka weka = new Weka();
                    weka.setTraining("weather.nominal.arff");
                    String[] options_cl = {""};
                    weka.setClassifier("myJ48.MyJ48", options_cl);
                            
                    weka.runCV(true);
                    double currentAccuracy = weka.getM_Evaluation().correct();*/
                    Random rand = new Random();
                    double currentAccuracy = rand.nextDouble();
                    System.out.println("acc kepake : " + currentAccuracy);

                    double maxFalseAccuracy = 0.7; // coba coba

                    if (maxFalseAccuracy > currentAccuracy) {
                        nodes[i].currentAttribute = tempAttr;
                        //visited = true;
                    } else {
                        //visited = false;
                    }
                }
            } else {
                nodes[i] = nodes[i].pruneTree(data);
            }
        }
    }
    return this;
}

From source file:naive_bayes.Naive_bayes.java

@Override
public void buildClassifier(Instances newData) throws Exception {

    int countAttr = newData.numAttributes();
    int distinctClassValue = newData.attribute(classidx).numValues();

    /* Inisialisasi Model */
    M = new ArrayList[countAttr][distinctClassValue];
    for (int i = 0; i < countAttr; i++) {
        for (int j = 0; j < distinctClassValue; j++) {
            M[i][j] = new ArrayList<ListElement>();
        }/*from  w  w  w .j a  va2  s  .com*/
    }
    boolean add;
    ListElement le = new ListElement();
    Attribute ab;
    for (int i = 0; i < countAttr; i++) {
        if (i != classidx) {
            for (int j = 0; j < distinctClassValue; j++) {

                for (int k = 0; k < newData.attribute(i).numValues(); k++) {
                    ab = newData.attribute(i);
                    String c = ab.value((int) newData.instance(149).value(i));
                    add = M[i][j].add(new ListElement());
                }
            }
        }
    }

    /* Membuat array yang menghitung banyak nilai pada masing-masing kelas */
    Attribute a;
    String c;
    arrayOfClass = new ListElement[newData.numClasses()];

    for (int idx = 0; idx < newData.numClasses(); idx++) {
        arrayOfClass[idx] = new ListElement();
        a = newData.classAttribute();
        c = a.value(idx);
        arrayOfClass[idx].setDisAttrName(c);
    }
    for (int i = 0; i < newData.numInstances(); i++) {
        double z = newData.instance(i).classValue();
        int zz = (int) z;
        arrayOfClass[zz].setCount(arrayOfClass[zz].getCount() + 1);
    }

    //Masukan frekuensi masing-masing atribut
    for (int i = 0; i < newData.numInstances(); i++) {
        for (int j = 0; j < newData.numAttributes(); j++) {
            if (j != classidx) { //bukan atribut kelas
                a = newData.attribute(classidx);
                c = a.value((int) newData.instance(i).value(classidx));

                //Mengambil indeks kelas
                double z = newData.instance(i).classValue();
                int zz = (int) z;
                le.setDisAttrName(c);

                //Mengambil indeks valueDistinct
                double x = newData.instance(i).value(j);
                int xx = (int) x;

                //Menambahkan frekuensi kemunculan nilai per kelas per atribut
                le.setCount(M[j][zz].get(xx).getCount() + 1);
                M[j][zz].set(xx,
                        new ListElement(M[j][zz].get(xx).getDisAttrName(), M[j][zz].get(xx).getCount() + 1));
            }
        }
    }

    /* Menghitung probabilitas masing-masing nilai distinct atribut per kelas */
    for (int j = 0; j < newData.numAttributes(); j++) {
        if (j != classidx) {
            for (int zz = 0; zz < newData.numClasses(); zz++) {
                for (int xx = 0; xx < newData.attribute(j).numValues(); xx++) {
                    M[j][zz].set(xx, new ListElement(M[j][zz].get(xx).getDisAttrName(),
                            M[j][zz].get(xx).getCount() / arrayOfClass[zz].getCount()));

                }
            }
        }
    }
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Builds Id3 decision tree classifier./*  www  .ja  v  a 2s .c  o  m*/
 * 
 * @param data
 *            the training data
 * @exception Exception
 *                if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception {
    if (!data.classAttribute().isNominal()) {
        throw new UnsupportedClassTypeException("Id3: nominal class, please.");
    }
    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        if (!((Attribute) enumAtt.nextElement()).isNominal()) {
            throw new UnsupportedAttributeTypeException("Id3: only nominal " + "attributes, please.");
        }
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    makeTree(data);
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Method for building an Id3 tree.//from   www .j av  a  2 s .  co m
 * 
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {
    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        double sum = 0;
        laplaceSmooth(m_Distribution, sum, data.numClasses());
        return;
    }
    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    boolean makeLeaf;
    makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0);
    Instances[] splitData = null;
    if (!makeLeaf) {
        splitData = splitData(data, m_Attribute);
        for (int i = 0; i < splitData.length; ++i) {
            if (splitData[i].numInstances() == data.numInstances()) {
                //System.out.println("When splitting on attrib
                // "+m_Attribute+", child "+i+" is same size as current,
                // making into leaf.");
                makeLeaf = true;
                break;
            }
        }
    }
    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (makeLeaf) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        double sum = 0;
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
            sum += inst.weight();
        }
        //laplace smooth the distribution instead
        laplaceSmooth(m_Distribution, sum, data.numClasses());
        //Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        m_Successors = new MyId3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyId3();
            m_Successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:net.sf.bddbddb.order.WekaInterface.java

License:LGPL

public static double cvError(int numFolds, Instances data0, String cClassName) {
    if (data0.numInstances() < numFolds)
        return Double.NaN; //more folds than elements
    if (numFolds == 0)
        return Double.NaN; // no folds
    if (data0.numInstances() == 0)
        return 0; //no instances

    Instances data = new Instances(data0);
    //data.randomize(new Random(System.currentTimeMillis()));
    data.stratify(numFolds);//  ww  w .  j  a  va 2 s.  co m
    Assert._assert(data.classAttribute() != null);
    double[] estimates = new double[numFolds];
    for (int i = 0; i < numFolds; ++i) {
        Instances trainData = data.trainCV(numFolds, i);
        Assert._assert(trainData.classAttribute() != null);
        Assert._assert(trainData.numInstances() != 0, "Cannot train classifier on 0 instances.");

        Instances testData = data.testCV(numFolds, i);
        Assert._assert(testData.classAttribute() != null);
        Assert._assert(testData.numInstances() != 0, "Cannot test classifier on 0 instances.");

        int temp = FindBestDomainOrder.TRACE;
        FindBestDomainOrder.TRACE = 0;
        Classifier classifier = buildClassifier(cClassName, trainData);
        FindBestDomainOrder.TRACE = temp;
        int count = testData.numInstances();
        double loss = 0;
        double sum = 0;
        for (Enumeration e = testData.enumerateInstances(); e.hasMoreElements();) {
            Instance instance = (Instance) e.nextElement();
            Assert._assert(instance != null);
            Assert._assert(instance.classAttribute() != null
                    && instance.classAttribute() == trainData.classAttribute());
            try {
                double testClass = classifier.classifyInstance(instance);
                double weight = instance.weight();
                if (testClass != instance.classValue())
                    loss += weight;
                sum += weight;
            } catch (Exception ex) {
                FindBestDomainOrder.out.println("Exception while classifying: " + instance + "\n" + ex);
            }
        }
        estimates[i] = 1 - loss / sum;
    }
    double average = 0;
    for (int i = 0; i < numFolds; ++i)
        average += estimates[i];

    return average / numFolds;
}

From source file:newclassifier.NewClassifier.java

public void classify(String path) throws Exception {
    // load unlabeled data and set class attribute
    Instances unlabeled = DataSource.read(path);
    unlabeled.setClassIndex(unlabeled.numAttributes() - 1);

    // copy//from  w  ww. ja v  a2  s  . com
    Instances labeled = new Instances(unlabeled);

    // label instances
    for (int i = 0; i < unlabeled.numInstances(); i++) {
        double clsLabel = cls.classifyInstance(unlabeled.instance(i));
        labeled.instance(i).setClassValue(clsLabel);
    }

    // save labeled data
    DataSink.write("labeled.arff", labeled);

    // output prediction
    System.out.println("# - actual - predicted - distribution");
    for (int i = 0; i < labeled.numInstances(); i++) {
        double pred = cls.classifyInstance(labeled.instance(i));
        double[] dist = cls.distributionForInstance(labeled.instance(i));
        System.out.print((i + 1) + " - ");
        System.out.print(labeled.instance(i).toString(labeled.classIndex()) + " - ");
        System.out.print(labeled.classAttribute().value((int) pred) + " - ");
        System.out.println(Utils.arrayToString(dist));
    }
}