Example usage for weka.core Instances enumerateInstances

Introduction

In this page you can find the example usage for weka.core Instances enumerateInstances.

Prototype

publicEnumeration<Instance> enumerateInstances()

Source Link

Document

Returns an enumeration of all instances in the dataset.

Usage

From source file:myid3andc45classifier.Model.MyC45.java

public double countError(Instances instances) {
    int ctrFalse = 0;
    int ctr = 0;/*from  ww w . j a  v a 2s .  c o  m*/
    Enumeration enumeration = instances.enumerateInstances();
    while (enumeration.hasMoreElements()) {
        Instance instance = (Instance) enumeration.nextElement();
        if (!checkInstance(instance)) {
            ctrFalse++;
        }
        ctr++;
    }
    return (double) ctrFalse / (double) (ctr);
}

From source file:myid3andc45classifier.Model.MyID3.java

@Override
public void buildClassifier(Instances data) throws Exception {
    if (!data.classAttribute().isNominal()) {
        throw new Exception("MyID3: nominal class, please.");
    }/*from   w  w  w .  java  2 s.c  om*/
    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        Attribute attr = (Attribute) enumAtt.nextElement();
        if (!attr.isNominal()) {
            throw new Exception("MyID3: only nominal attributes, please.");
        }
        Enumeration enumInstance = data.enumerateInstances();
        while (enumInstance.hasMoreElements()) {
            if (((Instance) enumInstance.nextElement()).isMissing(attr)) {
                throw new Exception("MyID3: no missing values, please.");
            }
        }
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    makeMyID3Tree(data);
}

From source file:myid3andc45classifier.Model.MyID3.java

public void makeMyID3Tree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance yang dalam node ini
    if (data.numInstances() == 0) {
        attribute = null;//w w w  .  j a  v  a2s. com
        classValue = Instance.missingValue();
        return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }

    attribute = data.attribute(maxIndex(infoGains));

    // Make leaf if information gain is zero. 
    // Otherwise create successors.
    if (isDoubleEqual(infoGains[attribute.index()], 0)) {
        attribute = null;
        double[] numClasses = new double[data.numClasses()];

        Enumeration instEnum = data.enumerateInstances();
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            numClasses[(int) inst.classValue()]++;
        }

        label = maxIndex(numClasses);
        classAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitInstancesByAttribute(data, attribute);
        successors = new MyID3[attribute.numValues()];
        for (int j = 0; j < attribute.numValues(); j++) {
            successors[j] = new MyID3();
            successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance/*from w  w w  .  j a  va  2s. co  m*/
 */
public void buildTree(Instances data) throws Exception {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyJ48[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyJ48(this);
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }

}

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance//ww w . j  ava  2 s .  c  o m
 */
public void pruneTree2(Instances data) throws Exception {
    if (currentAttribute == null) {
        Attribute tempAttr = predecessor.currentAttribute;
        predecessor.currentAttribute = null;
        // Set the class value as the highest frequency of the class
        classDistribution = new double[data.numClasses()];
        Enumeration enumInstance = data.enumerateInstances();
        while (enumInstance.hasMoreElements()) {
            Instance temp = (Instance) enumInstance.nextElement();
            classDistribution[(int) temp.classValue()]++;
        }
        Utils.normalize(classDistribution);
        predecessor.classValue = Utils.maxIndex(classDistribution);
        predecessor.classAttribute = data.classAttribute();
        Weka weka = new Weka();
        weka.setTraining("weather.nominal.arff");
        String[] options_cl = { "" };
        weka.setClassifier("myJ48.MyJ48", options_cl);

        weka.runCV(true);
        double currentAccuracy = weka.getM_Evaluation().correct();
        double maxFalseAccuracy = initAccuracy * 0.9;

        if (maxFalseAccuracy > currentAccuracy) {
            predecessor.currentAttribute = tempAttr;
            visited = true;
        } else {
            visited = false;
        }
    } else if (visited) {
    } else {
        for (int j = 0; j < currentAttribute.numValues(); j++) {
            if (nodes[j] == null) {
                //System.out.println("null nodes");
            } else {
                //System.out.println("ga null");
            }
            nodes[j].pruneTree(data);
        }
    }
}

From source file:myJ48.MyJ48.java

public MyJ48 pruneTree(Instances data) throws Exception {
    if (currentAttribute == null) {
        return this;
    } else {//  w  ww .ja  v  a2  s  .com

    }
    if (currentAttribute != null) {
        for (int i = 0; i < currentAttribute.numValues(); i++) {
            boolean succLeaf = true;
            if (nodes[i].currentAttribute != null) {
                for (int j = 0; j < nodes[i].currentAttribute.numValues(); j++) {
                    succLeaf = (succLeaf && (nodes[i].nodes[j].currentAttribute == null));
                }
                if (succLeaf) {
                    Attribute tempAttr = nodes[i].currentAttribute;
                    nodes[i].currentAttribute = null;
                    // Set the class value as the highest frequency of the class
                    classDistribution = new double[data.numClasses()];
                    Enumeration enumInstance = data.enumerateInstances();
                    while (enumInstance.hasMoreElements()) {
                        Instance temp = (Instance) enumInstance.nextElement();
                        classDistribution[(int) temp.classValue()]++;
                    }
                    Utils.normalize(classDistribution);
                    nodes[i].classValue = Utils.maxIndex(classDistribution);
                    nodes[i].classAttribute = data.classAttribute();
                    /*Weka weka = new Weka();
                    weka.setTraining("weather.nominal.arff");
                    String[] options_cl = {""};
                    weka.setClassifier("myJ48.MyJ48", options_cl);
                            
                    weka.runCV(true);
                    double currentAccuracy = weka.getM_Evaluation().correct();*/
                    Random rand = new Random();
                    double currentAccuracy = rand.nextDouble();
                    System.out.println("acc kepake : " + currentAccuracy);

                    double maxFalseAccuracy = 0.7; // coba coba

                    if (maxFalseAccuracy > currentAccuracy) {
                        nodes[i].currentAttribute = tempAttr;
                        //visited = true;
                    } else {
                        //visited = false;
                    }
                }
            } else {
                nodes[i] = nodes[i].pruneTree(data);
            }
        }
    }
    return this;
}

From source file:NaiveBayes.NaiveBayes13514004.java

@Override
public void buildClassifier(Instances i) {
    //Algoritma//from  w  w w  .java 2  s. co m
    origin = new Instances(i);
    //Menghitung jumlah attribute dan kelas
    numAtt = i.numAttributes() - 1;
    numClass = i.numClasses();

    //Inisialisasi matrix 3 dimensi
    data = new int[numAtt][numClass][0];
    prob = new double[numAtt][numClass][0];
    kelasdata = new int[numClass];
    kelasprob = new double[numClass];

    Enumeration<Instance> enu1 = i.enumerateInstances();
    while (enu1.hasMoreElements()) {
        Instance ins = enu1.nextElement();
        Enumeration<Attribute> enu_t = i.enumerateAttributes();
        int x = 0;
        while (enu_t.hasMoreElements()) {
            Attribute att = enu_t.nextElement();
            numDis = att.numValues();
            data[x][(int) ins.classValue()] = new int[numDis];
            prob[x][(int) ins.classValue()] = new double[numDis];
            x++;
        }
    }

    //Mengisi matriks Frekuensi
    Enumeration<Instance> enu2 = i.enumerateInstances();
    while (enu2.hasMoreElements()) {
        Instance ins = enu2.nextElement();
        Enumeration<Attribute> enu_t = i.enumerateAttributes();
        int x = 0;
        while (enu_t.hasMoreElements()) {
            Attribute att = enu_t.nextElement();
            data[x][(int) ins.classValue()][(int) ins.value(att)]++;
            x++;
        }
        kelasdata[(int) ins.classValue()]++;
    }

    //Menghitung probabilitas kelas
    double numInstances = (double) i.numInstances();
    for (int y = 0; y < numClass; y++) {
        kelasprob[y] = (double) kelasdata[y] / numInstances;
    }

    //Mengisi matriks probabilitas
    Enumeration<Instance> enu3 = i.enumerateInstances();
    while (enu3.hasMoreElements()) {
        Instance ins = enu3.nextElement();
        Enumeration<Attribute> enu_t = i.enumerateAttributes();
        int x = 0;
        while (enu_t.hasMoreElements()) {
            Attribute att = enu_t.nextElement();
            int sumDis = Utils.sum(data[x][(int) ins.classValue()]);
            numDis = att.numValues();
            for (int z = 0; z < numDis; z++) {
                int y = (int) ins.classValue();
                prob[x][y][z] = ((double) data[x][y][z] / (double) sumDis);
            }
            x++;
        }
    }

}

From source file:net.sf.bddbddb.FindBestDomainOrder.java

License:LGPL

void dumpClassifierInfo(String name, Classifier c, Instances data) {
    BufferedWriter w = null;//from ww  w .  j  a va  2 s .  c om
    try {
        w = new BufferedWriter(new FileWriter(name));
        w.write("Classifier \"name\":\n");
        w.write("Attributes: \n");
        for (Enumeration e = data.enumerateAttributes(); e.hasMoreElements();) {
            w.write(e.nextElement() + "\n");
        }
        w.write("\n");
        w.write("Based on data from " + data.numInstances() + " instances:\n");
        for (Enumeration e = data.enumerateInstances(); e.hasMoreElements();) {
            Instance i = (Instance) e.nextElement();

            if (i instanceof TrialInstance) {
                TrialInstance ti = (TrialInstance) i;
                InferenceRule ir = ti.ti.getCollection().getRule(solver);
                w.write("    " + ti.ti.getCollection().name + " " + ti.getOrder());
                if (!ti.getOrder().equals(ti.ti.order))
                    w.write(" (" + ti.ti.order + ")");
                if (ti.isMaxTime()) {
                    w.write(" MAX TIME\n");
                } else {
                    w.write(" " + format(ti.getCost()) + " (" + ti.ti.cost + " ms)\n");
                }
            } else {
                w.write("    " + i + "\n");
            }
        }
        w.write(c.toString());
        w.write("\n");
    } catch (IOException x) {
        solver.err.println("IO Exception occurred writing \"" + name + "\": " + x);
    } finally {
        if (w != null)
            try {
                w.close();
            } catch (IOException _) {
            }
    }
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Method for building an Id3 tree./*from  w ww .  java 2  s  . c o  m*/
 * 
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {
    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        double sum = 0;
        laplaceSmooth(m_Distribution, sum, data.numClasses());
        return;
    }
    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    boolean makeLeaf;
    makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0);
    Instances[] splitData = null;
    if (!makeLeaf) {
        splitData = splitData(data, m_Attribute);
        for (int i = 0; i < splitData.length; ++i) {
            if (splitData[i].numInstances() == data.numInstances()) {
                //System.out.println("When splitting on attrib
                // "+m_Attribute+", child "+i+" is same size as current,
                // making into leaf.");
                makeLeaf = true;
                break;
            }
        }
    }
    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (makeLeaf) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        double sum = 0;
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
            sum += inst.weight();
        }
        //laplace smooth the distribution instead
        laplaceSmooth(m_Distribution, sum, data.numClasses());
        //Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        m_Successors = new MyId3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyId3();
            m_Successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Computes the entropy of a dataset.//from  w  w w  .  jav a2  s .  c om
 * 
 * @param data
 *            the data for which entropy is to be computed
 * @return the entropy of the data's class distribution
 */
private double computeEntropy(Instances data, Attribute att) throws Exception {
    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    int numInstances = 0;
    while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        if (inst.isMissing(att))
            continue;
        classCounts[(int) inst.classValue()]++;
        ++numInstances;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
        if (classCounts[j] > 0) {
            entropy -= classCounts[j] * Utils.log2(classCounts[j]);
        }
    }
    entropy /= (double) numInstances;
    return entropy + Utils.log2(numInstances);
}