Example usage for weka.core Instances enumerateAttributes

List of usage examples for weka.core Instances enumerateAttributes

Introduction

In this page you can find the example usage for weka.core Instances enumerateAttributes.

Prototype

publicEnumeration<Attribute> enumerateAttributes() 

Source Link

Document

Returns an enumeration of all the attributes.

Usage

From source file:myJ48.MyJ48.java

/**
 * Construct the tree using the given instance
 * Find the highest attribute value which best at dividing the data
 * @param data Instance//from   w  ww  .j av a  2  s .  c o m
 */
public void buildTree(Instances data) throws Exception {
    if (data.numInstances() > 0) {
        // Lets find the highest Information Gain!
        // First compute each information gain attribute
        double IG[] = new double[data.numAttributes()];
        Enumeration enumAttribute = data.enumerateAttributes();
        while (enumAttribute.hasMoreElements()) {
            Attribute attribute = (Attribute) enumAttribute.nextElement();
            IG[attribute.index()] = informationGain(data, attribute);
            // System.out.println(attribute.toString() + ": " + IG[attribute.index()]);
        }
        // Assign it as the tree attribute!
        currentAttribute = data.attribute(maxIndex(IG));
        //System.out.println(Arrays.toString(IG) + IG[currentAttribute.index()]);

        // IG = 0 then current node = leaf!
        if (Utils.eq(IG[currentAttribute.index()], 0)) {
            // Set the class value as the highest frequency of the class
            currentAttribute = null;
            classDistribution = new double[data.numClasses()];
            Enumeration enumInstance = data.enumerateInstances();
            while (enumInstance.hasMoreElements()) {
                Instance temp = (Instance) enumInstance.nextElement();
                classDistribution[(int) temp.classValue()]++;
            }
            Utils.normalize(classDistribution);
            classValue = Utils.maxIndex(classDistribution);
            classAttribute = data.classAttribute();
        } else {
            // Create another node from the current tree
            Instances[] splitData = splitDataByAttribute(data, currentAttribute);
            nodes = new MyJ48[currentAttribute.numValues()];

            for (int i = 0; i < currentAttribute.numValues(); i++) {
                nodes[i] = new MyJ48(this);
                nodes[i].buildTree(splitData[i]);
            }
        }
    } else {
        classAttribute = null;
        classValue = Utils.missingValue();
        classDistribution = new double[data.numClasses()];
    }

}

From source file:NaiveBayes.NaiveBayes13514004.java

@Override
public void buildClassifier(Instances i) {
    //Algoritma//from   ww w  . j  a v  a 2 s  .c  o m
    origin = new Instances(i);
    //Menghitung jumlah attribute dan kelas
    numAtt = i.numAttributes() - 1;
    numClass = i.numClasses();

    //Inisialisasi matrix 3 dimensi
    data = new int[numAtt][numClass][0];
    prob = new double[numAtt][numClass][0];
    kelasdata = new int[numClass];
    kelasprob = new double[numClass];

    Enumeration<Instance> enu1 = i.enumerateInstances();
    while (enu1.hasMoreElements()) {
        Instance ins = enu1.nextElement();
        Enumeration<Attribute> enu_t = i.enumerateAttributes();
        int x = 0;
        while (enu_t.hasMoreElements()) {
            Attribute att = enu_t.nextElement();
            numDis = att.numValues();
            data[x][(int) ins.classValue()] = new int[numDis];
            prob[x][(int) ins.classValue()] = new double[numDis];
            x++;
        }
    }

    //Mengisi matriks Frekuensi
    Enumeration<Instance> enu2 = i.enumerateInstances();
    while (enu2.hasMoreElements()) {
        Instance ins = enu2.nextElement();
        Enumeration<Attribute> enu_t = i.enumerateAttributes();
        int x = 0;
        while (enu_t.hasMoreElements()) {
            Attribute att = enu_t.nextElement();
            data[x][(int) ins.classValue()][(int) ins.value(att)]++;
            x++;
        }
        kelasdata[(int) ins.classValue()]++;
    }

    //Menghitung probabilitas kelas
    double numInstances = (double) i.numInstances();
    for (int y = 0; y < numClass; y++) {
        kelasprob[y] = (double) kelasdata[y] / numInstances;
    }

    //Mengisi matriks probabilitas
    Enumeration<Instance> enu3 = i.enumerateInstances();
    while (enu3.hasMoreElements()) {
        Instance ins = enu3.nextElement();
        Enumeration<Attribute> enu_t = i.enumerateAttributes();
        int x = 0;
        while (enu_t.hasMoreElements()) {
            Attribute att = enu_t.nextElement();
            int sumDis = Utils.sum(data[x][(int) ins.classValue()]);
            numDis = att.numValues();
            for (int z = 0; z < numDis; z++) {
                int y = (int) ins.classValue();
                prob[x][y][z] = ((double) data[x][y][z] / (double) sumDis);
            }
            x++;
        }
    }

}

From source file:net.sf.bddbddb.FindBestDomainOrder.java

License:LGPL

void dumpClassifierInfo(String name, Classifier c, Instances data) {
    BufferedWriter w = null;//from   ww w .  j  a  v  a  2 s  .c  o  m
    try {
        w = new BufferedWriter(new FileWriter(name));
        w.write("Classifier \"name\":\n");
        w.write("Attributes: \n");
        for (Enumeration e = data.enumerateAttributes(); e.hasMoreElements();) {
            w.write(e.nextElement() + "\n");
        }
        w.write("\n");
        w.write("Based on data from " + data.numInstances() + " instances:\n");
        for (Enumeration e = data.enumerateInstances(); e.hasMoreElements();) {
            Instance i = (Instance) e.nextElement();

            if (i instanceof TrialInstance) {
                TrialInstance ti = (TrialInstance) i;
                InferenceRule ir = ti.ti.getCollection().getRule(solver);
                w.write("    " + ti.ti.getCollection().name + " " + ti.getOrder());
                if (!ti.getOrder().equals(ti.ti.order))
                    w.write(" (" + ti.ti.order + ")");
                if (ti.isMaxTime()) {
                    w.write(" MAX TIME\n");
                } else {
                    w.write(" " + format(ti.getCost()) + " (" + ti.ti.cost + " ms)\n");
                }
            } else {
                w.write("    " + i + "\n");
            }
        }
        w.write(c.toString());
        w.write("\n");
    } catch (IOException x) {
        solver.err.println("IO Exception occurred writing \"" + name + "\": " + x);
    } finally {
        if (w != null)
            try {
                w.close();
            } catch (IOException _) {
            }
    }
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Builds Id3 decision tree classifier./* w  w w .j  a v  a  2s. co m*/
 * 
 * @param data
 *            the training data
 * @exception Exception
 *                if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception {
    if (!data.classAttribute().isNominal()) {
        throw new UnsupportedClassTypeException("Id3: nominal class, please.");
    }
    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        if (!((Attribute) enumAtt.nextElement()).isNominal()) {
            throw new UnsupportedAttributeTypeException("Id3: only nominal " + "attributes, please.");
        }
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    makeTree(data);
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Method for building an Id3 tree./*from   w  ww.  j a v a  2s  .c  o  m*/
 * 
 * @param data
 *            the training data
 * @exception Exception
 *                if decision tree can't be built successfully
 */
private void makeTree(Instances data) throws Exception {
    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
        m_Attribute = null;
        m_ClassValue = Instance.missingValue();
        m_Distribution = new double[data.numClasses()];
        double sum = 0;
        laplaceSmooth(m_Distribution, sum, data.numClasses());
        return;
    }
    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
        Attribute att = (Attribute) attEnum.nextElement();
        infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    boolean makeLeaf;
    makeLeaf = Utils.eq(infoGains[m_Attribute.index()], 0);
    Instances[] splitData = null;
    if (!makeLeaf) {
        splitData = splitData(data, m_Attribute);
        for (int i = 0; i < splitData.length; ++i) {
            if (splitData[i].numInstances() == data.numInstances()) {
                //System.out.println("When splitting on attrib
                // "+m_Attribute+", child "+i+" is same size as current,
                // making into leaf.");
                makeLeaf = true;
                break;
            }
        }
    }
    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (makeLeaf) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        Enumeration instEnum = data.enumerateInstances();
        double sum = 0;
        while (instEnum.hasMoreElements()) {
            Instance inst = (Instance) instEnum.nextElement();
            m_Distribution[(int) inst.classValue()]++;
            sum += inst.weight();
        }
        //laplace smooth the distribution instead
        laplaceSmooth(m_Distribution, sum, data.numClasses());
        //Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        m_Successors = new MyId3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyId3();
            m_Successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:newdtl.NewID3.java

/**
 * Creates an Id3 tree./*from  ww w.  j a  va  2s .  co  m*/
 *
 * @param data the training data
 * @exception Exception if tree failed to build
 */
private void makeTree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance dalam node ini
    if (data.numInstances() == 0) {
        splitAttribute = null;
        label = DOUBLE_MISSING_VALUE;
        classDistributions = new double[data.numClasses()]; //???
    } else {
        // Mencari IG maksimum
        double[] infoGains = new double[data.numAttributes()];

        Enumeration attEnum = data.enumerateAttributes();
        while (attEnum.hasMoreElements()) {
            Attribute att = (Attribute) attEnum.nextElement();
            infoGains[att.index()] = computeInfoGain(data, att);
        }

        // cek max IG
        int maxIG = maxIndex(infoGains);
        if (maxIG != -1) {
            splitAttribute = data.attribute(maxIndex(infoGains));
        } else {
            Exception exception = new Exception("array null");
            throw exception;
        }

        // Membuat daun jika IG-nya 0
        if (Double.compare(infoGains[splitAttribute.index()], 0) == 0) {
            splitAttribute = null;

            classDistributions = new double[data.numClasses()];
            for (int i = 0; i < data.numInstances(); i++) {
                Instance inst = (Instance) data.instance(i);
                classDistributions[(int) inst.classValue()]++;
            }

            normalizeClassDistribution();
            label = maxIndex(classDistributions);
            classAttribute = data.classAttribute();
        } else {
            // Membuat tree baru di bawah node ini
            Instances[] splitData = splitData(data, splitAttribute);
            children = new NewID3[splitAttribute.numValues()];
            for (int j = 0; j < splitAttribute.numValues(); j++) {
                children[j] = new NewID3();
                children[j].makeTree(splitData[j]);
            }
        }
    }
}

From source file:newdtl.NewJ48.java

/**
 * Creates a J48 tree./*from   www  .ja v a  2s  .com*/
 *
 * @param data the training data
 * @exception Exception if tree failed to build
 */
private void makeTree(Instances data) throws Exception {

    // Mengecek apakah tidak terdapat instance dalam node ini
    if (data.numInstances() == 0) {
        splitAttribute = null;
        label = DOUBLE_MISSING_VALUE;
        classDistributions = new double[data.numClasses()];
        isLeaf = true;
    } else {
        // Mencari Gain Ratio maksimum
        double[] gainRatios = new double[data.numAttributes()];
        double[] thresholds = new double[data.numAttributes()];

        Enumeration attEnum = data.enumerateAttributes();
        while (attEnum.hasMoreElements()) {
            Attribute att = (Attribute) attEnum.nextElement();
            double[] result = computeGainRatio(data, att);
            gainRatios[att.index()] = result[0];
            thresholds[att.index()] = result[1];
        }

        splitAttribute = data.attribute(maxIndex(gainRatios));

        if (splitAttribute.isNumeric()) {
            splitThreshold = thresholds[maxIndex(gainRatios)];
        } else {
            splitThreshold = Double.NaN;
        }

        classDistributions = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            Instance inst = (Instance) data.instance(i);
            classDistributions[(int) inst.classValue()]++;
        }

        // Membuat daun jika Gain Ratio-nya 0
        if (Double.compare(gainRatios[splitAttribute.index()], 0) == 0) {
            splitAttribute = null;

            label = maxIndex(classDistributions);
            classAttribute = data.classAttribute();
            isLeaf = true;
        } else {
            // Mengecek jika ada missing value
            if (isMissing(data, splitAttribute)) {
                // cari modus
                int index = modusIndex(data, splitAttribute);

                // ubah data yang punya missing value
                Enumeration dataEnum = data.enumerateInstances();
                while (dataEnum.hasMoreElements()) {
                    Instance inst = (Instance) dataEnum.nextElement();
                    if (inst.isMissing(splitAttribute)) {
                        inst.setValue(splitAttribute, splitAttribute.value(index));
                    }
                }
            }

            // Membuat tree baru di bawah node ini
            Instances[] splitData;
            if (splitAttribute.isNumeric()) {
                splitData = splitData(data, splitAttribute, splitThreshold);
                children = new NewJ48[2];
                for (int j = 0; j < 2; j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            } else {
                splitData = splitData(data, splitAttribute);
                children = new NewJ48[splitAttribute.numValues()];
                for (int j = 0; j < splitAttribute.numValues(); j++) {
                    children[j] = new NewJ48();
                    children[j].makeTree(splitData[j]);
                }
            }
            isLeaf = false;
        }
    }
}

From source file:org.barcelonamedia.uima.CAS2WekaInstance.java

License:Open Source License

private static DenseInstance toWekaInternalInstance(List<AttributeValue> attributeValues,
        Instances wekaInstances) throws CASException {
    double[] zeroValues = new double[wekaInstances.numAttributes()];
    Arrays.fill(zeroValues, 0.0d);
    DenseInstance wekaInstance = new DenseInstance(1.0d, zeroValues);
    wekaInstance.setDataset(wekaInstances);

    Iterator<AttributeValue> attributeValuesIterator = attributeValues.iterator();

    while (attributeValuesIterator.hasNext()) {
        String value = null;//from  w w  w .  j  a  v  a  2s  .  co  m
        String attributeName = null;

        AttributeValue attributeValue = attributeValuesIterator.next();
        attributeName = attributeValue.getAttributeName();
        Attribute attribute = wekaInstances.attribute(attributeName);
        if (attribute == null)
            continue;

        if (attributeValue instanceof NumericAttributeValue) {
            value = ((NumericAttributeValue) attributeValue).getValue();
            wekaInstance.setValue(attribute, Double.parseDouble(value));
        } else if (attributeValue instanceof DateAttributeValue) {
            //this isn't actually very smart.... I need to understand this better
            //any volunteers for the four lines of code I need here?
            value = ((DateAttributeValue) attributeValue).getValue();
            wekaInstance.setValue(attribute, value);
        } else if (attributeValue instanceof NominalAttributeValue) {
            value = ((NominalAttributeValue) attributeValue).getValue();
            int valueIndex = attribute.indexOfValue(value);
            wekaInstance.setValue(attribute, (double) valueIndex);
        } else if (attributeValue instanceof StringAttributeValue) {
            value = ((StringAttributeValue) attributeValue).getValue();
            wekaInstance.setValue(attribute, value);
        }
    }

    Enumeration attributes = wekaInstances.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        Attribute attribute = (Attribute) attributes.nextElement();
        if (attribute.isNumeric() && wekaInstance.isMissing(attribute)) {
            wekaInstance.setValue(attribute, 0);
        }
    }

    return wekaInstance;
}

From source file:org.dynamicfactory.property.InstancesFactory.java

License:Open Source License

@Override
public String exportToString(Instances type, Graph g) {
    StringBuffer ret = new StringBuffer();
    ret.append(type.relationName());// w w  w.ja  v  a  2 s  .c o  m
    Enumeration attributes = type.enumerateAttributes();
    while (attributes.hasMoreElements()) {
        ret.append(",");
        outputAttribute(ret, (Attribute) attributes.nextElement());
    }
    return ret.toString();
}

From source file:org.opentox.www.rest.resources.AlgorithmResource.java

License:Open Source License

@SuppressWarnings({ "unchecked" })
private Representation filterData(final Representation entity, final Variant variant) {
    InputProcessor p1 = new InputProcessor();
    DatasetBuilder p2 = new DatasetBuilder();
    InstancesProcessor p3 = new InstancesProcessor();
    AttributeCleanup p4 = new AttributeCleanup(ATTRIBUTE_TYPE.string);
    Pipeline pipe = new Pipeline(p1, p2, p3, p4);

    YaqpForm form = new YaqpForm(entity);
    URI uri;/*from w  w w  . jav a  2 s.  c o m*/
    try {
        uri = new URI(form.getFirstValue(ConstantParameters.dataset_uri));
    } catch (URISyntaxException ex) {
        toggleBadRequest();
        return sendMessage(
                "Inacceptable URI (" + form.getFirstValue(ConstantParameters.dataset_uri) + ")" + NEWLINE);
    }
    Instances filteredData = null;
    try {
        filteredData = (Instances) pipe.process(uri);
    } catch (YaqpException ex) {
        toggleBadRequest();
        return sendMessage(ex.toString());
    }

    Enumeration attributes = filteredData.enumerateAttributes();
    String list = "";
    Attribute att;
    while (attributes.hasMoreElements()) {
        att = (Attribute) attributes.nextElement();
        list += "feature_uris[]=" + att.name();
        if (attributes.hasMoreElements())
            list += "&";
    }
    return new StringRepresentation(uri + "?" + list, MediaType.TEXT_URI_LIST);
}