Example usage for weka.core Instances attribute

List of usage examples for weka.core Instances attribute

Introduction

In this page you can find the example usage for weka.core Instances attribute.

Prototype

publicAttribute attribute(String name) 

Source Link

Document

Returns an attribute given its name.

Usage

From source file:LVCoref.WekaWrapper.java

License:Open Source License

public static void main1(String[] args) throws Exception {
    FastVector atts;//from   www.  jav  a2  s .  com
    FastVector attsRel;
    FastVector attVals;
    FastVector attValsRel;
    Instances data;
    Instances dataRel;
    double[] vals;
    double[] valsRel;
    int i;

    // 1. set up attributes
    atts = new FastVector();
    // - numeric
    atts.addElement(new Attribute("att1"));
    // - nominal
    attVals = new FastVector();
    for (i = 0; i < 5; i++)
        attVals.addElement("val" + (i + 1));
    atts.addElement(new Attribute("att2", attVals));
    // - string
    atts.addElement(new Attribute("att3", (FastVector) null));
    // - date
    atts.addElement(new Attribute("att4", "yyyy-MM-dd"));
    // - relational
    attsRel = new FastVector();
    // -- numeric
    attsRel.addElement(new Attribute("att5.1"));
    // -- nominal
    attValsRel = new FastVector();
    for (i = 0; i < 5; i++)
        attValsRel.addElement("val5." + (i + 1));
    attsRel.addElement(new Attribute("att5.2", attValsRel));
    dataRel = new Instances("att5", attsRel, 0);
    atts.addElement(new Attribute("att5", dataRel, 0));

    // 2. create Instances object
    data = new Instances("MyRelation", atts, 0);

    // 3. fill with data
    // first instance
    vals = new double[data.numAttributes()];
    // - numeric
    vals[0] = Math.PI;
    // - nominal
    vals[1] = attVals.indexOf("val3");
    // - string
    vals[2] = data.attribute(2).addStringValue("This is a string!");
    // - date
    vals[3] = data.attribute(3).parseDate("2001-11-09");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 1;
    valsRel[1] = attValsRel.indexOf("val5.3");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.PI + 2;
    valsRel[1] = attValsRel.indexOf("val5.2");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // second instance
    vals = new double[data.numAttributes()]; // important: needs NEW array!
    // - numeric
    vals[0] = Math.E;
    // - nominal
    vals[1] = attVals.indexOf("val1");
    // - string
    vals[2] = data.attribute(2).addStringValue("And another one!");
    // - date
    vals[3] = data.attribute(3).parseDate("2000-12-01");
    // - relational
    dataRel = new Instances(data.attribute(4).relation(), 0);
    // -- first instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 1;
    valsRel[1] = attValsRel.indexOf("val5.4");
    dataRel.add(new Instance(1.0, valsRel));
    // -- second instance
    valsRel = new double[2];
    valsRel[0] = Math.E + 2;
    valsRel[1] = attValsRel.indexOf("val5.1");
    dataRel.add(new Instance(1.0, valsRel));
    vals[4] = data.attribute(4).addRelation(dataRel);
    // add
    data.add(new Instance(1.0, vals));

    // 4. output data
    System.out.println(data);
}

From source file:machinelearningproject.RFTree.java

@Override
public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();
    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    //choose random fraction
    int numAttr = instances.numAttributes();
    int k = (int) round(sqrt(numAttr));
    ArrayList<Integer> randomIdx = randomFraction(numAttr);

    for (int idx = 0; idx < k; idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }//from   ww  w.  j  av  a  2 s.  c  o  m
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningproject.Tree.java

public Tree buildTree(Instances instances) throws Exception {
    Tree tree = new Tree();
    ArrayList<String> availableAttributes = new ArrayList();

    int largestInfoGainAttrIdx = -1;
    double largestInfoGainAttrValue = 0.0;

    for (int idx = 0; idx < instances.numAttributes(); idx++) {
        if (idx != instances.classIndex()) {
            availableAttributes.add(instances.attribute(idx).name());
        }/*from  w w  w . j  a  va2s  .  com*/
    }

    if (instances.numInstances() == 0) {
        return null;
    } else if (calculateClassEntropy(instances) == 0.0) {
        // all examples have the sama classification
        tree.attributeName = instances.get(0).stringValue(instances.classIndex());
    } else if (availableAttributes.isEmpty()) {
        // mode classification
        tree.attributeName = getModeClass(instances, instances.classIndex());
    } else {
        for (int idx = 0; idx < instances.numAttributes(); idx++) {
            if (idx != instances.classIndex()) {
                double attrInfoGain = calculateInformationGain(instances, idx, instances.classIndex());
                if (largestInfoGainAttrValue < attrInfoGain) {
                    largestInfoGainAttrIdx = idx;
                    largestInfoGainAttrValue = attrInfoGain;
                }
            }
        }

        if (largestInfoGainAttrIdx != -1) {
            tree.attributeName = instances.attribute(largestInfoGainAttrIdx).name();
            ArrayList<String> attrValues = new ArrayList();
            for (int i = 0; i < instances.numInstances(); i++) {
                Instance instance = instances.get(i);
                String attrValue = instance.stringValue(largestInfoGainAttrIdx);
                if (attrValues.isEmpty() || !attrValues.contains(attrValue)) {
                    attrValues.add(attrValue);
                }
            }

            for (String attrValue : attrValues) {
                Node node = new Node(attrValue);
                Instances copyInstances = new Instances(instances);
                copyInstances.setClassIndex(instances.classIndex());
                int i = 0;
                while (i < copyInstances.numInstances()) {
                    Instance instance = copyInstances.get(i);
                    // reducing examples
                    if (!instance.stringValue(largestInfoGainAttrIdx).equals(attrValue)) {
                        copyInstances.delete(i);
                        i--;
                    }
                    i++;
                }
                copyInstances.deleteAttributeAt(largestInfoGainAttrIdx);
                node.subTree = buildTree(copyInstances);
                tree.nodes.add(node);
            }
        }
    }

    return tree;
}

From source file:machinelearningq2.BasicNaiveBayesV1.java

/**
 *
 * Performs lapalce correction to ensure there are no zero values in the
 * data Creating a DataFound object ensures the count starts from 1
 *
 * @param instnc//  w  w w.  j  a v  a 2s. co  m
 * @return
 * @throws Exception
 */
public void laplaceCorrection(Instances inst) throws ParseException {
    inst.setClassIndex(inst.numAttributes() - 1);
    for (int c = 0; c < inst.numClasses(); c++) {
        for (int j = 0; j < inst.numAttributes() - 1; j++) {
            for (int i = 0; i < inst.numDistinctValues(j); i++) {
                String attributeValue = inst.attribute(j).value(i);
                NumberFormat nf = NumberFormat.getInstance();
                double atval = nf.parse(attributeValue).doubleValue();
                DataFound d = new DataFound(atval, c, i);
                data.add(d);
            }
        }
    }
}

From source file:machine_learing_clasifier.MyC45.java

@Override
public void buildClassifier(Instances i) throws Exception {
    if (!i.classAttribute().isNominal()) {
        throw new Exception("Class not nominal");
    }/*from w  ww. j  a v a2  s  . com*/

    //penanganan missing value
    for (int j = 0; j < i.numAttributes(); j++) {
        Attribute attr = i.attribute(j);
        for (int k = 0; k < i.numInstances(); k++) {
            Instance inst = i.instance(k);
            if (inst.isMissing(attr)) {
                inst.setValue(attr, fillMissingValue(i, attr));
                //bisa dituning lagi performancenya
            }
        }
    }

    i = new Instances(i);
    i.deleteWithMissingClass();
    makeTree(i);
}

From source file:machine_learing_clasifier.MyC45.java

public void makeTree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        return;/*from  w w w .  j av  a 2  s  . c om*/
    }

    double[] infoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        Attribute att = data.attribute(i);
        if (data.classIndex() != att.index()) {
            if (att.isNominal()) {
                infoGains[att.index()] = computeInformationGain(data, att);
            } else {
                infoGains[att.index()] = computeInformationGainContinous(data, att,
                        BestContinousAttribute(data, att));
            }
        }
    }

    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    if (m_Attribute.isNumeric()) {
        numericAttThreshold = BestContinousAttribute(data, m_Attribute);
        System.out.println(" ini kalo continous dengan attribut : " + numericAttThreshold);
    }
    System.out.println("huhu = " + m_Attribute.toString());

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            int inst = (int) data.instance(i).value(data.classAttribute());
            m_Distribution[inst]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData;
        if (m_Attribute.isNominal()) {
            splitData = splitData(data, m_Attribute);
        } else {
            splitData = splitDataContinous(data, m_Attribute, numericAttThreshold);
        }

        if (m_Attribute.isNominal()) {
            System.out.println("nominal");
            m_Successors = new MyC45[m_Attribute.numValues()];
            System.out.println(m_Successors.length);
            for (int j = 0; j < m_Attribute.numValues(); j++) {
                m_Successors[j] = new MyC45(head, this);
                m_Successors[j].buildClassifier(splitData[j]);
            }
        } else {
            System.out.println("numeric");
            m_Successors = new MyC45[2];
            System.out.println(m_Successors.length);
            for (int j = 0; j < 2; j++) {
                m_Successors[j] = new MyC45(head, this);
                m_Successors[j].buildClassifier(splitData[j]);
            }
        }
    }
}

From source file:machine_learing_clasifier.MyID3.java

@Override
public void buildClassifier(Instances i) throws Exception {
    if (!i.classAttribute().isNominal()) {
        throw new Exception("Class not nominal");
    }/*w w  w.j  av a2 s . com*/

    for (int j = 0; j < i.numAttributes(); j++) {
        Attribute attr = i.attribute(j);
        if (!attr.isNominal()) {
            throw new Exception("Attribute not nominal");
        }

        for (int k = 0; k < i.numInstances(); k++) {
            Instance inst = i.instance(k);
            if (inst.isMissing(attr)) {
                throw new Exception("Missing value");
            }
        }
    }

    i = new Instances(i);
    i.deleteWithMissingClass();
    makeTree(i);
}

From source file:machine_learing_clasifier.MyID3.java

public void makeTree(Instances data) throws Exception {
    if (data.numInstances() == 0) {
        return;//from w w  w. j a  v  a 2s .c  o  m
    }

    double[] infoGains = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
        Attribute att = data.attribute(i);
        if (data.classIndex() != att.index()) {
            infoGains[att.index()] = computeInformationGain(data, att);
        }
    }

    m_Attribute = data.attribute(Utils.maxIndex(infoGains));
    //System.out.println("huhu = " + m_Attribute.toString());

    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
        m_Attribute = null;
        m_Distribution = new double[data.numClasses()];
        for (int i = 0; i < data.numInstances(); i++) {
            int inst = (int) data.instance(i).value(data.classAttribute());
            m_Distribution[inst]++;
        }
        Utils.normalize(m_Distribution);
        m_ClassValue = Utils.maxIndex(m_Distribution);
        m_ClassAttribute = data.classAttribute();
    } else {
        Instances[] splitData = splitData(data, m_Attribute);
        m_Successors = new MyID3[m_Attribute.numValues()];
        for (int j = 0; j < m_Attribute.numValues(); j++) {
            m_Successors[j] = new MyID3();
            m_Successors[j].buildClassifier(splitData[j]);
        }
    }
}

From source file:mao.datamining.DataSetPair.java

private void doItOnce4All() {
    if (didIt)// w  w  w.j  a  v a2 s  .co m
        return;
    didIt = true;
    try {
        //step 0, remove all those empty columns, which has more than 50% missing values
        Instances orangeDataSet = ConverterUtils.DataSource.read(trainSourceFileName);
        orangeDataSet.setClassIndex(orangeDataSet.numAttributes() - 1);
        Attribute classAttr = orangeDataSet.attribute(orangeDataSet.numAttributes() - 1);
        MainLogger.log(Level.INFO, "Class Attribute: {0}", classAttr.toString());

        //step 0-1, to remove all columns which has more than half missing values
        Instances newData = orangeDataSet;
        RemoveUselessColumnsByMissingValues removeMissingValuesColumns = new RemoveUselessColumnsByMissingValues();
        removeMissingValuesColumns.setM_maxMissingPercentage(50);
        removeMissingValuesColumns.setManualDeleteColumns(columns2Delete);
        removeMissingValuesColumns.setInputFormat(newData);
        newData = Filter.useFilter(newData, removeMissingValuesColumns);
        Main.logging("== New Data After Removing all Columns having >50% missing values: ===\n"
                + newData.toSummaryString());
        try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")))) {
            writer.write(newData.toString());
        }

        //step 0-2 to transform those numeric columns to Nominal
        //to delete those instances with more than half missing values
        BufferedReader reader70 = new BufferedReader(new InputStreamReader(
                new FileInputStream(Main.OrangeProcessedDSHome + "/afterRemoveMissingColumns1.arff")));
        BufferedWriter writerAfterDeleteRows = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(Main.OrangeProcessedDSHome + "/afterRemoveRows2.arff")));
        int columnNum = newData.numAttributes();
        int totalInstanceNum = newData.numInstances(), deleteM1Num = 0, delete1Num = 0;
        String line = null;
        int missingColumnNum = 0;
        while ((line = reader70.readLine()) != null) {
            missingColumnNum = 0;
            for (int i = 0; i < line.length(); i++) {
                if (line.charAt(i) == '?')
                    missingColumnNum++;
            }
            if (missingColumnNum * 100 / columnNum < 50) {
                writerAfterDeleteRows.write(line);
                writerAfterDeleteRows.newLine();
            } else {
                System.out.println("Delete Row: [" + line + "]");
                if (line.endsWith("-1")) {
                    deleteM1Num++;
                } else {
                    delete1Num++;
                }
            }
        }
        System.out.println("Total: " + totalInstanceNum + ", delete class -1: " + deleteM1Num
                + ", delete class 1:  " + delete1Num);
        reader70.close();
        writerAfterDeleteRows.close();

        //create sample files:
        createSampleDataSets();

    } catch (Exception e) {
        Main.logging(null, e);
    }
}

From source file:mao.datamining.DataSetPair.java

/**
 * Pre-Process the training data set with:
 * RemoveUselessColumnsByMissingValues filter
 * SpreadSubsample filter to shrink the majority class instances 
 * AttributeSelection filter with CfsSubsetEval and LinearForwardSelection
 *///ww w .j ava 2 s. c o  m
private void processTrainRawData() {
    System.out.println("====================" + this.trainFileName + "====================");
    System.out.println("====================" + this.trainFileName + "====================");
    System.out.println("====================" + this.trainFileName + "====================");
    finalTrainAttrList.clear();
    try {
        doItOnce4All();
        String sampleFilePath = null;
        //step 2, either over sample, or under sample
        //weka.filters.supervised.instance.SpreadSubsample
        if (this.resampleMethod.equalsIgnoreCase(resampleUnder)) {
            System.out.println("Under Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterUnderSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleOver)) {
            System.out.println("Over Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterOverSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleNone)) {
            //do nothing,
            System.out.println("None Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff";
        } else if (resampleMethod.equalsIgnoreCase(resampleMatrix)) {
            //do nothing
            System.out.println("Matrix Samplessssssssssssssssssssssssssssssssssssss");
            sampleFilePath = Main.OrangeProcessedDSHome + "/afterNoneSampling.arff";
        } else {
            doNotSupport();
        }
        Instances newData = ConverterUtils.DataSource.read(sampleFilePath);
        newData.setClassIndex(newData.numAttributes() - 1);
        //            Main.logging("== New Data After Resampling class instances: ===\n" + newData.toSummaryString());

        //Step 3, select features
        AttributeSelection attrSelectionFilter = new AttributeSelection();
        ASEvaluation eval = null;
        ASSearch search = null;

        //ranker
        if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionA)) {
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            System.out.println("Ranker ssssssssssssssssssssssssssssssssssssss");
            eval = new weka.attributeSelection.InfoGainAttributeEval();
            //weka.attributeSelection.Ranker -T 0.02 -N -1
            search = new Ranker();
            String rankerOptios[] = { "-T", "0.01", "-N", "-1" };
            if (resampleMethod.equalsIgnoreCase(resampleOver)) {
                rankerOptios[1] = "0.1";
            }
            ((Ranker) search).setOptions(rankerOptios);
            Main.logging("== Start to Select Features with InfoGainAttributeEval and Ranker");
        }
        //weka.attributeSelection.LinearForwardSelection -D 0 -N 5 -I -K 50 -T 0
        else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionB)) {
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            System.out.println("CfsSubset ssssssssssssssssssssssssssssssssssssss");
            eval = new CfsSubsetEval();
            search = new LinearForwardSelection();
            String linearOptios[] = { "-D", "0", "-N", "5", "-I", "-K", "50", "-T", "0" };
            ((LinearForwardSelection) search).setOptions(linearOptios);
            Main.logging("== Start to Select Features with CfsSubsetEval and LinearForwardSelection");
        } else if (this.featureSelectionMode.equalsIgnoreCase(featureSelectionNo)) {
            System.out.println("None Selection ssssssssssssssssssssssssssssssssssssss");
            Main.logging("No Feature Selection Method");
        } else {
            doNotSupport();
        }

        if (eval != null) {
            attrSelectionFilter.setEvaluator(eval);
            attrSelectionFilter.setSearch(search);
            attrSelectionFilter.setInputFormat(newData);
            newData = Filter.useFilter(newData, attrSelectionFilter);
        }

        Main.logging("== New Data After Selecting Features: ===\n" + newData.toSummaryString());

        //finally, write the final dataset to file system

        try (BufferedWriter writer = new BufferedWriter(
                new OutputStreamWriter(new FileOutputStream(this.trainFileName)))) {
            writer.write(newData.toString());
        }

        int numAttributes = newData.numAttributes();
        for (int i = 0; i < numAttributes; i++) {
            String attrName = newData.attribute(i).name();
            finalTrainAttrList.add(attrName);
        }
        Main.logging(finalTrainAttrList.toString());
        //            //set the final train dataset
        finalTrainDataSet = newData;
        finalTrainDataSet.setClassIndex(finalTrainDataSet.numAttributes() - 1);

        Main.logging("train dataset class attr: " + finalTrainDataSet.classAttribute().toString());
    } catch (Exception ex) {
        Main.logging(null, ex);
    }

}