Example usage for weka.core Instances deleteWithMissingClass

List of usage examples for weka.core Instances deleteWithMissingClass

Introduction

In this page you can find the example usage for weka.core Instances deleteWithMissingClass.

Prototype

public void deleteWithMissingClass() 

Source Link

Document

Removes all instances with a missing class value from the dataset.

Usage

From source file:myJ48.MyJ48.java

/**
 * Build an Id3 classifier// w w  w  .j  av  a 2  s  .c  om
 * @param instances dataset used for building the model
 * @throws Exception 
 */
@Override
public void buildClassifier(Instances instances) throws Exception {

    if (instances == null) {
        //System.out.println("instances null");
    } else {
        //System.out.println("instances ga null");
    }

    // Detecting the instance type, can Id3 handle the data?
    getCapabilities().testWithFail(instances);

    // Remove missing class
    Instances data = new Instances(instances);
    data.deleteWithMissingClass();

    //Numeric to Nominal
    data = NumericToNominal(data);
    //System.out.println(data);
    //System.out.println(data.numAttributes());

    // Build the id3
    buildTree(data);

    Weka weka = new Weka();
    String[] options_cl = { "" };
    weka.setTraining("weather.nominal.arff");
    weka.setClassifier("weka.classifiers.trees.J48", options_cl);

    weka.runCV(false);

    initAccuracy = weka.getM_Evaluation().correct();

    if (data == null) {
        //System.out.println("data null");
    } else {
        //System.out.println("data ga null");
    }

    //pruneTree(data);
}

From source file:naivebayes.NBRun.java

public static void main(String[] args) throws Exception {
    System.out.println("Naive Bayes Classifier");
    Instances data = TucilWeka.readDataSet("C:\\Program Files\\Weka-3-8\\data\\mush_test.arff");
    Scanner scan = new Scanner(System.in);
    Classifier cls;//w  w  w .ja  va 2s.  c o  m
    Instances train = TucilWeka.readDataSet("C:\\Program Files\\Weka-3-8\\data\\mush.arff");

    System.out.println("Read or create model? r/c");
    if (scan.next().equals("c")) {
        cls = new NBTubesAI();
        cls.buildClassifier(train);
        TucilWeka.saveModel(train);
    } else {
        cls = (NBTubesAI) TucilWeka.readModel();
    }
    Evaluation eval = new Evaluation(data);
    System.out.println("10 fold cross validate or Full train? c/f");
    if (scan.next().equals("c")) {
        int fold = 10;
        for (int i = 0; i < data.numAttributes(); i++) {
            System.out.println(i + ". " + data.attribute(i));
        }
        eval.crossValidateModel(cls, data, fold, new Random(1));
    } else {
        for (int i = 0; i < data.numAttributes(); i++) {
            System.out.println(i + ". " + data.attribute(i));
        }
        data.deleteWithMissingClass();
        try {

            eval.evaluateModel(cls, data);
        } catch (java.lang.Exception ex) {
            eval.crossValidateModel(cls, data, 11, new Random(1));
        }
    }
    //        Classifier cls=new NBTubesAI();
    //        cls.buildClassifier(data);

    System.out.println("Hasil evaluasi: ");
    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());
    System.out.println(eval.toClassDetailsString());
}

From source file:naivebayes.NBTubesAI.java

@Override
public void buildClassifier(Instances data) throws Exception {
    distribution = new HashMap<>();
    classCount = new HashMap<>();

    data = new Instances(data);
    //Delete data tanpa kelas
    data.deleteWithMissingClass();
    //melakukan filter discretize untuk mengubah atribut menjadi nominal
    //menghitung jumlah instance
    m_Instances = new Instances(data);
    numInstance = data.numInstances();/*from   w ww  . j  a v a 2  s .c  o m*/
    //Enumerasi seluruh atribut instances
    Enumeration<Attribute> enumAttr = m_Instances.enumerateAttributes();
    //Index attribut saat ini
    int attrIndex = 0;
    //Hashmap untuk menghitung jumlah kemunculan kelas yang bersesuaian

    for (int i = 0; i < m_Instances.classAttribute().numValues(); i++) {
        classCount.put(i + 0.0, 0);
    }
    Enumeration<Instance> forCount = m_Instances.enumerateInstances();
    while (forCount.hasMoreElements()) {
        Instance instCount = forCount.nextElement();
        classCount.put(instCount.classValue(), classCount.get(instCount.classValue()) + 1);
    }

    System.out.println("JMLAH KELAS:" + m_Instances.numClasses());
    System.out.println(classCount.toString());
    //Looping untuk seluruh atribut
    while (enumAttr.hasMoreElements()) {
        Attribute temp = enumAttr.nextElement();

        //nama attribute
        String attrName = temp.name();
        //Memasukkan kunci attrName
        if (distribution.get(attrName) == null) {
            distribution.put(attrName, new HashMap<String, HashMap<Double, Double>>());
        }

        //Enumerasi dari seluruh instance pada Instances masukan
        Enumeration<Instance> enumInst = m_Instances.enumerateInstances();
        //Looping untuk seluruh instance
        while (enumInst.hasMoreElements()) {
            //Mengambil Instance selanjutnya
            Instance tempInst = enumInst.nextElement();
            //Nilai domain untuk atribut saat ini
            String nilaiDomain = tempInst.stringValue(temp);
            //Class dari instance ini
            double classAttr = tempInst.classValue();

            if (distribution.get(attrName).get(nilaiDomain) == null) {
                //Membuat hashmap baru jika domainNilai pertama kali muncul
                distribution.get(attrName).put(nilaiDomain, new HashMap<Double, Double>());
            }
            if (distribution.get(attrName).get(nilaiDomain).get(classAttr) == null) {
                //Membuat hashmap baru jika untuk pasangan domain nilai dan 
                //kelas ini baru pertama kali muncul
                for (int i = 0; i < m_Instances.numClasses(); i++) {
                    distribution.get(attrName).get(nilaiDomain).put(i + 0.0, 0.0);

                }

            }
            //Menambahkan frekuensi kemunculan +1
            distribution.get(attrName).get(nilaiDomain).put(classAttr,
                    distribution.get(attrName).get(nilaiDomain).get(classAttr)
                            + (1.0 / classCount.get(classAttr)));
        }

        attrIndex++;

    }
    System.out.println(distribution.toString());
    System.out.println(classCount.toString());
}

From source file:net.paudan.evosvm.LibLINEAR.java

License:Open Source License

/**
* builds the classifier//  w  ww .  ja  va 2 s. co m
*
* @param insts the training instances
* @throws Exception if liblinear classes not in classpath or liblinear
* encountered a problem
*/
public void buildClassifier(Instances insts) throws Exception {
    m_NominalToBinary = null;
    m_Filter = null;

    // remove instances with missing class
    insts = new Instances(insts);
    insts.deleteWithMissingClass();

    if (!getDoNotReplaceMissingValues()) {
        m_ReplaceMissingValues = new ReplaceMissingValues();
        m_ReplaceMissingValues.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_ReplaceMissingValues);
    }

    // can classifier handle the data?
    // we check this here so that if the user turns off
    // replace missing values filtering, it will fail
    // if the data actually does have missing values
    getCapabilities().testWithFail(insts);

    if (getConvertNominalToBinary()) {
        insts = nominalToBinary(insts);
    }

    if (getNormalize()) {
        m_Filter = new Normalize();
        m_Filter.setInputFormat(insts);
        insts = Filter.useFilter(insts, m_Filter);
    }

    int[] vy = new int[insts.numInstances()];
    FeatureNode[][] vx = new FeatureNode[insts.numInstances()][];
    int max_index = 0;

    for (int d = 0; d < insts.numInstances(); d++) {
        Instance inst = insts.instance(d);
        FeatureNode[] x = instanceToArray(inst);
        if (x.length > 0) {
            max_index = Math.max(max_index, x[x.length - 1].index);
        }
        vx[d] = x;
        double classValue = inst.classValue();
        int classValueInt = (int) classValue;
        if (classValueInt != classValue)
            throw new RuntimeException("unsupported class value: " + classValue);
        vy[d] = classValueInt;
    }

    if (!m_Debug) {
        Linear.disableDebugOutput();
    } else {
        Linear.enableDebugOutput();
    }

    // reset the PRNG for regression-stable results
    Linear.resetRandom();

    // train model
    m_Model = Linear.train(getProblem(vx, vy, max_index), getParameters());
}

From source file:net.sf.bddbddb.order.MyId3.java

License:LGPL

/**
 * Builds Id3 decision tree classifier.//from   w  ww. j a va 2s.co  m
 * 
 * @param data
 *            the training data
 * @exception Exception
 *                if classifier can't be built successfully
 */
public void buildClassifier(Instances data) throws Exception {
    if (!data.classAttribute().isNominal()) {
        throw new UnsupportedClassTypeException("Id3: nominal class, please.");
    }
    Enumeration enumAtt = data.enumerateAttributes();
    while (enumAtt.hasMoreElements()) {
        if (!((Attribute) enumAtt.nextElement()).isNominal()) {
            throw new UnsupportedAttributeTypeException("Id3: only nominal " + "attributes, please.");
        }
    }
    data = new Instances(data);
    data.deleteWithMissingClass();
    makeTree(data);
}

From source file:net.sf.jclal.classifier.MOAWrapper.java

License:Open Source License

/**
 * {@inheritDoc}//from ww  w  .j ava 2s.c o  m
 */
public void buildClassifier(Instances data) throws Exception {

    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    classifier.resetLearning();

    for (int i = 0; i < data.numInstances(); i++)
        updateClassifier(data.instance(i));

}

From source file:newdtl.NewID3.java

/**
 * Builds Id3 tree classifier.// w w  w. ja va 2s .  c o m
 *
 * @param data the training data
 * @exception Exception if classifier failed to build
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    // Mengecek apakah data dapat dibuat classifier
    getCapabilities().testWithFail(data);

    // Menghapus instances dengan missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    makeTree(data);
}

From source file:newdtl.NewJ48.java

/**
 * Builds J48 tree classifier./*from w w w . j  a v  a2s.c o  m*/
 *
 * @param data the training data
 * @exception Exception if classifier failed to build
 */
@Override
public void buildClassifier(Instances data) throws Exception {

    // Mengecek apakah data dapat dibuat classifier
    getCapabilities().testWithFail(data);

    // Menghapus instances dengan missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    makeTree(data);

    pruneTree(data);
}

From source file:org.esa.nest.gpf.SGD.java

/**
 * Method for building the classifier./*ww  w .j  a  v  a  2  s . c  o  m*/
 *
 * @param data the set of training instances.
 * @throws Exception if the classifier can't be built successfully.
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    reset();

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    data = new Instances(data);
    data.deleteWithMissingClass();

    if (data.numInstances() > 0 && !m_dontReplaceMissing) {
        m_replaceMissing = new ReplaceMissingValues();
        m_replaceMissing.setInputFormat(data);
        data = Filter.useFilter(data, m_replaceMissing);
    }

    // check for only numeric attributes
    boolean onlyNumeric = true;
    for (int i = 0; i < data.numAttributes(); i++) {
        if (i != data.classIndex()) {
            if (!data.attribute(i).isNumeric()) {
                onlyNumeric = false;
                break;
            }
        }
    }

    if (!onlyNumeric) {
        if (data.numInstances() > 0) {
            m_nominalToBinary = new weka.filters.supervised.attribute.NominalToBinary();
        } else {
            m_nominalToBinary = new weka.filters.unsupervised.attribute.NominalToBinary();
        }
        m_nominalToBinary.setInputFormat(data);
        data = Filter.useFilter(data, m_nominalToBinary);
    }

    if (!m_dontNormalize && data.numInstances() > 0) {

        m_normalize = new Normalize();
        m_normalize.setInputFormat(data);
        data = Filter.useFilter(data, m_normalize);
    }

    m_numInstances = data.numInstances();

    m_weights = new double[data.numAttributes() + 1];
    m_data = new Instances(data, 0);

    if (data.numInstances() > 0) {
        data.randomize(new Random(getSeed())); // randomize the data
        train(data);
    }
}

From source file:org.scripps.branch.classifier.ManualTree.java

License:Open Source License

/**
 * Builds classifier./*from w w w .  jav  a2 s .c om*/
 * 
 * @param data
 *            the data to train with
 * @throws Exception
 *             if something goes wrong or the data doesn't fit
 */
@Override
public void buildClassifier(Instances data) throws Exception {
    // Make sure K value is in range
    if (m_KValue > data.numAttributes() - 1)
        m_KValue = data.numAttributes() - 1;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    // Figure out appropriate datasets
    Instances train = null;
    Instances backfit = null;
    Random rand = data.getRandomNumberGenerator(m_randomSeed);
    if (m_NumFolds <= 0) {
        train = data;
    } else {
        data.randomize(rand);
        data.stratify(m_NumFolds);
        train = data.trainCV(m_NumFolds, 1, rand);
        backfit = data.testCV(m_NumFolds, 1);
    }

    //Set Default Instances for selection.
    setRequiredInst(data);

    // Create the attribute indices window
    int[] attIndicesWindow = new int[data.numAttributes() - 1];
    int j = 0;
    for (int i = 0; i < attIndicesWindow.length; i++) {
        if (j == data.classIndex())
            j++; // do not include the class
        attIndicesWindow[i] = j++;
    }

    // Compute initial class counts
    double[] classProbs = new double[train.numClasses()];
    for (int i = 0; i < train.numInstances(); i++) {
        Instance inst = train.instance(i);
        classProbs[(int) inst.classValue()] += inst.weight();
    }

    Instances requiredInstances = getRequiredInst();
    // Build tree
    if (jsontree != null) {
        buildTree(train, classProbs, new Instances(data, 0), m_Debug, 0, jsontree, 0, m_distributionData,
                requiredInstances, listOfFc, cSetList, ccSer, d);
    } else {
        System.out.println("No json tree specified, failing to process tree");
    }
    setRequiredInst(requiredInstances);
    // Backfit if required
    if (backfit != null) {
        backfitData(backfit);
    }
}