Example usage for weka.core Instances deleteWithMissingClass

List of usage examples for weka.core Instances deleteWithMissingClass

Introduction

In this page you can find the example usage for weka.core Instances deleteWithMissingClass.

Prototype

public void deleteWithMissingClass() 

Source Link

Document

Removes all instances with a missing class value from the dataset.

Usage

From source file:j48.C45PruneableClassifierTreeG.java

License:Open Source License

/**
 * Method for building a pruneable classifier tree.
 *
 * @param datathe data for building the tree
 * @throws Exception if something goes wrong
 *//*from  w  w  w  .  jav a 2  s . co m*/
public void buildClassifier(Instances data) throws Exception {

    // can classifier tree handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    buildTree(data, m_subtreeRaising);
    collapse();
    if (m_pruneTheTree) {
        prune();
    }
    doGrafting(data);
    if (m_cleanup) {
        cleanup(new Instances(data, 0));
    }
}

From source file:j48.ClassifierTree.java

License:Open Source License

/**
 * Method for building a classifier tree.
 *
 * @param data the data to build the tree from
 * @throws Exception if something goes wrong
 *///from  w w w  .jav  a  2s . co m
public void buildClassifier(Instances data) throws Exception {

    // can classifier tree handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    buildTree(data, false);
}

From source file:j48.PruneableClassifierTree.java

License:Open Source License

/**
 * Method for building a pruneable classifier tree.
 *
 * @param data the data to build the tree from 
 * @throws Exception if tree can't be built successfully
 *//*from www. j av a  2  s.  co m*/
public void buildClassifier(Instances data) throws Exception {

    // can classifier tree handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    Random random = new Random(m_seed);
    data.stratify(numSets);
    buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup);
    if (pruneTheTree) {
        prune();
    }
    if (m_cleanup) {
        cleanup(new Instances(data, 0));
    }
}

From source file:learning.DMRandomForest.java

License:Open Source License

/**
 * Builds a classifier for a set of instances.
 *
 * @param data the instances to train the classifier with
 * @throws Exception if something goes wrong
 *///from  ww  w . j a v a  2s .com
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    m_bagger = new DMBagging();
    RandomTree rTree = new RandomTree();

    // set up the random tree options
    m_KValue = m_numFeatures;
    if (m_KValue < 1)
        m_KValue = (int) Utils.log2(data.numAttributes()) + 1;
    rTree.setKValue(m_KValue);
    rTree.setMaxDepth(getMaxDepth());

    // set up the bagger and build the forest
    m_bagger.setClassifier(rTree);
    m_bagger.setSeed(m_randomSeed);
    m_bagger.setNumIterations(m_numTrees);
    m_bagger.setCalcOutOfBag(true);
    m_bagger.buildClassifier(data);
}

From source file:library.MikeC45PruneableClassifierTree.java

License:Open Source License

/**
 * Method for building a pruneable classifier tree.
 *
 * @param data the data for building the tree
 * @throws Exception if something goes wrong
 *//*from   www . j  ava 2 s . com*/
public void buildClassifier(Instances data) throws Exception {

    // can classifier tree handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    buildTree(data, m_subtreeRaising);
    collapse();
    if (m_pruneTheTree) {
        prune();
    }
    if (m_cleanup) {
        cleanup(new Instances(data, 0));
    }
}

From source file:LogReg.FilteredLogRegClassifier.java

License:Open Source License

/**
 * Build the classifier on the filtered data.
 *
 * @param data the training data/*from w  ww .j  a v  a2  s.  c  o  m*/
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    if (m_Classifier == null) {
        throw new Exception("No base classifiers have been set!");
    }

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    /*
    String fname = m_Filter.getClass().getName();
    fname = fname.substring(fname.lastIndexOf('.') + 1);
    util.Timer t = util.Timer.getTimer("FilteredClassifier::" + fname);
    t.start();
    */
    m_Filter.setInputFormat(data); // filter capabilities are checked here
    data = Filter.useFilter(data, m_Filter);
    //t.stop();

    // can classifier handle the data?
    getClassifier().getCapabilities().testWithFail(data);

    m_FilteredInstances = data.stringFreeStructure();
    m_Classifier.buildClassifier(data);
}

From source file:LogReg.Logistic.java

License:Open Source License

/**
 * Builds the classifier//from   w  w w.  ja v a 2s . c  om
 *
 * @param train the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances train) throws Exception {
    // can classifier handle the data?
    getCapabilities().testWithFail(train);

    // remove instances with missing class
    train = new Instances(train);
    train.deleteWithMissingClass();

    // Replace missing values   
    m_ReplaceMissingValues = new ReplaceMissingValues();
    m_ReplaceMissingValues.setInputFormat(train);
    train = Filter.useFilter(train, m_ReplaceMissingValues);

    // Remove useless attributes
    m_AttFilter = new RemoveUseless();
    m_AttFilter.setInputFormat(train);
    train = Filter.useFilter(train, m_AttFilter);

    // Transform attributes
    m_NominalToBinary = new NominalToBinary();
    m_NominalToBinary.setInputFormat(train);
    train = Filter.useFilter(train, m_NominalToBinary);

    // Save the structure for printing the model
    m_structure = new Instances(train, 0);

    // Extract data
    m_ClassIndex = train.classIndex();
    m_NumClasses = train.numClasses();

    int nK = m_NumClasses - 1; // Only K-1 class labels needed 
    int nR = m_NumPredictors = train.numAttributes() - 1;
    int nC = train.numInstances();

    m_Data = new double[nC][nR + 1]; // Data values
    int[] Y = new int[nC]; // Class labels
    double[] xMean = new double[nR + 1]; // Attribute means
    xSD = new double[nR + 1]; // Attribute stddev's
    double[] sY = new double[nK + 1]; // Number of classes
    double[] weights = new double[nC]; // Weights of instances
    double totWeights = 0; // Total weights of the instances
    m_Par = new double[nR + 1][nK]; // Optimized parameter values

    if (m_Debug) {
        System.out.println("Extracting data...");
    }

    for (int i = 0; i < nC; i++) {
        // initialize X[][]
        Instance current = train.instance(i);
        Y[i] = (int) current.classValue(); // Class value starts from 0
        weights[i] = current.weight(); // Dealing with weights
        totWeights += weights[i];

        m_Data[i][0] = 1;
        int j = 1;
        for (int k = 0; k <= nR; k++) {
            if (k != m_ClassIndex) {
                double x = current.value(k);
                m_Data[i][j] = x;
                xMean[j] += weights[i] * x;
                xSD[j] += weights[i] * x * x;
                j++;
            }
        }

        // Class count
        sY[Y[i]]++;
    }

    if ((totWeights <= 1) && (nC > 1))
        throw new Exception("Sum of weights of instances less than 1, please reweight!");

    xMean[0] = 0;
    xSD[0] = 1;
    for (int j = 1; j <= nR; j++) {
        xMean[j] = xMean[j] / totWeights;
        if (totWeights > 1)
            xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1));
        else
            xSD[j] = 0;
    }

    if (m_Debug) {
        // Output stats about input data
        System.out.println("Descriptives...");
        for (int m = 0; m <= nK; m++)
            System.out.println(sY[m] + " cases have class " + m);
        System.out.println("\n Variable     Avg       SD    ");
        for (int j = 1; j <= nR; j++)
            System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4)
                    + Utils.doubleToString(xSD[j], 10, 4));
    }

    // Normalise input data 
    for (int i = 0; i < nC; i++) {
        for (int j = 0; j <= nR; j++) {
            if (xSD[j] != 0) {
                m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j];
            }
        }
    }

    if (m_Debug) {
        System.out.println("\nIteration History...");
    }

    double x[] = new double[(nR + 1) * nK];
    double[][] b = new double[2][x.length]; // Boundary constraints, N/A here

    // Initialize
    for (int p = 0; p < nK; p++) {
        int offset = p * (nR + 1);
        x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model
        b[0][offset] = Double.NaN;
        b[1][offset] = Double.NaN;
        for (int q = 1; q <= nR; q++) {
            x[offset + q] = 0.0;
            b[0][offset + q] = Double.NaN;
            b[1][offset + q] = Double.NaN;
        }
    }

    OptEng opt = new OptEng();
    opt.setDebug(m_Debug);
    opt.setWeights(weights);
    opt.setClassLabels(Y);

    if (m_MaxIts == -1) { // Search until convergence
        x = opt.findArgmin(x, b);
        while (x == null) {
            x = opt.getVarbValues();
            if (m_Debug)
                System.out.println("200 iterations finished, not enough!");
            x = opt.findArgmin(x, b);
        }
        if (m_Debug)
            System.out.println(" -------------<Converged>--------------");
    } else {
        opt.setMaxIteration(m_MaxIts);
        x = opt.findArgmin(x, b);
        if (x == null) // Not enough, but use the current value
            x = opt.getVarbValues();
    }

    m_LL = -opt.getMinFunction(); // Log-likelihood

    // Don't need data matrix anymore
    m_Data = null;

    // Convert coefficients back to non-normalized attribute units
    for (int i = 0; i < nK; i++) {
        m_Par[0][i] = x[i * (nR + 1)];
        for (int j = 1; j <= nR; j++) {
            m_Par[j][i] = x[i * (nR + 1) + j];
            if (xSD[j] != 0) {
                m_Par[j][i] /= xSD[j];
                m_Par[0][i] -= m_Par[j][i] * xMean[j];
            }
        }
    }
}

From source file:machine_learing_clasifier.MyC45.java

@Override
public void buildClassifier(Instances i) throws Exception {
    if (!i.classAttribute().isNominal()) {
        throw new Exception("Class not nominal");
    }//  w w w. jav  a2s . co  m

    //penanganan missing value
    for (int j = 0; j < i.numAttributes(); j++) {
        Attribute attr = i.attribute(j);
        for (int k = 0; k < i.numInstances(); k++) {
            Instance inst = i.instance(k);
            if (inst.isMissing(attr)) {
                inst.setValue(attr, fillMissingValue(i, attr));
                //bisa dituning lagi performancenya
            }
        }
    }

    i = new Instances(i);
    i.deleteWithMissingClass();
    makeTree(i);
}

From source file:machine_learing_clasifier.MyID3.java

@Override
public void buildClassifier(Instances i) throws Exception {
    if (!i.classAttribute().isNominal()) {
        throw new Exception("Class not nominal");
    }//from  ww  w .ja v a  2s. c  o  m

    for (int j = 0; j < i.numAttributes(); j++) {
        Attribute attr = i.attribute(j);
        if (!attr.isNominal()) {
            throw new Exception("Attribute not nominal");
        }

        for (int k = 0; k < i.numInstances(); k++) {
            Instance inst = i.instance(k);
            if (inst.isMissing(attr)) {
                throw new Exception("Missing value");
            }
        }
    }

    i = new Instances(i);
    i.deleteWithMissingClass();
    makeTree(i);
}

From source file:main.NaiveBayes.java

License:Open Source License

/**
 * Generates the classifier.// www .  j  a va 2s .c  om
 * 
 * @param instances set of instances serving as training data
 * @exception Exception if the classifier has not been generated successfully
 */
@Override
public void buildClassifier(Instances instances) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    m_NumClasses = instances.numClasses();

    // Copy the instances
    m_Instances = new Instances(instances);

    // Discretize instances if required
    if (m_UseDiscretization) {
        m_Disc = new weka.filters.supervised.attribute.Discretize();
        m_Disc.setInputFormat(m_Instances);
        m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc);
    } else {
        m_Disc = null;
    }

    // Reserve space for the distributions
    m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()];
    m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true);
    int attIndex = 0;
    Enumeration<Attribute> enu = m_Instances.enumerateAttributes();
    while (enu.hasMoreElements()) {
        Attribute attribute = enu.nextElement();

        // If the attribute is numeric, determine the estimator
        // numeric precision from differences between adjacent values
        double numPrecision = DEFAULT_NUM_PRECISION;
        if (attribute.type() == Attribute.NUMERIC) {
            m_Instances.sort(attribute);
            if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) {
                double lastVal = m_Instances.instance(0).value(attribute);
                double currentVal, deltaSum = 0;
                int distinct = 0;
                for (int i = 1; i < m_Instances.numInstances(); i++) {
                    Instance currentInst = m_Instances.instance(i);
                    if (currentInst.isMissing(attribute)) {
                        break;
                    }
                    currentVal = currentInst.value(attribute);
                    if (currentVal != lastVal) {
                        deltaSum += currentVal - lastVal;
                        lastVal = currentVal;
                        distinct++;
                    }
                }
                if (distinct > 0) {
                    numPrecision = deltaSum / distinct;
                }
            }
        }

        for (int j = 0; j < m_Instances.numClasses(); j++) {
            switch (attribute.type()) {
            case Attribute.NUMERIC:
                if (m_UseKernelEstimator) {
                    m_Distributions[attIndex][j] = new KernelEstimator(numPrecision);
                } else {
                    m_Distributions[attIndex][j] = new NormalEstimator(numPrecision);
                }
                break;
            case Attribute.NOMINAL:
                m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true);
                break;
            default:
                throw new Exception("Attribute type unknown to NaiveBayes");
            }
        }
        attIndex++;
    }

    // Compute counts
    Enumeration<Instance> enumInsts = m_Instances.enumerateInstances();
    while (enumInsts.hasMoreElements()) {
        Instance instance = enumInsts.nextElement();
        updateClassifier(instance);
    }

    // Save space
    m_Instances = new Instances(m_Instances, 0);
}