Example usage for weka.core Instances numClasses

Introduction

In this page you can find the example usage for weka.core Instances numClasses.

Prototype


publicint numClasses()

Source Link

Document

Returns the number of class labels.

Usage

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Boosting method.//w ww. j a  v a  2  s  . co m
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @throws Exception if the classifier could not be built successfully
 */

public void buildClassifier(Instances data) throws Exception {

    super.buildClassifier(data);

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (data.numAttributes() == 1) {
        System.err.println(
                "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
        m_ZeroR = new weka.classifiers.rules.ZeroR();
        m_ZeroR.buildClassifier(data);
        return;
    } else {
        m_ZeroR = null;
    }

    m_NumClasses = data.numClasses();
    if (m_NumClasses != 2)
        System.err.println("Can only build model for binary class data");

    /*
    we do not use the method buildClassifierWithWeights
     as we think that some base learning algorithms could 
     not deal with weighted data.
     */
    buildClassifierUsingResampling(data);
}

From source file:gyc.SMOTEBagging.java

License:Open Source License

/**
 * Bagging method.// w  w w  .j  ava2s  .co m
 *
 * @param data the training data to be used for generating the
 * bagged classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag)
        inBag = new boolean[m_Classifiers.length][];
    int b = 0;
    for (int j = 0; j < m_Classifiers.length; j++) {

        //
        int classNum[] = data.attributeStats(data.classIndex()).nominalCounts;
        int minC, nMin = classNum[0];
        int majC, nMaj = classNum[1];
        if (nMin < nMaj) {
            minC = 0;
            majC = 1;
        } else {
            minC = 1;
            majC = 0;
            nMin = classNum[1];
            nMaj = classNum[0];
        }

        b = b + 10;
        Instances bagData = randomSampling(data, majC, minC, b, random);

        /*      // create the in-bag dataset
              if (m_CalcOutOfBag) {
           inBag[j] = new boolean[data.numInstances()];
           bagData = resampleWithWeights(data, random, inBag[j]);
              } else {
           bagData = data.resampleWithWeights(random);
           if (bagSize < data.numInstances()) {
             bagData.randomize(random);
             Instances newBagData = new Instances(bagData, 0, bagSize);
             bagData = newBagData;
           }
              }
                      
              if (m_Classifier instanceof Randomizable) {
           ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
              }*/

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //classNum=bagData.attributeStats(bagData.classIndex()).nominalCounts;
        //System.out.println("after:"+classNum[0]+"-"+classNum[1]);
    }

    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i])
                    continue;

                voteCount++;
                double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric)
                    votes[0] += pred;
                else
                    votes[(int) pred]++;
            }

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                vote = Utils.maxIndex(votes); // majority vote
            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:hr.irb.fastRandomForest.NakedFastRfBagging.java

License:Open Source License

/**
 * Bagging method. Produces DataCache objects with bootstrap samples of the
 * original data, and feeds them to the base classifier (which can only be a
 * FastRandomTree)./* w  w  w .  j  a v  a  2 s . com*/
 * 
 * @param data
 *            The training set to be used for generating the bagged
 *            classifier.
 * @param numThreads
 *            The number of simultaneous threads to use for computation.
 *            Pass zero (0) for autodetection.
 * @param motherForest
 *            A reference to the FastRandomForest object that invoked this.
 * 
 * @throws Exception
 *             if the classifier could not be built successfully
 */
public void buildClassifier(Instances data, final int numThreads, final NakedFastRandomForest motherForest)
        throws Exception {

    // can classifier handle the vals?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();

    if (!(m_Classifier instanceof NakedFastRandomTree))
        throw new IllegalArgumentException(
                "The NakedFastRfBagging class accepts " + "only NakedFastRandomTree as its base classifier.");

    /*
     * We fill the m_Classifiers array by creating lots of trees with new()
     * because this is much faster than using serialization to deep-copy the
     * one tree in m_Classifier - this is what the
     * super.buildClassifier(data) normally does.
     */
    m_Classifiers = new Classifier[m_NumIterations];
    for (int i = 0; i < m_Classifiers.length; i++) {
        final NakedFastRandomTree curTree = new NakedFastRandomTree();
        // all parameters for training will be looked up in the motherForest
        // (maxDepth, k_Value)
        curTree.m_MotherForest = motherForest;
        // 0.99: reference to these arrays will get passed down all nodes so
        // the array can be re-used
        // 0.99: this array is of size two as now all splits are binary -
        // even categorical ones
        curTree.tempProps = new double[2];
        curTree.tempDists = new double[2][];
        curTree.tempDists[0] = new double[data.numClasses()];
        curTree.tempDists[1] = new double[data.numClasses()];
        curTree.tempDistsOther = new double[2][];
        curTree.tempDistsOther[0] = new double[data.numClasses()];
        curTree.tempDistsOther[1] = new double[data.numClasses()];
        m_Classifiers[i] = curTree;
    }

    // this was SLOW.. takes approx 1/2 time as training the forest
    // afterwards (!!!)
    // super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    // sorting is performed inside this constructor
    final DataCache myData = new DataCache(data);

    final int bagSize = data.numInstances() * m_BagSizePercent / 100;
    final Random random = new Random(m_Seed);

    final boolean[][] inBag = new boolean[m_Classifiers.length][];

    // thread management
    final ExecutorService threadPool = Executors
            .newFixedThreadPool(numThreads > 0 ? numThreads : Runtime.getRuntime().availableProcessors());
    final List<Future<?>> futures = new ArrayList<Future<?>>(m_Classifiers.length);

    try {

        for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) {

            // create the in-bag dataset (and be sure to remember what's in
            // bag)
            // for computing the out-of-bag error later
            final DataCache bagData = myData.resample(bagSize, random);
            bagData.reusableRandomGenerator = bagData.getRandomNumberGenerator(random.nextInt());
            inBag[treeIdx] = bagData.inBag; // store later for OOB error
            // calculation

            // build the classifier
            if (m_Classifiers[treeIdx] instanceof NakedFastRandomTree) {

                final FastRandomTree aTree = (FastRandomTree) m_Classifiers[treeIdx];
                aTree.data = bagData;

                final Future<?> future = threadPool.submit(aTree);
                futures.add(future);

            } else {
                throw new IllegalArgumentException("The FastRfBagging class accepts "
                        + "only NakedFastRandomTree as its base classifier.");
            }

        }

        // make sure all trees have been trained before proceeding
        for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) {
            futures.get(treeIdx).get();

        }

        // [jhostetler] 'm_FeatureImportances' and 'computeOOBError()' are
        // private, so we'll just not compute them.

        // calc OOB error?
        // if( getCalcOutOfBag() || getComputeImportances() ) {
        // // m_OutOfBagError = computeOOBError(data, inBag, threadPool);
        // m_OutOfBagError = computeOOBError( myData, inBag, threadPool );
        // }
        // else {
        // m_OutOfBagError = 0;
        // }

        // // calc feature importances
        // m_FeatureImportances = null;
        // // m_FeatureNames = null;
        // if( getComputeImportances() ) {
        // m_FeatureImportances = new double[data.numAttributes()];
        // // /m_FeatureNames = new String[data.numAttributes()];
        // // Instances dataCopy = new Instances(data); //To scramble
        // // int[] permutation =
        // // FastRfUtils.randomPermutation(data.numInstances(), random);
        // for( int j = 0; j < data.numAttributes(); j++ ) {
        // if( j != data.classIndex() ) {
        // // double sError =
        // // computeOOBError(FastRfUtils.scramble(data, dataCopy,
        // // j, permutation), inBag, threadPool);
        // // double sError = computeOOBError(data, inBag,
        // // threadPool, j, 0);
        // final float[] unscrambled = myData.scrambleOneAttribute( j,
        // random );
        // final double sError = computeOOBError( myData, inBag,
        // threadPool );
        // myData.vals[j] = unscrambled; // restore the original
        // // state
        // m_FeatureImportances[j] = sError - m_OutOfBagError;
        // }
        // // m_FeatureNames[j] = data.attribute(j).name();
        // }
        // }

        threadPool.shutdown();

    } finally {
        threadPool.shutdownNow();
    }
}

From source file:id3.MyID3.java

/**
 * Algoritma pohon keputusan//from w  ww  . j  a  va2s . c om
 * @param instances data train
 * @param attributes remaining attributes
 * @throws Exception
 */
public void buildMyID3(Instances instances, ArrayList<Attribute> attributes) throws Exception {
    // Check if no instances have reached this node.
    if (instances.numInstances() == 0) {
        classAttribute = null;
        classLabel = Instance.missingValue();
        classDistributionAmongInstances = new double[instances.numClasses()];
        return;
    }
    // Check if all instances only contain one class label
    if (computeEntropy(instances) == 0) {
        currentAttribute = null;
        classDistributionAmongInstances = classDistribution(instances);
        // Labelling process at node
        for (int i = 0; i < classDistributionAmongInstances.length; i++) {
            if (classDistributionAmongInstances[i] > 0) {
                classLabel = i;
                break;
            }
        }
        classAttribute = instances.classAttribute();
        Utils.normalize(classDistributionAmongInstances);
    } else {
        // Compute infogain for each attribute
        double[] infoGainAttribute = new double[instances.numAttributes()];
        for (int i = 0; i < instances.numAttributes(); i++) {
            infoGainAttribute[i] = computeIG(instances, instances.attribute(i));
        }
        // Choose attribute with maximum information gain
        int indexMaxInfoGain = 0;
        double maximumInfoGain = 0.0;
        for (int i = 0; i < (infoGainAttribute.length - 1); i++) {
            if (infoGainAttribute[i] > maximumInfoGain) {
                maximumInfoGain = infoGainAttribute[i];
                indexMaxInfoGain = i;
            }
        }
        currentAttribute = instances.attribute(indexMaxInfoGain);
        // Delete current attribute from remaining attribute
        ArrayList<Attribute> remainingAttributes = attributes;
        if (!remainingAttributes.isEmpty()) {
            int indexAttributeDeleted = 0;
            for (int i = 0; i < remainingAttributes.size(); i++) {
                if (remainingAttributes.get(i).index() == currentAttribute.index()) {
                    indexAttributeDeleted = i;
                }
            }
            remainingAttributes.remove(indexAttributeDeleted);
        }
        // Split instances based on currentAttribute (create branch new node)
        Instances[] instancesSplitBasedAttribute = splitData(instances, currentAttribute);
        subTree = new MyID3[currentAttribute.numValues()];
        for (int i = 0; i < currentAttribute.numValues(); i++) {
            if (instancesSplitBasedAttribute[i].numInstances() == 0) {
                // Handle empty examples at nodes
                double[] currentClassDistribution = classDistribution(instances);
                classLabel = 0.0;
                double counterDistribution = 0.0;
                for (int j = 0; j < currentClassDistribution.length; j++) {
                    if (currentClassDistribution[j] > counterDistribution) {
                        classLabel = j;
                    }
                }
                classAttribute = instances.classAttribute();
            } else {
                subTree[i] = new MyID3();
                subTree[i].buildMyID3(instancesSplitBasedAttribute[i], remainingAttributes);
            }
        }
    }
}

From source file:id3.MyID3.java

/**
 * Algoritma untuk menghitung distribusi kelas
 * @param instances/*from w w w .  j  a va  2  s. c  om*/
 * @return distributionClass counter
 */
public double[] classDistribution(Instances instances) {
    // Compute class distribution counter from instances
    double[] distributionClass = new double[instances.numClasses()];
    for (int i = 0; i < instances.numInstances(); i++) {
        distributionClass[(int) instances.instance(i).classValue()]++;
    }
    return distributionClass;
}

From source file:imba.classifier.FFNNTubes.java

@Override
public void buildClassifier(Instances data) throws Exception {
    getCapabilities().testWithFail(data);

    data.deleteWithMissingClass();/*from   w w  w.j av  a 2 s . c  om*/

    nAttribute = data.numAttributes() - 1;
    nOutput = data.numClasses();
    nData = data.size();

    //set target data
    setTarget(data);

    //generate weight
    generateRandomWeight();

    //normalisasi data
    Normalize norm = new Normalize();
    Filter filter = new NominalToBinary();

    norm.setInputFormat(data);

    Instances filteredData = Filter.useFilter(data, norm);

    try {
        filter.setInputFormat(filteredData);

        for (Instance i1 : filteredData) {
            filter.input(i1);
        }

        filter.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    int z = 0;
    double valMSE = 100.0;
    while ((z <= nEpoch) && (valMSE >= 0.00001)) {
        for (int j = 0; j < nData; j++) {
            feedForward(filteredData.get(j));

            if (nHidden == 0) {
                updateWeight(target[j]);
            } else {
                backPropagation(target[j]);
            }
        }

        countError(filteredData);
        valMSE = countMSE(filteredData);
        System.out.println("ACCURACY " + z + " : " + accuracy);
        System.out.println("MSE " + z + " : " + valMSE);
        z++;
    }
}

From source file:imba.classifier.NBTubes.java

@Override
public void buildClassifier(Instances data) {
    dataClassifier = new ArrayList<>();
    infoClassifier = new ArrayList<>();
    validAttribute = new ArrayList<>();
    dataset = null;//from w w  w .ja va 2  s  . c o  m
    sumClass = null;
    dataSize = 0;
    header_Instances = data;

    Filter f;
    int i, j, k, l, m;
    int sumVal;

    int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1

    i = 0;
    while (i < numAttr && wasNumeric == false) {
        if (i == classIdx) {
            i++;
        }

        if (i != numAttr && data.attribute(i).isNumeric()) {
            wasNumeric = true;
        }

        i++;
    }

    Instance p;

    //kasih filter
    if (wasNumeric) {
        f = new Normalize();
        //Filter f = new NumericToNominal();
        try {
            f.setInputFormat(data);

            for (Instance i1 : data) {
                f.input(i1);
            }

            f.batchFinished();
        } catch (Exception ex) {
            Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
        }

        dataset = f.getOutputFormat();

        while ((p = f.output()) != null) {
            dataset.add(p);
        }
    }

    //f = new NumericToNominal();
    if (filter.equals("Discretize")) {
        f = new Discretize();
    } else {
        f = new NumericToNominal();
    }

    try {
        if (wasNumeric) {
            f.setInputFormat(dataset);
            for (Instance i1 : dataset) {
                f.input(i1);
            }
        } else {
            f.setInputFormat(data);
            for (Instance i1 : data) {
                f.input(i1);
            }
        }

        f.batchFinished();
    } catch (Exception ex) {
        Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex);
    }

    dataset = null;
    dataset = f.getOutputFormat();

    while ((p = f.output()) != null) {
        dataset.add(p);
    }

    //building data structure
    classIdx = data.classIndex();

    dataSize = data.size();

    //isi data dan info classifier dengan array kosong
    i = 0;
    j = i;
    while (j < numAttr) {
        if (i == classIdx) {
            i++;
        } else {
            dataClassifier.add(new ArrayList<>());
            infoClassifier.add(new ArrayList<>());

            if (j < i) {
                m = j - 1;
            } else {
                m = j;
            }

            k = 0;
            while (k < dataset.attribute(j).numValues()) {
                dataClassifier.get(m).add(new ArrayList<>());
                infoClassifier.get(m).add(new ArrayList<>());

                l = 0;
                while (l < dataset.attribute(classIdx).numValues()) {
                    dataClassifier.get(m).get(k).add(0);
                    infoClassifier.get(m).get(k).add(0.0);

                    l++;
                }

                k++;
            }
        }

        i++;
        j++;
    }

    //isi data classifier dari dataset
    sumClass = new int[data.numClasses()];

    i = 0;
    while (i < dataset.size()) {
        j = 0;
        k = j;
        while (k < dataset.numAttributes()) {
            if (j == classIdx) {
                j++;
            } else {
                if (k < j) {
                    m = k - 1;
                } else {
                    m = k;
                }

                dataClassifier.get(m).get((int) dataset.get(i).value(k)).set(
                        (int) dataset.get(i).value(classIdx),
                        dataClassifier.get(m).get((int) dataset.get(i).value(k))
                                .get((int) dataset.get(i).value(classIdx)) + 1);

                if (m == 0) {
                    sumClass[(int) dataset.get(i).value(classIdx)]++;
                }

            }

            k++;
            j++;
        }

        i++;
    }

    //proses double values
    i = 0;
    while (i < dataClassifier.size()) {
        j = 0;
        while (j < dataClassifier.get(i).size()) {
            k = 0;
            while (k < dataClassifier.get(i).get(j).size()) {
                infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]);

                k++;
            }

            j++;
        }

        i++;
    }

    /*
    //liat apakah ada nilai di tiap atribut
    //yang merepresentasikan lebih dari 80% data
    i = 0;
    while (i < dataClassifier.size()) {
    j = 0;
    while (j < dataClassifier.get(i).size()) {
                
                
        j++;
    }
            
    i++;
    }
    */
}

From source file:iris.ID3.java

public void makeLikeAWhat(Instances instances) {
    // Create storage for different info gains
    double[] infoGains = new double[instances.numAttributes()];
    // Enumerate through attributes to find the best gain
    Enumeration attributeEnum = instances.enumerateAttributes();
    while (attributeEnum.hasMoreElements()) {
        // Loop through attributes, adding gain to infoGains array
        Attribute att = (Attribute) attributeEnum.nextElement();
        infoGains[att.index()] = infoGain(instances, att);
    }//from  w w  w.  j  ava  2  s .c  o  m
    // Use maxIndex to find the highest info gain in the array
    highestInfoGain = instances.attribute(Utils.maxIndex(infoGains));

    // Make a leaf if there is no more info to gain
    // Otherwise, create children
    // Check if there is no more info to gain
    if (Utils.eq(infoGains[highestInfoGain.index()], 0)) {
        highestInfoGain = null;
        // Instantiate maxDistribution
        maxDistribution = new double[instances.numClasses()];
        // Set up enumerator for instances
        Enumeration instanceEnum = instances.enumerateInstances();
        // Tally classes
        while (instanceEnum.hasMoreElements()) {
            Instance instance = (Instance) instanceEnum.nextElement();
            maxDistribution[(int) instance.classValue()]++;
        }
        // Normalize data for easier manipulation
        Utils.normalize(maxDistribution);
        // Get the max index of the distrubtion
        classValue = Utils.maxIndex(maxDistribution);
        // Save class attribute
        classAttribute = instances.classAttribute();
    }
    // Create children
    else {
        // Split best attribute into bins
        Instances[] bins = makeBins(instances, highestInfoGain);
        // Create nodes
        children = new ID3[highestInfoGain.numValues()];
        for (int i = 0; i < highestInfoGain.numValues(); i++) {
            children[i] = new ID3();
            children[i].makeLikeAWhat(bins[i]);
        }
    }
}

From source file:iris.ID3.java

public double calculateEntropy(Instances instances) {
    // Array to hold counts for each class
    double[] numInEachClass = new double[instances.numClasses()];

    // Loop through every instance in one bin
    for (int i = 0; i < instances.numInstances(); i++) {
        // Increment the count for the class that the instance belongs to
        numInEachClass[(int) instances.instance(i).classValue()]++;
    }/*from  w  w w.j a v  a 2s .  c o m*/
    // Instantiate the entropy value
    double entropy = 0;

    // Loop through number of classes to sum log operations
    for (int i = 0; i < instances.numClasses(); i++) {
        // Handle missing data
        if (numInEachClass[i] > 0) {
            // Logarithm algorithm for entropy
            entropy -= (numInEachClass[i] / instances.numInstances())
                    * Utils.log2(numInEachClass[i] / instances.numInstances());
        }
    }
    return entropy;
}

From source file:j48.BinC45Split.java

License:Open Source License

/**
 * Creates split on enumerated attribute.
 *
 * @exception Exception if something goes wrong
 *///from www  . j ava2  s . co m
private void handleEnumeratedAttribute(Instances trainInstances) throws Exception {

    Distribution newDistribution, secondDistribution;
    int numAttValues;
    double currIG, currGR;
    Instance instance;
    int i;

    numAttValues = trainInstances.attribute(m_attIndex).numValues();
    newDistribution = new Distribution(numAttValues, trainInstances.numClasses());

    // Only Instances with known values are relevant.
    Enumeration enu = trainInstances.enumerateInstances();
    while (enu.hasMoreElements()) {
        instance = (Instance) enu.nextElement();
        if (!instance.isMissing(m_attIndex))
            newDistribution.add((int) instance.value(m_attIndex), instance);
    }
    m_distribution = newDistribution;

    // For all values
    for (i = 0; i < numAttValues; i++) {

        if (Utils.grOrEq(newDistribution.perBag(i), m_minNoObj)) {
            secondDistribution = new Distribution(newDistribution, i);

            // Check if minimum number of Instances in the two
            // subsets.
            if (secondDistribution.check(m_minNoObj)) {
                m_numSubsets = 2;
                currIG = m_infoGainCrit.splitCritValue(secondDistribution, m_sumOfWeights);
                currGR = m_gainRatioCrit.splitCritValue(secondDistribution, m_sumOfWeights, currIG);
                if ((i == 0) || Utils.gr(currGR, m_gainRatio)) {
                    m_gainRatio = currGR;
                    m_infoGain = currIG;
                    m_splitPoint = (double) i;
                    m_distribution = secondDistribution;
                }
            }
        }
    }
}