Example usage for weka.core Instances resampleWithWeights

List of usage examples for weka.core Instances resampleWithWeights

Introduction

In this page you can find the example usage for weka.core Instances resampleWithWeights.

Prototype

public Instances resampleWithWeights(Random random) 

Source Link

Document

Creates a new dataset of the same size as this dataset using random sampling with replacement according to the current instance weights.

Usage

From source file:fantail.algorithms.ARTForests.java

License:Open Source License

@Override
public void buildRanker(Instances metaData) throws Exception {

    Random r = new Random(m_RandomSeed);
    Instances workingData = new Instances(metaData);
    m_WeakRankers = new Ranker[m_T];

    for (int i = 0; i < m_T; i++) {
        Instances baggingSample = workingData.resampleWithWeights(r);

        if (m_BaggingPercentage < 100.0) {
            weka.filters.unsupervised.instance.Resample res = new weka.filters.unsupervised.instance.Resample();
            res.setSampleSizePercent(m_BaggingPercentage);
            res.setNoReplacement(false);
            res.setInputFormat(baggingSample);
            baggingSample = Filter.useFilter(baggingSample, res);
        }/*from   w w  w. j a  va 2  s .co m*/

        BinaryART ranker = new BinaryART();
        ranker.setMiniLeaf(m_NumMinInstances);
        ranker.setK(m_K);
        ranker.setRandomSeed(i);
        ranker.setUseMedian(m_UseMedian);

        m_WeakRankers[i] = ranker;
        m_WeakRankers[i].buildRanker(baggingSample);
    }
}

From source file:mulan.classifier.transformation.EnsembleOfClassifierChains.java

License:Open Source License

@Override
protected void buildInternal(MultiLabelInstances trainingSet) throws Exception {

    Instances dataSet = new Instances(trainingSet.getDataSet());

    for (int i = 0; i < numOfModels; i++) {
        debug("ECC Building Model:" + (i + 1) + "/" + numOfModels);
        // 2013.12.13  
        System.out.println("ECC Building Model:" + (i + 1) + "/" + numOfModels);
        Instances sampledDataSet = null;
        dataSet.randomize(rand);//from  w  ww.  j  a  v a2  s .c  om
        if (useSamplingWithReplacement) {
            int bagSize = dataSet.numInstances() * BagSizePercent / 100;
            // create the in-bag dataset
            sampledDataSet = dataSet.resampleWithWeights(new Random(1));
            if (bagSize < dataSet.numInstances()) {
                sampledDataSet = new Instances(sampledDataSet, 0, bagSize);
            }
        } else {
            RemovePercentage rmvp = new RemovePercentage();
            rmvp.setInvertSelection(true);
            rmvp.setPercentage(samplingPercentage);
            rmvp.setInputFormat(dataSet);
            sampledDataSet = Filter.useFilter(dataSet, rmvp);
        }
        MultiLabelInstances train = new MultiLabelInstances(sampledDataSet, trainingSet.getLabelsMetaData());

        int[] chain = new int[numLabels];
        for (int j = 0; j < numLabels; j++)
            chain[j] = j;
        for (int j = 0; j < chain.length; j++) {
            int randomPosition = rand.nextInt(chain.length);
            int temp = chain[j];
            chain[j] = chain[randomPosition];
            chain[randomPosition] = temp;
        }
        debug(Arrays.toString(chain));
        //========================================
        System.out.println(Arrays.toString(chain));
        //========================================
        // MAYBE WE SHOULD CHECK NOT TO PRODUCE THE SAME VECTOR FOR THE
        // INDICES
        // BUT IN THE PAPER IT DID NOT MENTION SOMETHING LIKE THAT
        // IT JUST SIMPLY SAY A RANDOM CHAIN ORDERING OF L

        ensemble[i] = new ClassifierChain(baseClassifier, chain);
        ensemble[i].build(train);
    }

}