Example usage for weka.core Instances resampleWithWeights

List of usage examples for weka.core Instances resampleWithWeights

Introduction

In this page you can find the example usage for weka.core Instances resampleWithWeights.

Prototype

public Instances resampleWithWeights(Random random, double[] weights) 

Source Link

Document

Creates a new dataset of the same size as this dataset using random sampling with replacement according to the given weight vector.

Usage

From source file:BaggingImprove.java

/**
 * Bagging method./*  ww w  .  j av a  2 s  . c o m*/
 *
 * @param data the training data to be used for generating the bagged
 * classifier.
 * @throws Exception if the classifier could not be built successfully
 */
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    //data.deleteWithMissingClass();

    super.buildClassifier(data);

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }
    //+
    System.out.println("Classifier length" + m_Classifiers.length);

    int bagSize = data.numInstances() * m_BagSizePercent / 100;
    //+
    System.out.println("Bag Size " + bagSize);

    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag) {
        inBag = new boolean[m_Classifiers.length][];
    }

    //+
    //inisialisasi nama penamaan model
    BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt"));

    for (int j = 0; j < m_Classifiers.length; j++) {

        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];

            //System.out.println("Inbag1 " + inBag[0][1]);
            //bagData = resampleWithWeights(data, random, inBag[j]);
            bagData = data.resampleWithWeights(random, inBag[j]);
            //System.out.println("num after resample " + bagData.numInstances());
            //+
            //                for (int k = 0; k < bagData.numInstances(); k++) {
            //                    System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k));
            //                }

        } else {
            //+
            System.out.println("Not m_Calc out of bag");
            System.out.println("Please configure code inside!");

            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        if (m_Classifier instanceof Randomizable) {
            //+
            System.out.println("Randomizable");
            ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }

        //write bootstrap into file
        writer.write("Bootstrap " + j);
        writer.newLine();
        writer.write(bagData.toString());
        writer.newLine();

        System.out.println("Berhasil menyimpan bootstrap ke file ");

        System.out.println("Bootstrap " + j + 1);
        //            textarea.append("\nBootsrap " + (j + 1));
        //System.out.println("num instance kedua kali "+bagData.numInstances());

        for (int b = 1; b < bagData.numInstances(); b++) {
            System.out.println("" + bagData.instance(b));
            //                textarea.append("\n" + bagData.instance(b));
        }
        //            //+

        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);
        //            //+
        //            
        //            SerializationHelper serialization = new SerializationHelper();
        //            serialization.write("KnnData"+model+".model", m_Classifiers[j]);
        //            System.out.println("Finish write into model");
        //            model++;
    }

    writer.flush();
    writer.close();
    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric) {
                votes = new double[1];
            } else {
                votes = new double[data.numClasses()];
            }

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i]) {
                    continue;
                }
                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] = m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    //-
                    //                        for(double a : newProbs)
                    //                        {
                    //                            System.out.println("Double new probs %.f "+a);
                    //                        }
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }

                }
            }
            System.out.println("Vote count %d" + voteCount);

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);

                }
                vote = Utils.maxIndex(votes); // predicted class
                //-
                System.out.println("Vote " + vote);

            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else if (vote != data.instance(i).classValue()) {
                //+
                System.out.println("Vote terakhir" + data.instance(i).classValue());
                errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:Pair.java

License:Open Source License

private void doCV(Instances targetData) throws Exception {
    System.out.println();//from www  . j  a  v  a 2 s  . co  m
    System.out.flush();
    int numSourceInstances = m_SourceInstances.numInstances();
    int numInstances = targetData.numInstances() + numSourceInstances;
    numTargetInstances = numInstances - numSourceInstances;
    double weightSource, weightTarget;
    double initialSourceFraction;
    double[] weights = new double[numInstances];
    Random randomInstance = new Random(1);

    Instances data = new Instances(m_SourceInstances, 0, numSourceInstances);
    // Now add the target data, shallow copying the instances as they are added
    // so it doesn't mess up the weights for anyone else
    Enumeration enumer = targetData.enumerateInstances();
    while (enumer.hasMoreElements()) {
        Instance instance = (Instance) enumer.nextElement();
        data.add(instance);
    }

    if (sourceRatio < 0) { //weight all equally
        weightSource = weightTarget = 1.0/*/numInstances*/;
        initialSourceFraction = numSourceInstances / (double) numInstances;
    } else {
        double totalWeight = 1 + sourceRatio;
        weightSource = sourceRatio / totalWeight/*/numSourceInstances*/;
        weightTarget = 1.0 / totalWeight/*/numTargetInstances*/;
        initialSourceFraction = weightSource;
    }
    for (int j = 0; j < numInstances; j++) {
        Instance instance = data.instance(j);
        if (j < numSourceInstances)
            instance.setWeight(weightSource);
        else
            instance.setWeight(weightTarget);
    }

    if (doFraction) {
        for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) {

            sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular]
            if (sourceFraction > .995)
                sourceFraction = .995;
            //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances;
            double sourceWeight = (sourceFraction * numTargetInstances)
                    / (numSourceInstances * (1 - sourceFraction));
            for (int j = 0; j < numInstances; j++) {
                Instance instance = data.instance(j);
                if (j < numSourceInstances)
                    instance.setWeight(sourceWeight);
                else
                    instance.setWeight(1);
            }
            buildClassifierWithWeights(data);
            System.out.println("Iteration " + it + ":" + getTestError());
        }
    } else {

        for (int i = 0; i < numInstances; i++)
            weights[i] = data.instance(i).weight();
        buildClassifierWithWeights(data);
        System.out.println("Iteration -1:" + getTestError());
        for (int i = 0; i < numInstances; i++)
            data.instance(i).setWeight(weights[i]);

        for (int it = 0; it < sourceIterations; it++) {

            Instances sample = null;
            if (!resample || m_NumIterationsPerformed == 0) {
                sample = data;
            } else {
                double sum = data.sumOfWeights();
                double[] sweights = new double[data.numInstances()];
                for (int i = 0; i < sweights.length; i++) {
                    sweights[i] = data.instance(i).weight() / sum;
                }
                sample = data.resampleWithWeights(randomInstance, sweights);
            }

            try {
                m_Classifiers[it].buildClassifier(sample);
            } catch (Exception e) {
                e.printStackTrace();
                System.out.println("E: " + e);
            }

            sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations);
            setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false);

            for (int i = 0; i < numInstances; i++)
                weights[i] = data.instance(i).weight();

            buildClassifierWithWeights(data);

            System.out.println("Iteration " + it + ":" + getTestError());

            for (int i = 0; i < numInstances; i++)
                data.instance(i).setWeight(weights[i]);

        }

    }

}

From source file:Pair.java

License:Open Source License

/**
 * Boosting method. Boosts any classifier that can handle weighted
 * instances./*  w ww .  ja v a  2s .c  om*/
 *
 * @param data the training data to be used for generating the
 * boosted classifier.
 * @exception Exception if the classifier could not be built successfully
 */
protected void buildClassifierWithWeights(Instances data) throws Exception {

    Random randomInstance = new Random(0);
    double epsilon, reweight, beta = 0;
    Evaluation evaluation;
    Instances sample;
    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    int numSourceInstances = m_SourceInstances.numInstances();

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        // Build the classifier
        sample = null;
        if (!resample || m_NumIterationsPerformed == 0) {
            sample = data;
        } else {
            double sum = data.sumOfWeights();
            double[] weights = new double[data.numInstances()];
            for (int i = 0; i < weights.length; i++) {
                weights[i] = data.instance(i).weight() / sum;
            }
            sample = data.resampleWithWeights(randomInstance, weights);

            if (doSampleSize) {
                int effectiveInstances = (int) (sourceFraction * weights.length + numTargetInstances);
                if (effectiveInstances > numSourceInstances + numTargetInstances)
                    effectiveInstances = numSourceInstances + numTargetInstances;
                //System.out.println(effectiveInstances);               
                sample.randomize(randomInstance);
                Instances q = new Instances(sample, 0, effectiveInstances);
                sample = q;
            }
        }
        try {
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("E: " + e);
        }

        if (doBagging)
            beta = 0.4 / .6; //always same beta
        else
            beta = setWeights(data, m_Classifiers[m_NumIterationsPerformed], -1, numSourceInstances, true);

        // Stop if error too small or error too big and ignore this model
        if (beta < 0) { //setWeights indicates a problem with negative beta
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model

        m_Betas[m_NumIterationsPerformed] = Math.log(1 / beta);

    }

    betaSum = 0;

    for (int i = 0; i < m_NumIterationsPerformed; i++)
        betaSum += m_Betas[i];
}

From source file:com.reactivetechnologies.analytics.core.eval.BaggingWithBuiltClassifiers.java

License:Open Source License

@Override
public void buildClassifier(Instances data) throws Exception {

    // can classifier handle the data?
    getCapabilities().testWithFail(data);

    // remove instances with missing class
    data = new Instances(data);
    data.deleteWithMissingClass();//from   ww  w.j a v  a2  s . co m

    /** Changed here: Use supplied classifier */
    //super.buildClassifier(data);
    /** End change */

    if (m_CalcOutOfBag && (m_BagSizePercent != 100)) {
        throw new IllegalArgumentException(
                "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!");
    }

    int bagSize = (int) (data.numInstances() * (m_BagSizePercent / 100.0));
    Random random = new Random(m_Seed);

    boolean[][] inBag = null;
    if (m_CalcOutOfBag)
        inBag = new boolean[m_Classifiers.length][];

    for (int j = 0; j < m_Classifiers.length; j++) {
        Instances bagData = null;

        // create the in-bag dataset
        if (m_CalcOutOfBag) {
            inBag[j] = new boolean[data.numInstances()];
            bagData = data.resampleWithWeights(random, inBag[j]);
        } else {
            bagData = data.resampleWithWeights(random);
            if (bagSize < data.numInstances()) {
                bagData.randomize(random);
                Instances newBagData = new Instances(bagData, 0, bagSize);
                bagData = newBagData;
            }
        }

        /** Changed here: Use supplied classifier */
        /*if (m_Classifier instanceof Randomizable) {
          ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
        }
                
        // build the classifier
        m_Classifiers[j].buildClassifier(bagData);*/
        /** End change */
    }

    // calc OOB error?
    if (getCalcOutOfBag()) {
        double outOfBagCount = 0.0;
        double errorSum = 0.0;
        boolean numeric = data.classAttribute().isNumeric();

        for (int i = 0; i < data.numInstances(); i++) {
            double vote;
            double[] votes;
            if (numeric)
                votes = new double[1];
            else
                votes = new double[data.numClasses()];

            // determine predictions for instance
            int voteCount = 0;
            for (int j = 0; j < m_Classifiers.length; j++) {
                if (inBag[j][i])
                    continue;

                voteCount++;
                // double pred = m_Classifiers[j].classifyInstance(data.instance(i));
                if (numeric) {
                    // votes[0] += pred;
                    votes[0] += m_Classifiers[j].classifyInstance(data.instance(i));
                } else {
                    // votes[(int) pred]++;
                    double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i));
                    // average the probability estimates
                    for (int k = 0; k < newProbs.length; k++) {
                        votes[k] += newProbs[k];
                    }
                }
            }

            // "vote"
            if (numeric) {
                vote = votes[0];
                if (voteCount > 0) {
                    vote /= voteCount; // average
                }
            } else {
                if (Utils.eq(Utils.sum(votes), 0)) {
                } else {
                    Utils.normalize(votes);
                }
                vote = Utils.maxIndex(votes); // predicted class
            }

            // error for instance
            outOfBagCount += data.instance(i).weight();
            if (numeric) {
                errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight();
            } else {
                if (vote != data.instance(i).classValue())
                    errorSum += data.instance(i).weight();
            }
        }

        m_OutOfBagError = errorSum / outOfBagCount;
    } else {
        m_OutOfBagError = 0;
    }
}

From source file:gyc.OverBoostM1.java

License:Open Source License

/**
 * Boosting method. Boosts using resampling
 *
 * @param data the training data to be used for generating the
 * boosted classifier.//from  ww w .  ja  v a2 s  .  c om
 * @throws Exception if the classifier could not be built successfully
 */
protected void buildClassifierUsingResampling(Instances data) throws Exception {

    Instances trainData, sample, training;
    double epsilon, reweight, sumProbs;
    Evaluation evaluation;
    int numInstances = data.numInstances();
    Random randomInstance = new Random(m_Seed);
    int resamplingIterations = 0;

    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    training = new Instances(data, 0, numInstances);
    sumProbs = training.sumOfWeights();
    for (int i = 0; i < training.numInstances(); i++) {
        training.instance(i).setWeight(training.instance(i).weight() / sumProbs);
    }

    // Do boostrap iterations
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        if (m_Debug) {
            System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
        }

        // Select instances to train the classifier on
        if (m_WeightThreshold < 100) {
            trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
        } else {
            trainData = new Instances(training);
        }

        // Resample
        resamplingIterations = 0;
        double[] weights = new double[trainData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = trainData.instance(i).weight();
        }
        do {
            sample = trainData.resampleWithWeights(randomInstance, weights);

            //
            int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts;
            int minC, nMin = classNum[0];
            int majC, nMaj = classNum[1];
            if (nMin < nMaj) {
                minC = 0;
                majC = 1;
            } else {
                minC = 1;
                majC = 0;
                nMin = classNum[1];
                nMaj = classNum[0];
            }
            //System.out.println("minC="+nMin+"; majC="+nMaj);
            /*
             * balance the data which boosting generate for training base classifier
            */
            //System.out.println("before:"+classNum[0]+"-"+classNum[1]);
            Instances sampleData = randomSampling(sample, majC, minC, nMaj, nMaj, randomInstance);
            //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts;
            //System.out.println("after:"+classNum[0]+"-"+classNum[1]);

            // Build and evaluate classifier
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData);

            evaluation = new Evaluation(data);
            evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training);
            epsilon = evaluation.errorRate();
            resamplingIterations++;
        } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS));

        // Stop if error too big or 0
        if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model
        m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon);
        reweight = (1 - epsilon) / epsilon;
        if (m_Debug) {
            System.err.println("\terror rate = " + epsilon + "  beta = " + m_Betas[m_NumIterationsPerformed]);
        }

        // Update instance weights
        setWeights(training, reweight);
    }
}

From source file:gyc.UnderOverBoostM1.java

License:Open Source License

/**
 * Boosting method. Boosts using resampling
 *
 * @param data the training data to be used for generating the
 * boosted classifier./*from w  ww .  jav  a2  s. c o m*/
 * @throws Exception if the classifier could not be built successfully
 */
protected void buildClassifierUsingResampling(Instances data) throws Exception {

    Instances trainData, sample, training;
    double epsilon, reweight, sumProbs;
    Evaluation evaluation;
    int numInstances = data.numInstances();
    Random randomInstance = new Random(m_Seed);
    int resamplingIterations = 0;

    // Initialize data
    m_Betas = new double[m_Classifiers.length];
    m_NumIterationsPerformed = 0;
    // Create a copy of the data so that when the weights are diddled
    // with it doesn't mess up the weights for anyone else
    training = new Instances(data, 0, numInstances);
    sumProbs = training.sumOfWeights();
    for (int i = 0; i < training.numInstances(); i++) {
        training.instance(i).setWeight(training.instance(i).weight() / sumProbs);
    }

    // Do boostrap iterations
    int b = 10;
    for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) {
        if (m_Debug) {
            System.err.println("Training classifier " + (m_NumIterationsPerformed + 1));
        }

        // Select instances to train the classifier on
        if (m_WeightThreshold < 100) {
            trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100);
        } else {
            trainData = new Instances(training);
        }

        // Resample
        resamplingIterations = 0;
        double[] weights = new double[trainData.numInstances()];
        for (int i = 0; i < weights.length; i++) {
            weights[i] = trainData.instance(i).weight();
        }
        do {
            sample = trainData.resampleWithWeights(randomInstance, weights);

            //
            int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts;
            int minC, nMin = classNum[0];
            int majC, nMaj = classNum[1];
            if (nMin < nMaj) {
                minC = 0;
                majC = 1;
            } else {
                minC = 1;
                majC = 0;
                nMin = classNum[1];
                nMaj = classNum[0];
            }
            //System.out.println("minC="+nMin+"; majC="+nMaj);
            /*
             * balance the data which boosting generate for training base classifier
            */
            //System.out.println("before:"+classNum[0]+"-"+classNum[1]);
            double pb = 100.0 * (nMin + nMaj) / 2 / nMaj;
            /* if (m_NumIterationsPerformed + 1 > (m_Classifiers.length / 10))    
                b += 10;
            (b% * Nmaj) instances are taken from each class */
            Instances sampleData = randomSampling(sample, majC, minC, (int) pb, randomInstance);

            //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts;
            //System.out.println("after:"+classNum[0]+"-"+classNum[1]);

            // Build and evaluate classifier
            m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData);

            evaluation = new Evaluation(data);
            evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training);
            epsilon = evaluation.errorRate();
            resamplingIterations++;
        } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS));

        // Stop if error too big or 0
        if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) {
            if (m_NumIterationsPerformed == 0) {
                m_NumIterationsPerformed = 1; // If we're the first we have to to use it
            }
            break;
        }

        // Determine the weight to assign to this model
        m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon);
        reweight = (1 - epsilon) / epsilon;
        if (m_Debug) {
            System.err.println("\terror rate = " + epsilon + "  beta = " + m_Betas[m_NumIterationsPerformed]);
        }

        // Update instance weights
        setWeights(training, reweight);
    }
}