List of usage examples for weka.core Instances resampleWithWeights
public Instances resampleWithWeights(Random random, double[] weights)
From source file:BaggingImprove.java
/** * Bagging method./* ww w . j av a 2 s . c o m*/ * * @param data the training data to be used for generating the bagged * classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); //data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } //+ System.out.println("Classifier length" + m_Classifiers.length); int bagSize = data.numInstances() * m_BagSizePercent / 100; //+ System.out.println("Bag Size " + bagSize); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) { inBag = new boolean[m_Classifiers.length][]; } //+ //inisialisasi nama penamaan model BufferedWriter writer = new BufferedWriter(new FileWriter("Bootstrap.txt")); for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; //System.out.println("Inbag1 " + inBag[0][1]); //bagData = resampleWithWeights(data, random, inBag[j]); bagData = data.resampleWithWeights(random, inBag[j]); //System.out.println("num after resample " + bagData.numInstances()); //+ // for (int k = 0; k < bagData.numInstances(); k++) { // System.out.println("Bag Data after resample [calc out bag]" + bagData.instance(k)); // } } else { //+ System.out.println("Not m_Calc out of bag"); System.out.println("Please configure code inside!"); bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { //+ System.out.println("Randomizable"); ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } //write bootstrap into file writer.write("Bootstrap " + j); writer.newLine(); writer.write(bagData.toString()); writer.newLine(); System.out.println("Berhasil menyimpan bootstrap ke file "); System.out.println("Bootstrap " + j + 1); // textarea.append("\nBootsrap " + (j + 1)); //System.out.println("num instance kedua kali "+bagData.numInstances()); for (int b = 1; b < bagData.numInstances(); b++) { System.out.println("" + bagData.instance(b)); // textarea.append("\n" + bagData.instance(b)); } // //+ // build the classifier m_Classifiers[j].buildClassifier(bagData); // //+ // // SerializationHelper serialization = new SerializationHelper(); // serialization.write("KnnData"+model+".model", m_Classifiers[j]); // System.out.println("Finish write into model"); // model++; } writer.flush(); writer.close(); // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) { votes = new double[1]; } else { votes = new double[data.numClasses()]; } // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) { continue; } voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] = m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); //- // for(double a : newProbs) // { // System.out.println("Double new probs %.f "+a); // } // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } System.out.println("Vote count %d" + voteCount); // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class //- System.out.println("Vote " + vote); } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else if (vote != data.instance(i).classValue()) { //+ System.out.println("Vote terakhir" + data.instance(i).classValue()); errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:Pair.java
License:Open Source License
private void doCV(Instances targetData) throws Exception { System.out.println();//from www . j a v a 2 s . co m System.out.flush(); int numSourceInstances = m_SourceInstances.numInstances(); int numInstances = targetData.numInstances() + numSourceInstances; numTargetInstances = numInstances - numSourceInstances; double weightSource, weightTarget; double initialSourceFraction; double[] weights = new double[numInstances]; Random randomInstance = new Random(1); Instances data = new Instances(m_SourceInstances, 0, numSourceInstances); // Now add the target data, shallow copying the instances as they are added // so it doesn't mess up the weights for anyone else Enumeration enumer = targetData.enumerateInstances(); while (enumer.hasMoreElements()) { Instance instance = (Instance) enumer.nextElement(); data.add(instance); } if (sourceRatio < 0) { //weight all equally weightSource = weightTarget = 1.0/*/numInstances*/; initialSourceFraction = numSourceInstances / (double) numInstances; } else { double totalWeight = 1 + sourceRatio; weightSource = sourceRatio / totalWeight/*/numSourceInstances*/; weightTarget = 1.0 / totalWeight/*/numTargetInstances*/; initialSourceFraction = weightSource; } for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(weightSource); else instance.setWeight(weightTarget); } if (doFraction) { for (int it = 0; it < sourceIterations/*m_NumIterations*/; it++) { sourceFraction = (1 - (it / (double) m_NumIterations)) * initialSourceFraction; //[same weights as regular] if (sourceFraction > .995) sourceFraction = .995; //double sourceWeight = (sourceFraction * numInstances) / numSourceInstances; double sourceWeight = (sourceFraction * numTargetInstances) / (numSourceInstances * (1 - sourceFraction)); for (int j = 0; j < numInstances; j++) { Instance instance = data.instance(j); if (j < numSourceInstances) instance.setWeight(sourceWeight); else instance.setWeight(1); } buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); } } else { for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration -1:" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); for (int it = 0; it < sourceIterations; it++) { Instances sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] sweights = new double[data.numInstances()]; for (int i = 0; i < sweights.length; i++) { sweights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, sweights); } try { m_Classifiers[it].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } sourceFraction = initialSourceFraction * (1 - (it + 1) / (double) m_NumIterations); setWeights(data, m_Classifiers[it], sourceFraction, numSourceInstances, false); for (int i = 0; i < numInstances; i++) weights[i] = data.instance(i).weight(); buildClassifierWithWeights(data); System.out.println("Iteration " + it + ":" + getTestError()); for (int i = 0; i < numInstances; i++) data.instance(i).setWeight(weights[i]); } } }
From source file:Pair.java
License:Open Source License
/** * Boosting method. Boosts any classifier that can handle weighted * instances./* w ww . ja v a 2s .c om*/ * * @param data the training data to be used for generating the * boosted classifier. * @exception Exception if the classifier could not be built successfully */ protected void buildClassifierWithWeights(Instances data) throws Exception { Random randomInstance = new Random(0); double epsilon, reweight, beta = 0; Evaluation evaluation; Instances sample; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; int numSourceInstances = m_SourceInstances.numInstances(); // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { // Build the classifier sample = null; if (!resample || m_NumIterationsPerformed == 0) { sample = data; } else { double sum = data.sumOfWeights(); double[] weights = new double[data.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = data.instance(i).weight() / sum; } sample = data.resampleWithWeights(randomInstance, weights); if (doSampleSize) { int effectiveInstances = (int) (sourceFraction * weights.length + numTargetInstances); if (effectiveInstances > numSourceInstances + numTargetInstances) effectiveInstances = numSourceInstances + numTargetInstances; //System.out.println(effectiveInstances); sample.randomize(randomInstance); Instances q = new Instances(sample, 0, effectiveInstances); sample = q; } } try { m_Classifiers[m_NumIterationsPerformed].buildClassifier(sample); } catch (Exception e) { e.printStackTrace(); System.out.println("E: " + e); } if (doBagging) beta = 0.4 / .6; //always same beta else beta = setWeights(data, m_Classifiers[m_NumIterationsPerformed], -1, numSourceInstances, true); // Stop if error too small or error too big and ignore this model if (beta < 0) { //setWeights indicates a problem with negative beta if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log(1 / beta); } betaSum = 0; for (int i = 0; i < m_NumIterationsPerformed; i++) betaSum += m_Betas[i]; }
From source file:com.reactivetechnologies.analytics.core.eval.BaggingWithBuiltClassifiers.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass();//from ww w.j a v a2 s . co m /** Changed here: Use supplied classifier */ //super.buildClassifier(data); /** End change */ if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } int bagSize = (int) (data.numInstances() * (m_BagSizePercent / 100.0)); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) inBag = new boolean[m_Classifiers.length][]; for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; bagData = data.resampleWithWeights(random, inBag[j]); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } /** Changed here: Use supplied classifier */ /*if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } // build the classifier m_Classifiers[j].buildClassifier(bagData);*/ /** End change */ } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) continue; voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] += m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts using resampling * * @param data the training data to be used for generating the * boosted classifier.//from ww w . ja v a2 s . c om * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierUsingResampling(Instances data) throws Exception { Instances trainData, sample, training; double epsilon, reweight, sumProbs; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); int resamplingIterations = 0; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); sumProbs = training.sumOfWeights(); for (int i = 0; i < training.numInstances(); i++) { training.instance(i).setWeight(training.instance(i).weight() / sumProbs); } // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training); } // Resample resamplingIterations = 0; double[] weights = new double[trainData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = trainData.instance(i).weight(); } do { sample = trainData.resampleWithWeights(randomInstance, weights); // int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } //System.out.println("minC="+nMin+"; majC="+nMaj); /* * balance the data which boosting generate for training base classifier */ //System.out.println("before:"+classNum[0]+"-"+classNum[1]); Instances sampleData = randomSampling(sample, majC, minC, nMaj, nMaj, randomInstance); //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); // Build and evaluate classifier m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData); evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); resamplingIterations++; } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS)); // Stop if error too big or 0 if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:gyc.UnderOverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts using resampling * * @param data the training data to be used for generating the * boosted classifier./*from w ww . jav a2 s. c o m*/ * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierUsingResampling(Instances data) throws Exception { Instances trainData, sample, training; double epsilon, reweight, sumProbs; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); int resamplingIterations = 0; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); sumProbs = training.sumOfWeights(); for (int i = 0; i < training.numInstances(); i++) { training.instance(i).setWeight(training.instance(i).weight() / sumProbs); } // Do boostrap iterations int b = 10; for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training); } // Resample resamplingIterations = 0; double[] weights = new double[trainData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = trainData.instance(i).weight(); } do { sample = trainData.resampleWithWeights(randomInstance, weights); // int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } //System.out.println("minC="+nMin+"; majC="+nMaj); /* * balance the data which boosting generate for training base classifier */ //System.out.println("before:"+classNum[0]+"-"+classNum[1]); double pb = 100.0 * (nMin + nMaj) / 2 / nMaj; /* if (m_NumIterationsPerformed + 1 > (m_Classifiers.length / 10)) b += 10; (b% * Nmaj) instances are taken from each class */ Instances sampleData = randomSampling(sample, majC, minC, (int) pb, randomInstance); //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); // Build and evaluate classifier m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData); evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); resamplingIterations++; } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS)); // Stop if error too big or 0 if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }