List of usage examples for weka.core Instances deleteWithMissingClass
public void deleteWithMissingClass()
From source file:dewaweebtreeclassifier.veranda.VerandaTree.java
/** * //w w w .j a v a 2 s . c o m * @param data */ @Override public void buildClassifier(Instances data) { // remove all instance with missing class value data.deleteWithMissingClass(); buildTree(data); }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Boosting method./*from w w w . j a v a2 s . c o m*/ * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } m_NumClasses = data.numClasses(); if (m_NumClasses != 2) System.err.println("Can only build model for binary class data"); /* we do not use the method buildClassifierWithWeights as we think that some base learning algorithms could not deal with weighted data. */ buildClassifierUsingResampling(data); }
From source file:gyc.SMOTEBagging.java
License:Open Source License
/** * Bagging method.//from ww w . j a v a 2s .c o m * * @param data the training data to be used for generating the * bagged classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } int bagSize = data.numInstances() * m_BagSizePercent / 100; Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) inBag = new boolean[m_Classifiers.length][]; int b = 0; for (int j = 0; j < m_Classifiers.length; j++) { // int classNum[] = data.attributeStats(data.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } b = b + 10; Instances bagData = randomSampling(data, majC, minC, b, random); /* // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; bagData = resampleWithWeights(data, random, inBag[j]); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); }*/ // build the classifier m_Classifiers[j].buildClassifier(bagData); //classNum=bagData.attributeStats(bagData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) continue; voteCount++; double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) votes[0] += pred; else votes[(int) pred]++; } // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { vote = Utils.maxIndex(votes); // majority vote } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:hr.irb.fastRandomForest.FastRandomForest.java
License:Open Source License
/** * Builds a classifier for a set of instances. * * @param data the instances to train the classifier with * * @throws Exception if something goes wrong *///w ww . j av a 2 s .c o m public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } /* Save header with attribute info. Can be accessed later by FastRfTrees * through their m_MotherForest field. */ m_Info = new Instances(data, 0); m_bagger = new FastRfBagging(); // Set up the tree options which are held in the motherForest. m_KValue = m_numFeatures; if (m_KValue > data.numAttributes() - 1) m_KValue = data.numAttributes() - 1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; FastRandomTree rTree = new FastRandomTree(); rTree.m_MotherForest = this; // allows to retrieve KValue and MaxDepth // some temporary arrays which need to be separate for every tree, so // that the trees can be trained in parallel in different threads // set up the bagger and build the forest m_bagger.setClassifier(rTree); m_bagger.setSeed(m_randomSeed); m_bagger.setNumIterations(m_numTrees); m_bagger.setCalcOutOfBag(true); m_bagger.setComputeImportances(this.getComputeImportances()); m_bagger.buildClassifier(data, m_NumThreads, this); }
From source file:hr.irb.fastRandomForest.NakedFastRandomForest.java
License:Open Source License
/** * Builds a classifier for a set of instances. * // ww w . j a v a2 s .c o m * Copy-pasted from FastRandomForest, except that it uses * NakedFastRandomTree as the mother classifier. * * @param data * the instances to train the classifier with * * @throws Exception * if something goes wrong */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } /* * Save header with attribute info. Can be accessed later by FastRfTrees * through their m_MotherForest field. */ m_Info = new Instances(data, 0); m_bagger = new NakedFastRfBagging(); // Set up the tree options which are held in the motherForest. m_KValue = m_numFeatures; if (m_KValue > data.numAttributes() - 1) m_KValue = data.numAttributes() - 1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; // [jhostetler] This line is the only change from FastRandomForest.buildClassifier final FastRandomTree rTree = new NakedFastRandomTree(); rTree.m_MotherForest = this; // allows to retrieve KValue and MaxDepth // some temporary arrays which need to be separate for every tree, so // that the trees can be trained in parallel in different threads // set up the bagger and build the forest m_bagger.setClassifier(rTree); m_bagger.setSeed(m_randomSeed); m_bagger.setNumIterations(m_numTrees); m_bagger.setCalcOutOfBag(true); m_bagger.setComputeImportances(this.getComputeImportances()); ((NakedFastRfBagging) m_bagger).buildClassifier(data, m_NumThreads, this); }
From source file:hr.irb.fastRandomForest.NakedFastRfBagging.java
License:Open Source License
/** * Bagging method. Produces DataCache objects with bootstrap samples of the * original data, and feeds them to the base classifier (which can only be a * FastRandomTree).//from www .j av a 2 s. c o m * * @param data * The training set to be used for generating the bagged * classifier. * @param numThreads * The number of simultaneous threads to use for computation. * Pass zero (0) for autodetection. * @param motherForest * A reference to the FastRandomForest object that invoked this. * * @throws Exception * if the classifier could not be built successfully */ public void buildClassifier(Instances data, final int numThreads, final NakedFastRandomForest motherForest) throws Exception { // can classifier handle the vals? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); if (!(m_Classifier instanceof NakedFastRandomTree)) throw new IllegalArgumentException( "The NakedFastRfBagging class accepts " + "only NakedFastRandomTree as its base classifier."); /* * We fill the m_Classifiers array by creating lots of trees with new() * because this is much faster than using serialization to deep-copy the * one tree in m_Classifier - this is what the * super.buildClassifier(data) normally does. */ m_Classifiers = new Classifier[m_NumIterations]; for (int i = 0; i < m_Classifiers.length; i++) { final NakedFastRandomTree curTree = new NakedFastRandomTree(); // all parameters for training will be looked up in the motherForest // (maxDepth, k_Value) curTree.m_MotherForest = motherForest; // 0.99: reference to these arrays will get passed down all nodes so // the array can be re-used // 0.99: this array is of size two as now all splits are binary - // even categorical ones curTree.tempProps = new double[2]; curTree.tempDists = new double[2][]; curTree.tempDists[0] = new double[data.numClasses()]; curTree.tempDists[1] = new double[data.numClasses()]; curTree.tempDistsOther = new double[2][]; curTree.tempDistsOther[0] = new double[data.numClasses()]; curTree.tempDistsOther[1] = new double[data.numClasses()]; m_Classifiers[i] = curTree; } // this was SLOW.. takes approx 1/2 time as training the forest // afterwards (!!!) // super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } // sorting is performed inside this constructor final DataCache myData = new DataCache(data); final int bagSize = data.numInstances() * m_BagSizePercent / 100; final Random random = new Random(m_Seed); final boolean[][] inBag = new boolean[m_Classifiers.length][]; // thread management final ExecutorService threadPool = Executors .newFixedThreadPool(numThreads > 0 ? numThreads : Runtime.getRuntime().availableProcessors()); final List<Future<?>> futures = new ArrayList<Future<?>>(m_Classifiers.length); try { for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) { // create the in-bag dataset (and be sure to remember what's in // bag) // for computing the out-of-bag error later final DataCache bagData = myData.resample(bagSize, random); bagData.reusableRandomGenerator = bagData.getRandomNumberGenerator(random.nextInt()); inBag[treeIdx] = bagData.inBag; // store later for OOB error // calculation // build the classifier if (m_Classifiers[treeIdx] instanceof NakedFastRandomTree) { final FastRandomTree aTree = (FastRandomTree) m_Classifiers[treeIdx]; aTree.data = bagData; final Future<?> future = threadPool.submit(aTree); futures.add(future); } else { throw new IllegalArgumentException("The FastRfBagging class accepts " + "only NakedFastRandomTree as its base classifier."); } } // make sure all trees have been trained before proceeding for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) { futures.get(treeIdx).get(); } // [jhostetler] 'm_FeatureImportances' and 'computeOOBError()' are // private, so we'll just not compute them. // calc OOB error? // if( getCalcOutOfBag() || getComputeImportances() ) { // // m_OutOfBagError = computeOOBError(data, inBag, threadPool); // m_OutOfBagError = computeOOBError( myData, inBag, threadPool ); // } // else { // m_OutOfBagError = 0; // } // // calc feature importances // m_FeatureImportances = null; // // m_FeatureNames = null; // if( getComputeImportances() ) { // m_FeatureImportances = new double[data.numAttributes()]; // // /m_FeatureNames = new String[data.numAttributes()]; // // Instances dataCopy = new Instances(data); //To scramble // // int[] permutation = // // FastRfUtils.randomPermutation(data.numInstances(), random); // for( int j = 0; j < data.numAttributes(); j++ ) { // if( j != data.classIndex() ) { // // double sError = // // computeOOBError(FastRfUtils.scramble(data, dataCopy, // // j, permutation), inBag, threadPool); // // double sError = computeOOBError(data, inBag, // // threadPool, j, 0); // final float[] unscrambled = myData.scrambleOneAttribute( j, // random ); // final double sError = computeOOBError( myData, inBag, // threadPool ); // myData.vals[j] = unscrambled; // restore the original // // state // m_FeatureImportances[j] = sError - m_OutOfBagError; // } // // m_FeatureNames[j] = data.attribute(j).name(); // } // } threadPool.shutdown(); } finally { threadPool.shutdownNow(); } }
From source file:id3.MyID3.java
/** * Membuat pohon keputusan//w w w .ja v a2 s . c o m * @param instances data train * @throws Exception */ @Override public void buildClassifier(Instances instances) throws Exception { // Check if classifier can handle the data getCapabilities().testWithFail(instances); // Remove missing value instance from instances instances = new Instances(instances); instances.deleteWithMissingClass(); // Gather list of attribute in instances ArrayList<Attribute> remainingAttributes = new ArrayList<>(); Enumeration enumAttributes = instances.enumerateAttributes(); while (enumAttributes.hasMoreElements()) { remainingAttributes.add((Attribute) enumAttributes.nextElement()); } // Start build classifier ID3 buildMyID3(instances, remainingAttributes); }
From source file:imba.classifier.FFNNTubes.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data.deleteWithMissingClass(); nAttribute = data.numAttributes() - 1; nOutput = data.numClasses();/*from w ww. j a v a 2 s . c o m*/ nData = data.size(); //set target data setTarget(data); //generate weight generateRandomWeight(); //normalisasi data Normalize norm = new Normalize(); Filter filter = new NominalToBinary(); norm.setInputFormat(data); Instances filteredData = Filter.useFilter(data, norm); try { filter.setInputFormat(filteredData); for (Instance i1 : filteredData) { filter.input(i1); } filter.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } int z = 0; double valMSE = 100.0; while ((z <= nEpoch) && (valMSE >= 0.00001)) { for (int j = 0; j < nData; j++) { feedForward(filteredData.get(j)); if (nHidden == 0) { updateWeight(target[j]); } else { backPropagation(target[j]); } } countError(filteredData); valMSE = countMSE(filteredData); System.out.println("ACCURACY " + z + " : " + accuracy); System.out.println("MSE " + z + " : " + valMSE); z++; } }
From source file:iris.ID3.java
@Override public void buildClassifier(Instances instance) throws Exception { instance = new Instances(instance); // Removes instances that have missing data instance.deleteWithMissingClass(); makeLikeAWhat(instance);/* w ww.ja va2 s .c om*/ }
From source file:j48.C45PruneableClassifierTree.java
License:Open Source License
/** * Method for building a pruneable classifier tree. * * @param data the data for building the tree * @throws Exception if something goes wrong *//*from w w w .j av a 2s .c o m*/ public void buildClassifier(Instances data) throws Exception { // can classifier tree handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); buildTree(data, m_subtreeRaising || !m_cleanup); collapse(); if (m_pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }