List of usage examples for weka.core Instances deleteWithMissingClass
public void deleteWithMissingClass()
From source file:com.reactivetechnologies.analytics.core.eval.AdaBoostM1WithBuiltClassifiers.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { /** Changed here: Using the provided classifiers */ /** End *//* ww w .j ava2 s . c o m*/ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } m_NumClasses = data.numClasses(); if ((!m_UseResampling) && (m_Classifier instanceof WeightedInstancesHandler)) { buildClassifierWithWeights(data); } else { buildClassifierUsingResampling(data); } }
From source file:com.reactivetechnologies.analytics.core.eval.BaggingWithBuiltClassifiers.java
License:Open Source License
@Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); /** Changed here: Use supplied classifier */ //super.buildClassifier(data); /** End change */ if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); }/*from w w w . j a v a 2 s . c o m*/ int bagSize = (int) (data.numInstances() * (m_BagSizePercent / 100.0)); Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) inBag = new boolean[m_Classifiers.length][]; for (int j = 0; j < m_Classifiers.length; j++) { Instances bagData = null; // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; bagData = data.resampleWithWeights(random, inBag[j]); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } /** Changed here: Use supplied classifier */ /*if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); } // build the classifier m_Classifiers[j].buildClassifier(bagData);*/ /** End change */ } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) continue; voteCount++; // double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) { // votes[0] += pred; votes[0] += m_Classifiers[j].classifyInstance(data.instance(i)); } else { // votes[(int) pred]++; double[] newProbs = m_Classifiers[j].distributionForInstance(data.instance(i)); // average the probability estimates for (int k = 0; k < newProbs.length; k++) { votes[k] += newProbs[k]; } } } // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { if (Utils.eq(Utils.sum(votes), 0)) { } else { Utils.normalize(votes); } vote = Utils.maxIndex(votes); // predicted class } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:com.reactivetechnologies.analytics.core.eval.StackingWithBuiltClassifiers.java
License:Open Source License
/** * Buildclassifier selects a classifier from the set of classifiers * by minimising error on the training data. * * @param data the training data to be used for generating the * boosted classifier.//from w ww .j a v a 2 s . co m * @throws Exception if the classifier could not be built successfully */ @Override public void buildClassifier(Instances data) throws Exception { if (m_MetaClassifier == null) { throw new IllegalArgumentException("No meta classifier has been set"); } // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances newData = new Instances(data); m_BaseFormat = new Instances(data, 0); newData.deleteWithMissingClass(); Random random = new Random(m_Seed); newData.randomize(random); if (newData.classAttribute().isNominal()) { newData.stratify(m_NumFolds); } // Create meta level generateMetaLevel(newData, random); /** Changed here */ // DO NOT Rebuilt all the base classifiers on the full training data /*for (int i = 0; i < m_Classifiers.length; i++) { getClassifier(i).buildClassifier(newData); }*/ /** End change */ }
From source file:com.reactivetechnologies.analytics.core.eval.VotingWithBuiltClassifiers.java
License:Open Source License
/** * Buildclassifier selects a classifier from the set of classifiers * by minimising error on the training data. * * @param data the training data to be used for generating the * boosted classifier.// ww w. j ava2 s .co m * @throws Exception if the classifier could not be built successfully */ @Override public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances newData = new Instances(data); newData.deleteWithMissingClass(); m_Random = new Random(getSeed()); /** Changed here */ /*for (int i = 0; i < m_Classifiers.length; i++) { getClassifier(i).buildClassifier(newData); }*/ /** End change */ }
From source file:com.tum.classifiertest.FastRandomForest.java
License:Open Source License
/** * Builds a classifier for a set of instances. * * @param data the instances to train the classifier with * * @throws Exception if something goes wrong */// w w w .ja va2s .co m public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } /* Save header with attribute info. Can be accessed later by FastRfTrees * through their m_MotherForest field. */ setM_Info(new Instances(data, 0)); m_bagger = new FastRfBagging(); // Set up the tree options which are held in the motherForest. m_KValue = m_numFeatures; if (m_KValue > data.numAttributes() - 1) m_KValue = data.numAttributes() - 1; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; FastRandomTree rTree = new FastRandomTree(); rTree.m_MotherForest = this; // allows to retrieve KValue and MaxDepth // some temporary arrays which need to be separate for every tree, so // that the trees can be trained in parallel in different threads // set up the bagger and build the forest m_bagger.setClassifier(rTree); m_bagger.setSeed(m_randomSeed); m_bagger.setNumIterations(m_numTrees); m_bagger.setCalcOutOfBag(true); m_bagger.setComputeImportances(this.getComputeImportances()); m_bagger.buildClassifier(data, m_NumThreads, this); }
From source file:com.tum.classifiertest.FastRfBagging.java
License:Open Source License
/** * Bagging method. Produces DataCache objects with bootstrap samples of * the original data, and feeds them to the base classifier (which can only * be a FastRandomTree)./*from w w w .jav a 2 s . co m*/ * * @param data The training set to be used for generating the * bagged classifier. * @param numThreads The number of simultaneous threads to use for * computation. Pass zero (0) for autodetection. * @param motherForest A reference to the FastRandomForest object that * invoked this. * * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data, int numThreads, FastRandomForest motherForest) throws Exception { // can classifier handle the vals? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); if (!(m_Classifier instanceof FastRandomTree)) throw new IllegalArgumentException( "The FastRfBagging class accepts " + "only FastRandomTree as its base classifier."); /* We fill the m_Classifiers array by creating lots of trees with new() * because this is much faster than using serialization to deep-copy the * one tree in m_Classifier - this is what the super.buildClassifier(data) * normally does. */ m_Classifiers = new Classifier[m_NumIterations]; for (int i = 0; i < m_Classifiers.length; i++) { FastRandomTree curTree = new FastRandomTree(); // all parameters for training will be looked up in the motherForest (maxDepth, k_Value) curTree.m_MotherForest = motherForest; // 0.99: reference to these arrays will get passed down all nodes so the array can be re-used // 0.99: this array is of size two as now all splits are binary - even categorical ones curTree.tempProps = new double[2]; curTree.tempDists = new double[2][]; curTree.tempDists[0] = new double[data.numClasses()]; curTree.tempDists[1] = new double[data.numClasses()]; curTree.tempDistsOther = new double[2][]; curTree.tempDistsOther[0] = new double[data.numClasses()]; curTree.tempDistsOther[1] = new double[data.numClasses()]; m_Classifiers[i] = curTree; } // this was SLOW.. takes approx 1/2 time as training the forest afterwards (!!!) // super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } // sorting is performed inside this constructor DataCache myData = new DataCache(data); int bagSize = data.numInstances() * m_BagSizePercent / 100; Random random = new Random(m_Seed); boolean[][] inBag = new boolean[m_Classifiers.length][]; // thread management ExecutorService threadPool = Executors .newFixedThreadPool(numThreads > 0 ? numThreads : Runtime.getRuntime().availableProcessors()); List<Future<?>> futures = new ArrayList<Future<?>>(m_Classifiers.length); try { for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) { // create the in-bag dataset (and be sure to remember what's in bag) // for computing the out-of-bag error later DataCache bagData = myData.resample(bagSize, random); bagData.reusableRandomGenerator = bagData.getRandomNumberGenerator(random.nextInt()); inBag[treeIdx] = bagData.inBag; // store later for OOB error calculation // build the classifier if (m_Classifiers[treeIdx] instanceof FastRandomTree) { FastRandomTree aTree = (FastRandomTree) m_Classifiers[treeIdx]; aTree.data = bagData; Future<?> future = threadPool.submit(aTree); futures.add(future); } else { throw new IllegalArgumentException( "The FastRfBagging class accepts " + "only FastRandomTree as its base classifier."); } } // make sure all trees have been trained before proceeding for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) { futures.get(treeIdx).get(); } // calc OOB error? if (getCalcOutOfBag() || getComputeImportances()) { //m_OutOfBagError = computeOOBError(data, inBag, threadPool); m_OutOfBagError = computeOOBError(myData, inBag, threadPool); } else { m_OutOfBagError = 0; } //calc feature importances m_FeatureImportances = null; //m_FeatureNames = null; if (getComputeImportances()) { m_FeatureImportances = new double[data.numAttributes()]; ///m_FeatureNames = new String[data.numAttributes()]; //Instances dataCopy = new Instances(data); //To scramble //int[] permutation = FastRfUtils.randomPermutation(data.numInstances(), random); for (int j = 0; j < data.numAttributes(); j++) { if (j != data.classIndex()) { //double sError = computeOOBError(FastRfUtils.scramble(data, dataCopy, j, permutation), inBag, threadPool); //double sError = computeOOBError(data, inBag, threadPool, j, 0); float[] unscrambled = myData.scrambleOneAttribute(j, random); double sError = computeOOBError(myData, inBag, threadPool); myData.vals[j] = unscrambled; // restore the original state m_FeatureImportances[j] = sError - m_OutOfBagError; } //m_FeatureNames[j] = data.attribute(j).name(); } } threadPool.shutdown(); } finally { threadPool.shutdownNow(); } }
From source file:com.walmart.productgenome.matching.models.EMSRandomForest.java
License:Open Source License
/** * Builds a classifier for a set of instances. * * @param data the instances to train the classifier with * @throws Exception if something goes wrong */// w ww .java2 s. com public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); m_bagger = new Bagging(); RandomTree rTree = new RandomTree(); // set up the random tree options m_KValue = m_numFeatures; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; rTree.setKValue(m_KValue); rTree.setMaxDepth(getMaxDepth()); // set up the bagger and build the forest m_bagger.setClassifier(rTree); m_bagger.setSeed(m_randomSeed); m_bagger.setNumIterations(m_numTrees); m_bagger.setCalcOutOfBag(true); m_bagger.setNumExecutionSlots(m_numExecutionSlots); m_bagger.buildClassifier(data); }
From source file:core.classifier.MyFirstClassifier.java
License:Open Source License
/** * Method for building the classifier. Implements a one-against-one * wrapper for multi-class problems.// w w w . j a v a2 s.com * * @param insts the set of training instances * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances insts) throws Exception { if (!m_checksTurnedOff) { // can classifier handle the data? getCapabilities().testWithFail(insts); // remove instances with missing class insts = new Instances(insts); insts.deleteWithMissingClass(); /* Removes all the instances with weight equal to 0. MUST be done since condition (8) of Keerthi's paper is made with the assertion Ci > 0 (See equation (3a). */ Instances data = new Instances(insts, insts.numInstances()); for (int i = 0; i < insts.numInstances(); i++) { if (insts.instance(i).weight() > 0) data.add(insts.instance(i)); } if (data.numInstances() == 0) { throw new Exception("No training instances left after removing " + "instances with weight 0!"); } insts = data; } if (!m_checksTurnedOff) { m_Missing = new ReplaceMissingValues(); m_Missing.setInputFormat(insts); insts = Filter.useFilter(insts, m_Missing); } else { m_Missing = null; } if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) { boolean onlyNumeric = true; if (!m_checksTurnedOff) { for (int i = 0; i < insts.numAttributes(); i++) { if (i != insts.classIndex()) { if (!insts.attribute(i).isNumeric()) { onlyNumeric = false; break; } } } } if (!onlyNumeric) { m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(insts); insts = Filter.useFilter(insts, m_NominalToBinary); } else { m_NominalToBinary = null; } } else { m_NominalToBinary = null; } if (m_filterType == FILTER_STANDARDIZE) { m_Filter = new Standardize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else if (m_filterType == FILTER_NORMALIZE) { m_Filter = new Normalize(); m_Filter.setInputFormat(insts); insts = Filter.useFilter(insts, m_Filter); } else { m_Filter = null; } m_classIndex = insts.classIndex(); m_classAttribute = insts.classAttribute(); m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0); // Generate subsets representing each class Instances[] subsets = new Instances[insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { subsets[i] = new Instances(insts, insts.numInstances()); } for (int j = 0; j < insts.numInstances(); j++) { Instance inst = insts.instance(j); subsets[(int) inst.classValue()].add(inst); } for (int i = 0; i < insts.numClasses(); i++) { subsets[i].compactify(); } // Build the binary classifiers Random rand = new Random(m_randomSeed); m_classifiers = new BinarySMO[insts.numClasses()][insts.numClasses()]; for (int i = 0; i < insts.numClasses(); i++) { for (int j = i + 1; j < insts.numClasses(); j++) { m_classifiers[i][j] = new BinarySMO(); m_classifiers[i][j].setKernel(Kernel.makeCopy(getKernel())); Instances data = new Instances(insts, insts.numInstances()); for (int k = 0; k < subsets[i].numInstances(); k++) { data.add(subsets[i].instance(k)); } for (int k = 0; k < subsets[j].numInstances(); k++) { data.add(subsets[j].instance(k)); } data.compactify(); data.randomize(rand); m_classifiers[i][j].buildClassifier(data, i, j, m_fitLogisticModels, m_numFolds, m_randomSeed); } } }
From source file:decisiontree.MyC45.java
public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // handle instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); // handle missing values Instances processedInstances = handleMissingValues(instances); makeTree(processedInstances);/*from w w w .j a va2 s. c o m*/ }
From source file:decisiontree.MyID3.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data = new Instances(data); data.deleteWithMissingClass(); makeTree(data);/*from w w w . java2 s . c om*/ }