List of usage examples for weka.core Instances numClasses
publicint numClasses()
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Boosting method.//w ww. j a v a 2 s . co m * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { super.buildClassifier(data); // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } m_NumClasses = data.numClasses(); if (m_NumClasses != 2) System.err.println("Can only build model for binary class data"); /* we do not use the method buildClassifierWithWeights as we think that some base learning algorithms could not deal with weighted data. */ buildClassifierUsingResampling(data); }
From source file:gyc.SMOTEBagging.java
License:Open Source License
/** * Bagging method.// w w w .j ava2s .co m * * @param data the training data to be used for generating the * bagged classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } int bagSize = data.numInstances() * m_BagSizePercent / 100; Random random = new Random(m_Seed); boolean[][] inBag = null; if (m_CalcOutOfBag) inBag = new boolean[m_Classifiers.length][]; int b = 0; for (int j = 0; j < m_Classifiers.length; j++) { // int classNum[] = data.attributeStats(data.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } b = b + 10; Instances bagData = randomSampling(data, majC, minC, b, random); /* // create the in-bag dataset if (m_CalcOutOfBag) { inBag[j] = new boolean[data.numInstances()]; bagData = resampleWithWeights(data, random, inBag[j]); } else { bagData = data.resampleWithWeights(random); if (bagSize < data.numInstances()) { bagData.randomize(random); Instances newBagData = new Instances(bagData, 0, bagSize); bagData = newBagData; } } if (m_Classifier instanceof Randomizable) { ((Randomizable) m_Classifiers[j]).setSeed(random.nextInt()); }*/ // build the classifier m_Classifiers[j].buildClassifier(bagData); //classNum=bagData.attributeStats(bagData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); } // calc OOB error? if (getCalcOutOfBag()) { double outOfBagCount = 0.0; double errorSum = 0.0; boolean numeric = data.classAttribute().isNumeric(); for (int i = 0; i < data.numInstances(); i++) { double vote; double[] votes; if (numeric) votes = new double[1]; else votes = new double[data.numClasses()]; // determine predictions for instance int voteCount = 0; for (int j = 0; j < m_Classifiers.length; j++) { if (inBag[j][i]) continue; voteCount++; double pred = m_Classifiers[j].classifyInstance(data.instance(i)); if (numeric) votes[0] += pred; else votes[(int) pred]++; } // "vote" if (numeric) { vote = votes[0]; if (voteCount > 0) { vote /= voteCount; // average } } else { vote = Utils.maxIndex(votes); // majority vote } // error for instance outOfBagCount += data.instance(i).weight(); if (numeric) { errorSum += StrictMath.abs(vote - data.instance(i).classValue()) * data.instance(i).weight(); } else { if (vote != data.instance(i).classValue()) errorSum += data.instance(i).weight(); } } m_OutOfBagError = errorSum / outOfBagCount; } else { m_OutOfBagError = 0; } }
From source file:hr.irb.fastRandomForest.NakedFastRfBagging.java
License:Open Source License
/** * Bagging method. Produces DataCache objects with bootstrap samples of the * original data, and feeds them to the base classifier (which can only be a * FastRandomTree)./* w w w . j a v a 2 s . com*/ * * @param data * The training set to be used for generating the bagged * classifier. * @param numThreads * The number of simultaneous threads to use for computation. * Pass zero (0) for autodetection. * @param motherForest * A reference to the FastRandomForest object that invoked this. * * @throws Exception * if the classifier could not be built successfully */ public void buildClassifier(Instances data, final int numThreads, final NakedFastRandomForest motherForest) throws Exception { // can classifier handle the vals? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); if (!(m_Classifier instanceof NakedFastRandomTree)) throw new IllegalArgumentException( "The NakedFastRfBagging class accepts " + "only NakedFastRandomTree as its base classifier."); /* * We fill the m_Classifiers array by creating lots of trees with new() * because this is much faster than using serialization to deep-copy the * one tree in m_Classifier - this is what the * super.buildClassifier(data) normally does. */ m_Classifiers = new Classifier[m_NumIterations]; for (int i = 0; i < m_Classifiers.length; i++) { final NakedFastRandomTree curTree = new NakedFastRandomTree(); // all parameters for training will be looked up in the motherForest // (maxDepth, k_Value) curTree.m_MotherForest = motherForest; // 0.99: reference to these arrays will get passed down all nodes so // the array can be re-used // 0.99: this array is of size two as now all splits are binary - // even categorical ones curTree.tempProps = new double[2]; curTree.tempDists = new double[2][]; curTree.tempDists[0] = new double[data.numClasses()]; curTree.tempDists[1] = new double[data.numClasses()]; curTree.tempDistsOther = new double[2][]; curTree.tempDistsOther[0] = new double[data.numClasses()]; curTree.tempDistsOther[1] = new double[data.numClasses()]; m_Classifiers[i] = curTree; } // this was SLOW.. takes approx 1/2 time as training the forest // afterwards (!!!) // super.buildClassifier(data); if (m_CalcOutOfBag && (m_BagSizePercent != 100)) { throw new IllegalArgumentException( "Bag size needs to be 100% if " + "out-of-bag error is to be calculated!"); } // sorting is performed inside this constructor final DataCache myData = new DataCache(data); final int bagSize = data.numInstances() * m_BagSizePercent / 100; final Random random = new Random(m_Seed); final boolean[][] inBag = new boolean[m_Classifiers.length][]; // thread management final ExecutorService threadPool = Executors .newFixedThreadPool(numThreads > 0 ? numThreads : Runtime.getRuntime().availableProcessors()); final List<Future<?>> futures = new ArrayList<Future<?>>(m_Classifiers.length); try { for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) { // create the in-bag dataset (and be sure to remember what's in // bag) // for computing the out-of-bag error later final DataCache bagData = myData.resample(bagSize, random); bagData.reusableRandomGenerator = bagData.getRandomNumberGenerator(random.nextInt()); inBag[treeIdx] = bagData.inBag; // store later for OOB error // calculation // build the classifier if (m_Classifiers[treeIdx] instanceof NakedFastRandomTree) { final FastRandomTree aTree = (FastRandomTree) m_Classifiers[treeIdx]; aTree.data = bagData; final Future<?> future = threadPool.submit(aTree); futures.add(future); } else { throw new IllegalArgumentException("The FastRfBagging class accepts " + "only NakedFastRandomTree as its base classifier."); } } // make sure all trees have been trained before proceeding for (int treeIdx = 0; treeIdx < m_Classifiers.length; treeIdx++) { futures.get(treeIdx).get(); } // [jhostetler] 'm_FeatureImportances' and 'computeOOBError()' are // private, so we'll just not compute them. // calc OOB error? // if( getCalcOutOfBag() || getComputeImportances() ) { // // m_OutOfBagError = computeOOBError(data, inBag, threadPool); // m_OutOfBagError = computeOOBError( myData, inBag, threadPool ); // } // else { // m_OutOfBagError = 0; // } // // calc feature importances // m_FeatureImportances = null; // // m_FeatureNames = null; // if( getComputeImportances() ) { // m_FeatureImportances = new double[data.numAttributes()]; // // /m_FeatureNames = new String[data.numAttributes()]; // // Instances dataCopy = new Instances(data); //To scramble // // int[] permutation = // // FastRfUtils.randomPermutation(data.numInstances(), random); // for( int j = 0; j < data.numAttributes(); j++ ) { // if( j != data.classIndex() ) { // // double sError = // // computeOOBError(FastRfUtils.scramble(data, dataCopy, // // j, permutation), inBag, threadPool); // // double sError = computeOOBError(data, inBag, // // threadPool, j, 0); // final float[] unscrambled = myData.scrambleOneAttribute( j, // random ); // final double sError = computeOOBError( myData, inBag, // threadPool ); // myData.vals[j] = unscrambled; // restore the original // // state // m_FeatureImportances[j] = sError - m_OutOfBagError; // } // // m_FeatureNames[j] = data.attribute(j).name(); // } // } threadPool.shutdown(); } finally { threadPool.shutdownNow(); } }
From source file:id3.MyID3.java
/** * Algoritma pohon keputusan//from w ww . j a va2s . c om * @param instances data train * @param attributes remaining attributes * @throws Exception */ public void buildMyID3(Instances instances, ArrayList<Attribute> attributes) throws Exception { // Check if no instances have reached this node. if (instances.numInstances() == 0) { classAttribute = null; classLabel = Instance.missingValue(); classDistributionAmongInstances = new double[instances.numClasses()]; return; } // Check if all instances only contain one class label if (computeEntropy(instances) == 0) { currentAttribute = null; classDistributionAmongInstances = classDistribution(instances); // Labelling process at node for (int i = 0; i < classDistributionAmongInstances.length; i++) { if (classDistributionAmongInstances[i] > 0) { classLabel = i; break; } } classAttribute = instances.classAttribute(); Utils.normalize(classDistributionAmongInstances); } else { // Compute infogain for each attribute double[] infoGainAttribute = new double[instances.numAttributes()]; for (int i = 0; i < instances.numAttributes(); i++) { infoGainAttribute[i] = computeIG(instances, instances.attribute(i)); } // Choose attribute with maximum information gain int indexMaxInfoGain = 0; double maximumInfoGain = 0.0; for (int i = 0; i < (infoGainAttribute.length - 1); i++) { if (infoGainAttribute[i] > maximumInfoGain) { maximumInfoGain = infoGainAttribute[i]; indexMaxInfoGain = i; } } currentAttribute = instances.attribute(indexMaxInfoGain); // Delete current attribute from remaining attribute ArrayList<Attribute> remainingAttributes = attributes; if (!remainingAttributes.isEmpty()) { int indexAttributeDeleted = 0; for (int i = 0; i < remainingAttributes.size(); i++) { if (remainingAttributes.get(i).index() == currentAttribute.index()) { indexAttributeDeleted = i; } } remainingAttributes.remove(indexAttributeDeleted); } // Split instances based on currentAttribute (create branch new node) Instances[] instancesSplitBasedAttribute = splitData(instances, currentAttribute); subTree = new MyID3[currentAttribute.numValues()]; for (int i = 0; i < currentAttribute.numValues(); i++) { if (instancesSplitBasedAttribute[i].numInstances() == 0) { // Handle empty examples at nodes double[] currentClassDistribution = classDistribution(instances); classLabel = 0.0; double counterDistribution = 0.0; for (int j = 0; j < currentClassDistribution.length; j++) { if (currentClassDistribution[j] > counterDistribution) { classLabel = j; } } classAttribute = instances.classAttribute(); } else { subTree[i] = new MyID3(); subTree[i].buildMyID3(instancesSplitBasedAttribute[i], remainingAttributes); } } } }
From source file:id3.MyID3.java
/** * Algoritma untuk menghitung distribusi kelas * @param instances/*from w w w . j a va 2 s. c om*/ * @return distributionClass counter */ public double[] classDistribution(Instances instances) { // Compute class distribution counter from instances double[] distributionClass = new double[instances.numClasses()]; for (int i = 0; i < instances.numInstances(); i++) { distributionClass[(int) instances.instance(i).classValue()]++; } return distributionClass; }
From source file:imba.classifier.FFNNTubes.java
@Override public void buildClassifier(Instances data) throws Exception { getCapabilities().testWithFail(data); data.deleteWithMissingClass();/*from w w w.j av a 2 s . c om*/ nAttribute = data.numAttributes() - 1; nOutput = data.numClasses(); nData = data.size(); //set target data setTarget(data); //generate weight generateRandomWeight(); //normalisasi data Normalize norm = new Normalize(); Filter filter = new NominalToBinary(); norm.setInputFormat(data); Instances filteredData = Filter.useFilter(data, norm); try { filter.setInputFormat(filteredData); for (Instance i1 : filteredData) { filter.input(i1); } filter.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } int z = 0; double valMSE = 100.0; while ((z <= nEpoch) && (valMSE >= 0.00001)) { for (int j = 0; j < nData; j++) { feedForward(filteredData.get(j)); if (nHidden == 0) { updateWeight(target[j]); } else { backPropagation(target[j]); } } countError(filteredData); valMSE = countMSE(filteredData); System.out.println("ACCURACY " + z + " : " + accuracy); System.out.println("MSE " + z + " : " + valMSE); z++; } }
From source file:imba.classifier.NBTubes.java
@Override public void buildClassifier(Instances data) { dataClassifier = new ArrayList<>(); infoClassifier = new ArrayList<>(); validAttribute = new ArrayList<>(); dataset = null;//from w w w .ja va 2 s . c o m sumClass = null; dataSize = 0; header_Instances = data; Filter f; int i, j, k, l, m; int sumVal; int numAttr = data.numAttributes(); //ini beserta kelasnya, jadi atribut + 1 i = 0; while (i < numAttr && wasNumeric == false) { if (i == classIdx) { i++; } if (i != numAttr && data.attribute(i).isNumeric()) { wasNumeric = true; } i++; } Instance p; //kasih filter if (wasNumeric) { f = new Normalize(); //Filter f = new NumericToNominal(); try { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } } //f = new NumericToNominal(); if (filter.equals("Discretize")) { f = new Discretize(); } else { f = new NumericToNominal(); } try { if (wasNumeric) { f.setInputFormat(dataset); for (Instance i1 : dataset) { f.input(i1); } } else { f.setInputFormat(data); for (Instance i1 : data) { f.input(i1); } } f.batchFinished(); } catch (Exception ex) { Logger.getLogger(NBTubes.class.getName()).log(Level.SEVERE, null, ex); } dataset = null; dataset = f.getOutputFormat(); while ((p = f.output()) != null) { dataset.add(p); } //building data structure classIdx = data.classIndex(); dataSize = data.size(); //isi data dan info classifier dengan array kosong i = 0; j = i; while (j < numAttr) { if (i == classIdx) { i++; } else { dataClassifier.add(new ArrayList<>()); infoClassifier.add(new ArrayList<>()); if (j < i) { m = j - 1; } else { m = j; } k = 0; while (k < dataset.attribute(j).numValues()) { dataClassifier.get(m).add(new ArrayList<>()); infoClassifier.get(m).add(new ArrayList<>()); l = 0; while (l < dataset.attribute(classIdx).numValues()) { dataClassifier.get(m).get(k).add(0); infoClassifier.get(m).get(k).add(0.0); l++; } k++; } } i++; j++; } //isi data classifier dari dataset sumClass = new int[data.numClasses()]; i = 0; while (i < dataset.size()) { j = 0; k = j; while (k < dataset.numAttributes()) { if (j == classIdx) { j++; } else { if (k < j) { m = k - 1; } else { m = k; } dataClassifier.get(m).get((int) dataset.get(i).value(k)).set( (int) dataset.get(i).value(classIdx), dataClassifier.get(m).get((int) dataset.get(i).value(k)) .get((int) dataset.get(i).value(classIdx)) + 1); if (m == 0) { sumClass[(int) dataset.get(i).value(classIdx)]++; } } k++; j++; } i++; } //proses double values i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { k = 0; while (k < dataClassifier.get(i).get(j).size()) { infoClassifier.get(i).get(j).set(k, (double) dataClassifier.get(i).get(j).get(k) / sumClass[k]); k++; } j++; } i++; } /* //liat apakah ada nilai di tiap atribut //yang merepresentasikan lebih dari 80% data i = 0; while (i < dataClassifier.size()) { j = 0; while (j < dataClassifier.get(i).size()) { j++; } i++; } */ }
From source file:iris.ID3.java
public void makeLikeAWhat(Instances instances) { // Create storage for different info gains double[] infoGains = new double[instances.numAttributes()]; // Enumerate through attributes to find the best gain Enumeration attributeEnum = instances.enumerateAttributes(); while (attributeEnum.hasMoreElements()) { // Loop through attributes, adding gain to infoGains array Attribute att = (Attribute) attributeEnum.nextElement(); infoGains[att.index()] = infoGain(instances, att); }//from w w w. j ava 2 s .c o m // Use maxIndex to find the highest info gain in the array highestInfoGain = instances.attribute(Utils.maxIndex(infoGains)); // Make a leaf if there is no more info to gain // Otherwise, create children // Check if there is no more info to gain if (Utils.eq(infoGains[highestInfoGain.index()], 0)) { highestInfoGain = null; // Instantiate maxDistribution maxDistribution = new double[instances.numClasses()]; // Set up enumerator for instances Enumeration instanceEnum = instances.enumerateInstances(); // Tally classes while (instanceEnum.hasMoreElements()) { Instance instance = (Instance) instanceEnum.nextElement(); maxDistribution[(int) instance.classValue()]++; } // Normalize data for easier manipulation Utils.normalize(maxDistribution); // Get the max index of the distrubtion classValue = Utils.maxIndex(maxDistribution); // Save class attribute classAttribute = instances.classAttribute(); } // Create children else { // Split best attribute into bins Instances[] bins = makeBins(instances, highestInfoGain); // Create nodes children = new ID3[highestInfoGain.numValues()]; for (int i = 0; i < highestInfoGain.numValues(); i++) { children[i] = new ID3(); children[i].makeLikeAWhat(bins[i]); } } }
From source file:iris.ID3.java
public double calculateEntropy(Instances instances) { // Array to hold counts for each class double[] numInEachClass = new double[instances.numClasses()]; // Loop through every instance in one bin for (int i = 0; i < instances.numInstances(); i++) { // Increment the count for the class that the instance belongs to numInEachClass[(int) instances.instance(i).classValue()]++; }/*from w w w.j a v a 2s . c o m*/ // Instantiate the entropy value double entropy = 0; // Loop through number of classes to sum log operations for (int i = 0; i < instances.numClasses(); i++) { // Handle missing data if (numInEachClass[i] > 0) { // Logarithm algorithm for entropy entropy -= (numInEachClass[i] / instances.numInstances()) * Utils.log2(numInEachClass[i] / instances.numInstances()); } } return entropy; }
From source file:j48.BinC45Split.java
License:Open Source License
/** * Creates split on enumerated attribute. * * @exception Exception if something goes wrong *///from www . j ava2 s . co m private void handleEnumeratedAttribute(Instances trainInstances) throws Exception { Distribution newDistribution, secondDistribution; int numAttValues; double currIG, currGR; Instance instance; int i; numAttValues = trainInstances.attribute(m_attIndex).numValues(); newDistribution = new Distribution(numAttValues, trainInstances.numClasses()); // Only Instances with known values are relevant. Enumeration enu = trainInstances.enumerateInstances(); while (enu.hasMoreElements()) { instance = (Instance) enu.nextElement(); if (!instance.isMissing(m_attIndex)) newDistribution.add((int) instance.value(m_attIndex), instance); } m_distribution = newDistribution; // For all values for (i = 0; i < numAttValues; i++) { if (Utils.grOrEq(newDistribution.perBag(i), m_minNoObj)) { secondDistribution = new Distribution(newDistribution, i); // Check if minimum number of Instances in the two // subsets. if (secondDistribution.check(m_minNoObj)) { m_numSubsets = 2; currIG = m_infoGainCrit.splitCritValue(secondDistribution, m_sumOfWeights); currGR = m_gainRatioCrit.splitCritValue(secondDistribution, m_sumOfWeights, currIG); if ((i == 0) || Utils.gr(currGR, m_gainRatio)) { m_gainRatio = currGR; m_infoGain = currIG; m_splitPoint = (double) i; m_distribution = secondDistribution; } } } } }