List of usage examples for weka.core Instances deleteWithMissingClass
public void deleteWithMissingClass()
From source file:j48.C45PruneableClassifierTreeG.java
License:Open Source License
/** * Method for building a pruneable classifier tree. * * @param datathe data for building the tree * @throws Exception if something goes wrong *//*from w w w . jav a 2 s . co m*/ public void buildClassifier(Instances data) throws Exception { // can classifier tree handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); buildTree(data, m_subtreeRaising); collapse(); if (m_pruneTheTree) { prune(); } doGrafting(data); if (m_cleanup) { cleanup(new Instances(data, 0)); } }
From source file:j48.ClassifierTree.java
License:Open Source License
/** * Method for building a classifier tree. * * @param data the data to build the tree from * @throws Exception if something goes wrong *///from w w w .jav a 2s . co m public void buildClassifier(Instances data) throws Exception { // can classifier tree handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); buildTree(data, false); }
From source file:j48.PruneableClassifierTree.java
License:Open Source License
/** * Method for building a pruneable classifier tree. * * @param data the data to build the tree from * @throws Exception if tree can't be built successfully *//*from www. j av a 2 s. co m*/ public void buildClassifier(Instances data) throws Exception { // can classifier tree handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_seed); data.stratify(numSets); buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup); if (pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
From source file:learning.DMRandomForest.java
License:Open Source License
/** * Builds a classifier for a set of instances. * * @param data the instances to train the classifier with * @throws Exception if something goes wrong *///from ww w . j a v a 2s .com public void buildClassifier(Instances data) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); m_bagger = new DMBagging(); RandomTree rTree = new RandomTree(); // set up the random tree options m_KValue = m_numFeatures; if (m_KValue < 1) m_KValue = (int) Utils.log2(data.numAttributes()) + 1; rTree.setKValue(m_KValue); rTree.setMaxDepth(getMaxDepth()); // set up the bagger and build the forest m_bagger.setClassifier(rTree); m_bagger.setSeed(m_randomSeed); m_bagger.setNumIterations(m_numTrees); m_bagger.setCalcOutOfBag(true); m_bagger.buildClassifier(data); }
From source file:library.MikeC45PruneableClassifierTree.java
License:Open Source License
/** * Method for building a pruneable classifier tree. * * @param data the data for building the tree * @throws Exception if something goes wrong *//*from www . j ava 2 s . com*/ public void buildClassifier(Instances data) throws Exception { // can classifier tree handle the data? getCapabilities().testWithFail(data); // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); buildTree(data, m_subtreeRaising); collapse(); if (m_pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
From source file:LogReg.FilteredLogRegClassifier.java
License:Open Source License
/** * Build the classifier on the filtered data. * * @param data the training data/*from w ww .j a v a2 s. c o m*/ * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances data) throws Exception { if (m_Classifier == null) { throw new Exception("No base classifiers have been set!"); } // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); /* String fname = m_Filter.getClass().getName(); fname = fname.substring(fname.lastIndexOf('.') + 1); util.Timer t = util.Timer.getTimer("FilteredClassifier::" + fname); t.start(); */ m_Filter.setInputFormat(data); // filter capabilities are checked here data = Filter.useFilter(data, m_Filter); //t.stop(); // can classifier handle the data? getClassifier().getCapabilities().testWithFail(data); m_FilteredInstances = data.stringFreeStructure(); m_Classifier.buildClassifier(data); }
From source file:LogReg.Logistic.java
License:Open Source License
/** * Builds the classifier//from w w w. ja v a 2s . c om * * @param train the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ public void buildClassifier(Instances train) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(train); // remove instances with missing class train = new Instances(train); train.deleteWithMissingClass(); // Replace missing values m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(train); train = Filter.useFilter(train, m_ReplaceMissingValues); // Remove useless attributes m_AttFilter = new RemoveUseless(); m_AttFilter.setInputFormat(train); train = Filter.useFilter(train, m_AttFilter); // Transform attributes m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(train); train = Filter.useFilter(train, m_NominalToBinary); // Save the structure for printing the model m_structure = new Instances(train, 0); // Extract data m_ClassIndex = train.classIndex(); m_NumClasses = train.numClasses(); int nK = m_NumClasses - 1; // Only K-1 class labels needed int nR = m_NumPredictors = train.numAttributes() - 1; int nC = train.numInstances(); m_Data = new double[nC][nR + 1]; // Data values int[] Y = new int[nC]; // Class labels double[] xMean = new double[nR + 1]; // Attribute means xSD = new double[nR + 1]; // Attribute stddev's double[] sY = new double[nK + 1]; // Number of classes double[] weights = new double[nC]; // Weights of instances double totWeights = 0; // Total weights of the instances m_Par = new double[nR + 1][nK]; // Optimized parameter values if (m_Debug) { System.out.println("Extracting data..."); } for (int i = 0; i < nC; i++) { // initialize X[][] Instance current = train.instance(i); Y[i] = (int) current.classValue(); // Class value starts from 0 weights[i] = current.weight(); // Dealing with weights totWeights += weights[i]; m_Data[i][0] = 1; int j = 1; for (int k = 0; k <= nR; k++) { if (k != m_ClassIndex) { double x = current.value(k); m_Data[i][j] = x; xMean[j] += weights[i] * x; xSD[j] += weights[i] * x * x; j++; } } // Class count sY[Y[i]]++; } if ((totWeights <= 1) && (nC > 1)) throw new Exception("Sum of weights of instances less than 1, please reweight!"); xMean[0] = 0; xSD[0] = 1; for (int j = 1; j <= nR; j++) { xMean[j] = xMean[j] / totWeights; if (totWeights > 1) xSD[j] = Math.sqrt(Math.abs(xSD[j] - totWeights * xMean[j] * xMean[j]) / (totWeights - 1)); else xSD[j] = 0; } if (m_Debug) { // Output stats about input data System.out.println("Descriptives..."); for (int m = 0; m <= nK; m++) System.out.println(sY[m] + " cases have class " + m); System.out.println("\n Variable Avg SD "); for (int j = 1; j <= nR; j++) System.out.println(Utils.doubleToString(j, 8, 4) + Utils.doubleToString(xMean[j], 10, 4) + Utils.doubleToString(xSD[j], 10, 4)); } // Normalise input data for (int i = 0; i < nC; i++) { for (int j = 0; j <= nR; j++) { if (xSD[j] != 0) { m_Data[i][j] = (m_Data[i][j] - xMean[j]) / xSD[j]; } } } if (m_Debug) { System.out.println("\nIteration History..."); } double x[] = new double[(nR + 1) * nK]; double[][] b = new double[2][x.length]; // Boundary constraints, N/A here // Initialize for (int p = 0; p < nK; p++) { int offset = p * (nR + 1); x[offset] = Math.log(sY[p] + 1.0) - Math.log(sY[nK] + 1.0); // Null model b[0][offset] = Double.NaN; b[1][offset] = Double.NaN; for (int q = 1; q <= nR; q++) { x[offset + q] = 0.0; b[0][offset + q] = Double.NaN; b[1][offset + q] = Double.NaN; } } OptEng opt = new OptEng(); opt.setDebug(m_Debug); opt.setWeights(weights); opt.setClassLabels(Y); if (m_MaxIts == -1) { // Search until convergence x = opt.findArgmin(x, b); while (x == null) { x = opt.getVarbValues(); if (m_Debug) System.out.println("200 iterations finished, not enough!"); x = opt.findArgmin(x, b); } if (m_Debug) System.out.println(" -------------<Converged>--------------"); } else { opt.setMaxIteration(m_MaxIts); x = opt.findArgmin(x, b); if (x == null) // Not enough, but use the current value x = opt.getVarbValues(); } m_LL = -opt.getMinFunction(); // Log-likelihood // Don't need data matrix anymore m_Data = null; // Convert coefficients back to non-normalized attribute units for (int i = 0; i < nK; i++) { m_Par[0][i] = x[i * (nR + 1)]; for (int j = 1; j <= nR; j++) { m_Par[j][i] = x[i * (nR + 1) + j]; if (xSD[j] != 0) { m_Par[j][i] /= xSD[j]; m_Par[0][i] -= m_Par[j][i] * xMean[j]; } } } }
From source file:machine_learing_clasifier.MyC45.java
@Override public void buildClassifier(Instances i) throws Exception { if (!i.classAttribute().isNominal()) { throw new Exception("Class not nominal"); }// w w w. jav a2s . co m //penanganan missing value for (int j = 0; j < i.numAttributes(); j++) { Attribute attr = i.attribute(j); for (int k = 0; k < i.numInstances(); k++) { Instance inst = i.instance(k); if (inst.isMissing(attr)) { inst.setValue(attr, fillMissingValue(i, attr)); //bisa dituning lagi performancenya } } } i = new Instances(i); i.deleteWithMissingClass(); makeTree(i); }
From source file:machine_learing_clasifier.MyID3.java
@Override public void buildClassifier(Instances i) throws Exception { if (!i.classAttribute().isNominal()) { throw new Exception("Class not nominal"); }//from ww w .ja v a 2s. c o m for (int j = 0; j < i.numAttributes(); j++) { Attribute attr = i.attribute(j); if (!attr.isNominal()) { throw new Exception("Attribute not nominal"); } for (int k = 0; k < i.numInstances(); k++) { Instance inst = i.instance(k); if (inst.isMissing(attr)) { throw new Exception("Missing value"); } } } i = new Instances(i); i.deleteWithMissingClass(); makeTree(i); }
From source file:main.NaiveBayes.java
License:Open Source License
/** * Generates the classifier.// www . j a va 2s .c om * * @param instances set of instances serving as training data * @exception Exception if the classifier has not been generated successfully */ @Override public void buildClassifier(Instances instances) throws Exception { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); m_NumClasses = instances.numClasses(); // Copy the instances m_Instances = new Instances(instances); // Discretize instances if required if (m_UseDiscretization) { m_Disc = new weka.filters.supervised.attribute.Discretize(); m_Disc.setInputFormat(m_Instances); m_Instances = weka.filters.Filter.useFilter(m_Instances, m_Disc); } else { m_Disc = null; } // Reserve space for the distributions m_Distributions = new Estimator[m_Instances.numAttributes() - 1][m_Instances.numClasses()]; m_ClassDistribution = new DiscreteEstimator(m_Instances.numClasses(), true); int attIndex = 0; Enumeration<Attribute> enu = m_Instances.enumerateAttributes(); while (enu.hasMoreElements()) { Attribute attribute = enu.nextElement(); // If the attribute is numeric, determine the estimator // numeric precision from differences between adjacent values double numPrecision = DEFAULT_NUM_PRECISION; if (attribute.type() == Attribute.NUMERIC) { m_Instances.sort(attribute); if ((m_Instances.numInstances() > 0) && !m_Instances.instance(0).isMissing(attribute)) { double lastVal = m_Instances.instance(0).value(attribute); double currentVal, deltaSum = 0; int distinct = 0; for (int i = 1; i < m_Instances.numInstances(); i++) { Instance currentInst = m_Instances.instance(i); if (currentInst.isMissing(attribute)) { break; } currentVal = currentInst.value(attribute); if (currentVal != lastVal) { deltaSum += currentVal - lastVal; lastVal = currentVal; distinct++; } } if (distinct > 0) { numPrecision = deltaSum / distinct; } } } for (int j = 0; j < m_Instances.numClasses(); j++) { switch (attribute.type()) { case Attribute.NUMERIC: if (m_UseKernelEstimator) { m_Distributions[attIndex][j] = new KernelEstimator(numPrecision); } else { m_Distributions[attIndex][j] = new NormalEstimator(numPrecision); } break; case Attribute.NOMINAL: m_Distributions[attIndex][j] = new DiscreteEstimator(attribute.numValues(), true); break; default: throw new Exception("Attribute type unknown to NaiveBayes"); } } attIndex++; } // Compute counts Enumeration<Instance> enumInsts = m_Instances.enumerateInstances(); while (enumInsts.hasMoreElements()) { Instance instance = enumInsts.nextElement(); updateClassifier(instance); } // Save space m_Instances = new Instances(m_Instances, 0); }