List of usage examples for weka.classifiers Evaluation errorRate
public final double errorRate()
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts using resampling * * @param data the training data to be used for generating the * boosted classifier.//from www.ja v a2 s. co m * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierUsingResampling(Instances data) throws Exception { Instances trainData, sample, training; double epsilon, reweight, sumProbs; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); int resamplingIterations = 0; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); sumProbs = training.sumOfWeights(); for (int i = 0; i < training.numInstances(); i++) { training.instance(i).setWeight(training.instance(i).weight() / sumProbs); } // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training); } // Resample resamplingIterations = 0; double[] weights = new double[trainData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = trainData.instance(i).weight(); } do { sample = trainData.resampleWithWeights(randomInstance, weights); // int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } //System.out.println("minC="+nMin+"; majC="+nMaj); /* * balance the data which boosting generate for training base classifier */ //System.out.println("before:"+classNum[0]+"-"+classNum[1]); Instances sampleData = randomSampling(sample, majC, minC, nMaj, nMaj, randomInstance); //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); // Build and evaluate classifier m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData); evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); resamplingIterations++; } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS)); // Stop if error too big or 0 if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts any classifier that can handle weighted * instances.// ww w .j av a2s . c o m * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierWithWeights(Instances data) throws Exception { Instances trainData, training; double epsilon, reweight; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training, 0, numInstances); } // Build the classifier if (m_Classifiers[m_NumIterationsPerformed] instanceof Randomizable) ((Randomizable) m_Classifiers[m_NumIterationsPerformed]).setSeed(randomInstance.nextInt()); // this is the training data for building base classifier, m_Classifiers[m_NumIterationsPerformed].buildClassifier(trainData); // Evaluate the classifier evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); // Stop if error too small or error too big and ignore this model if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:gyc.UnderOverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts using resampling * * @param data the training data to be used for generating the * boosted classifier./*from w w w . ja va 2s . c om*/ * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierUsingResampling(Instances data) throws Exception { Instances trainData, sample, training; double epsilon, reweight, sumProbs; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); int resamplingIterations = 0; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); sumProbs = training.sumOfWeights(); for (int i = 0; i < training.numInstances(); i++) { training.instance(i).setWeight(training.instance(i).weight() / sumProbs); } // Do boostrap iterations int b = 10; for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training); } // Resample resamplingIterations = 0; double[] weights = new double[trainData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = trainData.instance(i).weight(); } do { sample = trainData.resampleWithWeights(randomInstance, weights); // int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } //System.out.println("minC="+nMin+"; majC="+nMaj); /* * balance the data which boosting generate for training base classifier */ //System.out.println("before:"+classNum[0]+"-"+classNum[1]); double pb = 100.0 * (nMin + nMaj) / 2 / nMaj; /* if (m_NumIterationsPerformed + 1 > (m_Classifiers.length / 10)) b += 10; (b% * Nmaj) instances are taken from each class */ Instances sampleData = randomSampling(sample, majC, minC, (int) pb, randomInstance); //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); // Build and evaluate classifier m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData); evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); resamplingIterations++; } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS)); // Stop if error too big or 0 if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the support vector machine(SVM) * classifier. The SMO are used as the SVM classifier implemented in the * Weka software.// w w w .j av a 2 s .c om * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param svmKernel the kernel to use * * @return the classification accuracy */ public static double SVM(String pathTrainData, String pathTestData, String svmKernel) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); SMO svm = new SMO(); if (svmKernel.equals("Polynomial kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance()); } else if (svmKernel.equals("RBF kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance()); } else { svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance()); } svm.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(svm, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the naiveBayes(NB) classifier. * The naiveBayes are used as the NB classifier implemented in the Weka * software./*from www .j a va2s . co m*/ * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * * @return the classification accuracy */ public static double naiveBayes(String pathTrainData, String pathTestData) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); NaiveBayes nb = new NaiveBayes(); nb.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(nb, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the decision tree(DT) classifier. * The j48 are used as the DT classifier implemented in the Weka software. * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param confidenceValue The confidence factor used for pruning * @param minNumSampleInLeaf The minimum number of instances per leaf * /* w w w.j a va 2 s .c o m*/ * @return the classification accuracy */ public static double dTree(String pathTrainData, String pathTestData, double confidenceValue, int minNumSampleInLeaf) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); J48 decisionTree = new J48(); decisionTree.setConfidenceFactor((float) confidenceValue); decisionTree.setMinNumObj(minNumSampleInLeaf); decisionTree.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(decisionTree, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }
From source file:lu.lippmann.cdb.datasetview.tabs.RegressionTreeTabView.java
License:Open Source License
/** * {@inheritDoc}//from w w w.j a v a2s .c om */ @SuppressWarnings("unchecked") @Override public void update0(final Instances dataSet) throws Exception { this.panel.removeAll(); //final Object[] attrNames=WekaDataStatsUtil.getNumericAttributesNames(dataSet).toArray(); final Object[] attrNames = WekaDataStatsUtil.getAttributeNames(dataSet).toArray(); final JComboBox xCombo = new JComboBox(attrNames); xCombo.setBorder(new TitledBorder("Attribute to evaluate")); final JXPanel comboPanel = new JXPanel(); comboPanel.setLayout(new GridLayout(1, 2)); comboPanel.add(xCombo); final JXButton jxb = new JXButton("Compute"); comboPanel.add(jxb); this.panel.add(comboPanel, BorderLayout.NORTH); jxb.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { try { if (gv != null) panel.remove((Component) gv); dataSet.setClassIndex(xCombo.getSelectedIndex()); final REPTree rt = new REPTree(); rt.setNoPruning(true); //rt.setMaxDepth(3); rt.buildClassifier(dataSet); /*final M5P rt=new M5P(); rt.buildClassifier(dataSet);*/ final Evaluation eval = new Evaluation(dataSet); double[] d = eval.evaluateModel(rt, dataSet); System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d)); System.out.println(eval.errorRate()); System.out.println(eval.sizeOfPredictedRegions()); System.out.println(eval.toSummaryString("", true)); final GraphWithOperations gwo = GraphUtil .buildGraphWithOperationsFromWekaRegressionString(rt.graph()); final DecisionTree dt = new DecisionTree(gwo, eval.errorRate()); gv = DecisionTreeToGraphViewHelper.buildGraphView(dt, eventPublisher, commandDispatcher); gv.addMetaInfo("Size=" + dt.getSize(), ""); gv.addMetaInfo("Depth=" + dt.getDepth(), ""); gv.addMetaInfo("MAE=" + FormatterUtil.DECIMAL_FORMAT.format(eval.meanAbsoluteError()) + "", ""); gv.addMetaInfo("RMSE=" + FormatterUtil.DECIMAL_FORMAT.format(eval.rootMeanSquaredError()) + "", ""); final JCheckBox toggleDecisionTreeDetails = new JCheckBox("Toggle details"); toggleDecisionTreeDetails.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { if (!tweakedGraph) { final Object[] mapRep = WekaDataStatsUtil .buildNodeAndEdgeRepartitionMap(dt.getGraphWithOperations(), dataSet); gv.updateVertexShapeTransformer((Map<CNode, Map<Object, Integer>>) mapRep[0]); gv.updateEdgeShapeRenderer((Map<CEdge, Float>) mapRep[1]); } else { gv.resetVertexAndEdgeShape(); } tweakedGraph = !tweakedGraph; } }); gv.addMetaInfoComponent(toggleDecisionTreeDetails); /*final JButton openInEditorButton = new JButton("Open in editor"); openInEditorButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { GraphUtil.importDecisionTreeInEditor(dtFactory, dataSet, applicationContext, eventPublisher, commandDispatcher); } }); this.gv.addMetaInfoComponent(openInEditorButton);*/ final JButton showTextButton = new JButton("In text"); showTextButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) { JOptionPane.showMessageDialog(null, graphDsl.getDslString(dt.getGraphWithOperations())); } }); gv.addMetaInfoComponent(showTextButton); panel.add(gv.asComponent(), BorderLayout.CENTER); } catch (Exception e1) { e1.printStackTrace(); panel.add(new JXLabel("Error during computation: " + e1.getMessage()), BorderLayout.CENTER); } } }); }
From source file:lu.lippmann.cdb.dt.ModelTreeFactory.java
License:Open Source License
/** * Main method.// w w w . ja va2 s. c o m * @param args command line arguments */ public static void main(final String[] args) { try { //final String f="./samples/csv/uci/winequality-red-simplified.csv"; final String f = "./samples/csv/uci/winequality-white.csv"; //final String f="./samples/arff/UCI/crimepredict.arff"; final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f)); System.out.println(dataSet.classAttribute().isNumeric()); final M5P rt = new M5P(); //rt.setUnpruned(true); rt.setMinNumInstances(1000); rt.buildClassifier(dataSet); System.out.println(rt); System.out.println(rt.graph()); final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph()); System.out.println(gwo); System.out.println(new ASCIIGraphDsl().getDslString(gwo)); final Evaluation eval = new Evaluation(dataSet); /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable"); privateStringField.setAccessible(true); //privateStringField.get boolean fieldValue = privateStringField.getBoolean(eval); System.out.println("fieldValue = " + fieldValue);*/ double[] d = eval.evaluateModel(rt, dataSet); System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d)); System.out.println(eval.errorRate()); System.out.println(eval.sizeOfPredictedRegions()); System.out.println(eval.toSummaryString("", true)); System.out.println(new DecisionTree(gwo, eval.errorRate())); } catch (Exception e) { e.printStackTrace(); } }
From source file:lu.lippmann.cdb.dt.RegressionTreeFactory.java
License:Open Source License
/** * Main method.// w ww .j a v a 2 s . c o m * @param args command line arguments */ public static void main(final String[] args) { try { final String f = "./samples/csv/uci/winequality-red.csv"; //final String f="./samples/arff/UCI/crimepredict.arff"; final Instances dataSet = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f)); System.out.println(dataSet.classAttribute().isNumeric()); final REPTree rt = new REPTree(); rt.setMaxDepth(3); rt.buildClassifier(dataSet); System.out.println(rt); //System.out.println(rt.graph()); final GraphWithOperations gwo = GraphUtil.buildGraphWithOperationsFromWekaRegressionString(rt.graph()); System.out.println(gwo); System.out.println(new ASCIIGraphDsl().getDslString(gwo)); final Evaluation eval = new Evaluation(dataSet); /*Field privateStringField = Evaluation.class.getDeclaredField("m_CoverageStatisticsAvailable"); privateStringField.setAccessible(true); //privateStringField.get boolean fieldValue = privateStringField.getBoolean(eval); System.out.println("fieldValue = " + fieldValue);*/ double[] d = eval.evaluateModel(rt, dataSet); System.out.println("PREDICTED -> " + FormatterUtil.buildStringFromArrayOfDoubles(d)); System.out.println(eval.errorRate()); System.out.println(eval.sizeOfPredictedRegions()); System.out.println(eval.toSummaryString("", true)); /*final String f2="./samples/csv/salary.csv"; final Instances dataSet2=WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(new File(f2)); final J48 j48=new J48(); j48.buildClassifier(dataSet2); System.out.println(j48.graph()); final GraphWithOperations gwo2=GraphUtil.buildGraphWithOperationsFromWekaString(j48.graph(),false); System.out.println(gwo2);*/ System.out.println(new DecisionTree(gwo, eval.errorRate())); } catch (Exception e) { e.printStackTrace(); } }
From source file:trainableSegmentation.WekaSegmentation.java
License:GNU General Public License
/** * Get training error (from loaded data). * * @param verbose option to display evaluation information in the log window * @return classifier error on the training data set. *//* w w w . j a va2s . c om*/ public double getTrainingError(boolean verbose) { if (null == this.trainHeader) return -1; double error = -1; try { final Evaluation evaluation = new Evaluation(this.loadedTrainingData); evaluation.evaluateModel(classifier, this.loadedTrainingData); if (verbose) IJ.log(evaluation.toSummaryString("\n=== Training set evaluation ===\n", false)); error = evaluation.errorRate(); } catch (Exception e) { e.printStackTrace(); } return error; }