List of usage examples for weka.classifiers Evaluation evaluateModel
public static String evaluateModel(Classifier classifier, String[] options) throws Exception
From source file:gyc.OverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts any classifier that can handle weighted * instances.//from w ww . j a v a 2 s. co m * * @param data the training data to be used for generating the * boosted classifier. * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierWithWeights(Instances data) throws Exception { Instances trainData, training; double epsilon, reweight; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); // Do boostrap iterations for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training, 0, numInstances); } // Build the classifier if (m_Classifiers[m_NumIterationsPerformed] instanceof Randomizable) ((Randomizable) m_Classifiers[m_NumIterationsPerformed]).setSeed(randomInstance.nextInt()); // this is the training data for building base classifier, m_Classifiers[m_NumIterationsPerformed].buildClassifier(trainData); // Evaluate the classifier evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); // Stop if error too small or error too big and ignore this model if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:gyc.UnderOverBoostM1.java
License:Open Source License
/** * Boosting method. Boosts using resampling * * @param data the training data to be used for generating the * boosted classifier.//from ww w. ja v a 2 s.co m * @throws Exception if the classifier could not be built successfully */ protected void buildClassifierUsingResampling(Instances data) throws Exception { Instances trainData, sample, training; double epsilon, reweight, sumProbs; Evaluation evaluation; int numInstances = data.numInstances(); Random randomInstance = new Random(m_Seed); int resamplingIterations = 0; // Initialize data m_Betas = new double[m_Classifiers.length]; m_NumIterationsPerformed = 0; // Create a copy of the data so that when the weights are diddled // with it doesn't mess up the weights for anyone else training = new Instances(data, 0, numInstances); sumProbs = training.sumOfWeights(); for (int i = 0; i < training.numInstances(); i++) { training.instance(i).setWeight(training.instance(i).weight() / sumProbs); } // Do boostrap iterations int b = 10; for (m_NumIterationsPerformed = 0; m_NumIterationsPerformed < m_Classifiers.length; m_NumIterationsPerformed++) { if (m_Debug) { System.err.println("Training classifier " + (m_NumIterationsPerformed + 1)); } // Select instances to train the classifier on if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(training, (double) m_WeightThreshold / 100); } else { trainData = new Instances(training); } // Resample resamplingIterations = 0; double[] weights = new double[trainData.numInstances()]; for (int i = 0; i < weights.length; i++) { weights[i] = trainData.instance(i).weight(); } do { sample = trainData.resampleWithWeights(randomInstance, weights); // int classNum[] = sample.attributeStats(sample.classIndex()).nominalCounts; int minC, nMin = classNum[0]; int majC, nMaj = classNum[1]; if (nMin < nMaj) { minC = 0; majC = 1; } else { minC = 1; majC = 0; nMin = classNum[1]; nMaj = classNum[0]; } //System.out.println("minC="+nMin+"; majC="+nMaj); /* * balance the data which boosting generate for training base classifier */ //System.out.println("before:"+classNum[0]+"-"+classNum[1]); double pb = 100.0 * (nMin + nMaj) / 2 / nMaj; /* if (m_NumIterationsPerformed + 1 > (m_Classifiers.length / 10)) b += 10; (b% * Nmaj) instances are taken from each class */ Instances sampleData = randomSampling(sample, majC, minC, (int) pb, randomInstance); //classNum =sampleData.attributeStats(sampleData.classIndex()).nominalCounts; //System.out.println("after:"+classNum[0]+"-"+classNum[1]); // Build and evaluate classifier m_Classifiers[m_NumIterationsPerformed].buildClassifier(sampleData); evaluation = new Evaluation(data); evaluation.evaluateModel(m_Classifiers[m_NumIterationsPerformed], training); epsilon = evaluation.errorRate(); resamplingIterations++; } while (Utils.eq(epsilon, 0) && (resamplingIterations < MAX_NUM_RESAMPLING_ITERATIONS)); // Stop if error too big or 0 if (Utils.grOrEq(epsilon, 0.5) || Utils.eq(epsilon, 0)) { if (m_NumIterationsPerformed == 0) { m_NumIterationsPerformed = 1; // If we're the first we have to to use it } break; } // Determine the weight to assign to this model m_Betas[m_NumIterationsPerformed] = Math.log((1 - epsilon) / epsilon); reweight = (1 - epsilon) / epsilon; if (m_Debug) { System.err.println("\terror rate = " + epsilon + " beta = " + m_Betas[m_NumIterationsPerformed]); } // Update instance weights setWeights(training, reweight); } }
From source file:hurtowniedanych.FXMLController.java
public void trainAndTestKNN() throws FileNotFoundException, IOException, Exception { InstanceQuery instanceQuery = new InstanceQuery(); instanceQuery.setUsername("postgres"); instanceQuery.setPassword("szupek"); instanceQuery.setCustomPropsFile(new File("./src/data/DatabaseUtils.props")); // Wskazanie pliku z ustawieniami dla PostgreSQL String query = "select ks.wydawnictwo,ks.gatunek, kl.mia-sto\n" + "from zakupy z,ksiazki ks,klienci kl\n" + "where ks.id_ksiazka=z.id_ksiazka and kl.id_klient=z.id_klient"; instanceQuery.setQuery(query);//from www . j av a2s.c o m Instances data = instanceQuery.retrieveInstances(); data.setClassIndex(data.numAttributes() - 1); data.randomize(new Random()); double percent = 70.0; int trainSize = (int) Math.round(data.numInstances() * percent / 100); int testSize = data.numInstances() - trainSize; Instances trainData = new Instances(data, 0, trainSize); Instances testData = new Instances(data, trainSize, testSize); int lSasiadow = Integer.parseInt(textFieldKnn.getText()); System.out.println(lSasiadow); IBk ibk = new IBk(lSasiadow); // Ustawienie odleglosci EuclideanDistance euclidean = new EuclideanDistance(); // euklidesowej ManhattanDistance manhatan = new ManhattanDistance(); // miejska LinearNNSearch linearNN = new LinearNNSearch(); if (comboboxOdleglosc.getSelectionModel().getSelectedItem().equals("Manhatan")) { linearNN.setDistanceFunction(manhatan); } else { linearNN.setDistanceFunction(euclidean); } ibk.setNearestNeighbourSearchAlgorithm(linearNN); // ustawienie sposobu szukania sasiadow // Tworzenie klasyfikatora ibk.buildClassifier(trainData); Evaluation eval = new Evaluation(trainData); eval.evaluateModel(ibk, testData); spr.setVisible(true); labelKnn.setVisible(true); labelOdleglosc.setVisible(true); labelKnn.setText(textFieldKnn.getText()); labelOdleglosc.setText(comboboxOdleglosc.getSelectionModel().getSelectedItem().toString()); spr.setText(eval.toSummaryString("Wynik:", true)); }
From source file:id3.MyID3.java
/** * Main method//from w w w. jav a 2s. c o m * @param args arguments */ public static void main(String[] args) { Instances instances; try { BufferedReader reader = new BufferedReader(new FileReader("D:\\Weka-3-6\\data\\weather.nominal.arff")); try { instances = new Instances(reader); instances.setClassIndex(instances.numAttributes() - 1); MyID3 id3 = new MyID3(); try { id3.buildClassifier(instances); } catch (Exception e) { e.printStackTrace(); } // Test class distribution double[] classDistribution = id3.classDistribution(instances); for (int i = 0; i < classDistribution.length; i++) { System.out.println(classDistribution[i]); } // Test entrophy and information gain for each attribute System.out.println(id3.computeEntropy(instances)); Enumeration attributes = instances.enumerateAttributes(); while (attributes.hasMoreElements()) { System.out.println(id3.computeIG(instances, (Attribute) attributes.nextElement())); } // Test build classifier try { id3.buildClassifier(instances); } catch (Exception e) { e.printStackTrace(); } System.out.println(id3.toString()); // Evaluate model from build classifier (full training) Evaluation eval = null; try { eval = new Evaluation(instances); } catch (Exception e) { e.printStackTrace(); } try { System.out.println(instances); eval.evaluateModel(id3, instances); } catch (Exception e) { e.printStackTrace(); } System.out.println(eval.toSummaryString("\nResults Full-Training\n\n", false)); // Evaluate model from build classifier (test set) // Test Confusion Matrix System.out.println("Confusion Matrix : "); double[][] cmMatrix = eval.confusionMatrix(); for (int row_i = 0; row_i < cmMatrix.length; row_i++) { for (int col_i = 0; col_i < cmMatrix.length; col_i++) { System.out.print(cmMatrix[row_i][col_i]); System.out.print("|"); } System.out.println(); } } catch (IOException e) { e.printStackTrace(); } } catch (FileNotFoundException e) { e.printStackTrace(); } }
From source file:id3classifier.Main.java
public static void main(String[] args) throws Exception { ConverterUtils.DataSource source = new ConverterUtils.DataSource(file); Instances dataSet = source.getDataSet(); // discretize the dataset Discretize filter = new Discretize(); filter.setInputFormat(dataSet);/*ww w . ja v a 2s .c om*/ dataSet = Filter.useFilter(dataSet, filter); // standardize the dataset Standardize standardizedData = new Standardize(); standardizedData.setInputFormat(dataSet); dataSet = Filter.useFilter(dataSet, standardizedData); // randomize the dataset dataSet.setClassIndex(dataSet.numAttributes() - 1); dataSet.randomize(new Debug.Random()); // get the sizes of the training and testing sets and split int trainingSize = (int) Math.round(dataSet.numInstances() * .7); int testSize = dataSet.numInstances() - trainingSize; Instances training = new Instances(dataSet, 0, trainingSize); Instances test = new Instances(dataSet, trainingSize, testSize); // set up the ID3 classifier on the training data ID3Classifiers classifier = new ID3Classifiers(); classifier.buildClassifier(training); // set up the evaluation and test using the classifier and test set Evaluation eval = new Evaluation(dataSet); eval.evaluateModel(classifier, test); // outup and kill, important to exit here to stop javaFX System.out.println(eval.toSummaryString("\nResults\n======\n", false)); System.exit(0); }
From source file:id3j48.WekaAccess.java
public static Evaluation testModel(Classifier classifier, Instances data, Instances test) throws Exception { Evaluation evaluation = new Evaluation(data); evaluation.evaluateModel(classifier, test); return evaluation; }
From source file:irisdata.IrisData.java
/** * @param args the command line arguments * @throws java.lang.Exception /*from w w w .j a v a2 s . c o m*/ */ public static void main(String[] args) throws Exception { String file = "/Users/paul/Desktop/BYU-Idaho/Spring2015/CS450/iris.csv"; DataSource source = new DataSource(file); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } data.randomize(new Random(1)); // set training set to 70% RemovePercentage remove = new RemovePercentage(); remove.setPercentage(30); remove.setInputFormat(data); Instances trainingSet = Filter.useFilter(data, remove); // set the rest for the testing set remove.setInvertSelection(true); Instances testSet = Filter.useFilter(data, remove); // train classifier - kind of HardCodedClassifier classifier = new HardCodedClassifier(); classifier.buildClassifier(trainingSet); // this does nothing right now // Evaluate classifier Evaluation eval = new Evaluation(trainingSet); eval.evaluateModel(classifier, testSet); //eval.crossValidateModel(classifier, data, 10, new Random(1)); // Print some statistics System.out.println("Results: " + eval.toSummaryString()); }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier, Instances pInstances, String pModelName, String pClassifierName) throws Exception { // other options int folds = 10; // randomize data Random rand = new Random(42); Instances randData = new Instances(pInstances); randData.randomize(rand);/*from w ww . j a v a 2 s .c o m*/ if (randData.classAttribute().isNominal()) { randData.stratify(folds); } // perform cross-validation and add predictions Instances predictedData = null; Evaluation eval = new Evaluation(randData); int positiveValueIndexOfClassFeature = 0; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); int classFeatureIndex = 0; for (int i = 0; i < train.numAttributes(); i++) { if (train.attribute(i).name().equals("isBuggy")) { classFeatureIndex = i; break; } } Attribute classFeature = train.attribute(classFeatureIndex); for (int i = 0; i < classFeature.numValues(); i++) { if (classFeature.value(i).equals("TRUE")) { positiveValueIndexOfClassFeature = i; } } train.setClassIndex(classFeatureIndex); test.setClassIndex(classFeatureIndex); // build and evaluate classifier pClassifier.buildClassifier(train); eval.evaluateModel(pClassifier, test); // add predictions // AddClassification filter = new AddClassification(); // filter.setClassifier(pClassifier); // filter.setOutputClassification(true); // filter.setOutputDistribution(true); // filter.setOutputErrorFlag(true); // filter.setInputFormat(train); // Filter.useFilter(train, filter); // Instances pred = Filter.useFilter(test, filter); // if (predictedData == null) // predictedData = new Instances(pred, 0); // // for (int j = 0; j < pred.numInstances(); j++) // predictedData.add(pred.instance(j)); } double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)) / (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numFalsePositives(positiveValueIndexOfClassFeature) + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)); double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature) * eval.recall(positiveValueIndexOfClassFeature)) / (eval.precision(positiveValueIndexOfClassFeature) + eval.recall(positiveValueIndexOfClassFeature))); File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv"); PrintWriter pw1 = new PrintWriter(wekaOutput); pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature)); System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";" + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";" + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n"); }
From source file:j48.NBTreeNoSplit.java
License:Open Source License
/** * Utility method for fast 5-fold cross validation of a naive bayes * model/*from w ww . j a v a 2 s. co m*/ * * @param fullModel a <code>NaiveBayesUpdateable</code> value * @param trainingSet an <code>Instances</code> value * @param r a <code>Random</code> value * @return a <code>double</code> value * @exception Exception if an error occurs */ public static double crossValidate(NaiveBayesUpdateable fullModel, Instances trainingSet, Random r) throws Exception { // make some copies for fast evaluation of 5-fold xval Classifier[] copies = Classifier.makeCopies(fullModel, 5); Evaluation eval = new Evaluation(trainingSet); // make some splits for (int j = 0; j < 5; j++) { Instances test = trainingSet.testCV(5, j); // unlearn these test instances for (int k = 0; k < test.numInstances(); k++) { test.instance(k).setWeight(-test.instance(k).weight()); ((NaiveBayesUpdateable) copies[j]).updateClassifier(test.instance(k)); // reset the weight back to its original value test.instance(k).setWeight(-test.instance(k).weight()); } eval.evaluateModel(copies[j], test); } return eval.incorrect(); }
From source file:kfst.classifier.WekaClassifier.java
License:Open Source License
/** * This method builds and evaluates the support vector machine(SVM) * classifier. The SMO are used as the SVM classifier implemented in the * Weka software.//ww w . jav a 2 s.c o m * * @param pathTrainData the path of the train set * @param pathTestData the path of the test set * @param svmKernel the kernel to use * * @return the classification accuracy */ public static double SVM(String pathTrainData, String pathTestData, String svmKernel) { double resultValue = 0; try { BufferedReader readerTrain = new BufferedReader(new FileReader(pathTrainData)); Instances dataTrain = new Instances(readerTrain); readerTrain.close(); dataTrain.setClassIndex(dataTrain.numAttributes() - 1); BufferedReader readerTest = new BufferedReader(new FileReader(pathTestData)); Instances dataTest = new Instances(readerTest); readerTest.close(); dataTest.setClassIndex(dataTest.numAttributes() - 1); SMO svm = new SMO(); if (svmKernel.equals("Polynomial kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.PolyKernel.class.newInstance()); } else if (svmKernel.equals("RBF kernel")) { svm.setKernel(weka.classifiers.functions.supportVector.RBFKernel.class.newInstance()); } else { svm.setKernel(weka.classifiers.functions.supportVector.Puk.class.newInstance()); } svm.buildClassifier(dataTrain); Evaluation eval = new Evaluation(dataTest); eval.evaluateModel(svm, dataTest); resultValue = 100 - (eval.errorRate() * 100); } catch (Exception ex) { Logger.getLogger(WekaClassifier.class.getName()).log(Level.SEVERE, null, ex); } return resultValue; }