List of usage examples for weka.classifiers Classifier buildClassifier
public abstract void buildClassifier(Instances data) throws Exception;
From source file:gnusmail.learning.ClassifierManager.java
License:Open Source License
/** * Batch training// w ww . ja va2 s .c om */ public void trainModel() { Classifier model = new NaiveBayesUpdateable(); try { model.buildClassifier(dataSet); } catch (Exception e) { return; } try { FileOutputStream f = new FileOutputStream(ConfigManager.MODEL_FILE); ObjectOutputStream fis = new ObjectOutputStream(f); fis.writeObject(model); fis.close(); } catch (FileNotFoundException e) { System.out.println("File " + ConfigManager.MODEL_FILE.getAbsolutePath() + " not found"); e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }
From source file:id3j48.WekaAccess.java
public static Classifier buildClassifier(Instances data, Classifier classifier) throws Exception { classifier.buildClassifier(data); return classifier; }
From source file:id3j48.WekaAccess.java
public static Evaluation percentageSplit(Instances data, Classifier classifier, int percentage) throws Exception { Instances tempdata = new Instances(data); tempdata.randomize(new Random(1)); int trainSize = Math.round(tempdata.numInstances() * percentage / 100); int testSize = tempdata.numInstances() - trainSize; Instances train = new Instances(tempdata, 0, trainSize); Instances test = new Instances(tempdata, trainSize, testSize); classifier.buildClassifier(train); Evaluation eval = testModel(classifier, train, test); return eval;// w w w . j ava 2 s.co m }
From source file:it.unisa.gitdm.evaluation.WekaEvaluator.java
private static void evaluateModel(String baseFolderPath, String projectName, Classifier pClassifier, Instances pInstances, String pModelName, String pClassifierName) throws Exception { // other options int folds = 10; // randomize data Random rand = new Random(42); Instances randData = new Instances(pInstances); randData.randomize(rand);/* w w w .j av a 2 s .c om*/ if (randData.classAttribute().isNominal()) { randData.stratify(folds); } // perform cross-validation and add predictions Instances predictedData = null; Evaluation eval = new Evaluation(randData); int positiveValueIndexOfClassFeature = 0; for (int n = 0; n < folds; n++) { Instances train = randData.trainCV(folds, n); Instances test = randData.testCV(folds, n); // the above code is used by the StratifiedRemoveFolds filter, the // code below by the Explorer/Experimenter: // Instances train = randData.trainCV(folds, n, rand); int classFeatureIndex = 0; for (int i = 0; i < train.numAttributes(); i++) { if (train.attribute(i).name().equals("isBuggy")) { classFeatureIndex = i; break; } } Attribute classFeature = train.attribute(classFeatureIndex); for (int i = 0; i < classFeature.numValues(); i++) { if (classFeature.value(i).equals("TRUE")) { positiveValueIndexOfClassFeature = i; } } train.setClassIndex(classFeatureIndex); test.setClassIndex(classFeatureIndex); // build and evaluate classifier pClassifier.buildClassifier(train); eval.evaluateModel(pClassifier, test); // add predictions // AddClassification filter = new AddClassification(); // filter.setClassifier(pClassifier); // filter.setOutputClassification(true); // filter.setOutputDistribution(true); // filter.setOutputErrorFlag(true); // filter.setInputFormat(train); // Filter.useFilter(train, filter); // Instances pred = Filter.useFilter(test, filter); // if (predictedData == null) // predictedData = new Instances(pred, 0); // // for (int j = 0; j < pred.numInstances(); j++) // predictedData.add(pred.instance(j)); } double accuracy = (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)) / (eval.numTruePositives(positiveValueIndexOfClassFeature) + eval.numFalsePositives(positiveValueIndexOfClassFeature) + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + eval.numTrueNegatives(positiveValueIndexOfClassFeature)); double fmeasure = 2 * ((eval.precision(positiveValueIndexOfClassFeature) * eval.recall(positiveValueIndexOfClassFeature)) / (eval.precision(positiveValueIndexOfClassFeature) + eval.recall(positiveValueIndexOfClassFeature))); File wekaOutput = new File(baseFolderPath + projectName + "/predictors.csv"); PrintWriter pw1 = new PrintWriter(wekaOutput); pw1.write(accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature)); System.out.println(projectName + ";" + pClassifierName + ";" + pModelName + ";" + eval.numTruePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalsePositives(positiveValueIndexOfClassFeature) + ";" + eval.numFalseNegatives(positiveValueIndexOfClassFeature) + ";" + eval.numTrueNegatives(positiveValueIndexOfClassFeature) + ";" + accuracy + ";" + eval.precision(positiveValueIndexOfClassFeature) + ";" + eval.recall(positiveValueIndexOfClassFeature) + ";" + fmeasure + ";" + eval.areaUnderROC(positiveValueIndexOfClassFeature) + "\n"); }
From source file:LeerArchivo.Leer.java
public void leerArchivoArff() { try {// ww w . j av a 2 s . c om // create J48 Classifier cls = new KStar(); // train Instances inst = new Instances(new BufferedReader(new FileReader("../datos.arff"))); inst.setClassIndex(inst.numAttributes() - 1); cls.buildClassifier(inst); // serialize model ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream("./KStar.model")); oos.writeObject(cls); oos.flush(); oos.close(); } catch (IOException ex) { Logger.getLogger(Leer.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(Leer.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:LVCoref.WekaWrapper.java
License:Open Source License
public static void main(String[] args) { try {//from w ww .j a v a 2s. co m List<Document> docs = new LinkedList<Document>(); Document d = new Document(); d.readCONLL("data/pipeline/interview_16.lvsem.conll"); d.addAnnotationMMAX("data/interview_16_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_23.lvsem.conll"); d.addAnnotationMMAX("data/interview_23_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_27.lvsem.conll"); d.addAnnotationMMAX("data/interview_27_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_38.lvsem.conll"); d.addAnnotationMMAX("data/interview_38_coref_level.xml"); d.useGoldMentions(); docs.add(d); Instances train = toArff2(docs); train.setClassIndex(train.numAttributes() - 1); String[] options = { "-U" };//, "-C", "0.5"}; Classifier cls = new J48(); cls.setOptions(options); cls.buildClassifier(train); docs = new LinkedList<Document>(); d = new Document(); d.readCONLL("data/pipeline/interview_43.lvsem.conll"); d.addAnnotationMMAX("data/interview_43_coref_level.xml"); d.useGoldMentions(); docs.add(d); d = new Document(); d.readCONLL("data/pipeline/interview_46.lvsem.conll"); d.addAnnotationMMAX("data/interview_46_coref_level.xml"); d.useGoldMentions(); docs.add(d); Evaluation eval = new Evaluation(train); Instances data = toArff2(docs); data.setClassIndex(data.numAttributes() - 1); for (int i = 0; i < data.numInstances(); i++) { double clsLabel = cls.classifyInstance(data.instance(i)); //System.out.println(clsLabel); data.instance(i).setClassValue(clsLabel); System.out.println(data.instance(i).toString(data.classIndex())); } // eval.crossValidateModel(cls, train, 10, new Random(1)); // // generate curve // ThresholdCurve tc = new ThresholdCurve(); // //int classIndex = test.numAttributes()-1; // Instances result = tc.getCurve(eval.predictions());//, classIndex); // // // plot curve // ThresholdVisualizePanel vmc = new ThresholdVisualizePanel(); // vmc.setROCString("(Area under ROC = " + // weka.core.Utils.doubleToString(tc.getROCArea(result), 4) + ")"); // vmc.setName(result.relationName()); // PlotData2D tempd = new PlotData2D(result); // tempd.setPlotName(result.relationName()); // tempd.addInstanceNumberAttribute(); // // specify which points are connected // boolean[] cp = new boolean[result.numInstances()]; // for (int n = 1; n < cp.length; n++) // cp[n] = true; // tempd.setConnectPoints(cp); // // add plot // vmc.addPlot(tempd); // // // display curve // String plotName = vmc.getName(); // final javax.swing.JFrame jf = // new javax.swing.JFrame("Weka Classifier Visualize: "+plotName); // jf.setSize(500,400); // jf.getContentPane().setLayout(new BorderLayout()); // jf.getContentPane().add(vmc, BorderLayout.CENTER); // jf.addWindowListener(new java.awt.event.WindowAdapter() { // public void windowClosing(java.awt.event.WindowEvent e) { // jf.dispose(); // } // }); // jf.setVisible(true); // Instances test = toArff2(docs); // test.setClassIndex(test.numAttributes()-1); // // // Evaluation evals = new Evaluation(train); // // evals.evaluateModel(cls, test); // System.out.println(evals.toSummaryString("\nResults\n======\n", false)); // System.out.println(evals.toMatrixString()); // System.out.println(evals.toClassDetailsString()); // // System.out.println(cls); // //System.out.println(toArff2(docs)); } catch (Exception ex) { Logger.getLogger(WekaWrapper.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:machinelearningcw.MachineLearningCw.java
public static void timingExperiment(Classifier s, Instances[] data) throws Exception { /* get the biggest data set */ Instances largestData = data[0];//from ww w .ja v a 2 s . c o m for (int i = 0; i < data.length; i++) { if (largestData.numInstances() < data[i].numInstances()) { largestData = data[i]; } } for (int i = 1; i <= 7; i++) { int percent = i * 10; int train_size = (int) Math.round(largestData.numInstances() * percent / 100); int testSize = largestData.numInstances() - train_size; Instances train = new Instances(largestData, 0, train_size); Instances test = new Instances(largestData, train_size, testSize); long t1 = System.currentTimeMillis(); s.buildClassifier(train); for (Instance ins : test) { s.classifyInstance(ins); } long t2 = System.currentTimeMillis() - t1; //change to seconds System.out.println("TIME TAKEN " + i + ": " + t2); } System.out.println("\n"); }
From source file:machinelearning_cw.MachineLearning_CW.java
/** * //from w w w .j a v a2s. com * Tests the accuracy of a classifier against a collection of datasets * by Resampling. * * @param classifier The classifier to be tested * @param trainingDatasets A collection of Instances objects containing * the training data for different datasets. * @param testDatasets A collection of Instances objects containing * the test data for different datasets. * @param t The number of times the data should be sampled * @throws Exception */ public static void performClassifierAccuracyTests(Classifier classifier, ArrayList<Instances> trainingDatasets, ArrayList<Instances> testDatasets, int t) throws Exception { ArrayList<Double> accuracies = new ArrayList<Double>(); Random randomGenerator = new Random(); for (int i = 0; i < trainingDatasets.size(); i++) { Instances train = trainingDatasets.get(i); Instances test = testDatasets.get(i); /* Test by Resampling. First, merge train and test data */ for (int j = 0; j < t; j++) { Instances mergedDataSet = mergeDataSets(train, test); train.clear(); test.clear(); /* Randomly sample n instances from the merged dataset * (without replacement) to form the train set */ int n = mergedDataSet.size() / 2; for (int k = 0; k < n; k++) { int indexToRemove = randomGenerator.nextInt(mergedDataSet.size()); train.add(mergedDataSet.remove(indexToRemove)); } /* Reserve remainingdata as test data */ for (int k = 0; k < mergedDataSet.size(); k++) { test.add(mergedDataSet.remove(k)); } /* Train classifier. Recalculates k */ classifier.buildClassifier(train); /* Measure and record the accuracy of the classifier on * the test set */ double accuracy = Helpers.findClassifierAccuracy(classifier, test); accuracies.add(accuracy); } double accuracyAverage = average(accuracies); System.out.println(accuracyAverage); } }
From source file:machinelearning_cw.MachineLearning_CW.java
/** * /* w ww. jav a2s.c o m*/ * Tests the speed of a classifier in milliseconds and prints it. * This is achieved by checking the time taken for the given classifier to * classify some given data. * * * @param classifier The classifier to be tested. * @param train The data with which to train the classifier. * @param test The data the classifier is to be tested against. * @param t The number of times the test should be carried out and averaged. * @throws Exception */ public static void timeClassifier(Classifier classifier, Instances train, Instances test, int t) throws Exception { ArrayList<Double> times = new ArrayList<Double>(); /* Carry out test t+1 times and average. * The first run is ignored to offset the effects of * the Java Garbage Collector and caching. */ for (int i = 0; i < t + 1; i++) { // Time the build and classifyInstance methods double t1 = System.nanoTime(); classifier.buildClassifier(train); for (Instance eachInstance : test) { classifier.classifyInstance(eachInstance); } double t2 = System.nanoTime() - t1; // Convert to ms double timeTaken = t2 / 1000000.0; if (i != 0) { times.add(timeTaken); } } double averageTime = average(times); System.out.println(averageTime); }
From source file:mao.datamining.ModelProcess.java
private void testWithExtraDS(Classifier classifier, Instances finalTrainDataSet, Instances finalTestDataSet, FileOutputStream testCaseSummaryOut, TestResult result) { //Use final training dataset and final test dataset double confusionMatrix[][] = null; long start, end, trainTime = 0, testTime = 0; if (finalTestDataSet != null) { try {//from w ww . jav a2 s . c om //counting training time start = System.currentTimeMillis(); classifier.buildClassifier(finalTrainDataSet); end = System.currentTimeMillis(); trainTime += end - start; //counting test time start = System.currentTimeMillis(); Evaluation testEvalOnly = new Evaluation(finalTrainDataSet); testEvalOnly.evaluateModel(classifier, finalTestDataSet); end = System.currentTimeMillis(); testTime += end - start; testCaseSummaryOut.write("=====================================================\n".getBytes()); testCaseSummaryOut.write((testEvalOnly.toSummaryString("=== Test Summary ===", true)).getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut .write((testEvalOnly.toClassDetailsString("=== Test Class Detail ===\n")).getBytes()); testCaseSummaryOut.write("\n".getBytes()); testCaseSummaryOut .write((testEvalOnly.toMatrixString("=== Confusion matrix for Test ===\n")).getBytes()); testCaseSummaryOut.flush(); confusionMatrix = testEvalOnly.confusionMatrix(); result.setConfusionMatrix4Test(confusionMatrix); result.setAUT(testEvalOnly.areaUnderROC(1)); result.setPrecision(testEvalOnly.precision(1)); result.setRecall(testEvalOnly.recall(1)); } catch (Exception e) { ModelProcess.logging(null, e); } result.setTrainingTime(trainTime); result.setTestTime(testTime); } //using test data set , end }