List of usage examples for opennlp.tools.sentdetect SentenceDetectorEvaluator getFMeasure
public FMeasure getFMeasure()
From source file:com.civprod.writerstoolbox.OpenNLP.training.SentenceDetectorTrainer.java
private void cmdTrainSentenceDetectorActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainSentenceDetectorActionPerformed final SentenceDetectorTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //read other models SentenceDetector stdDetector = null; try {//from ww w. j a v a 2 s. c o m stdDetector = OpenNLPUtils.createSentenceDetector(); } catch (IOException ex) { } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-sent.train"); File testFile = new File("en-sent.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create SentenceDetectorFactory part of the training context SentenceDetectorFactory mySentenceDetectorFactory = new SentenceDetectorFactory("EN", cbUseTokenEnd.isSelected(), mAbbreviationDictionary, txtEosChars.getText().toCharArray()); ObjectStream<String> trainingLineStream = null; SentenceModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(trainingLineStream); train = SentenceDetectorME.train("EN", sampleStream, mySentenceDetectorFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } trainingLineStream = null; if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(testingLineStream); SentenceDetectorME testDetector = new SentenceDetectorME(train); SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdDetector != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new SentenceSampleStream(testingLineStream); SentenceDetectorEvaluator stdEvaluator = new SentenceDetectorEvaluator(stdDetector); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create SentenceDetectorFactory part of the training context SentenceDetectorFactory mySentenceDetectorFactory = new SentenceDetectorFactory("EN", cbUseTokenEnd.isSelected(), mAbbreviationDictionary, txtEosChars.getText().toCharArray()); //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(lineStream); this.createdObject = SentenceDetectorME.train("EN", sampleStream, mySentenceDetectorFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-sent.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }