List of usage examples for opennlp.tools.util TrainingParameters TrainingParameters
public TrainingParameters(InputStream in) throws IOException
From source file:com.textocat.textokit.postagger.opennlp.OpenNLPPosTaggerTrainerCLI.java
public static void main(String[] args) throws Exception { OpenNLPPosTaggerTrainerCLI cli = new OpenNLPPosTaggerTrainerCLI(); new JCommander(cli, args); ////from w w w . j a va 2s. c om OpenNLPPosTaggerTrainer trainer = new OpenNLPPosTaggerTrainer(); trainer.setLanguageCode(cli.languageCode); trainer.setModelOutFile(cli.modelOutFile); // train params { FileInputStream fis = FileUtils.openInputStream(cli.trainParamsFile); TrainingParameters trainParams; try { trainParams = new TrainingParameters(fis); } finally { IOUtils.closeQuietly(fis); } trainer.setTrainingParameters(trainParams); } // feature extractors { FileInputStream fis = FileUtils.openInputStream(cli.extractorParams); Properties props = new Properties(); try { props.load(fis); } finally { IOUtils.closeQuietly(fis); } MorphDictionary morphDict = getMorphDictionaryAPI().getCachedInstance().getResource(); trainer.setTaggerFactory(new POSTaggerFactory(DefaultFeatureExtractors.from(props, morphDict))); } // input sentence stream { ExternalResourceDescription morphDictDesc = getMorphDictionaryAPI() .getResourceDescriptionForCachedInstance(); TypeSystemDescription tsd = createTypeSystemDescription( "com.textocat.textokit.commons.Commons-TypeSystem", TokenizerAPI.TYPESYSTEM_TOKENIZER, SentenceSplitterAPI.TYPESYSTEM_SENTENCES, PosTaggerAPI.TYPESYSTEM_POSTAGGER); CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription( XmiCollectionReader.class, tsd, XmiCollectionReader.PARAM_INPUTDIR, cli.trainingXmiDir); AnalysisEngineDescription posTrimmerDesc = PosTrimmingAnnotator .createDescription(cli.gramCategories.toArray(new String[cli.gramCategories.size()])); bindExternalResource(posTrimmerDesc, PosTrimmingAnnotator.RESOURCE_GRAM_MODEL, morphDictDesc); AnalysisEngineDescription tagAssemblerDesc = TagAssembler.createDescription(); bindExternalResource(tagAssemblerDesc, GramModelBasedTagMapper.RESOURCE_GRAM_MODEL, morphDictDesc); AnalysisEngineDescription aeDesc = createEngineDescription(posTrimmerDesc, tagAssemblerDesc); Iterator<Sentence> sentIter = AnnotationIteratorOverCollection.createIterator(Sentence.class, colReaderDesc, aeDesc); SpanStreamOverCollection<Sentence> sentStream = new SpanStreamOverCollection<Sentence>(sentIter); trainer.setSentenceStream(sentStream); } trainer.train(); }