Example usage for opennlp.tools.util TrainingParameters TrainingParameters

List of usage examples for opennlp.tools.util TrainingParameters TrainingParameters

Introduction

In this page you can find the example usage for opennlp.tools.util TrainingParameters TrainingParameters.

Prototype

public TrainingParameters(InputStream in) throws IOException 

Source Link

Usage

From source file:com.textocat.textokit.postagger.opennlp.OpenNLPPosTaggerTrainerCLI.java

public static void main(String[] args) throws Exception {
    OpenNLPPosTaggerTrainerCLI cli = new OpenNLPPosTaggerTrainerCLI();
    new JCommander(cli, args);
    ////from   w w  w . j a va 2s. c  om
    OpenNLPPosTaggerTrainer trainer = new OpenNLPPosTaggerTrainer();
    trainer.setLanguageCode(cli.languageCode);
    trainer.setModelOutFile(cli.modelOutFile);
    // train params
    {
        FileInputStream fis = FileUtils.openInputStream(cli.trainParamsFile);
        TrainingParameters trainParams;
        try {
            trainParams = new TrainingParameters(fis);
        } finally {
            IOUtils.closeQuietly(fis);
        }
        trainer.setTrainingParameters(trainParams);
    }
    // feature extractors
    {
        FileInputStream fis = FileUtils.openInputStream(cli.extractorParams);
        Properties props = new Properties();
        try {
            props.load(fis);
        } finally {
            IOUtils.closeQuietly(fis);
        }
        MorphDictionary morphDict = getMorphDictionaryAPI().getCachedInstance().getResource();
        trainer.setTaggerFactory(new POSTaggerFactory(DefaultFeatureExtractors.from(props, morphDict)));
    }
    // input sentence stream
    {
        ExternalResourceDescription morphDictDesc = getMorphDictionaryAPI()
                .getResourceDescriptionForCachedInstance();
        TypeSystemDescription tsd = createTypeSystemDescription(
                "com.textocat.textokit.commons.Commons-TypeSystem", TokenizerAPI.TYPESYSTEM_TOKENIZER,
                SentenceSplitterAPI.TYPESYSTEM_SENTENCES, PosTaggerAPI.TYPESYSTEM_POSTAGGER);
        CollectionReaderDescription colReaderDesc = CollectionReaderFactory.createReaderDescription(
                XmiCollectionReader.class, tsd, XmiCollectionReader.PARAM_INPUTDIR, cli.trainingXmiDir);
        AnalysisEngineDescription posTrimmerDesc = PosTrimmingAnnotator
                .createDescription(cli.gramCategories.toArray(new String[cli.gramCategories.size()]));
        bindExternalResource(posTrimmerDesc, PosTrimmingAnnotator.RESOURCE_GRAM_MODEL, morphDictDesc);
        AnalysisEngineDescription tagAssemblerDesc = TagAssembler.createDescription();
        bindExternalResource(tagAssemblerDesc, GramModelBasedTagMapper.RESOURCE_GRAM_MODEL, morphDictDesc);
        AnalysisEngineDescription aeDesc = createEngineDescription(posTrimmerDesc, tagAssemblerDesc);
        Iterator<Sentence> sentIter = AnnotationIteratorOverCollection.createIterator(Sentence.class,
                colReaderDesc, aeDesc);
        SpanStreamOverCollection<Sentence> sentStream = new SpanStreamOverCollection<Sentence>(sentIter);
        trainer.setSentenceStream(sentStream);
    }
    trainer.train();
}