List of usage examples for opennlp.tools.sentdetect SentenceModel SentenceModel
public SentenceModel(URL modelURL) throws IOException
From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpSegmenter.java
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); sentenceModelProvider = new CasConfigurableProviderBase<SentenceDetectorME>() { {/* w ww . ja v a 2 s .c o m*/ setDefault(VERSION, "20120616.0"); setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core"); setDefault(ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.opennlp-model-sentence-${language}-${variant}"); setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/" + "sentence-${language}-${variant}.bin"); setDefault(VARIANT, "maxent"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected SentenceDetectorME produceResource(URL aUrl) throws IOException { InputStream is = null; try { is = aUrl.openStream(); SentenceModel model = new SentenceModel(is); return new SentenceDetectorME(model); } finally { closeQuietly(is); } } }; tokenModelProvider = new CasConfigurableProviderBase<TokenizerME>() { { setDefault(VERSION, "1.5"); setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core"); setDefault(ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.opennlp-model-token-${language}-${variant}"); setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/" + "token-${language}-${variant}.bin"); setDefault(VARIANT, "maxent"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected TokenizerME produceResource(URL aUrl) throws IOException { InputStream is = null; try { is = aUrl.openStream(); TokenizerModel model = new TokenizerModel(is); return new TokenizerME(model); } finally { closeQuietly(is); } } }; }
From source file:edu.stanford.muse.index.NER.java
public synchronized static void initialize() throws ClassCastException, IOException, ClassNotFoundException { if (pFinder != null) return;//www . ja v a2 s. com long startTimeMillis = System.currentTimeMillis(); log.info("Initializing NER models"); try { InputStream pis = Config.getResourceAsStream("models/en-ner-person.bin"); TokenNameFinderModel pmodel = new TokenNameFinderModel(pis); pFinder = new NameFinderME(pmodel); InputStream lis = Config.getResourceAsStream("models/en-ner-location.bin"); TokenNameFinderModel lmodel = new TokenNameFinderModel(lis); lFinder = new NameFinderME(lmodel); InputStream ois = Config.getResourceAsStream("models/en-ner-organization.bin"); TokenNameFinderModel omodel = new TokenNameFinderModel(ois); oFinder = new NameFinderME(omodel); } //dont bother about this, instead try not to use it catch (Exception e) { Util.print_exception(e, log); } try { InputStream modelIn = Config.getResourceAsStream("models/en-sent.bin"); SentenceModel model = new SentenceModel(modelIn); sFinder = new SentenceDetectorME(model); InputStream tokenStream = Config.getResourceAsStream("models/en-token.bin"); TokenizerModel modelTokenizer = new TokenizerModel(tokenStream); tokenizer = new TokenizerME(modelTokenizer); } catch (Exception e) { Util.print_exception(e); } long endTimeMillis = System.currentTimeMillis(); log.info("Done initializing NER model in " + Util.commatize(endTimeMillis - startTimeMillis) + "ms"); }
From source file:it.uniud.ailab.dcore.wrappers.external.OpenNlpBootstrapperAnnotator.java
/** * Loads a sentence model or retrieves it from cache if has been already * loaded before./* ww w . j a v a 2 s .c om*/ * * @param modelId the model to retrieve * @return the loaded model */ public static SentenceModel getSentenceModel(String modelId) { // if the model has not already been loaded, cache it if (!sentenceModelsCache.containsKey(modelId)) { // Split the text into sentences InputStream sentModelIn = null; SentenceModel sentModel = null; String sentPath = ""; try { sentPath = databasePaths.get(modelId); sentModelIn = new FileInputStream(sentPath); sentModel = new SentenceModel(sentModelIn); } catch (IOException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while loading the model file \"" + sentPath + "\".", e); } catch (NullPointerException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while looking for the model \"" + modelId + "\".", e); } finally { if (sentModelIn != null) { try { sentModelIn.close(); } catch (IOException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while loading the model file '\"" + modelId + "\".", e); } } } sentenceModelsCache.put(modelId, sentModel); return sentModel; } return sentenceModelsCache.get(modelId); }
From source file:org.dbpedia.spotlight.spot.OpenNLPUtil.java
protected static BaseModel loadgivenmodeltype(OpenNlpModels m, InputStream in) throws InvalidFormatException, IOException { BaseModel mdl = null;//w ww.ja v a2 s . c o m switch (m) { case TokenizerModel: { mdl = new TokenizerModel(in); LOG.debug("OpenNLP5 Tokenizer Model loaded: " + mdl); break; } case POSModel: { mdl = new POSModel(in); LOG.debug("OpenNLP5 POS Model loaded: " + mdl); break; } case SentenceModel: { mdl = new SentenceModel(in); LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl); break; } case ChunkModel: { mdl = new ChunkerModel(in); LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl); break; } case person: case organization: case location: { mdl = new TokenNameFinderModel(in); LOG.debug("OpenNLP5 TokenNameFinderModel Model loaded: " + mdl); break; } default: LOG.debug("Unknown Model Type!"); } return mdl; }
From source file:org.esipfed.eskg.nlp.OpenIE.java
public static void main(String[] args) throws IOException { SentenceDetector sentenceDetector = null; try {/*from ww w . jav a2 s.c o m*/ // need to change this to the resource folder InputStream modelIn = OpenIE.class.getClassLoader().getResourceAsStream("en-sent.bin"); final SentenceModel sentenceModel = new SentenceModel(modelIn); modelIn.close(); sentenceDetector = new SentenceDetectorME(sentenceModel); } catch (IOException ioe) { LOG.error("Error either reading 'en-sent.bin' file or creating SentanceModel: ", ioe); throw new IOException(ioe); } edu.knowitall.openie.OpenIE openIE = new edu.knowitall.openie.OpenIE( new ClearParser(new ClearPostagger(new ClearTokenizer())), new ClearSrl(), false, false); // any text file that contains English sentences would work File file = FileUtils.toFile(OpenIE.class.getClassLoader().getResource("test.txt")); String text = readFile(file.getAbsolutePath(), StandardCharsets.UTF_8); if (sentenceDetector != null) { String[] sentences = sentenceDetector.sentDetect(text); for (int i = 0; i < sentences.length; i++) { Seq<Instance> extractions = openIE.extract(sentences[i]); List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions); for (Instance instance : listExtractions) { StringBuilder sb = new StringBuilder(); sb.append(instance.confidence()).append('\t').append(instance.extr().context()).append('\t') .append(instance.extr().arg1().text()).append('\t').append(instance.extr().rel().text()) .append('\t'); List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s()); for (Argument argument : listArg2s) { sb.append(argument.text()).append("; "); } LOG.info(sb.toString()); } } } }
From source file:org.wso2.uima.collectionProccesingEngine.analysisEngines.LocationIdentifier.java
@Override public void initialize(UimaContext ctx) throws ResourceInitializationException { super.initialize(ctx); InputStream sentenceStream = null; InputStream tokenizerStream = null; InputStream nameFinderStream = null; try {/* ww w. j a v a 2s . c o m*/ sentenceStream = getContext().getResourceAsStream("SentenceModel"); SentenceModel sentenceModel = new SentenceModel(sentenceStream); sentenceDetector = new SentenceDetectorME(sentenceModel); sentenceStream.close(); tokenizerStream = getContext().getResourceAsStream("TokenizerModel"); TokenizerModel tokenModel = new TokenizerModel(tokenizerStream); tokenizer = new TokenizerME(tokenModel); tokenizerStream.close(); nameFinderStream = getContext().getResourceAsStream("TokenNameFinderModel"); TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(nameFinderStream); locationFinder = new NameFinderME(nameFinderModel); nameFinderStream.close(); } catch (Exception e) { throw new ResourceInitializationException(e); } finally { IOUtils.closeQuietly(nameFinderStream); IOUtils.closeQuietly(tokenizerStream); IOUtils.closeQuietly(sentenceStream); logger.info(LocationIdentifier.class.getSimpleName() + " Analysis Engine initialized successfully"); } }