List of usage examples for opennlp.tools.tokenize TokenizerModel TokenizerModel
public TokenizerModel(URL modelURL) throws IOException
From source file:NLP.java
public NLP() throws FileNotFoundException, IOException, URISyntaxException { itemsList = new HashMap<String, String>(); String file = (new File(NLP.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())) .toString();// w w w .j av a 2 s .com String path = (new File(file).getParentFile().getPath()).toString(); model = new POSModelLoader().load(new File(path + "\\fr-pos.bin")); perfMon = new PerformanceMonitor(System.err, "sent"); tagger = new POSTaggerME(model); try (InputStream is = new FileInputStream(path + "\\fr-token.bin")) { tokenizer = new TokenizerME(new TokenizerModel(is)); } catch (Exception e) { System.out.println(e); } }
From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpSegmenter.java
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); sentenceModelProvider = new CasConfigurableProviderBase<SentenceDetectorME>() { {// w w w . j a v a2 s.com setDefault(VERSION, "20120616.0"); setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core"); setDefault(ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.opennlp-model-sentence-${language}-${variant}"); setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/" + "sentence-${language}-${variant}.bin"); setDefault(VARIANT, "maxent"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected SentenceDetectorME produceResource(URL aUrl) throws IOException { InputStream is = null; try { is = aUrl.openStream(); SentenceModel model = new SentenceModel(is); return new SentenceDetectorME(model); } finally { closeQuietly(is); } } }; tokenModelProvider = new CasConfigurableProviderBase<TokenizerME>() { { setDefault(VERSION, "1.5"); setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core"); setDefault(ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.opennlp-model-token-${language}-${variant}"); setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/" + "token-${language}-${variant}.bin"); setDefault(VARIANT, "maxent"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected TokenizerME produceResource(URL aUrl) throws IOException { InputStream is = null; try { is = aUrl.openStream(); TokenizerModel model = new TokenizerModel(is); return new TokenizerME(model); } finally { closeQuietly(is); } } }; }
From source file:hrpod.tools.nlp.NLPTools.java
public void setTokenModel() { try {// w ww .j ava 2s. c o m URL tmUrl = this.getClass().getResource(modelBasePath + "en-token.bin"); this.tokenModel = new TokenizerModel(new FileInputStream(new File(tmUrl.getFile()))); } catch (Exception e) { logger.error("Error is setTokenModel", e); } }
From source file:com.civis.utils.opennlp.models.address.AddressFinderMe.java
/** * {@inheritDoc}//w ww.jav a 2s . com */ @Override public List<AddressSpan> find(String text) { try (InputStream tokenizerModelInputStream = Thread.currentThread().getContextClassLoader() .getResourceAsStream(ModelPath.DE_TOKEN_BIN)) { TokenizerModel modelToken = new TokenizerModel(tokenizerModelInputStream); return find(text, modelToken); } catch (Exception e) { LOG.error("Tokenizer Models can not be loaded successfully!", e); } return Collections.emptyList(); }
From source file:edu.stanford.muse.index.NER.java
public synchronized static void initialize() throws ClassCastException, IOException, ClassNotFoundException { if (pFinder != null) return;//from w w w . j a va2 s. com long startTimeMillis = System.currentTimeMillis(); log.info("Initializing NER models"); try { InputStream pis = Config.getResourceAsStream("models/en-ner-person.bin"); TokenNameFinderModel pmodel = new TokenNameFinderModel(pis); pFinder = new NameFinderME(pmodel); InputStream lis = Config.getResourceAsStream("models/en-ner-location.bin"); TokenNameFinderModel lmodel = new TokenNameFinderModel(lis); lFinder = new NameFinderME(lmodel); InputStream ois = Config.getResourceAsStream("models/en-ner-organization.bin"); TokenNameFinderModel omodel = new TokenNameFinderModel(ois); oFinder = new NameFinderME(omodel); } //dont bother about this, instead try not to use it catch (Exception e) { Util.print_exception(e, log); } try { InputStream modelIn = Config.getResourceAsStream("models/en-sent.bin"); SentenceModel model = new SentenceModel(modelIn); sFinder = new SentenceDetectorME(model); InputStream tokenStream = Config.getResourceAsStream("models/en-token.bin"); TokenizerModel modelTokenizer = new TokenizerModel(tokenStream); tokenizer = new TokenizerME(modelTokenizer); } catch (Exception e) { Util.print_exception(e); } long endTimeMillis = System.currentTimeMillis(); log.info("Done initializing NER model in " + Util.commatize(endTimeMillis - startTimeMillis) + "ms"); }
From source file:it.uniud.ailab.dcore.wrappers.external.OpenNlpBootstrapperAnnotator.java
/** * Loads a tokenizer model or retrieves it from cache if has been already * loaded before./* w w w . j av a 2s . com*/ * * @param modelId the model to retrieve * @return the loaded model */ public static TokenizerModel getTokenizerModel(String modelId) { // if the model has not already been loaded, cache it if (!tokenizerModelsCache.containsKey(modelId)) { // Split the text into sentences InputStream tokenModelIn = null; TokenizerModel tokenizerModel = null; String sentPath = ""; try { sentPath = databasePaths.get(modelId); tokenModelIn = new FileInputStream(sentPath); tokenizerModel = new TokenizerModel(tokenModelIn); } catch (IOException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while loading the model file \"" + sentPath + "\".", e); } catch (NullPointerException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while looking for the model \"" + modelId + "\".", e); } finally { if (tokenModelIn != null) { try { tokenModelIn.close(); } catch (IOException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while loading the model file '\"" + modelId + "\".", e); } } } tokenizerModelsCache.put(modelId, tokenizerModel); return tokenizerModel; } return tokenizerModelsCache.get(modelId); }
From source file:edu.stanford.muse.index.NER.java
public static void testOpenNLP() { try {//from w w w . ja v a 2s. c o m String s = Util.readFile("/tmp/in"); /* List<Pair<String,Float>> pairs = NER.namesFromText(s); for (Pair<String,Float> p: pairs) { System.out.println (p); } System.out.println ("-----"); */ InputStream pis = Config.getResourceAsStream("en-ner-person.bin"); TokenNameFinderModel pmodel = new TokenNameFinderModel(pis); InputStream lis = Config.getResourceAsStream("en-ner-location.bin"); TokenNameFinderModel lmodel = new TokenNameFinderModel(lis); InputStream ois = Config.getResourceAsStream("en-ner-organization.bin"); TokenNameFinderModel omodel = new TokenNameFinderModel(ois); InputStream tokenStream = Config.getResourceAsStream("en-token.bin"); TokenizerModel modelTokenizer = new TokenizerModel(tokenStream); TokenizerME tokenizer = new TokenizerME(modelTokenizer); Span[] tokSpans = tokenizer.tokenizePos(s); // Util.tokenize(s).toArray(new String[0]); String tokens[] = new String[tokSpans.length]; for (int i = 0; i < tokSpans.length; i++) tokens[i] = s.substring(tokSpans[i].getStart(), tokSpans[i].getEnd()); NameFinderME pFinder = new NameFinderME(pmodel); Span[] pSpans = pFinder.find(tokens); NameFinderME lFinder = new NameFinderME(lmodel); Span[] lSpans = lFinder.find(tokens); NameFinderME oFinder = new NameFinderME(omodel); Span[] oSpans = oFinder.find(tokens); System.out.println("Names found:"); for (Span span : pSpans) { for (int i = span.getStart(); i < span.getEnd(); i++) System.out.print(tokens[i] + " "); System.out.println(); } System.out.println("Locations found:"); for (Span span : lSpans) { for (int i = span.getStart(); i < span.getEnd(); i++) System.out.print(tokens[i] + " "); System.out.println(); } System.out.println("Orgs found:"); for (Span span : oSpans) { for (int i = span.getStart(); i < span.getEnd(); i++) System.out.print(tokens[i] + " "); System.out.println(); } } catch (IOException e) { e.printStackTrace(); } }
From source file:org.dbpedia.spotlight.spot.OpenNLPUtil.java
protected static BaseModel loadgivenmodeltype(OpenNlpModels m, InputStream in) throws InvalidFormatException, IOException { BaseModel mdl = null;// w ww . j a v a 2 s. com switch (m) { case TokenizerModel: { mdl = new TokenizerModel(in); LOG.debug("OpenNLP5 Tokenizer Model loaded: " + mdl); break; } case POSModel: { mdl = new POSModel(in); LOG.debug("OpenNLP5 POS Model loaded: " + mdl); break; } case SentenceModel: { mdl = new SentenceModel(in); LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl); break; } case ChunkModel: { mdl = new ChunkerModel(in); LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl); break; } case person: case organization: case location: { mdl = new TokenNameFinderModel(in); LOG.debug("OpenNLP5 TokenNameFinderModel Model loaded: " + mdl); break; } default: LOG.debug("Unknown Model Type!"); } return mdl; }
From source file:org.wso2.uima.collectionProccesingEngine.analysisEngines.LocationIdentifier.java
@Override public void initialize(UimaContext ctx) throws ResourceInitializationException { super.initialize(ctx); InputStream sentenceStream = null; InputStream tokenizerStream = null; InputStream nameFinderStream = null; try {// w w w . j av a2 s . c o m sentenceStream = getContext().getResourceAsStream("SentenceModel"); SentenceModel sentenceModel = new SentenceModel(sentenceStream); sentenceDetector = new SentenceDetectorME(sentenceModel); sentenceStream.close(); tokenizerStream = getContext().getResourceAsStream("TokenizerModel"); TokenizerModel tokenModel = new TokenizerModel(tokenizerStream); tokenizer = new TokenizerME(tokenModel); tokenizerStream.close(); nameFinderStream = getContext().getResourceAsStream("TokenNameFinderModel"); TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(nameFinderStream); locationFinder = new NameFinderME(nameFinderModel); nameFinderStream.close(); } catch (Exception e) { throw new ResourceInitializationException(e); } finally { IOUtils.closeQuietly(nameFinderStream); IOUtils.closeQuietly(tokenizerStream); IOUtils.closeQuietly(sentenceStream); logger.info(LocationIdentifier.class.getSimpleName() + " Analysis Engine initialized successfully"); } }
From source file:os.Controller.java
public String tokenize(String teks) throws InvalidFormatException, IOException { InputStream is = new FileInputStream("en-token.bin"); TokenizerModel model = new TokenizerModel(is); Tokenizer tokenizer = new TokenizerME(model); String tokens[] = tokenizer.tokenize(teks); String result = ""; for (String a : tokens) { result = result + " " + a; }// w w w. j ava2s. c om is.close(); return result; }