List of usage examples for opennlp.tools.sentdetect SentenceDetectorME SentenceDetectorME
public SentenceDetectorME(SentenceModel model)
From source file:org.dbpedia.spotlight.spot.OpenNLPNGramSpotter.java
/**Extracts noun-phrase n-grams from the given piece of input text. * @param text A Text object containing the input from where to extract NP n-grams * @return A list of SurfaceFormOccurrence objects. *//* w ww .ja v a2 s . co m*/ protected List<SurfaceFormOccurrence> extractNPNGrams(Text text) { String intext = text.text(); //System.out.println("\n\nRR- nextractNPNGrams(...) method called! with text: " + intext + "\n\n"); List<SurfaceFormOccurrence> npNgramSFLst = new ArrayList<SurfaceFormOccurrence>(); SentenceDetectorME sentenceDetector = new SentenceDetectorME((SentenceModel) sentenceModel); TokenizerME tokenizer = new TokenizerME((TokenizerModel) tokenModel); POSTaggerME posTagger = new POSTaggerME((POSModel) posModel); ChunkerME chunker = new ChunkerME((ChunkerModel) chunkModel); Span[] sentSpans = sentenceDetector.sentPosDetect(intext); for (Span sentSpan : sentSpans) { String sentence = sentSpan.getCoveredText(intext).toString(); int start = sentSpan.getStart(); Span[] tokSpans = tokenizer.tokenizePos(sentence); String[] tokens = new String[tokSpans.length]; // System.out.println("\n\nTokens:"); for (int i = 0; i < tokens.length; i++) { tokens[i] = tokSpans[i].getCoveredText(sentence).toString(); // System.out.println(tokens[i]); } String[] tags = posTagger.tag(tokens); Span[] chunks = chunker.chunkAsSpans(tokens, tags); for (Span chunk : chunks) { if ("NP".equals(chunk.getType())) { //Note: getStart()/getEnd() methods of Chunk spans only give the start and end token indexes of the chunk. //The actual Start/End positions of the chunk in the sentence need to be extracted from POS sentenceSpans. //They are offsets from the begining of the sentence in question. Need to add the start postion of the sentence //to compute the actual start/end offsets from the begining of the input text. int begin = tokSpans[chunk.getStart()].getStart(); int end = tokSpans[chunk.getEnd() - 1].getEnd(); List<Map<String, Integer>> ngrampos = extractNGramPos(chunk.getStart(), chunk.getEnd() + -1); extractNGrams(ngrampos, start, text, tokSpans, npNgramSFLst); } } } return npNgramSFLst; }
From source file:org.esipfed.eskg.nlp.OpenIE.java
public static void main(String[] args) throws IOException { SentenceDetector sentenceDetector = null; try {//ww w .jav a2 s .com // need to change this to the resource folder InputStream modelIn = OpenIE.class.getClassLoader().getResourceAsStream("en-sent.bin"); final SentenceModel sentenceModel = new SentenceModel(modelIn); modelIn.close(); sentenceDetector = new SentenceDetectorME(sentenceModel); } catch (IOException ioe) { LOG.error("Error either reading 'en-sent.bin' file or creating SentanceModel: ", ioe); throw new IOException(ioe); } edu.knowitall.openie.OpenIE openIE = new edu.knowitall.openie.OpenIE( new ClearParser(new ClearPostagger(new ClearTokenizer())), new ClearSrl(), false, false); // any text file that contains English sentences would work File file = FileUtils.toFile(OpenIE.class.getClassLoader().getResource("test.txt")); String text = readFile(file.getAbsolutePath(), StandardCharsets.UTF_8); if (sentenceDetector != null) { String[] sentences = sentenceDetector.sentDetect(text); for (int i = 0; i < sentences.length; i++) { Seq<Instance> extractions = openIE.extract(sentences[i]); List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions); for (Instance instance : listExtractions) { StringBuilder sb = new StringBuilder(); sb.append(instance.confidence()).append('\t').append(instance.extr().context()).append('\t') .append(instance.extr().arg1().text()).append('\t').append(instance.extr().rel().text()) .append('\t'); List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s()); for (Argument argument : listArg2s) { sb.append(argument.text()).append("; "); } LOG.info(sb.toString()); } } } }
From source file:org.wso2.uima.collectionProccesingEngine.analysisEngines.LocationIdentifier.java
@Override public void initialize(UimaContext ctx) throws ResourceInitializationException { super.initialize(ctx); InputStream sentenceStream = null; InputStream tokenizerStream = null; InputStream nameFinderStream = null; try {/*from w ww . j a va2 s . c o m*/ sentenceStream = getContext().getResourceAsStream("SentenceModel"); SentenceModel sentenceModel = new SentenceModel(sentenceStream); sentenceDetector = new SentenceDetectorME(sentenceModel); sentenceStream.close(); tokenizerStream = getContext().getResourceAsStream("TokenizerModel"); TokenizerModel tokenModel = new TokenizerModel(tokenizerStream); tokenizer = new TokenizerME(tokenModel); tokenizerStream.close(); nameFinderStream = getContext().getResourceAsStream("TokenNameFinderModel"); TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(nameFinderStream); locationFinder = new NameFinderME(nameFinderModel); nameFinderStream.close(); } catch (Exception e) { throw new ResourceInitializationException(e); } finally { IOUtils.closeQuietly(nameFinderStream); IOUtils.closeQuietly(tokenizerStream); IOUtils.closeQuietly(sentenceStream); logger.info(LocationIdentifier.class.getSimpleName() + " Analysis Engine initialized successfully"); } }