List of usage examples for opennlp.tools.postag POSTaggerME POSTaggerME
public POSTaggerME(POSModel model)
From source file:org.sglover.nlp.CoreNLPEntityTagger.java
@Override protected Entities getEntitiesImpl(String content) { Entities namedEntities = Entities.empty(); SentenceModel sentenceModel = sentenceModels.get("en"); SentenceDetector sentenceDetector = new SentenceDetectorME(sentenceModel); String[] sentences = sentenceDetector.sentDetect(content); TokenizerModel tm = tokenizerModels.get("en"); TokenizerME wordBreaker = new TokenizerME(tm); for (String sentence : sentences) { String[] tokens = wordBreaker.tokenize(sentence); List<TextAnnotation> allTextAnnotations = new LinkedList<TextAnnotation>(); POSModel posModel = posModels.get("en"); POSTaggerME posme = new POSTaggerME(posModel); String[] posTags = posme.tag(tokens); List<String> npTokens = new LinkedList<>(); ChunkerModel chunkerModel = chunkerModels.get("en"); ChunkerME chunkerME = new ChunkerME(chunkerModel); Span[] chunks = chunkerME.chunkAsSpans(tokens, posTags); String[] chunkStrings = Span.spansToStrings(chunks, tokens); for (int i = 0; i < chunks.length; i++) { String chunkString = chunkStrings[i]; logger.info("Chunk = " + chunkString + ", type = " + chunks[i].getType()); if (chunks[i].getType().equals("NP")) { npTokens.add(chunkString); }/*ww w . j a v a 2s . c o m*/ } // findEntities(namedEntities, allTextAnnotations, // npTokens.toArray(new String[0])); findEntities(namedEntities, allTextAnnotations, tokens); } return namedEntities; }
From source file:org.apache.stanbol.commons.opennlp.OpenNLP.java
/** * Getter for the "part-of-speech" tagger for the parsed language. * @param language the language//w ww . j av a 2s. c o m * @return the model or <code>null</code> if no model data are found * @throws InvalidFormatException in case the found model data are in the wrong format * @throws IOException on any error while reading the model data */ public POSTagger getPartOfSpeechTagger(String language) throws IOException { POSModel posModel = getPartOfSpeechModel(language); if (posModel != null) { return new POSTaggerME(posModel); } else { log.debug("No POS Model for language '{}'", language); return null; } }
From source file:org.dbpedia.spotlight.spot.OpenNLPNGramSpotter.java
/**Extracts noun-phrase n-grams from the given piece of input text. * @param text A Text object containing the input from where to extract NP n-grams * @return A list of SurfaceFormOccurrence objects. *///from w w w. jav a 2 s. c o m protected List<SurfaceFormOccurrence> extractNPNGrams(Text text) { String intext = text.text(); //System.out.println("\n\nRR- nextractNPNGrams(...) method called! with text: " + intext + "\n\n"); List<SurfaceFormOccurrence> npNgramSFLst = new ArrayList<SurfaceFormOccurrence>(); SentenceDetectorME sentenceDetector = new SentenceDetectorME((SentenceModel) sentenceModel); TokenizerME tokenizer = new TokenizerME((TokenizerModel) tokenModel); POSTaggerME posTagger = new POSTaggerME((POSModel) posModel); ChunkerME chunker = new ChunkerME((ChunkerModel) chunkModel); Span[] sentSpans = sentenceDetector.sentPosDetect(intext); for (Span sentSpan : sentSpans) { String sentence = sentSpan.getCoveredText(intext).toString(); int start = sentSpan.getStart(); Span[] tokSpans = tokenizer.tokenizePos(sentence); String[] tokens = new String[tokSpans.length]; // System.out.println("\n\nTokens:"); for (int i = 0; i < tokens.length; i++) { tokens[i] = tokSpans[i].getCoveredText(sentence).toString(); // System.out.println(tokens[i]); } String[] tags = posTagger.tag(tokens); Span[] chunks = chunker.chunkAsSpans(tokens, tags); for (Span chunk : chunks) { if ("NP".equals(chunk.getType())) { //Note: getStart()/getEnd() methods of Chunk spans only give the start and end token indexes of the chunk. //The actual Start/End positions of the chunk in the sentence need to be extracted from POS sentenceSpans. //They are offsets from the begining of the sentence in question. Need to add the start postion of the sentence //to compute the actual start/end offsets from the begining of the input text. int begin = tokSpans[chunk.getStart()].getStart(); int end = tokSpans[chunk.getEnd() - 1].getEnd(); List<Map<String, Integer>> ngrampos = extractNGramPos(chunk.getStart(), chunk.getEnd() + -1); extractNGrams(ngrampos, start, text, tokSpans, npNgramSFLst); } } } return npNgramSFLst; }
From source file:os.Controller.java
public static String POSTag(String a) throws IOException { POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin")); PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent"); POSTaggerME tagger = new POSTaggerME(model); ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(a)); perfMon.start();/*from w ww. j av a2 s .c o m*/ String line, result = ""; while ((line = lineStream.read()) != null) { String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line); String[] tags = tagger.tag(whitespaceTokenizerLine); POSSample sample = new POSSample(whitespaceTokenizerLine, tags); result = result + sample.toString(); perfMon.incrementCounter(); } perfMon.stopAndPrintFinalResult(); textList.add(result); return result; }
From source file:sentimental_analysis.pre.processor.WordBag.Tokenizer.PartsofSpeechFilter.java
private void getPOSTaggerME(POSModel Pmodel) { if (Pmodel == null) { System.out.println("POS Model is NULL!"); }/*from w w w . ja va 2 s . c o m*/ try { POSTagger = new POSTaggerME(Pmodel); } catch (Exception ex) { System.out.println("Exception Thrown POSTaggerME (getPOSTaggerME) : " + ex.getMessage()); System.out.println("Stack Trace"); ex.printStackTrace(); } }