List of usage examples for opennlp.tools.util Span getType
public String getType()
From source file:com.civis.utils.opennlp.models.address.AddressFinderMe.java
/** * {@inheritDoc}/*from w w w . j a v a2 s .com*/ */ @Override public List<AddressSpan> find(String[] tokens) { Span[] spans = this.find(tokens, EMPTY); if (spans.length == 0) { // try to find address with zip code. return tryToFindAddressByZip(tokens); } else { List<Span> fullAddressSpans = new ArrayList<>(); for (Span span : spans) { String spanType = span.getType(); if (spanType.contains(AddressSpan.PREFIX_TYPE_ADDRESS)) { fullAddressSpans.add(span); } } //find probabilities for address double[] addressSpanProbs = this.probs(fullAddressSpans); //3. add founded contact persons to the result list List<AddressSpan> addressSpans = new ArrayList<>(); for (int i = 0; i < fullAddressSpans.size(); i++) { Span fullAddressSpan = fullAddressSpans.get(i); double probability = addressSpanProbs[i]; AddressSpan addressSpan = new AddressSpanBuilder(fullAddressSpan, probability, tokens) .setCountries(countries).setCsvAddressData(csvAddressDataList).build(); if (addressSpan.isValid()) { addressSpans.add(addressSpan); } } return removeDuplicated(addressSpans); } }
From source file:org.dbpedia.spotlight.spot.OpenNLPNGramSpotter.java
/**Extracts noun-phrase n-grams from the given piece of input text. * @param text A Text object containing the input from where to extract NP n-grams * @return A list of SurfaceFormOccurrence objects. *///from www .j a va 2s . c om protected List<SurfaceFormOccurrence> extractNPNGrams(Text text) { String intext = text.text(); //System.out.println("\n\nRR- nextractNPNGrams(...) method called! with text: " + intext + "\n\n"); List<SurfaceFormOccurrence> npNgramSFLst = new ArrayList<SurfaceFormOccurrence>(); SentenceDetectorME sentenceDetector = new SentenceDetectorME((SentenceModel) sentenceModel); TokenizerME tokenizer = new TokenizerME((TokenizerModel) tokenModel); POSTaggerME posTagger = new POSTaggerME((POSModel) posModel); ChunkerME chunker = new ChunkerME((ChunkerModel) chunkModel); Span[] sentSpans = sentenceDetector.sentPosDetect(intext); for (Span sentSpan : sentSpans) { String sentence = sentSpan.getCoveredText(intext).toString(); int start = sentSpan.getStart(); Span[] tokSpans = tokenizer.tokenizePos(sentence); String[] tokens = new String[tokSpans.length]; // System.out.println("\n\nTokens:"); for (int i = 0; i < tokens.length; i++) { tokens[i] = tokSpans[i].getCoveredText(sentence).toString(); // System.out.println(tokens[i]); } String[] tags = posTagger.tag(tokens); Span[] chunks = chunker.chunkAsSpans(tokens, tags); for (Span chunk : chunks) { if ("NP".equals(chunk.getType())) { //Note: getStart()/getEnd() methods of Chunk spans only give the start and end token indexes of the chunk. //The actual Start/End positions of the chunk in the sentence need to be extracted from POS sentenceSpans. //They are offsets from the begining of the sentence in question. Need to add the start postion of the sentence //to compute the actual start/end offsets from the begining of the input text. int begin = tokSpans[chunk.getStart()].getStart(); int end = tokSpans[chunk.getEnd() - 1].getEnd(); List<Map<String, Integer>> ngrampos = extractNGramPos(chunk.getStart(), chunk.getEnd() + -1); extractNGrams(ngrampos, start, text, tokSpans, npNgramSFLst); } } } return npNgramSFLst; }