Example usage for opennlp.tools.sentdetect SentenceDetectorME SentenceDetectorME

Introduction

In this page you can find the example usage for opennlp.tools.sentdetect SentenceDetectorME SentenceDetectorME.

Prototype

public SentenceDetectorME(SentenceModel model)

Source Link

Document

Initializes the current instance.

Usage

From source file:org.dbpedia.spotlight.spot.OpenNLPNGramSpotter.java

/**Extracts noun-phrase n-grams from the given piece of input text. 
 * @param text  A Text object containing the input from where to extract NP n-grams
 * @return A list of SurfaceFormOccurrence objects.
 *//* w ww  .ja v a2  s  .  co  m*/
protected List<SurfaceFormOccurrence> extractNPNGrams(Text text) {
    String intext = text.text();
    //System.out.println("\n\nRR- nextractNPNGrams(...) method called! with text: " + intext + "\n\n");
    List<SurfaceFormOccurrence> npNgramSFLst = new ArrayList<SurfaceFormOccurrence>();
    SentenceDetectorME sentenceDetector = new SentenceDetectorME((SentenceModel) sentenceModel);
    TokenizerME tokenizer = new TokenizerME((TokenizerModel) tokenModel);
    POSTaggerME posTagger = new POSTaggerME((POSModel) posModel);
    ChunkerME chunker = new ChunkerME((ChunkerModel) chunkModel);

    Span[] sentSpans = sentenceDetector.sentPosDetect(intext);
    for (Span sentSpan : sentSpans) {
        String sentence = sentSpan.getCoveredText(intext).toString();
        int start = sentSpan.getStart();
        Span[] tokSpans = tokenizer.tokenizePos(sentence);
        String[] tokens = new String[tokSpans.length];
        // System.out.println("\n\nTokens:");
        for (int i = 0; i < tokens.length; i++) {
            tokens[i] = tokSpans[i].getCoveredText(sentence).toString();
            // System.out.println(tokens[i]);
        }
        String[] tags = posTagger.tag(tokens);
        Span[] chunks = chunker.chunkAsSpans(tokens, tags);
        for (Span chunk : chunks) {
            if ("NP".equals(chunk.getType())) {
                //Note: getStart()/getEnd() methods of Chunk spans only give the start and end token indexes of the chunk.
                //The actual Start/End positions of the chunk in the sentence need to be extracted from POS sentenceSpans.
                //They are offsets from the begining of the sentence in question. Need to add the start postion of the sentence
                //to compute the actual start/end offsets from the begining of the input text.
                int begin = tokSpans[chunk.getStart()].getStart();
                int end = tokSpans[chunk.getEnd() - 1].getEnd();
                List<Map<String, Integer>> ngrampos = extractNGramPos(chunk.getStart(), chunk.getEnd() + -1);
                extractNGrams(ngrampos, start, text, tokSpans, npNgramSFLst);
            }
        }
    }
    return npNgramSFLst;
}

From source file:org.esipfed.eskg.nlp.OpenIE.java

public static void main(String[] args) throws IOException {

    SentenceDetector sentenceDetector = null;
    try {//ww w .jav a2  s .com
        // need to change this to the resource folder
        InputStream modelIn = OpenIE.class.getClassLoader().getResourceAsStream("en-sent.bin");
        final SentenceModel sentenceModel = new SentenceModel(modelIn);
        modelIn.close();
        sentenceDetector = new SentenceDetectorME(sentenceModel);
    } catch (IOException ioe) {
        LOG.error("Error either reading 'en-sent.bin' file or creating SentanceModel: ", ioe);
        throw new IOException(ioe);
    }
    edu.knowitall.openie.OpenIE openIE = new edu.knowitall.openie.OpenIE(
            new ClearParser(new ClearPostagger(new ClearTokenizer())), new ClearSrl(), false, false);

    // any text file that contains English sentences would work
    File file = FileUtils.toFile(OpenIE.class.getClassLoader().getResource("test.txt"));
    String text = readFile(file.getAbsolutePath(), StandardCharsets.UTF_8);

    if (sentenceDetector != null) {
        String[] sentences = sentenceDetector.sentDetect(text);
        for (int i = 0; i < sentences.length; i++) {

            Seq<Instance> extractions = openIE.extract(sentences[i]);

            List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);

            for (Instance instance : listExtractions) {
                StringBuilder sb = new StringBuilder();

                sb.append(instance.confidence()).append('\t').append(instance.extr().context()).append('\t')
                        .append(instance.extr().arg1().text()).append('\t').append(instance.extr().rel().text())
                        .append('\t');

                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
                for (Argument argument : listArg2s) {
                    sb.append(argument.text()).append("; ");
                }

                LOG.info(sb.toString());
            }
        }
    }

}

From source file:org.wso2.uima.collectionProccesingEngine.analysisEngines.LocationIdentifier.java

@Override
public void initialize(UimaContext ctx) throws ResourceInitializationException {
    super.initialize(ctx);
    InputStream sentenceStream = null;
    InputStream tokenizerStream = null;
    InputStream nameFinderStream = null;
    try {/*from w ww . j  a  va2  s .  c  o  m*/
        sentenceStream = getContext().getResourceAsStream("SentenceModel");
        SentenceModel sentenceModel = new SentenceModel(sentenceStream);
        sentenceDetector = new SentenceDetectorME(sentenceModel);
        sentenceStream.close();
        tokenizerStream = getContext().getResourceAsStream("TokenizerModel");
        TokenizerModel tokenModel = new TokenizerModel(tokenizerStream);
        tokenizer = new TokenizerME(tokenModel);
        tokenizerStream.close();
        nameFinderStream = getContext().getResourceAsStream("TokenNameFinderModel");
        TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(nameFinderStream);
        locationFinder = new NameFinderME(nameFinderModel);
        nameFinderStream.close();
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    } finally {
        IOUtils.closeQuietly(nameFinderStream);
        IOUtils.closeQuietly(tokenizerStream);
        IOUtils.closeQuietly(sentenceStream);
        logger.info(LocationIdentifier.class.getSimpleName() + " Analysis Engine initialized successfully");
    }
}