Example usage for opennlp.tools.tokenize WhitespaceTokenizer INSTANCE

List of usage examples for opennlp.tools.tokenize WhitespaceTokenizer INSTANCE

Introduction

In this page you can find the example usage for opennlp.tools.tokenize WhitespaceTokenizer INSTANCE.

Prototype

WhitespaceTokenizer INSTANCE

To view the source code for opennlp.tools.tokenize WhitespaceTokenizer INSTANCE.

Click Source Link

Document

Use this static reference to retrieve an instance of the WhitespaceTokenizer .

Usage

From source file:NLP.java

public static String[] POSTag(String input) throws IOException {

    ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(input));

    return tagger.tag(WhitespaceTokenizer.INSTANCE.tokenize(lineStream.read()));
}

From source file:com.geocode.service.impl.AddressServiceImpl.java

@Override
public List<String> extractAddress(String input) throws IOException {
    List<String> locations = new ArrayList<String>();
    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");

    ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(input));

    perfMon.start();// www.j a v a 2 s.  c  o m

    input = cleanInputString(input, locations);

    String line;
    String whitespaceTokenizerLine[] = null;

    String[] tags = null;
    POSSample sample = null;
    while ((line = lineStream.read()) != null) {
        whitespaceTokenizerLine = WhitespaceTokenizer.INSTANCE.tokenize(line);
        tags = tagger.tag(whitespaceTokenizerLine);

        sample = new POSSample(whitespaceTokenizerLine, tags);
        //System.out.println(sample.toString());
        perfMon.incrementCounter();
    }

    if (sample != null && sample.getSentence() != null) {
        String[] sent = sample.getSentence();
        String nnp = null;
        boolean itemProcessed = false;
        for (int i = 0; i < tags.length; i++) {
            itemProcessed = false;
            if (!itemProcessed && tags[i].equals("CD") && nnp == null) {
                if (i + 1 < tags.length && baseList.contains(tags[i + 1])) {
                    nnp = addWordToLocation(nnp, sent[i]);
                }
                itemProcessed = true;

            }
            // Denotes completetion of one address
            if (!itemProcessed && tags[i].equals("NN")) {
                if (i - 1 >= 0 && tags[i - 1].equals("NNP")) {
                    itemProcessed = true;
                    locations.add(nnp);
                    nnp = null;
                }
            }
            if (!itemProcessed && list.contains(tags[i]) && !checkExcludedWords(sent[i])) {
                itemProcessed = true;
                nnp = addWordToLocation(nnp, sent[i]);
            }
            if (!itemProcessed) {
                itemProcessed = true;
                if (nnp != null) {
                    locations.add(nnp);

                    nnp = null;
                }
            }
        }
        perfMon.stopAndPrintFinalResult();

        //      for (String string : locations) {
        //         if(string.contains(" ") && string.matches(".*\\d+.*"))
        //            System.out.println(string);
        //      }
    }
    return filterLocations(locations);
}

From source file:os.Controller.java

public static String POSTag(String a) throws IOException {
    POSModel model = new POSModelLoader().load(new File("en-pos-maxent.bin"));
    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
    POSTaggerME tagger = new POSTaggerME(model);

    ObjectStream<String> lineStream = new PlainTextByLineStream(new StringReader(a));

    perfMon.start();/*from   w  w  w  .ja  v  a  2  s .  c om*/
    String line, result = "";
    while ((line = lineStream.read()) != null) {

        String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
        String[] tags = tagger.tag(whitespaceTokenizerLine);

        POSSample sample = new POSSample(whitespaceTokenizerLine, tags);
        result = result + sample.toString();

        perfMon.incrementCounter();
    }
    perfMon.stopAndPrintFinalResult();
    textList.add(result);
    return result;
}