Example usage for java.text BreakIterator getSentenceInstance

List of usage examples for java.text BreakIterator getSentenceInstance

Introduction

In this page you can find the example usage for java.text BreakIterator getSentenceInstance.

Prototype

public static BreakIterator getSentenceInstance(Locale locale) 

Source Link

Document

Returns a new BreakIterator instance for sentence breaks for the given locale.

Usage

From source file:Main.java

public static void main(String[] argv) throws Exception {

    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA);
    iterator.setText("this is a test.");
    for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) {
        System.out.println(index);
    }//  www  . j  a va2  s .c o  m
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);/* www.  jav a  2s .  c  o  m*/
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> splitSentences(String text, String language_code) throws Exception {
    LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200)));
    List<String> sentences = new ArrayList<String>();

    text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " ");

    for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) {
        String line = iter.nextLine();
        BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code));
        sentence_bounds.setText(line);//from  ww  w.j  av  a 2  s. c  o  m
        int begin_s = sentence_bounds.first();
        for (int end_s = sentence_bounds
                .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) {

            String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s));
            if (sentence.isEmpty())
                continue;
            sentences.add(sentence);
            LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200)));
        }
    }
    LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200),
            sentences.size()));
    return sentences;
}

From source file:IteratorTest.java

protected void refreshDisplay() {
    int startIndex, nextIndex;
    Vector items = new Vector();
    String msgText = textArea.getText();
    Locale locale = (Locale) (localeButton.getSelectedItem());
    BreakIterator iterator = null;
    if (charButton.isSelected()) {
        iterator = BreakIterator.getCharacterInstance(locale);
    } else if (wordButton.isSelected()) {
        iterator = BreakIterator.getWordInstance(locale);
    } else if (lineButton.isSelected()) {
        iterator = BreakIterator.getLineInstance(locale);
    } else if (sentButton.isSelected()) {
        iterator = BreakIterator.getSentenceInstance(locale);
    }/*w w w .j a  v a  2s .  c  o  m*/
    iterator.setText(msgText);
    startIndex = iterator.first();
    nextIndex = iterator.next();

    while (nextIndex != BreakIterator.DONE) {
        items.addElement(msgText.substring(startIndex, nextIndex));
        startIndex = nextIndex;
        nextIndex = iterator.next();
    }
    itemList.setListData(items);
}

From source file:com.cotrino.knowledgemap.db.Question.java

/**
 * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser
 * @param text//w w  w  . ja  v  a2s  .  c  o  m
 * @param language
 * @param country
 * @return
 */
public static List<String> tokenize(String text, String language, String country) {
    List<String> sentences = new ArrayList<String>();
    Locale currentLocale = new Locale(language, country);
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale);
    sentenceIterator.setText(text);
    int boundary = sentenceIterator.first();
    int lastBoundary = 0;
    while (boundary != BreakIterator.DONE) {
        boundary = sentenceIterator.next();
        if (boundary != BreakIterator.DONE) {
            sentences.add(text.substring(lastBoundary, boundary));
        }
        lastBoundary = boundary;
    }
    return sentences;
}

From source file:graphene.util.StringUtils.java

/**
 * Convert a string to a list of strings broken up by end of sentence
 * tokens.//from w  w w .  ja v  a2s.  c o m
 * 
 * @param input
 * @param locale
 * @return
 */
public static List<String> convertToSentences(final String input, final Locale locale) {
    final BreakIterator iterator = BreakIterator.getSentenceInstance(locale);
    iterator.setText(input);
    final ArrayList<String> sentences = new ArrayList<String>();

    int start = iterator.first();

    for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
        sentences.add(input.substring(start, end));
    }
    return sentences;
}

From source file:org.cloudgraph.examples.test.model.NLPWikiParseTest.java

private void parse(StringBuilder buf) throws IOException {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);

    String text = buf.toString();
    int counter = 0;
    iterator.setText(text);/*from  w  w  w  . j  a v  a 2s  .  co m*/

    int lastIndex = iterator.first();
    while (lastIndex != BreakIterator.DONE) {
        int firstIndex = lastIndex;
        lastIndex = iterator.next();

        if (lastIndex != BreakIterator.DONE) {
            String sentence = text.substring(firstIndex, lastIndex);
            long before = System.currentTimeMillis();
            //parse(sentence);
            long after = System.currentTimeMillis();
            log.info("time4: " + String.valueOf(after - before) + ": " + sentence);
            counter++;
        }
    }

}

From source file:org.cloudgraph.examples.test.model.StanfordCoreNLPTest.java

private void parse(StringBuilder buf) throws IOException {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);

    String text = buf.toString();
    int counter = 0;
    iterator.setText(text);/*  ww  w .  java2  s .  c  om*/

    int lastIndex = iterator.first();
    while (lastIndex != BreakIterator.DONE) {
        int firstIndex = lastIndex;
        lastIndex = iterator.next();

        if (lastIndex != BreakIterator.DONE) {
            String sentence = text.substring(firstIndex, lastIndex);
            long before = System.currentTimeMillis();
            parse(sentence);
            long after = System.currentTimeMillis();
            log.info("time4: " + String.valueOf(after - before) + ": " + sentence);
            counter++;
        }
    }

}