List of usage examples for java.text BreakIterator getSentenceInstance
public static BreakIterator getSentenceInstance(Locale locale)
BreakIterator
instance for sentence breaks for the given locale. From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA); iterator.setText("this is a test."); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }// www . j a va2 s .c o m }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text);/* www. jav a 2s . c o m*/ outputText.setText(""); int from = currentBreakIterator.first(); int to; while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java
@Override public List<String> splitSentences(String text, String language_code) throws Exception { LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200))); List<String> sentences = new ArrayList<String>(); text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " "); for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) { String line = iter.nextLine(); BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code)); sentence_bounds.setText(line);//from ww w.j av a 2 s. c o m int begin_s = sentence_bounds.first(); for (int end_s = sentence_bounds .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) { String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s)); if (sentence.isEmpty()) continue; sentences.add(sentence); LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200))); } } LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200), sentences.size())); return sentences; }
From source file:IteratorTest.java
protected void refreshDisplay() { int startIndex, nextIndex; Vector items = new Vector(); String msgText = textArea.getText(); Locale locale = (Locale) (localeButton.getSelectedItem()); BreakIterator iterator = null; if (charButton.isSelected()) { iterator = BreakIterator.getCharacterInstance(locale); } else if (wordButton.isSelected()) { iterator = BreakIterator.getWordInstance(locale); } else if (lineButton.isSelected()) { iterator = BreakIterator.getLineInstance(locale); } else if (sentButton.isSelected()) { iterator = BreakIterator.getSentenceInstance(locale); }/*w w w .j a v a 2s . c o m*/ iterator.setText(msgText); startIndex = iterator.first(); nextIndex = iterator.next(); while (nextIndex != BreakIterator.DONE) { items.addElement(msgText.substring(startIndex, nextIndex)); startIndex = nextIndex; nextIndex = iterator.next(); } itemList.setListData(items); }
From source file:com.cotrino.knowledgemap.db.Question.java
/** * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser * @param text//w w w . ja v a2s . c o m * @param language * @param country * @return */ public static List<String> tokenize(String text, String language, String country) { List<String> sentences = new ArrayList<String>(); Locale currentLocale = new Locale(language, country); BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale); sentenceIterator.setText(text); int boundary = sentenceIterator.first(); int lastBoundary = 0; while (boundary != BreakIterator.DONE) { boundary = sentenceIterator.next(); if (boundary != BreakIterator.DONE) { sentences.add(text.substring(lastBoundary, boundary)); } lastBoundary = boundary; } return sentences; }
From source file:graphene.util.StringUtils.java
/** * Convert a string to a list of strings broken up by end of sentence * tokens.//from w w w . ja v a2s. c o m * * @param input * @param locale * @return */ public static List<String> convertToSentences(final String input, final Locale locale) { final BreakIterator iterator = BreakIterator.getSentenceInstance(locale); iterator.setText(input); final ArrayList<String> sentences = new ArrayList<String>(); int start = iterator.first(); for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { sentences.add(input.substring(start, end)); } return sentences; }
From source file:org.cloudgraph.examples.test.model.NLPWikiParseTest.java
private void parse(StringBuilder buf) throws IOException { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); String text = buf.toString(); int counter = 0; iterator.setText(text);/*from w w w . j a v a 2s . co m*/ int lastIndex = iterator.first(); while (lastIndex != BreakIterator.DONE) { int firstIndex = lastIndex; lastIndex = iterator.next(); if (lastIndex != BreakIterator.DONE) { String sentence = text.substring(firstIndex, lastIndex); long before = System.currentTimeMillis(); //parse(sentence); long after = System.currentTimeMillis(); log.info("time4: " + String.valueOf(after - before) + ": " + sentence); counter++; } } }
From source file:org.cloudgraph.examples.test.model.StanfordCoreNLPTest.java
private void parse(StringBuilder buf) throws IOException { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); String text = buf.toString(); int counter = 0; iterator.setText(text);/* ww w . java2 s . c om*/ int lastIndex = iterator.first(); while (lastIndex != BreakIterator.DONE) { int firstIndex = lastIndex; lastIndex = iterator.next(); if (lastIndex != BreakIterator.DONE) { String sentence = text.substring(firstIndex, lastIndex); long before = System.currentTimeMillis(); parse(sentence); long after = System.currentTimeMillis(); log.info("time4: " + String.valueOf(after - before) + ": " + sentence); counter++; } } }