List of usage examples for java.text BreakIterator next
public abstract int next();
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA); iterator.setText("a sentence"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }/*from w ww . j av a 2 s . co m*/ }
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getCharacterInstance(Locale.CANADA); iterator.setText("aString"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }/*from w ww .j a va 2s . co m*/ }
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getLineInstance(Locale.CANADA); iterator.setText("line1\nline2"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }/*from w w w.ja v a 2s. c o m*/ }
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA); iterator.setText("this is a test."); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }//from www.j a va2 s.c o m }
From source file:Main.java
static void extractWords(String target, BreakIterator wordIterator) { wordIterator.setText(target);/*from www . j a v a 2 s . com*/ int start = wordIterator.first(); int end = wordIterator.next(); while (end != BreakIterator.DONE) { String word = target.substring(start, end); if (Character.isLetterOrDigit(word.charAt(0))) { System.out.println(word); } start = end; end = wordIterator.next(); } }
From source file:HangulTextBoundaryDetection.java
static void printBoundaries(String source, BreakIterator bi) { bi.setText(source);// w w w . j a v a 2 s . co m int boundary = bi.first(); while (boundary != BreakIterator.DONE) { System.out.print(boundary + " "); boundary = bi.next(); } }
From source file:Main.java
public static String wordSpace(String source) { BreakIterator boundary = BreakIterator.getLineInstance(new Locale("th")); boundary.setText(source);/*w w w .j a va2s . co m*/ int start = boundary.first(); StringBuffer wordbuffer = new StringBuffer(""); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { wordbuffer.append(source.substring(start, end) + "\u200b"); // wordbuffer.append(source.substring(start, end)+"\ufeff"); } return wordbuffer.toString(); }
From source file:com.cotrino.knowledgemap.db.Question.java
/** * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser * @param text//from www . j a v a 2s . com * @param language * @param country * @return */ public static List<String> tokenize(String text, String language, String country) { List<String> sentences = new ArrayList<String>(); Locale currentLocale = new Locale(language, country); BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale); sentenceIterator.setText(text); int boundary = sentenceIterator.first(); int lastBoundary = 0; while (boundary != BreakIterator.DONE) { boundary = sentenceIterator.next(); if (boundary != BreakIterator.DONE) { sentences.add(text.substring(lastBoundary, boundary)); } lastBoundary = boundary; } return sentences; }
From source file:com.conversantmedia.mapreduce.tool.RunJob.java
private static void splitLine(List<String> lines, String text, int maxLength) { BreakIterator boundary = BreakIterator.getLineInstance(); boundary.setText(text);//from www . j a v a2 s. c o m int start = boundary.first(); int end = boundary.next(); int lineLength = 0; StringBuilder buffer = new StringBuilder(); while (end != BreakIterator.DONE) { String word = text.substring(start, end); lineLength = lineLength + word.length(); if (lineLength > maxLength) { lineLength = word.length(); lines.add(buffer.toString()); buffer.setLength(0); } buffer.append(word); start = end; end = boundary.next(); } lines.add(buffer.toString()); }
From source file:eu.fbk.utils.lsa.util.Anvur.java
static String tokenize(String in) { //print each word in order BreakIterator boundary = BreakIterator.getWordInstance(); boundary.setText(in);/*from ww w. jav a 2s . co m*/ StringBuilder out = new StringBuilder(); int start = boundary.first(); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { out.append(" "); out.append(in.substring(start, end)); } return out.toString(); }