List of usage examples for java.text BreakIterator setText
public abstract void setText(CharacterIterator newText);
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA); iterator.setText("a sentence"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }/*from www . jav a 2s. c o m*/ }
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getLineInstance(Locale.CANADA); iterator.setText("line1\nline2"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }/*from ww w . j a v a 2 s . c o m*/ }
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.CANADA); iterator.setText("this is a test."); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }//w w w .j a v a2s . c o m }
From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getCharacterInstance(Locale.CANADA); iterator.setText("aString"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }//from ww w . j ava 2 s.c o m }
From source file:HangulTextBoundaryDetection.java
static void printBoundaries(String source, BreakIterator bi) { bi.setText(source); int boundary = bi.first(); while (boundary != BreakIterator.DONE) { System.out.print(boundary + " "); boundary = bi.next();/*from w w w . jav a2 s . c o m*/ } }
From source file:Main.java
static void extractWords(String target, BreakIterator wordIterator) { wordIterator.setText(target); int start = wordIterator.first(); int end = wordIterator.next(); while (end != BreakIterator.DONE) { String word = target.substring(start, end); if (Character.isLetterOrDigit(word.charAt(0))) { System.out.println(word); }/*from w w w.j ava 2 s . c o m*/ start = end; end = wordIterator.next(); } }
From source file:Main.java
public static String wordSpace(String source) { BreakIterator boundary = BreakIterator.getLineInstance(new Locale("th")); boundary.setText(source); int start = boundary.first(); StringBuffer wordbuffer = new StringBuffer(""); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { wordbuffer.append(source.substring(start, end) + "\u200b"); // wordbuffer.append(source.substring(start, end)+"\ufeff"); }/*from w ww . j a v a 2s. co m*/ return wordbuffer.toString(); }
From source file:org.yamj.common.tools.StringTools.java
/** * Check that the passed string is not longer than the required length and * trim it if necessary/*from w w w .ja va2 s . c o m*/ * * @param sourceString The string to check * @param requiredLength The required length (Maximum) * @param trimToWord Trim the source string to the last space to avoid * partial words * @param endingSuffix The ending to append if the string is longer than the * required length * @return */ public static String trimToLength(String sourceString, int requiredLength, boolean trimToWord, String endingSuffix) { String changedString = sourceString.trim(); if (StringUtils.isNotBlank(changedString)) { if (changedString.length() <= requiredLength) { // No need to do anything return changedString; } else if (trimToWord) { BreakIterator bi = BreakIterator.getWordInstance(); bi.setText(changedString); int biLength = bi.preceding(requiredLength - endingSuffix.length()); return changedString.substring(0, biLength).trim() + endingSuffix; } else { // We know that the source string is longer that the required length, so trim it to size return changedString.substring(0, requiredLength - endingSuffix.length()).trim() + endingSuffix; } } return changedString; }
From source file:com.cotrino.knowledgemap.db.Question.java
/** * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser * @param text/*w w w . j a va 2 s . c om*/ * @param language * @param country * @return */ public static List<String> tokenize(String text, String language, String country) { List<String> sentences = new ArrayList<String>(); Locale currentLocale = new Locale(language, country); BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale); sentenceIterator.setText(text); int boundary = sentenceIterator.first(); int lastBoundary = 0; while (boundary != BreakIterator.DONE) { boundary = sentenceIterator.next(); if (boundary != BreakIterator.DONE) { sentences.add(text.substring(lastBoundary, boundary)); } lastBoundary = boundary; } return sentences; }
From source file:StringUtils.java
/** * Reformats a string where lines that are longer than <tt>width</tt> * are split apart at the earliest wordbreak or at maxLength, whichever is * sooner. If the width specified is less than 5 or greater than the input * Strings length the string will be returned as is. * <p/>/* w w w .jav a2 s. co m*/ * Please note that this method can be lossy - trailing spaces on wrapped * lines may be trimmed. * * @param input the String to reformat. * @param width the maximum length of any one line. * @return a new String with reformatted as needed. */ public static String wordWrap(String input, int width, Locale locale) { // protect ourselves if (input == null) { return ""; } else if (width < 5) { return input; } else if (width >= input.length()) { return input; } StringBuilder buf = new StringBuilder(input); boolean endOfLine = false; int lineStart = 0; for (int i = 0; i < buf.length(); i++) { if (buf.charAt(i) == '\n') { lineStart = i + 1; endOfLine = true; } // handle splitting at width character if (i > lineStart + width - 1) { if (!endOfLine) { int limit = i - lineStart - 1; BreakIterator breaks = BreakIterator.getLineInstance(locale); breaks.setText(buf.substring(lineStart, i)); int end = breaks.last(); // if the last character in the search string isn't a space, // we can't split on it (looks bad). Search for a previous // break character if (end == limit + 1) { if (!Character.isWhitespace(buf.charAt(lineStart + end))) { end = breaks.preceding(end - 1); } } // if the last character is a space, replace it with a \n if (end != BreakIterator.DONE && end == limit + 1) { buf.replace(lineStart + end, lineStart + end + 1, "\n"); lineStart = lineStart + end; } // otherwise, just insert a \n else if (end != BreakIterator.DONE && end != 0) { buf.insert(lineStart + end, '\n'); lineStart = lineStart + end + 1; } else { buf.insert(i, '\n'); lineStart = i + 1; } } else { buf.insert(i, '\n'); lineStart = i + 1; endOfLine = false; } } } return buf.toString(); }