List of usage examples for java.text BreakIterator DONE
int DONE
To view the source code for java.text BreakIterator DONE.
Click Source Link
From source file:com.redhat.rcm.version.Cli.java
private static void printKVLine(final String key, final String value, final String fmt, final int valMax, final PrintWriter pw) { final List<String> lines = new ArrayList<String>(); final BreakIterator iter = BreakIterator.getLineInstance(); iter.setText(value);// w ww. j a v a2 s .c o m int start = iter.first(); int end = BreakIterator.DONE; final StringBuilder currentLine = new StringBuilder(); String seg; while (start != BreakIterator.DONE && (end = iter.next()) != BreakIterator.DONE) { seg = value.substring(start, end); if (currentLine.length() + seg.length() > valMax) { lines.add(currentLine.toString()); currentLine.setLength(0); } currentLine.append(seg); start = end; } if (currentLine.length() > 0) { lines.add(currentLine.toString()); } pw.printf(fmt, key, lines.isEmpty() ? "" : lines.get(0)); if (lines.size() > 1) { for (int i = 1; i < lines.size(); i++) { // blank string to serve for indentation in format with two fields. pw.printf(fmt, "", lines.get(i)); } } }
From source file:net.sf.jasperreports.engine.fill.TextMeasurer.java
protected void appendTruncateSuffix(TextLineWrapper lineWrapper) { String truncateSuffx = getTruncateSuffix(); if (truncateSuffx == null) { return;// ww w . ja v a 2 s . c om } int lineStart = prevMeasuredState.textOffset; //advance from the line start until the next line start or the first newline String lineText = lineWrapper.getLineText(lineStart, measuredState.textOffset); int linePosition = lineText.length(); //iterate to the beginning of the line boolean done = false; do { measuredState = prevMeasuredState.cloneState(); String text = lineText.substring(0, linePosition) + truncateSuffx; boolean truncateAtChar = isToTruncateAtChar(); TextLineWrapper lastLineWrapper = lineWrapper.lastLineWrapper(text, measuredState.textOffset, linePosition, truncateAtChar); BreakIterator breakIterator = truncateAtChar ? BreakIterator.getCharacterInstance() : BreakIterator.getLineInstance(); breakIterator.setText(text); if (renderNextLine(lastLineWrapper, null, new int[] { 0 }, new TabStop[] { null }, new boolean[] { false })) { int lastPos = lastLineWrapper.paragraphPosition(); //test if the entire suffix fit if (lastPos == linePosition + truncateSuffx.length()) { //subtract the suffix from the offset measuredState.textOffset -= truncateSuffx.length(); measuredState.textSuffix = truncateSuffx; done = true; } else { linePosition = breakIterator.preceding(linePosition); if (linePosition == BreakIterator.DONE) { //if the text suffix did not fit the line, only the part of it that fits will show //truncate the suffix String actualSuffix = truncateSuffx.substring(0, measuredState.textOffset - prevMeasuredState.textOffset); //if the last text char is not a new line if (prevMeasuredState.textOffset > 0 && lineWrapper.charAt(prevMeasuredState.textOffset - 1) != '\n') { //force a new line so that the suffix is displayed on the last line actualSuffix = '\n' + actualSuffix; } measuredState.textSuffix = actualSuffix; //restore the next to last line offset measuredState.textOffset = prevMeasuredState.textOffset; done = true; } } } else { //if the line did not fit, leave it empty done = true; } } while (!done); }
From source file:com.glaf.core.util.StringTools.java
public static String[] toLowerCaseWordArray(String text) { if (text == null || text.length() == 0) { return new String[0]; }//www . ja v a2 s .c om List<String> wordList = new java.util.ArrayList<String>(); BreakIterator boundary = BreakIterator.getWordInstance(); boundary.setText(text); int start = 0; for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { String tmp = text.substring(start, end).trim(); tmp = replace(tmp, "+", ""); tmp = replace(tmp, "/", ""); tmp = replace(tmp, "\\", ""); tmp = replace(tmp, "#", ""); tmp = replace(tmp, "*", ""); tmp = replace(tmp, ")", ""); tmp = replace(tmp, "(", ""); tmp = replace(tmp, "&", ""); if (tmp.length() > 0) { wordList.add(tmp); } } return wordList.toArray(new String[wordList.size()]); }
From source file:com.juick.android.Utils.java
public static String getWordAtOffset(final String text, final int offset) { BreakIterator wordIterator = BreakIterator.getWordInstance(); wordIterator.setText(text);/*from w w w.j a v a 2s . co m*/ int start = wordIterator.first(); for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) { if ((end >= offset) && (end - start > 1)) { return text.substring(start, end); } } return null; }
From source file:nl.gridline.free.taalserver.TokenizeMap.java
@Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { CollectionItem item = new CollectionItem(value.getFamilyMap(COLLUMN_INTR)); StringBuilder data = new StringBuilder(); if (useTitle && item.getTitle() != null) { data.append(item.getTitle()).append(' '); }//from w w w . j a v a 2 s . co m if (useDescription && item.getDescription() != null) { data.append(item.getDescription()).append(' '); } if (useKeywords) { data.append(getKeywords(item)); } Long itemid = item.getId(); if (itemid == null) { throw new IOException("item is invallid: " + item.toString()); } outKey.set(itemid.longValue()); // get title && description (if requested) // temp list for word tokens: List<String> result = new ArrayList<String>(); // split the text & add to list: splitter.setText(data.toString()); int start = splitter.first(); for (int end = splitter.next(); end != BreakIterator.DONE; start = end, end = splitter.next()) { result.add(data.substring(start, end)); } // LOG.info("text: " + data); LOG.info("vector size: " + result.size() + " text length: " + data.length()); if (result.size() > 0) { // write new word vector to the output. context.write(new WordVector(result), outKey); context.progress(); } }
From source file:org.cloudgraph.examples.test.model.NLPWikiParseTest.java
private void parse(StringBuilder buf) throws IOException { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); String text = buf.toString(); int counter = 0; iterator.setText(text);//from w w w . j a v a2 s . co m int lastIndex = iterator.first(); while (lastIndex != BreakIterator.DONE) { int firstIndex = lastIndex; lastIndex = iterator.next(); if (lastIndex != BreakIterator.DONE) { String sentence = text.substring(firstIndex, lastIndex); long before = System.currentTimeMillis(); //parse(sentence); long after = System.currentTimeMillis(); log.info("time4: " + String.valueOf(after - before) + ": " + sentence); counter++; } } }
From source file:org.cloudgraph.examples.test.model.StanfordCoreNLPTest.java
private void parse(StringBuilder buf) throws IOException { BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US); String text = buf.toString(); int counter = 0; iterator.setText(text);//w ww. j a va2 s . c o m int lastIndex = iterator.first(); while (lastIndex != BreakIterator.DONE) { int firstIndex = lastIndex; lastIndex = iterator.next(); if (lastIndex != BreakIterator.DONE) { String sentence = text.substring(firstIndex, lastIndex); long before = System.currentTimeMillis(); parse(sentence); long after = System.currentTimeMillis(); log.info("time4: " + String.valueOf(after - before) + ": " + sentence); counter++; } } }
From source file:org.eclipse.fx.core.text.TextUtil.java
/** * Find the end offset of the word//from w w w.ja v a 2s. c o m * * @param content * the content * @param offset * the offset to start the search from * @param pointAsBoundary * should the '.' treated as word boundary * @return the end offset or {@link BreakIterator#DONE} */ public static int findWordEndOffset(IterableCharSequence content, int offset, boolean pointAsBoundary) { BreakIterator wordInstance = BreakIterator.getWordInstance(); wordInstance.setText(content.getIterator()); int rv = wordInstance.following(offset); if (rv != BreakIterator.DONE && pointAsBoundary) { String s = content.subSequence(offset, rv).toString(); int idx = s.indexOf('.'); if (idx >= 0) { rv = offset + idx; } if (rv == offset) { rv = offset + 1; } } return rv; }
From source file:org.eclipse.fx.core.text.TextUtil.java
/** * Find the start offset of the word/*from w w w . j av a 2s . com*/ * * @param content * the content * @param offset * the offset to start the search from * @param pointAsBoundary * should the '.' treated as word boundary * @return the start offset or or {@link BreakIterator#DONE} */ public static int findWordStartOffset(IterableCharSequence content, int offset, boolean pointAsBoundary) { BreakIterator wordInstance = BreakIterator.getWordInstance(); wordInstance.setText(content.getIterator()); int rv = wordInstance.preceding(offset); if (rv != BreakIterator.DONE && pointAsBoundary) { String s = content.subSequence(rv, offset).toString(); int idx = s.lastIndexOf('.'); if (idx > 0) { rv += idx + 1; } // move before the point if (rv == offset) { rv -= 1; } } return rv; }
From source file:org.eclipse.fx.core.text.TextUtil.java
/** * Find the bounds of the word/*from www .j a va2 s. c om*/ * * @param content * the content * @param offset * the offset * @param pointAsBoundary * should the '.' treated as word boundary * @return a tuple of value representing start and end */ public static IntTuple findWordBounds(IterableCharSequence content, int offset, boolean pointAsBoundary) { BreakIterator wordInstance = BreakIterator.getWordInstance(); wordInstance.setText(content.getIterator()); int previous = wordInstance.preceding(offset); int next = wordInstance.following(offset); if (pointAsBoundary && previous != BreakIterator.DONE && next != BreakIterator.DONE) { String preMatch = content.subSequence(previous, offset).toString(); String postMatch = content.subSequence(offset, next).toString(); int idx = preMatch.lastIndexOf('.'); if (idx > 0) { previous += idx + 1; } idx = postMatch.indexOf('.'); if (idx > 0) { next = offset + idx; } } return new IntTuple(previous, next); }