List of usage examples for java.text BreakIterator DONE
int DONE
To view the source code for java.text BreakIterator DONE.
Click Source Link
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = null; currentBreakIterator = BreakIterator.getCharacterInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text);/*www. ja v a 2 s .c om*/ outputText.setText(""); int from = currentBreakIterator.first(); int to; while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:eu.fbk.utils.lsa.util.Anvur.java
static String tokenize(String in) { //print each word in order BreakIterator boundary = BreakIterator.getWordInstance(); boundary.setText(in);/*from w w w . j a v a2 s . c o m*/ StringBuilder out = new StringBuilder(); int start = boundary.first(); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { out.append(" "); out.append(in.substring(start, end)); } return out.toString(); }
From source file:net.sf.jtmt.tokenizers.SentenceTokenizer.java
/** * Next sentence.//www. j a v a 2 s .c o m * * @return the string */ public String nextSentence() { int end = breakIterator.next(); if (end == BreakIterator.DONE) { return null; } String sentence = text.substring(index, end); index = end; return sentence; }
From source file:net.sf.jtmt.tokenizers.ParagraphTokenizer.java
/** * Next paragraph.// w w w .j av a2 s.c o m * * @return the string */ public String nextParagraph() { int end = breakIterator.next(); if (end == BreakIterator.DONE) { return null; } String sentence = text.substring(index, end); index = end; return sentence; }
From source file:ezbake.training.TweetWordDivideWorker.java
/** * Performs processing on the Tweet object by dividing the tweet's text into words. * * @param visibility The Visibility containing the Accumulo visibility string representing the classification level * of the data contained in the incoming thrift data object. * @param data The incoming Thrift object to be processed. *///from w w w .j a v a 2 s. co m @Override public void process(Visibility visibility, Tweet data) { if (data != null && data.getText() != null) { BreakIterator wordIterator = BreakIterator.getWordInstance(); wordIterator.setText(data.getText()); int wordStart = wordIterator.first(); int wordEnd = wordIterator.next(); for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) { String tweetTextWord = data.getText().substring(wordStart, wordEnd); if (StringUtils.isNotBlank(tweetTextWord)) { try { outputResultsToPipe(visibility, tweetTextWord); } catch (IOException e) { e.printStackTrace(); } } } } }
From source file:StringUtils.java
/** * Reformats a string where lines that are longer than <tt>width</tt> * are split apart at the earliest wordbreak or at maxLength, whichever is * sooner. If the width specified is less than 5 or greater than the input * Strings length the string will be returned as is. * <p/>//w w w . jav a 2s .c o m * Please note that this method can be lossy - trailing spaces on wrapped * lines may be trimmed. * * @param input the String to reformat. * @param width the maximum length of any one line. * @return a new String with reformatted as needed. */ public static String wordWrap(String input, int width, Locale locale) { // protect ourselves if (input == null) { return ""; } else if (width < 5) { return input; } else if (width >= input.length()) { return input; } StringBuilder buf = new StringBuilder(input); boolean endOfLine = false; int lineStart = 0; for (int i = 0; i < buf.length(); i++) { if (buf.charAt(i) == '\n') { lineStart = i + 1; endOfLine = true; } // handle splitting at width character if (i > lineStart + width - 1) { if (!endOfLine) { int limit = i - lineStart - 1; BreakIterator breaks = BreakIterator.getLineInstance(locale); breaks.setText(buf.substring(lineStart, i)); int end = breaks.last(); // if the last character in the search string isn't a space, // we can't split on it (looks bad). Search for a previous // break character if (end == limit + 1) { if (!Character.isWhitespace(buf.charAt(lineStart + end))) { end = breaks.preceding(end - 1); } } // if the last character is a space, replace it with a \n if (end != BreakIterator.DONE && end == limit + 1) { buf.replace(lineStart + end, lineStart + end + 1, "\n"); lineStart = lineStart + end; } // otherwise, just insert a \n else if (end != BreakIterator.DONE && end != 0) { buf.insert(lineStart + end, '\n'); lineStart = lineStart + end + 1; } else { buf.insert(i, '\n'); lineStart = i + 1; } } else { buf.insert(i, '\n'); lineStart = i + 1; endOfLine = false; } } } return buf.toString(); }
From source file:Utils.java
/** * Wrap multi-line strings (and get the individual lines). * /*ww w. j a v a 2s . c om*/ * @param original * the original string to wrap * @param width * the maximum width of lines * @param breakIterator * breaks original to chars, words, sentences, depending on what * instance you provide. * @param removeNewLines * if <code>true</code>, any newlines in the original string are * ignored * @return the lines after wrapping */ public static String[] wrapStringToArray(String original, int width, BreakIterator breakIterator, boolean removeNewLines) { if (original.length() == 0) { return new String[] { original }; } String[] workingSet; // substitute original newlines with spaces, // remove newlines from head and tail if (removeNewLines) { original = trimString(original); original = original.replace('\n', ' '); workingSet = new String[] { original }; } else { StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N int len = tokens.countTokens(); workingSet = new String[len]; for (int i = 0; i < len; i++) { workingSet[i] = tokens.nextToken(); } } if (width < 1) { width = 1; } if (original.length() <= width) { return workingSet; } widthcheck: { boolean ok = true; for (int i = 0; i < workingSet.length; i++) { ok = ok && (workingSet[i].length() < width); if (!ok) { break widthcheck; } } return workingSet; } java.util.ArrayList<String> lines = new java.util.ArrayList<String>(); int lineStart = 0; // the position of start of currently processed line in // the original string for (int i = 0; i < workingSet.length; i++) { if (workingSet[i].length() < width) { lines.add(workingSet[i]); } else { breakIterator.setText(workingSet[i]); int nextStart = breakIterator.next(); int prevStart = 0; do { while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) { prevStart = nextStart; nextStart = breakIterator.next(); } if (nextStart == BreakIterator.DONE) { nextStart = prevStart = workingSet[i].length(); } if (prevStart == 0) { prevStart = nextStart; } lines.add(workingSet[i].substring(lineStart, prevStart)); lineStart = prevStart; prevStart = 0; } while (lineStart < workingSet[i].length()); lineStart = 0; } } String[] s = new String[lines.size()]; return (String[]) lines.toArray(s); }
From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java
@Override public List<String> splitSentences(String text, String language_code) throws Exception { LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200))); List<String> sentences = new ArrayList<String>(); text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " "); for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) { String line = iter.nextLine(); BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code)); sentence_bounds.setText(line);/*from w w w .jav a 2 s .c o m*/ int begin_s = sentence_bounds.first(); for (int end_s = sentence_bounds .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) { String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s)); if (sentence.isEmpty()) continue; sentences.add(sentence); LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200))); } } LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200), sentences.size())); return sentences; }
From source file:IteratorTest.java
protected void refreshDisplay() { int startIndex, nextIndex; Vector items = new Vector(); String msgText = textArea.getText(); Locale locale = (Locale) (localeButton.getSelectedItem()); BreakIterator iterator = null; if (charButton.isSelected()) { iterator = BreakIterator.getCharacterInstance(locale); } else if (wordButton.isSelected()) { iterator = BreakIterator.getWordInstance(locale); } else if (lineButton.isSelected()) { iterator = BreakIterator.getLineInstance(locale); } else if (sentButton.isSelected()) { iterator = BreakIterator.getSentenceInstance(locale); }//w w w . j a v a2 s .c o m iterator.setText(msgText); startIndex = iterator.first(); nextIndex = iterator.next(); while (nextIndex != BreakIterator.DONE) { items.addElement(msgText.substring(startIndex, nextIndex)); startIndex = nextIndex; nextIndex = iterator.next(); } itemList.setListData(items); }
From source file:com.cotrino.knowledgemap.db.Question.java
/** * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser * @param text/* www .j a v a 2 s . co m*/ * @param language * @param country * @return */ public static List<String> tokenize(String text, String language, String country) { List<String> sentences = new ArrayList<String>(); Locale currentLocale = new Locale(language, country); BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale); sentenceIterator.setText(text); int boundary = sentenceIterator.first(); int lastBoundary = 0; while (boundary != BreakIterator.DONE) { boundary = sentenceIterator.next(); if (boundary != BreakIterator.DONE) { sentences.add(text.substring(lastBoundary, boundary)); } lastBoundary = boundary; } return sentences; }