List of usage examples for java.text BreakIterator setText
public abstract void setText(CharacterIterator newText);
From source file:net.nicoll.boot.metadata.AbstractMetadataFormatter.java
protected String extractTagLine(ConfigurationMetadataProperty property, String defaultValue) { String description = property.getDescription(); if (StringUtils.hasText(description)) { BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(description); return description.substring(breakIterator.first(), breakIterator.next()); }//from w w w . j a v a 2s. c om return defaultValue; }
From source file:ezbake.training.TweetWordDivideWorker.java
/** * Performs processing on the Tweet object by dividing the tweet's text into words. * * @param visibility The Visibility containing the Accumulo visibility string representing the classification level * of the data contained in the incoming thrift data object. * @param data The incoming Thrift object to be processed. *//*from w w w . j a va2s. c o m*/ @Override public void process(Visibility visibility, Tweet data) { if (data != null && data.getText() != null) { BreakIterator wordIterator = BreakIterator.getWordInstance(); wordIterator.setText(data.getText()); int wordStart = wordIterator.first(); int wordEnd = wordIterator.next(); for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) { String tweetTextWord = data.getText().substring(wordStart, wordEnd); if (StringUtils.isNotBlank(tweetTextWord)) { try { outputResultsToPipe(visibility, tweetTextWord); } catch (IOException e) { e.printStackTrace(); } } } } }
From source file:org.jivesoftware.util.StringUtils.java
/** * Converts a line of text into an array of lower case words using a * BreakIterator.wordInstance().<p> * * This method is under the Jive Open Source Software License and was * written by Mark Imbriaco.//www. j a va 2 s . c o m * * @param text a String of text to convert into an array of words * @return text broken up into an array of words. */ public static String[] toLowerCaseWordArray(String text) { if (text == null || text.length() == 0) { return new String[0]; } List<String> wordList = new ArrayList<>(); BreakIterator boundary = BreakIterator.getWordInstance(); boundary.setText(text); int start = 0; for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { String tmp = text.substring(start, end).trim(); // Remove characters that are not needed. tmp = replace(tmp, "+", ""); tmp = replace(tmp, "/", ""); tmp = replace(tmp, "\\", ""); tmp = replace(tmp, "#", ""); tmp = replace(tmp, "*", ""); tmp = replace(tmp, ")", ""); tmp = replace(tmp, "(", ""); tmp = replace(tmp, "&", ""); if (tmp.length() > 0) { wordList.add(tmp); } } return wordList.toArray(new String[wordList.size()]); }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getLineInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text); outputText.setText(""); int from = currentBreakIterator.first(); int to;//from w w w .j av a 2 s. co m while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text); outputText.setText(""); int from = currentBreakIterator.first(); int to;/*from w ww. j a va 2 s . c o m*/ while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text); outputText.setText(""); int from = currentBreakIterator.first(); int to;/*from www .j a va2s .co m*/ while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:net.nicoll.boot.metadata.ConsoleMetadataFormatter.java
@Override public String formatMetadata(ConfigurationMetadataRepository repository) { StringBuilder out = new StringBuilder(); int noDotInDescription = 0; List<String> keysMissingDescription = new ArrayList<String>(); List<ConfigurationMetadataGroup> groups = sortGroups(repository.getAllGroups().values()); for (ConfigurationMetadataGroup group : groups) { out.append("========================================").append(NEW_LINE); StringBuilder sb = new StringBuilder(); for (ConfigurationMetadataSource source : group.getSources().values()) { sb.append(source.getType()).append(" "); }//from w ww .j av a2s. c o m out.append("Group --- ").append(group.getId()).append("(").append(sb.toString().trim()).append(")") .append(NEW_LINE).append("========================================").append(NEW_LINE); List<ConfigurationMetadataProperty> properties = sortProperties(group.getProperties().values()); for (ConfigurationMetadataProperty property : properties) { StringBuilder item = new StringBuilder(property.getId()).append("="); Object defaultValue = property.getDefaultValue(); if (defaultValue != null) { if (defaultValue instanceof Object[]) { item.append(StringUtils.arrayToCommaDelimitedString((Object[]) defaultValue)); } else { item.append(defaultValue); } } item.append(" # (").append(property.getType()).append(")"); String description = property.getDescription(); if (StringUtils.hasText(description)) { item.append(" - "); int dot = description.indexOf("."); if (dot != -1) { BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(description); item.append(description.substring(breakIterator.first(), breakIterator.next())); } else { item.append(description).append(" --- NO DOT FOUND!"); noDotInDescription++; } } else { keysMissingDescription.add(property.getId()); } out.append(item.toString()).append(NEW_LINE); } } out.append("-------- Stats --------").append(NEW_LINE).append("Not dot in description: ") .append(noDotInDescription).append(NEW_LINE).append("Missing description:").append(NEW_LINE); StringBuilder desc = new StringBuilder(); for (String s : keysMissingDescription) { desc.append("\t").append(s).append("\n"); } out.append(desc.toString()); return out.toString(); }
From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java
@Override public List<String> tokenizeSentence_intern(String sentence, String language_code) { ArrayList<String> tokens = new ArrayList<String>(); BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code)); token_bounds.setText(sentence.trim()); int begin_t = token_bounds.first(); for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds .next()) {//w w w . j a v a2 s.c om String token = de.tudarmstadt.lt.utilities.StringUtils .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_"); if (!token.isEmpty()) { // add token iff token is not empty tokens.add(token); } } return tokens; }
From source file:org.jivesoftware.util.StringUtils.java
/** * Reformats a string where lines that are longer than <tt>width</tt> * are split apart at the earliest wordbreak or at maxLength, whichever is * sooner. If the width specified is less than 5 or greater than the input * Strings length the string will be returned as is. * <p>/*from w ww . j a v a2 s .c om*/ * Please note that this method can be lossy - trailing spaces on wrapped * lines may be trimmed.</p> * * @param input the String to reformat. * @param width the maximum length of any one line. * @return a new String with reformatted as needed. */ public static String wordWrap(String input, int width, Locale locale) { // protect ourselves if (input == null) { return ""; } else if (width < 5) { return input; } else if (width >= input.length()) { return input; } // default locale if (locale == null) { locale = JiveGlobals.getLocale(); } StringBuilder buf = new StringBuilder(input); boolean endOfLine = false; int lineStart = 0; for (int i = 0; i < buf.length(); i++) { if (buf.charAt(i) == '\n') { lineStart = i + 1; endOfLine = true; } // handle splitting at width character if (i > lineStart + width - 1) { if (!endOfLine) { int limit = i - lineStart - 1; BreakIterator breaks = BreakIterator.getLineInstance(locale); breaks.setText(buf.substring(lineStart, i)); int end = breaks.last(); // if the last character in the search string isn't a space, // we can't split on it (looks bad). Search for a previous // break character if (end == limit + 1) { if (!Character.isWhitespace(buf.charAt(lineStart + end))) { end = breaks.preceding(end - 1); } } // if the last character is a space, replace it with a \n if (end != BreakIterator.DONE && end == limit + 1) { buf.replace(lineStart + end, lineStart + end + 1, "\n"); lineStart = lineStart + end; } // otherwise, just insert a \n else if (end != BreakIterator.DONE && end != 0) { buf.insert(lineStart + end, '\n'); lineStart = lineStart + end + 1; } else { buf.insert(i, '\n'); lineStart = i + 1; } } else { buf.insert(i, '\n'); lineStart = i + 1; endOfLine = false; } } } return buf.toString(); }
From source file:com.juick.android.Utils.java
public static String getWordAtOffset(final String text, final int offset) { BreakIterator wordIterator = BreakIterator.getWordInstance(); wordIterator.setText(text); int start = wordIterator.first(); for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) { if ((end >= offset) && (end - start > 1)) { return text.substring(start, end); }/* w w w .j av a 2 s . c om*/ } return null; }