Example usage for java.text BreakIterator setText

Introduction

In this page you can find the example usage for java.text BreakIterator setText.

Prototype

public abstract void setText(CharacterIterator newText);

Source Link

Document

Set a new text for scanning.

Usage

From source file:net.nicoll.boot.metadata.AbstractMetadataFormatter.java

protected String extractTagLine(ConfigurationMetadataProperty property, String defaultValue) {
    String description = property.getDescription();
    if (StringUtils.hasText(description)) {
        BreakIterator breakIterator = BreakIterator.getSentenceInstance();
        breakIterator.setText(description);
        return description.substring(breakIterator.first(), breakIterator.next());
    }//from  w w  w . j a v  a 2s.  c om
    return defaultValue;
}

From source file:ezbake.training.TweetWordDivideWorker.java

/**
 * Performs processing on the Tweet object by dividing the tweet's text into words.
 *
 * @param visibility The Visibility containing the Accumulo visibility string representing the classification level
 * of the data contained in the incoming thrift data object.
 * @param data The incoming Thrift object to be processed.
 *//*from   w w w  .  j a va2s. c  o m*/
@Override
public void process(Visibility visibility, Tweet data) {
    if (data != null && data.getText() != null) {
        BreakIterator wordIterator = BreakIterator.getWordInstance();
        wordIterator.setText(data.getText());

        int wordStart = wordIterator.first();
        int wordEnd = wordIterator.next();
        for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) {
            String tweetTextWord = data.getText().substring(wordStart, wordEnd);
            if (StringUtils.isNotBlank(tweetTextWord)) {
                try {
                    outputResultsToPipe(visibility, tweetTextWord);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

From source file:org.jivesoftware.util.StringUtils.java

/**
 * Converts a line of text into an array of lower case words using a
 * BreakIterator.wordInstance().<p>
 *
 * This method is under the Jive Open Source Software License and was
 * written by Mark Imbriaco.//www.  j  a  va 2 s  . c o m
 *
 * @param text a String of text to convert into an array of words
 * @return text broken up into an array of words.
 */
public static String[] toLowerCaseWordArray(String text) {
    if (text == null || text.length() == 0) {
        return new String[0];
    }

    List<String> wordList = new ArrayList<>();
    BreakIterator boundary = BreakIterator.getWordInstance();
    boundary.setText(text);
    int start = 0;

    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
        String tmp = text.substring(start, end).trim();
        // Remove characters that are not needed.
        tmp = replace(tmp, "+", "");
        tmp = replace(tmp, "/", "");
        tmp = replace(tmp, "\\", "");
        tmp = replace(tmp, "#", "");
        tmp = replace(tmp, "*", "");
        tmp = replace(tmp, ")", "");
        tmp = replace(tmp, "(", "");
        tmp = replace(tmp, "&", "");
        if (tmp.length() > 0) {
            wordList.add(tmp);
        }
    }
    return wordList.toArray(new String[wordList.size()]);
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getLineInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;//from   w  w w .j av a  2  s. co m
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;/*from w  ww. j a  va 2  s  .  c  o m*/
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;/*from www  .j  a va2s .co m*/
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:net.nicoll.boot.metadata.ConsoleMetadataFormatter.java

@Override
public String formatMetadata(ConfigurationMetadataRepository repository) {
    StringBuilder out = new StringBuilder();
    int noDotInDescription = 0;
    List<String> keysMissingDescription = new ArrayList<String>();
    List<ConfigurationMetadataGroup> groups = sortGroups(repository.getAllGroups().values());
    for (ConfigurationMetadataGroup group : groups) {
        out.append("========================================").append(NEW_LINE);
        StringBuilder sb = new StringBuilder();
        for (ConfigurationMetadataSource source : group.getSources().values()) {
            sb.append(source.getType()).append(" ");
        }//from w  ww .j  av  a2s. c  o  m
        out.append("Group --- ").append(group.getId()).append("(").append(sb.toString().trim()).append(")")
                .append(NEW_LINE).append("========================================").append(NEW_LINE);
        List<ConfigurationMetadataProperty> properties = sortProperties(group.getProperties().values());
        for (ConfigurationMetadataProperty property : properties) {
            StringBuilder item = new StringBuilder(property.getId()).append("=");
            Object defaultValue = property.getDefaultValue();
            if (defaultValue != null) {
                if (defaultValue instanceof Object[]) {
                    item.append(StringUtils.arrayToCommaDelimitedString((Object[]) defaultValue));
                } else {
                    item.append(defaultValue);
                }
            }
            item.append(" # (").append(property.getType()).append(")");
            String description = property.getDescription();
            if (StringUtils.hasText(description)) {
                item.append(" - ");
                int dot = description.indexOf(".");
                if (dot != -1) {
                    BreakIterator breakIterator = BreakIterator.getSentenceInstance();
                    breakIterator.setText(description);
                    item.append(description.substring(breakIterator.first(), breakIterator.next()));
                } else {
                    item.append(description).append(" --- NO DOT FOUND!");
                    noDotInDescription++;
                }
            } else {
                keysMissingDescription.add(property.getId());
            }
            out.append(item.toString()).append(NEW_LINE);
        }
    }
    out.append("-------- Stats --------").append(NEW_LINE).append("Not dot in description: ")
            .append(noDotInDescription).append(NEW_LINE).append("Missing description:").append(NEW_LINE);
    StringBuilder desc = new StringBuilder();
    for (String s : keysMissingDescription) {
        desc.append("\t").append(s).append("\n");
    }
    out.append(desc.toString());
    return out.toString();
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> tokenizeSentence_intern(String sentence, String language_code) {
    ArrayList<String> tokens = new ArrayList<String>();
    BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code));
    token_bounds.setText(sentence.trim());
    int begin_t = token_bounds.first();
    for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds
            .next()) {//w  w  w  . j  a v a2 s.c  om
        String token = de.tudarmstadt.lt.utilities.StringUtils
                .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_");
        if (!token.isEmpty()) { // add token iff token is not empty
            tokens.add(token);
        }
    }
    return tokens;
}

From source file:org.jivesoftware.util.StringUtils.java

/**
 * Reformats a string where lines that are longer than <tt>width</tt>
 * are split apart at the earliest wordbreak or at maxLength, whichever is
 * sooner. If the width specified is less than 5 or greater than the input
 * Strings length the string will be returned as is.
 * <p>/*from  w ww . j a  v  a2  s .c  om*/
 * Please note that this method can be lossy - trailing spaces on wrapped
 * lines may be trimmed.</p>
 *
 * @param input the String to reformat.
 * @param width the maximum length of any one line.
 * @return a new String with reformatted as needed.
 */
public static String wordWrap(String input, int width, Locale locale) {
    // protect ourselves
    if (input == null) {
        return "";
    } else if (width < 5) {
        return input;
    } else if (width >= input.length()) {
        return input;
    }

    // default locale
    if (locale == null) {
        locale = JiveGlobals.getLocale();
    }

    StringBuilder buf = new StringBuilder(input);
    boolean endOfLine = false;
    int lineStart = 0;

    for (int i = 0; i < buf.length(); i++) {
        if (buf.charAt(i) == '\n') {
            lineStart = i + 1;
            endOfLine = true;
        }

        // handle splitting at width character
        if (i > lineStart + width - 1) {
            if (!endOfLine) {
                int limit = i - lineStart - 1;
                BreakIterator breaks = BreakIterator.getLineInstance(locale);
                breaks.setText(buf.substring(lineStart, i));
                int end = breaks.last();

                // if the last character in the search string isn't a space,
                // we can't split on it (looks bad). Search for a previous
                // break character
                if (end == limit + 1) {
                    if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
                        end = breaks.preceding(end - 1);
                    }
                }

                // if the last character is a space, replace it with a \n
                if (end != BreakIterator.DONE && end == limit + 1) {
                    buf.replace(lineStart + end, lineStart + end + 1, "\n");
                    lineStart = lineStart + end;
                }
                // otherwise, just insert a \n
                else if (end != BreakIterator.DONE && end != 0) {
                    buf.insert(lineStart + end, '\n');
                    lineStart = lineStart + end + 1;
                } else {
                    buf.insert(i, '\n');
                    lineStart = i + 1;
                }
            } else {
                buf.insert(i, '\n');
                lineStart = i + 1;
                endOfLine = false;
            }
        }
    }

    return buf.toString();
}

From source file:com.juick.android.Utils.java

public static String getWordAtOffset(final String text, final int offset) {
    BreakIterator wordIterator = BreakIterator.getWordInstance();
    wordIterator.setText(text);
    int start = wordIterator.first();
    for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) {
        if ((end >= offset) && (end - start > 1)) {
            return text.substring(start, end);
        }/* w  w  w  .j  av  a  2  s  .  c  om*/
    }
    return null;
}