Example usage for java.text BreakIterator setText

List of usage examples for java.text BreakIterator setText

Introduction

In this page you can find the example usage for java.text BreakIterator setText.

Prototype

public abstract void setText(CharacterIterator newText);

Source Link

Document

Set a new text for scanning.

Usage

From source file:net.nicoll.boot.metadata.AbstractMetadataFormatter.java

protected String extractTagLine(ConfigurationMetadataProperty property, String defaultValue) {
    String description = property.getDescription();
    if (StringUtils.hasText(description)) {
        BreakIterator breakIterator = BreakIterator.getSentenceInstance();
        breakIterator.setText(description);
        return description.substring(breakIterator.first(), breakIterator.next());
    }//from  w w  w . j a v  a 2s.  c om
    return defaultValue;
}

From source file:ezbake.training.TweetWordDivideWorker.java

/**
 * Performs processing on the Tweet object by dividing the tweet's text into words.
 *
 * @param visibility The Visibility containing the Accumulo visibility string representing the classification level
 * of the data contained in the incoming thrift data object.
 * @param data The incoming Thrift object to be processed.
 *//*from   w w w  .  j a va2s. c  o m*/
@Override
public void process(Visibility visibility, Tweet data) {
    if (data != null && data.getText() != null) {
        BreakIterator wordIterator = BreakIterator.getWordInstance();
        wordIterator.setText(data.getText());

        int wordStart = wordIterator.first();
        int wordEnd = wordIterator.next();
        for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) {
            String tweetTextWord = data.getText().substring(wordStart, wordEnd);
            if (StringUtils.isNotBlank(tweetTextWord)) {
                try {
                    outputResultsToPipe(visibility, tweetTextWord);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

From source file:org.jivesoftware.util.StringUtils.java

/**
 * Converts a line of text into an array of lower case words using a
 * BreakIterator.wordInstance().<p>
 *
 * This method is under the Jive Open Source Software License and was
 * written by Mark Imbriaco.//www.  j  a  va 2 s  . c o m
 *
 * @param text a String of text to convert into an array of words
 * @return text broken up into an array of words.
 */
public static String[] toLowerCaseWordArray(String text) {
    if (text == null || text.length() == 0) {
        return new String[0];
    }

    List<String> wordList = new ArrayList<>();
    BreakIterator boundary = BreakIterator.getWordInstance();
    boundary.setText(text);
    int start = 0;

    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
        String tmp = text.substring(start, end).trim();
        // Remove characters that are not needed.
        tmp = replace(tmp, "+", "");
        tmp = replace(tmp, "/", "");
        tmp = replace(tmp, "\\", "");
        tmp = replace(tmp, "#", "");
        tmp = replace(tmp, "*", "");
        tmp = replace(tmp, ")", "");
        tmp = replace(tmp, "(", "");
        tmp = replace(tmp, "&", "");
        if (tmp.length() > 0) {
            wordList.add(tmp);
        }
    }
    return wordList.toArray(new String[wordList.size()]);
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getLineInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;//from   w  w w .j av a  2  s. co m
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;/*from w  ww. j a  va 2  s  .  c  o m*/
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;/*from www  .j  a va2s .co m*/
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:net.nicoll.boot.metadata.ConsoleMetadataFormatter.java

@Override
public String formatMetadata(ConfigurationMetadataRepository repository) {
    StringBuilder out = new StringBuilder();
    int noDotInDescription = 0;
    List<String> keysMissingDescription = new ArrayList<String>();
    List<ConfigurationMetadataGroup> groups = sortGroups(repository.getAllGroups().values());
    for (ConfigurationMetadataGroup group : groups) {
        out.append("========================================").append(NEW_LINE);
        StringBuilder sb = new StringBuilder();
        for (ConfigurationMetadataSource source : group.getSources().values()) {
            sb.append(source.getType()).append(" ");
        }//from w  ww .j  av  a2s. c  o  m
        out.append("Group --- ").append(group.getId()).append("(").append(sb.toString().trim()).append(")")
                .append(NEW_LINE).append("========================================").append(NEW_LINE);
        List<ConfigurationMetadataProperty> properties = sortProperties(group.getProperties().values());
        for (ConfigurationMetadataProperty property : properties) {
            StringBuilder item = new StringBuilder(property.getId()).append("=");
            Object defaultValue = property.getDefaultValue();
            if (defaultValue != null) {
                if (defaultValue instanceof Object[]) {
                    item.append(StringUtils.arrayToCommaDelimitedString((Object[]) defaultValue));
                } else {
                    item.append(defaultValue);
                }
            }
            item.append(" # (").append(property.getType()).append(")");
            String description = property.getDescription();
            if (StringUtils.hasText(description)) {
                item.append(" - ");
                int dot = description.indexOf(".");
                if (dot != -1) {
                    BreakIterator breakIterator = BreakIterator.getSentenceInstance();
                    breakIterator.setText(description);
                    item.append(description.substring(breakIterator.first(), breakIterator.next()));
                } else {
                    item.append(description).append(" --- NO DOT FOUND!");
                    noDotInDescription++;
                }
            } else {
                keysMissingDescription.add(property.getId());
            }
            out.append(item.toString()).append(NEW_LINE);
        }
    }
    out.append("-------- Stats --------").append(NEW_LINE).append("Not dot in description: ")
            .append(noDotInDescription).append(NEW_LINE).append("Missing description:").append(NEW_LINE);
    StringBuilder desc = new StringBuilder();
    for (String s : keysMissingDescription) {
        desc.append("\t").append(s).append("\n");
    }
    out.append(desc.toString());
    return out.toString();
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> tokenizeSentence_intern(String sentence, String language_code) {
    ArrayList<String> tokens = new ArrayList<String>();
    BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code));
    token_bounds.setText(sentence.trim());
    int begin_t = token_bounds.first();
    for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds
            .next()) {//w  w  w  . j  a v a2 s.c  om
        String token = de.tudarmstadt.lt.utilities.StringUtils
                .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_");
        if (!token.isEmpty()) { // add token iff token is not empty
            tokens.add(token);
        }
    }
    return tokens;
}

From source file:org.jivesoftware.util.StringUtils.java

/**
 * Reformats a string where lines that are longer than <tt>width</tt>
 * are split apart at the earliest wordbreak or at maxLength, whichever is
 * sooner. If the width specified is less than 5 or greater than the input
 * Strings length the string will be returned as is.
 * <p>/*from  w ww . j a  v  a2  s .c  om*/
 * Please note that this method can be lossy - trailing spaces on wrapped
 * lines may be trimmed.</p>
 *
 * @param input the String to reformat.
 * @param width the maximum length of any one line.
 * @return a new String with reformatted as needed.
 */
public static String wordWrap(String input, int width, Locale locale) {
    // protect ourselves
    if (input == null) {
        return "";
    } else if (width < 5) {
        return input;
    } else if (width >= input.length()) {
        return input;
    }

    // default locale
    if (locale == null) {
        locale = JiveGlobals.getLocale();
    }

    StringBuilder buf = new StringBuilder(input);
    boolean endOfLine = false;
    int lineStart = 0;

    for (int i = 0; i < buf.length(); i++) {
        if (buf.charAt(i) == '\n') {
            lineStart = i + 1;
            endOfLine = true;
        }

        // handle splitting at width character
        if (i > lineStart + width - 1) {
            if (!endOfLine) {
                int limit = i - lineStart - 1;
                BreakIterator breaks = BreakIterator.getLineInstance(locale);
                breaks.setText(buf.substring(lineStart, i));
                int end = breaks.last();

                // if the last character in the search string isn't a space,
                // we can't split on it (looks bad). Search for a previous
                // break character
                if (end == limit + 1) {
                    if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
                        end = breaks.preceding(end - 1);
                    }
                }

                // if the last character is a space, replace it with a \n
                if (end != BreakIterator.DONE && end == limit + 1) {
                    buf.replace(lineStart + end, lineStart + end + 1, "\n");
                    lineStart = lineStart + end;
                }
                // otherwise, just insert a \n
                else if (end != BreakIterator.DONE && end != 0) {
                    buf.insert(lineStart + end, '\n');
                    lineStart = lineStart + end + 1;
                } else {
                    buf.insert(i, '\n');
                    lineStart = i + 1;
                }
            } else {
                buf.insert(i, '\n');
                lineStart = i + 1;
                endOfLine = false;
            }
        }
    }

    return buf.toString();
}

From source file:com.juick.android.Utils.java

public static String getWordAtOffset(final String text, final int offset) {
    BreakIterator wordIterator = BreakIterator.getWordInstance();
    wordIterator.setText(text);
    int start = wordIterator.first();
    for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) {
        if ((end >= offset) && (end - start > 1)) {
            return text.substring(start, end);
        }/* w  w  w  .j  av  a  2  s  .  c  om*/
    }
    return null;
}