Example usage for java.text BreakIterator DONE

Introduction

In this page you can find the example usage for java.text BreakIterator DONE.

Prototype

int DONE

To view the source code for java.text BreakIterator DONE.

Click Source Link

Document

DONE is returned by previous(), next(), next(int), preceding(int) and following(int) when either the first or last text boundary has been reached.

Usage

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = null;
    currentBreakIterator = BreakIterator.getCharacterInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);/*www.  ja v a  2  s  .c om*/
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:eu.fbk.utils.lsa.util.Anvur.java

static String tokenize(String in) {

    //print each word in order
    BreakIterator boundary = BreakIterator.getWordInstance();
    boundary.setText(in);/*from  w  w  w  .  j  a  v  a2 s  . c o m*/
    StringBuilder out = new StringBuilder();
    int start = boundary.first();

    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
        out.append(" ");
        out.append(in.substring(start, end));
    }
    return out.toString();
}

From source file:net.sf.jtmt.tokenizers.SentenceTokenizer.java

/**
 * Next sentence.//www. j  a  v a 2  s  .c o  m
 *
 * @return the string
 */
public String nextSentence() {
    int end = breakIterator.next();
    if (end == BreakIterator.DONE) {
        return null;
    }
    String sentence = text.substring(index, end);
    index = end;
    return sentence;
}

From source file:net.sf.jtmt.tokenizers.ParagraphTokenizer.java

/**
 * Next paragraph.//  w  w w .j  av a2  s.c  o  m
 *
 * @return the string
 */
public String nextParagraph() {
    int end = breakIterator.next();
    if (end == BreakIterator.DONE) {
        return null;
    }
    String sentence = text.substring(index, end);
    index = end;
    return sentence;
}

From source file:ezbake.training.TweetWordDivideWorker.java

/**
 * Performs processing on the Tweet object by dividing the tweet's text into words.
 *
 * @param visibility The Visibility containing the Accumulo visibility string representing the classification level
 * of the data contained in the incoming thrift data object.
 * @param data The incoming Thrift object to be processed.
 *///from  w w w  .j  a  v a  2  s. co  m
@Override
public void process(Visibility visibility, Tweet data) {
    if (data != null && data.getText() != null) {
        BreakIterator wordIterator = BreakIterator.getWordInstance();
        wordIterator.setText(data.getText());

        int wordStart = wordIterator.first();
        int wordEnd = wordIterator.next();
        for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) {
            String tweetTextWord = data.getText().substring(wordStart, wordEnd);
            if (StringUtils.isNotBlank(tweetTextWord)) {
                try {
                    outputResultsToPipe(visibility, tweetTextWord);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

From source file:StringUtils.java

/**
 * Reformats a string where lines that are longer than <tt>width</tt>
 * are split apart at the earliest wordbreak or at maxLength, whichever is
 * sooner. If the width specified is less than 5 or greater than the input
 * Strings length the string will be returned as is.
 * <p/>//w w  w  .  jav a  2s  .c o m
 * Please note that this method can be lossy - trailing spaces on wrapped
 * lines may be trimmed.
 *
 * @param input the String to reformat.
 * @param width the maximum length of any one line.
 * @return a new String with reformatted as needed.
 */
public static String wordWrap(String input, int width, Locale locale) {
    // protect ourselves
    if (input == null) {
        return "";
    } else if (width < 5) {
        return input;
    } else if (width >= input.length()) {
        return input;
    }

    StringBuilder buf = new StringBuilder(input);
    boolean endOfLine = false;
    int lineStart = 0;

    for (int i = 0; i < buf.length(); i++) {
        if (buf.charAt(i) == '\n') {
            lineStart = i + 1;
            endOfLine = true;
        }

        // handle splitting at width character
        if (i > lineStart + width - 1) {
            if (!endOfLine) {
                int limit = i - lineStart - 1;
                BreakIterator breaks = BreakIterator.getLineInstance(locale);
                breaks.setText(buf.substring(lineStart, i));
                int end = breaks.last();

                // if the last character in the search string isn't a space,
                // we can't split on it (looks bad). Search for a previous
                // break character
                if (end == limit + 1) {
                    if (!Character.isWhitespace(buf.charAt(lineStart + end))) {
                        end = breaks.preceding(end - 1);
                    }
                }

                // if the last character is a space, replace it with a \n
                if (end != BreakIterator.DONE && end == limit + 1) {
                    buf.replace(lineStart + end, lineStart + end + 1, "\n");
                    lineStart = lineStart + end;
                }
                // otherwise, just insert a \n
                else if (end != BreakIterator.DONE && end != 0) {
                    buf.insert(lineStart + end, '\n');
                    lineStart = lineStart + end + 1;
                } else {
                    buf.insert(i, '\n');
                    lineStart = i + 1;
                }
            } else {
                buf.insert(i, '\n');
                lineStart = i + 1;
                endOfLine = false;
            }
        }
    }

    return buf.toString();
}

From source file:Utils.java

/**
 * Wrap multi-line strings (and get the individual lines).
 * /*ww  w.  j  a v  a  2s .  c om*/
 * @param original
 *          the original string to wrap
 * @param width
 *          the maximum width of lines
 * @param breakIterator
 *          breaks original to chars, words, sentences, depending on what
 *          instance you provide.
 * @param removeNewLines
 *          if <code>true</code>, any newlines in the original string are
 *          ignored
 * @return the lines after wrapping
 */
public static String[] wrapStringToArray(String original, int width, BreakIterator breakIterator,
        boolean removeNewLines) {
    if (original.length() == 0) {
        return new String[] { original };
    }

    String[] workingSet;

    // substitute original newlines with spaces,
    // remove newlines from head and tail
    if (removeNewLines) {
        original = trimString(original);
        original = original.replace('\n', ' ');
        workingSet = new String[] { original };
    } else {
        StringTokenizer tokens = new StringTokenizer(original, "\n"); // NOI18N
        int len = tokens.countTokens();
        workingSet = new String[len];

        for (int i = 0; i < len; i++) {
            workingSet[i] = tokens.nextToken();
        }
    }

    if (width < 1) {
        width = 1;
    }

    if (original.length() <= width) {
        return workingSet;
    }

    widthcheck: {
        boolean ok = true;

        for (int i = 0; i < workingSet.length; i++) {
            ok = ok && (workingSet[i].length() < width);

            if (!ok) {
                break widthcheck;
            }
        }

        return workingSet;
    }

    java.util.ArrayList<String> lines = new java.util.ArrayList<String>();

    int lineStart = 0; // the position of start of currently processed line in
                       // the original string

    for (int i = 0; i < workingSet.length; i++) {
        if (workingSet[i].length() < width) {
            lines.add(workingSet[i]);
        } else {
            breakIterator.setText(workingSet[i]);

            int nextStart = breakIterator.next();
            int prevStart = 0;

            do {
                while (((nextStart - lineStart) < width) && (nextStart != BreakIterator.DONE)) {
                    prevStart = nextStart;
                    nextStart = breakIterator.next();
                }

                if (nextStart == BreakIterator.DONE) {
                    nextStart = prevStart = workingSet[i].length();
                }

                if (prevStart == 0) {
                    prevStart = nextStart;
                }

                lines.add(workingSet[i].substring(lineStart, prevStart));

                lineStart = prevStart;
                prevStart = 0;
            } while (lineStart < workingSet[i].length());

            lineStart = 0;
        }
    }

    String[] s = new String[lines.size()];

    return (String[]) lines.toArray(s);
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> splitSentences(String text, String language_code) throws Exception {
    LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200)));
    List<String> sentences = new ArrayList<String>();

    text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " ");

    for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) {
        String line = iter.nextLine();
        BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code));
        sentence_bounds.setText(line);/*from   w  w  w .jav a 2  s  .c o  m*/
        int begin_s = sentence_bounds.first();
        for (int end_s = sentence_bounds
                .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) {

            String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s));
            if (sentence.isEmpty())
                continue;
            sentences.add(sentence);
            LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200)));
        }
    }
    LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200),
            sentences.size()));
    return sentences;
}

From source file:IteratorTest.java

protected void refreshDisplay() {
    int startIndex, nextIndex;
    Vector items = new Vector();
    String msgText = textArea.getText();
    Locale locale = (Locale) (localeButton.getSelectedItem());
    BreakIterator iterator = null;
    if (charButton.isSelected()) {
        iterator = BreakIterator.getCharacterInstance(locale);
    } else if (wordButton.isSelected()) {
        iterator = BreakIterator.getWordInstance(locale);
    } else if (lineButton.isSelected()) {
        iterator = BreakIterator.getLineInstance(locale);
    } else if (sentButton.isSelected()) {
        iterator = BreakIterator.getSentenceInstance(locale);
    }//w  w w  . j  a  v a2  s  .c  o  m
    iterator.setText(msgText);
    startIndex = iterator.first();
    nextIndex = iterator.next();

    while (nextIndex != BreakIterator.DONE) {
        items.addElement(msgText.substring(startIndex, nextIndex));
        startIndex = nextIndex;
        nextIndex = iterator.next();
    }
    itemList.setListData(items);
}

From source file:com.cotrino.knowledgemap.db.Question.java

/**
 * http://stackoverflow.com/questions/2103598/java-simple-sentence-parser
 * @param text/* www .j a  v  a  2  s . co  m*/
 * @param language
 * @param country
 * @return
 */
public static List<String> tokenize(String text, String language, String country) {
    List<String> sentences = new ArrayList<String>();
    Locale currentLocale = new Locale(language, country);
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale);
    sentenceIterator.setText(text);
    int boundary = sentenceIterator.first();
    int lastBoundary = 0;
    while (boundary != BreakIterator.DONE) {
        boundary = sentenceIterator.next();
        if (boundary != BreakIterator.DONE) {
            sentences.add(text.substring(lastBoundary, boundary));
        }
        lastBoundary = boundary;
    }
    return sentences;
}