Example usage for java.text BreakIterator first

List of usage examples for java.text BreakIterator first

Introduction

In this page you can find the example usage for java.text BreakIterator first.

Prototype

public abstract int first();

Source Link

Document

Returns the first boundary.

Usage

From source file:graphene.util.StringUtils.java

/**
 * Convert a string to a list of strings broken up by end of sentence
 * tokens.//from  w  w w .ja  va 2  s . c o  m
 * 
 * @param input
 * @param locale
 * @return
 */
public static List<String> convertToSentences(final String input, final Locale locale) {
    final BreakIterator iterator = BreakIterator.getSentenceInstance(locale);
    iterator.setText(input);
    final ArrayList<String> sentences = new ArrayList<String>();

    int start = iterator.first();

    for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) {
        sentences.add(input.substring(start, end));
    }
    return sentences;
}

From source file:ezbake.training.TweetWordDivideWorker.java

/**
 * Performs processing on the Tweet object by dividing the tweet's text into words.
 *
 * @param visibility The Visibility containing the Accumulo visibility string representing the classification level
 * of the data contained in the incoming thrift data object.
 * @param data The incoming Thrift object to be processed.
 *///  www.jav a2 s .com
@Override
public void process(Visibility visibility, Tweet data) {
    if (data != null && data.getText() != null) {
        BreakIterator wordIterator = BreakIterator.getWordInstance();
        wordIterator.setText(data.getText());

        int wordStart = wordIterator.first();
        int wordEnd = wordIterator.next();
        for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) {
            String tweetTextWord = data.getText().substring(wordStart, wordEnd);
            if (StringUtils.isNotBlank(tweetTextWord)) {
                try {
                    outputResultsToPipe(visibility, tweetTextWord);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}

From source file:net.nicoll.boot.metadata.AbstractMetadataFormatter.java

protected String extractTagLine(ConfigurationMetadataProperty property, String defaultValue) {
    String description = property.getDescription();
    if (StringUtils.hasText(description)) {
        BreakIterator breakIterator = BreakIterator.getSentenceInstance();
        breakIterator.setText(description);
        return description.substring(breakIterator.first(), breakIterator.next());
    }/*from  w ww. j  a v  a2  s  .  c  o m*/
    return defaultValue;
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> tokenizeSentence_intern(String sentence, String language_code) {
    ArrayList<String> tokens = new ArrayList<String>();
    BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code));
    token_bounds.setText(sentence.trim());
    int begin_t = token_bounds.first();
    for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds
            .next()) {/*  w  w w  .j a va  2s. co m*/
        String token = de.tudarmstadt.lt.utilities.StringUtils
                .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_");
        if (!token.isEmpty()) { // add token iff token is not empty
            tokens.add(token);
        }
    }
    return tokens;
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getLineInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);//from   ww  w . jav  a  2  s  .  c o m
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);/*  w  ww.j a  v a  2s.  c  om*/
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:com.juick.android.Utils.java

public static String getWordAtOffset(final String text, final int offset) {
    BreakIterator wordIterator = BreakIterator.getWordInstance();
    wordIterator.setText(text);//from w  w  w.jav  a 2 s. c  o m
    int start = wordIterator.first();
    for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) {
        if ((end >= offset) && (end - start > 1)) {
            return text.substring(start, end);
        }
    }
    return null;
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);/*from   w ww.j a va 2 s  .com*/
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> splitSentences(String text, String language_code) throws Exception {
    LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200)));
    List<String> sentences = new ArrayList<String>();

    text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " ");

    for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) {
        String line = iter.nextLine();
        BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code));
        sentence_bounds.setText(line);//w  w w . ja va2 s  . c  o  m
        int begin_s = sentence_bounds.first();
        for (int end_s = sentence_bounds
                .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) {

            String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s));
            if (sentence.isEmpty())
                continue;
            sentences.add(sentence);
            LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200)));
        }
    }
    LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200),
            sentences.size()));
    return sentences;
}

From source file:net.nicoll.boot.metadata.ConsoleMetadataFormatter.java

@Override
public String formatMetadata(ConfigurationMetadataRepository repository) {
    StringBuilder out = new StringBuilder();
    int noDotInDescription = 0;
    List<String> keysMissingDescription = new ArrayList<String>();
    List<ConfigurationMetadataGroup> groups = sortGroups(repository.getAllGroups().values());
    for (ConfigurationMetadataGroup group : groups) {
        out.append("========================================").append(NEW_LINE);
        StringBuilder sb = new StringBuilder();
        for (ConfigurationMetadataSource source : group.getSources().values()) {
            sb.append(source.getType()).append(" ");
        }/* w  w w . j a  v a2s . co  m*/
        out.append("Group --- ").append(group.getId()).append("(").append(sb.toString().trim()).append(")")
                .append(NEW_LINE).append("========================================").append(NEW_LINE);
        List<ConfigurationMetadataProperty> properties = sortProperties(group.getProperties().values());
        for (ConfigurationMetadataProperty property : properties) {
            StringBuilder item = new StringBuilder(property.getId()).append("=");
            Object defaultValue = property.getDefaultValue();
            if (defaultValue != null) {
                if (defaultValue instanceof Object[]) {
                    item.append(StringUtils.arrayToCommaDelimitedString((Object[]) defaultValue));
                } else {
                    item.append(defaultValue);
                }
            }
            item.append(" # (").append(property.getType()).append(")");
            String description = property.getDescription();
            if (StringUtils.hasText(description)) {
                item.append(" - ");
                int dot = description.indexOf(".");
                if (dot != -1) {
                    BreakIterator breakIterator = BreakIterator.getSentenceInstance();
                    breakIterator.setText(description);
                    item.append(description.substring(breakIterator.first(), breakIterator.next()));
                } else {
                    item.append(description).append(" --- NO DOT FOUND!");
                    noDotInDescription++;
                }
            } else {
                keysMissingDescription.add(property.getId());
            }
            out.append(item.toString()).append(NEW_LINE);
        }
    }
    out.append("-------- Stats --------").append(NEW_LINE).append("Not dot in description: ")
            .append(noDotInDescription).append(NEW_LINE).append("Missing description:").append(NEW_LINE);
    StringBuilder desc = new StringBuilder();
    for (String s : keysMissingDescription) {
        desc.append("\t").append(s).append("\n");
    }
    out.append(desc.toString());
    return out.toString();
}