Example usage for java.text BreakIterator setText

List of usage examples for java.text BreakIterator setText

Introduction

In this page you can find the example usage for java.text BreakIterator setText.

Prototype

public abstract void setText(CharacterIterator newText);

Source Link

Document

Set a new text for scanning.

Usage

From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java

@Override
public List<String> splitSentences(String text, String language_code) throws Exception {
    LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200)));
    List<String> sentences = new ArrayList<String>();

    text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " ");

    for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) {
        String line = iter.nextLine();
        BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code));
        sentence_bounds.setText(line);
        int begin_s = sentence_bounds.first();
        for (int end_s = sentence_bounds
                .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) {

            String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s));
            if (sentence.isEmpty())
                continue;
            sentences.add(sentence);/*from   ww w. java  2  s.  c  o  m*/
            LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200)));
        }
    }
    LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200),
            sentences.size()));
    return sentences;
}

From source file:Main.java

public TextBoundaryFrame() {
    getContentPane().add(new JScrollPane(outputText));

    Locale currentLocale = Locale.getDefault();
    BreakIterator currentBreakIterator = null;
    currentBreakIterator = BreakIterator.getCharacterInstance(currentLocale);

    String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?";
    currentBreakIterator.setText(text);
    outputText.setText("");

    int from = currentBreakIterator.first();
    int to;/* ww w.jav a  2  s  .  co  m*/
    while ((to = currentBreakIterator.next()) != BreakIterator.DONE) {
        outputText.append(text.substring(from, to) + "|");
        from = to;
    }
    outputText.append(text.substring(from));
}

From source file:be.idamediafoundry.sofa.livecycle.dsc.util.AbstractQDoxComponentInfoExtractor.java

final protected String getFirstSentence(String text) {
    String result = text;// w  ww. j  av  a2 s  .co  m
    if (text != null) {
        BreakIterator iterator = BreakIterator.getSentenceInstance();
        iterator.setText(text);
        int start = iterator.first();
        int end = iterator.next();
        if (end != BreakIterator.DONE) {
            result = text.substring(start, end).trim();
        }
    }
    return result;
}

From source file:com.norconex.importer.handler.tagger.impl.TextStatisticsTagger.java

@Override
protected void tagTextDocument(String reference, Reader input, ImporterMetadata metadata, boolean parsed)
        throws ImporterHandlerException {
    long charCount = 0;
    long wordCharCount = 0;
    long wordCount = 0;
    long sentenceCount = 0;
    long sentenceCharCount = 0;
    long paragraphCount = 0;

    //TODO make this more efficient, by doing all this in one pass.
    LineIterator it = IOUtils.lineIterator(input);
    while (it.hasNext()) {
        String line = it.nextLine().trim();
        if (StringUtils.isBlank(line)) {
            continue;
        }/*from  w  w  w .  j  ava 2  s .c o  m*/

        // Paragraph
        paragraphCount++;

        // Character
        charCount += line.length();

        // Word
        Matcher matcher = PATTERN_WORD.matcher(line);
        while (matcher.find()) {
            int wordLength = matcher.end() - matcher.start();
            wordCount++;
            wordCharCount += wordLength;
        }

        // Sentence
        BreakIterator boundary = BreakIterator.getSentenceInstance();
        boundary.setText(line);
        int start = boundary.first();
        for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
            sentenceCharCount += (end - start);
            sentenceCount++;
        }
    }

    String field = StringUtils.EMPTY;
    if (StringUtils.isNotBlank(fieldName)) {
        field = fieldName.trim() + ".";
    }

    //--- Add fields ---
    metadata.addLong("document.stat." + field + "characterCount", charCount);
    metadata.addLong("document.stat." + field + "wordCount", wordCount);
    metadata.addLong("document.stat." + field + "sentenceCount", sentenceCount);
    metadata.addLong("document.stat." + field + "paragraphCount", paragraphCount);
    metadata.addString("document.stat." + field + "averageWordCharacterCount",
            divide(wordCharCount, wordCount));
    metadata.addString("document.stat." + field + "averageSentenceCharacterCount",
            divide(sentenceCharCount, sentenceCount));
    metadata.addString("document.stat." + field + "averageSentenceWordCount", divide(wordCount, sentenceCount));
    metadata.addString("document.stat." + field + "averageParagraphCharacterCount",
            divide(charCount, paragraphCount));
    metadata.addString("document.stat." + field + "averageParagraphSentenceCount",
            divide(sentenceCount, paragraphCount));
    metadata.addString("document.stat." + field + "averageParagraphWordCount",
            divide(wordCount, paragraphCount));

}

From source file:com.glaf.core.util.StringTools.java

public static String[] toLowerCaseWordArray(String text) {
    if (text == null || text.length() == 0) {
        return new String[0];
    }/*  w  w w  .  j a va 2 s  . c o m*/

    List<String> wordList = new java.util.ArrayList<String>();
    BreakIterator boundary = BreakIterator.getWordInstance();
    boundary.setText(text);
    int start = 0;

    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
        String tmp = text.substring(start, end).trim();
        tmp = replace(tmp, "+", "");
        tmp = replace(tmp, "/", "");
        tmp = replace(tmp, "\\", "");
        tmp = replace(tmp, "#", "");
        tmp = replace(tmp, "*", "");
        tmp = replace(tmp, ")", "");
        tmp = replace(tmp, "(", "");
        tmp = replace(tmp, "&", "");
        if (tmp.length() > 0) {
            wordList.add(tmp);
        }
    }
    return wordList.toArray(new String[wordList.size()]);
}

From source file:org.cloudgraph.examples.test.model.StanfordCoreNLPTest.java

private void parse(StringBuilder buf) throws IOException {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);

    String text = buf.toString();
    int counter = 0;
    iterator.setText(text);

    int lastIndex = iterator.first();
    while (lastIndex != BreakIterator.DONE) {
        int firstIndex = lastIndex;
        lastIndex = iterator.next();/*from w ww  .j a  va 2  s  .  co  m*/

        if (lastIndex != BreakIterator.DONE) {
            String sentence = text.substring(firstIndex, lastIndex);
            long before = System.currentTimeMillis();
            parse(sentence);
            long after = System.currentTimeMillis();
            log.info("time4: " + String.valueOf(after - before) + ": " + sentence);
            counter++;
        }
    }

}

From source file:com.amazon.android.ui.widget.EllipsizedTextView.java

/**
 * Find the first word/character break index before ellipsisIndex.
 *///from  w  w w .  j a va 2  s .  c o  m
private int breakBefore(final String displayText, final int ellipsisIndex, final BreakIterator iterator) {

    iterator.setText(displayText);
    return iterator.preceding(ellipsisIndex);
}

From source file:org.cloudgraph.examples.test.model.NLPWikiParseTest.java

private void parse(StringBuilder buf) throws IOException {
    BreakIterator iterator = BreakIterator.getSentenceInstance(Locale.US);

    String text = buf.toString();
    int counter = 0;
    iterator.setText(text);

    int lastIndex = iterator.first();
    while (lastIndex != BreakIterator.DONE) {
        int firstIndex = lastIndex;
        lastIndex = iterator.next();//from  ww w.j a  v a2  s . c om

        if (lastIndex != BreakIterator.DONE) {
            String sentence = text.substring(firstIndex, lastIndex);
            long before = System.currentTimeMillis();
            //parse(sentence);
            long after = System.currentTimeMillis();
            log.info("time4: " + String.valueOf(after - before) + ": " + sentence);
            counter++;
        }
    }

}

From source file:com.redhat.rcm.version.Cli.java

private static void printTextLine(final String line, final String indent, final int max, final PrintWriter pw) {
    final String fmt = "%s%-" + max + "s\n";

    final List<String> lines = new ArrayList<String>();

    final BreakIterator iter = BreakIterator.getLineInstance();
    iter.setText(line);

    int start = iter.first();
    int end = BreakIterator.DONE;
    final StringBuilder currentLine = new StringBuilder();
    String seg;// w w w.  ja v a2 s . co m
    while (start != BreakIterator.DONE && (end = iter.next()) != BreakIterator.DONE) {
        seg = line.substring(start, end);
        if (currentLine.length() + seg.length() > max) {
            lines.add(currentLine.toString());
            currentLine.setLength(0);
        }

        currentLine.append(seg);
        start = end;
    }

    if (currentLine.length() > 0) {
        lines.add(currentLine.toString());
    }

    for (final String ln : lines) {
        pw.printf(fmt, indent, ln);
    }
}

From source file:com.redhat.rcm.version.Cli.java

private static void printKVLine(final String key, final String value, final String fmt, final int valMax,
        final PrintWriter pw) {
    final List<String> lines = new ArrayList<String>();

    final BreakIterator iter = BreakIterator.getLineInstance();
    iter.setText(value);

    int start = iter.first();
    int end = BreakIterator.DONE;
    final StringBuilder currentLine = new StringBuilder();
    String seg;//from   w w w .java2s  .c o  m
    while (start != BreakIterator.DONE && (end = iter.next()) != BreakIterator.DONE) {
        seg = value.substring(start, end);
        if (currentLine.length() + seg.length() > valMax) {
            lines.add(currentLine.toString());
            currentLine.setLength(0);
        }

        currentLine.append(seg);
        start = end;
    }

    if (currentLine.length() > 0) {
        lines.add(currentLine.toString());
    }

    pw.printf(fmt, key, lines.isEmpty() ? "" : lines.get(0));
    if (lines.size() > 1) {
        for (int i = 1; i < lines.size(); i++) {
            // blank string to serve for indentation in format with two fields.
            pw.printf(fmt, "", lines.get(i));
        }
    }
}