List of usage examples for java.text BreakIterator first
public abstract int first();
From source file:graphene.util.StringUtils.java
/** * Convert a string to a list of strings broken up by end of sentence * tokens.//from w w w .ja va 2 s . c o m * * @param input * @param locale * @return */ public static List<String> convertToSentences(final String input, final Locale locale) { final BreakIterator iterator = BreakIterator.getSentenceInstance(locale); iterator.setText(input); final ArrayList<String> sentences = new ArrayList<String>(); int start = iterator.first(); for (int end = iterator.next(); end != BreakIterator.DONE; start = end, end = iterator.next()) { sentences.add(input.substring(start, end)); } return sentences; }
From source file:ezbake.training.TweetWordDivideWorker.java
/** * Performs processing on the Tweet object by dividing the tweet's text into words. * * @param visibility The Visibility containing the Accumulo visibility string representing the classification level * of the data contained in the incoming thrift data object. * @param data The incoming Thrift object to be processed. */// www.jav a2 s .com @Override public void process(Visibility visibility, Tweet data) { if (data != null && data.getText() != null) { BreakIterator wordIterator = BreakIterator.getWordInstance(); wordIterator.setText(data.getText()); int wordStart = wordIterator.first(); int wordEnd = wordIterator.next(); for (; wordEnd != BreakIterator.DONE; wordStart = wordEnd, wordEnd = wordIterator.next()) { String tweetTextWord = data.getText().substring(wordStart, wordEnd); if (StringUtils.isNotBlank(tweetTextWord)) { try { outputResultsToPipe(visibility, tweetTextWord); } catch (IOException e) { e.printStackTrace(); } } } } }
From source file:net.nicoll.boot.metadata.AbstractMetadataFormatter.java
protected String extractTagLine(ConfigurationMetadataProperty property, String defaultValue) { String description = property.getDescription(); if (StringUtils.hasText(description)) { BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(description); return description.substring(breakIterator.first(), breakIterator.next()); }/*from w ww. j a v a2 s . c o m*/ return defaultValue; }
From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java
@Override public List<String> tokenizeSentence_intern(String sentence, String language_code) { ArrayList<String> tokens = new ArrayList<String>(); BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code)); token_bounds.setText(sentence.trim()); int begin_t = token_bounds.first(); for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds .next()) {/* w w w .j a va 2s. co m*/ String token = de.tudarmstadt.lt.utilities.StringUtils .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_"); if (!token.isEmpty()) { // add token iff token is not empty tokens.add(token); } } return tokens; }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getLineInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text);//from ww w . jav a 2 s . c o m outputText.setText(""); int from = currentBreakIterator.first(); int to; while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text);/* w ww.j a v a 2s. c om*/ outputText.setText(""); int from = currentBreakIterator.first(); int to; while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:com.juick.android.Utils.java
public static String getWordAtOffset(final String text, final int offset) { BreakIterator wordIterator = BreakIterator.getWordInstance(); wordIterator.setText(text);//from w w w.jav a 2 s. c o m int start = wordIterator.first(); for (int end = wordIterator.next(); end != BreakIterator.DONE; start = end, end = wordIterator.next()) { if ((end >= offset) && (end - start > 1)) { return text.substring(start, end); } } return null; }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getSentenceInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text);/*from w ww.j a va 2 s .com*/ outputText.setText(""); int from = currentBreakIterator.first(); int to; while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java
@Override public List<String> splitSentences(String text, String language_code) throws Exception { LOG.trace(String.format("Splitting sentences from text: %s", StringUtils.abbreviate(text, 200))); List<String> sentences = new ArrayList<String>(); text = de.tudarmstadt.lt.utilities.StringUtils.trim_and_replace_emptyspace(text, " "); for (LineIterator iter = new LineIterator(new StringReader(text)); iter.hasNext();) { String line = iter.nextLine(); BreakIterator sentence_bounds = BreakIterator.getSentenceInstance(LocaleUtils.toLocale(language_code)); sentence_bounds.setText(line);//w w w . ja va2 s . c o m int begin_s = sentence_bounds.first(); for (int end_s = sentence_bounds .next(); end_s != BreakIterator.DONE; begin_s = end_s, end_s = sentence_bounds.next()) { String sentence = de.tudarmstadt.lt.utilities.StringUtils.trim(line.substring(begin_s, end_s)); if (sentence.isEmpty()) continue; sentences.add(sentence); LOG.trace(String.format("Current sentence: %s", StringUtils.abbreviate(sentence, 200))); } } LOG.trace(String.format("Split text '%s' into '%d' sentences.", StringUtils.abbreviate(text, 200), sentences.size())); return sentences; }
From source file:net.nicoll.boot.metadata.ConsoleMetadataFormatter.java
@Override public String formatMetadata(ConfigurationMetadataRepository repository) { StringBuilder out = new StringBuilder(); int noDotInDescription = 0; List<String> keysMissingDescription = new ArrayList<String>(); List<ConfigurationMetadataGroup> groups = sortGroups(repository.getAllGroups().values()); for (ConfigurationMetadataGroup group : groups) { out.append("========================================").append(NEW_LINE); StringBuilder sb = new StringBuilder(); for (ConfigurationMetadataSource source : group.getSources().values()) { sb.append(source.getType()).append(" "); }/* w w w . j a v a2s . co m*/ out.append("Group --- ").append(group.getId()).append("(").append(sb.toString().trim()).append(")") .append(NEW_LINE).append("========================================").append(NEW_LINE); List<ConfigurationMetadataProperty> properties = sortProperties(group.getProperties().values()); for (ConfigurationMetadataProperty property : properties) { StringBuilder item = new StringBuilder(property.getId()).append("="); Object defaultValue = property.getDefaultValue(); if (defaultValue != null) { if (defaultValue instanceof Object[]) { item.append(StringUtils.arrayToCommaDelimitedString((Object[]) defaultValue)); } else { item.append(defaultValue); } } item.append(" # (").append(property.getType()).append(")"); String description = property.getDescription(); if (StringUtils.hasText(description)) { item.append(" - "); int dot = description.indexOf("."); if (dot != -1) { BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(description); item.append(description.substring(breakIterator.first(), breakIterator.next())); } else { item.append(description).append(" --- NO DOT FOUND!"); noDotInDescription++; } } else { keysMissingDescription.add(property.getId()); } out.append(item.toString()).append(NEW_LINE); } } out.append("-------- Stats --------").append(NEW_LINE).append("Not dot in description: ") .append(noDotInDescription).append(NEW_LINE).append("Missing description:").append(NEW_LINE); StringBuilder desc = new StringBuilder(); for (String s : keysMissingDescription) { desc.append("\t").append(s).append("\n"); } out.append(desc.toString()); return out.toString(); }