List of usage examples for java.text BreakIterator getSentenceInstance
public static BreakIterator getSentenceInstance()
BreakIterator
instance for sentence breaks for the Locale#getDefault() default locale . From source file:net.nicoll.boot.metadata.AbstractMetadataFormatter.java
protected String extractTagLine(ConfigurationMetadataProperty property, String defaultValue) { String description = property.getDescription(); if (StringUtils.hasText(description)) { BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(description); return description.substring(breakIterator.first(), breakIterator.next()); }//from ww w.java2 s .com return defaultValue; }
From source file:net.nicoll.boot.metadata.ConsoleMetadataFormatter.java
@Override public String formatMetadata(ConfigurationMetadataRepository repository) { StringBuilder out = new StringBuilder(); int noDotInDescription = 0; List<String> keysMissingDescription = new ArrayList<String>(); List<ConfigurationMetadataGroup> groups = sortGroups(repository.getAllGroups().values()); for (ConfigurationMetadataGroup group : groups) { out.append("========================================").append(NEW_LINE); StringBuilder sb = new StringBuilder(); for (ConfigurationMetadataSource source : group.getSources().values()) { sb.append(source.getType()).append(" "); }//from w w w . j a v a2 s. c om out.append("Group --- ").append(group.getId()).append("(").append(sb.toString().trim()).append(")") .append(NEW_LINE).append("========================================").append(NEW_LINE); List<ConfigurationMetadataProperty> properties = sortProperties(group.getProperties().values()); for (ConfigurationMetadataProperty property : properties) { StringBuilder item = new StringBuilder(property.getId()).append("="); Object defaultValue = property.getDefaultValue(); if (defaultValue != null) { if (defaultValue instanceof Object[]) { item.append(StringUtils.arrayToCommaDelimitedString((Object[]) defaultValue)); } else { item.append(defaultValue); } } item.append(" # (").append(property.getType()).append(")"); String description = property.getDescription(); if (StringUtils.hasText(description)) { item.append(" - "); int dot = description.indexOf("."); if (dot != -1) { BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(description); item.append(description.substring(breakIterator.first(), breakIterator.next())); } else { item.append(description).append(" --- NO DOT FOUND!"); noDotInDescription++; } } else { keysMissingDescription.add(property.getId()); } out.append(item.toString()).append(NEW_LINE); } } out.append("-------- Stats --------").append(NEW_LINE).append("Not dot in description: ") .append(noDotInDescription).append(NEW_LINE).append("Missing description:").append(NEW_LINE); StringBuilder desc = new StringBuilder(); for (String s : keysMissingDescription) { desc.append("\t").append(s).append("\n"); } out.append(desc.toString()); return out.toString(); }
From source file:com.norconex.importer.handler.tagger.impl.TextStatisticsTagger.java
@Override protected void tagTextDocument(String reference, Reader input, ImporterMetadata metadata, boolean parsed) throws ImporterHandlerException { long charCount = 0; long wordCharCount = 0; long wordCount = 0; long sentenceCount = 0; long sentenceCharCount = 0; long paragraphCount = 0; //TODO make this more efficient, by doing all this in one pass. LineIterator it = IOUtils.lineIterator(input); while (it.hasNext()) { String line = it.nextLine().trim(); if (StringUtils.isBlank(line)) { continue; }/*from w ww . ja v a2s .co m*/ // Paragraph paragraphCount++; // Character charCount += line.length(); // Word Matcher matcher = PATTERN_WORD.matcher(line); while (matcher.find()) { int wordLength = matcher.end() - matcher.start(); wordCount++; wordCharCount += wordLength; } // Sentence BreakIterator boundary = BreakIterator.getSentenceInstance(); boundary.setText(line); int start = boundary.first(); for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { sentenceCharCount += (end - start); sentenceCount++; } } String field = StringUtils.EMPTY; if (StringUtils.isNotBlank(fieldName)) { field = fieldName.trim() + "."; } //--- Add fields --- metadata.addLong("document.stat." + field + "characterCount", charCount); metadata.addLong("document.stat." + field + "wordCount", wordCount); metadata.addLong("document.stat." + field + "sentenceCount", sentenceCount); metadata.addLong("document.stat." + field + "paragraphCount", paragraphCount); metadata.addString("document.stat." + field + "averageWordCharacterCount", divide(wordCharCount, wordCount)); metadata.addString("document.stat." + field + "averageSentenceCharacterCount", divide(sentenceCharCount, sentenceCount)); metadata.addString("document.stat." + field + "averageSentenceWordCount", divide(wordCount, sentenceCount)); metadata.addString("document.stat." + field + "averageParagraphCharacterCount", divide(charCount, paragraphCount)); metadata.addString("document.stat." + field + "averageParagraphSentenceCount", divide(sentenceCount, paragraphCount)); metadata.addString("document.stat." + field + "averageParagraphWordCount", divide(wordCount, paragraphCount)); }
From source file:com.bellman.bible.service.device.speak.SpeakTextProvider.java
private StartPos getPrevTextStartPos(String text, float fraction) { StartPos retVal = new StartPos(); int allTextLength = text.length(); int nextTextOffset = (int) (Math.min(1, fraction) * allTextLength); BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(text);/* ww w . j a v a 2s. com*/ int startPos = 0; try { // this can rarely throw an Exception startPos = breakIterator.preceding(nextTextOffset); } catch (Exception e) { Log.e(TAG, "Error finding previous sentence start", e); } retVal.found = startPos >= 0; if (retVal.found) { retVal.startPosition = startPos; // because we don't return an exact fraction, but go to the beginning of a sentence, we need to update the fractionAlreadySpoken retVal.actualFractionOfWhole = ((float) retVal.startPosition) / allTextLength; retVal.text = text.substring(retVal.startPosition); } return retVal; }
From source file:net.bible.service.device.speak.SpeakTextProvider.java
private StartPos getPrevTextStartPos(String text, float fraction) { StartPos retVal = new StartPos(); int allTextLength = text.length(); int nextTextOffset = (int) (Math.min(1, fraction) * allTextLength); BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(text);/* ww w.ja v a 2 s.co m*/ int startPos = 0; try { // this can rarely throw an Exception startPos = breakIterator.preceding(nextTextOffset); } catch (Exception e) { Log.e(TAG, "Error finding previous sentence start", e); } retVal.found = startPos >= 0; if (retVal.found) { retVal.startPosition = startPos; // because we don't return an exact fraction, but go to the beginning of a sentence, we need to update the fractionAlreadySpoken retVal.actualFractionOfWhole = ((float) retVal.startPosition) / allTextLength; retVal.text = text.substring(retVal.startPosition); } return retVal; }
From source file:be.idamediafoundry.sofa.livecycle.dsc.util.AbstractQDoxComponentInfoExtractor.java
final protected String getFirstSentence(String text) { String result = text;// w w w . ja v a 2 s . c om if (text != null) { BreakIterator iterator = BreakIterator.getSentenceInstance(); iterator.setText(text); int start = iterator.first(); int end = iterator.next(); if (end != BreakIterator.DONE) { result = text.substring(start, end).trim(); } } return result; }
From source file:com.bellman.bible.service.device.speak.SpeakTextProvider.java
private StartPos getForwardTextStartPos(String text, float fraction) { StartPos retVal = new StartPos(); int allTextLength = text.length(); int nextTextOffset = (int) (Math.min(1, fraction) * allTextLength); BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(text);// w ww .ja v a 2 s. c om int startPos = 0; try { // this can rarely throw an Exception startPos = breakIterator.following(nextTextOffset); } catch (Exception e) { Log.e(TAG, "Error finding next sentence start", e); } retVal.found = startPos >= 0; if (retVal.found) { // nudge the startPos past the beginning of sentence so this sentence start is found when searching for previous block in getNextSentence retVal.startPosition = startPos < text.length() - 1 - 1 ? startPos + 1 : startPos; // because we don't return an exact fraction, but go to the beginning of a sentence, we need to update the fractionAlreadySpoken retVal.actualFractionOfWhole = ((float) retVal.startPosition) / allTextLength; retVal.text = text.substring(retVal.startPosition); } return retVal; }
From source file:net.bible.service.device.speak.SpeakTextProvider.java
private StartPos getForwardTextStartPos(String text, float fraction) { StartPos retVal = new StartPos(); int allTextLength = text.length(); int nextTextOffset = (int) (Math.min(1, fraction) * allTextLength); BreakIterator breakIterator = BreakIterator.getSentenceInstance(); breakIterator.setText(text);// ww w.ja v a 2 s.co m int startPos = 0; try { // this can rarely throw an Exception startPos = breakIterator.following(nextTextOffset); } catch (Exception e) { Log.e(TAG, "Error finding next sentence start", e); } retVal.found = startPos >= 0; if (retVal.found) { // nudge the startPos past the beginning of sentence so this sentence start is found when searching for previous block in getNextSentence retVal.startPosition = startPos < text.length() - 1 - 1 ? startPos + 1 : startPos; // because we don't return an exact fraction, but go to the beginning of a sentence, we need to update the fractionAlreadySpoken retVal.actualFractionOfWhole = ((float) retVal.startPosition) / allTextLength; retVal.text = text.substring(retVal.startPosition); } return retVal; }