List of usage examples for java.text BreakIterator getWordInstance
public static BreakIterator getWordInstance(Locale locale)
BreakIterator
instance for word breaks for the given locale. From source file:Main.java
public static void main(String[] argv) throws Exception { BreakIterator iterator = BreakIterator.getWordInstance(Locale.CANADA); iterator.setText("a sentence"); for (int index = iterator.first(); index != BreakIterator.DONE; index = iterator.next()) { System.out.println(index); }// ww w . jav a 2 s . c o m }
From source file:Main.java
public static void main(String[] args) { String text = "this is a test(this is a test)."; BreakIterator wordIterator = BreakIterator.getWordInstance(Locale.getDefault()); extractWords(text, wordIterator);//w ww .j a va 2 s . com }
From source file:HangulTextBoundaryDetection.java
public static void main(String s[]) { String hangul = "\u1112\u1161\u11ab\u1100\u1173\u11af"; BreakIterator ci = BreakIterator.getCharacterInstance(Locale.KOREAN); BreakIterator wi = BreakIterator.getWordInstance(Locale.KOREAN); System.out.print("Character Boundaries: "); printBoundaries(hangul, ci);//from w w w .jav a 2s. c om System.out.print("\nWord Boundaries:"); printBoundaries(hangul, wi); }
From source file:Main.java
public TextBoundaryFrame() { getContentPane().add(new JScrollPane(outputText)); Locale currentLocale = Locale.getDefault(); BreakIterator currentBreakIterator = BreakIterator.getWordInstance(currentLocale); String text = "The quick, brown fox jump-ed\n" + "over the lazy \"dog.\" And then...what happened?"; currentBreakIterator.setText(text);//from ww w . j a v a 2 s . c o m outputText.setText(""); int from = currentBreakIterator.first(); int to; while ((to = currentBreakIterator.next()) != BreakIterator.DONE) { outputText.append(text.substring(from, to) + "|"); from = to; } outputText.append(text.substring(from)); }
From source file:pt.ua.ri.tokenizer.WordTokenizer.java
private WordTokenizer(String locale) { checkNotNull(locale);/*from w w w .j a va 2 s . c om*/ bi = BreakIterator.getWordInstance(Locale.forLanguageTag(locale)); tokens = new PatriciaTrie<>(); bf = new StringBuilder(); }
From source file:nl.gridline.free.taalserver.TokenizeMap.java
@Override protected void setup(Context context) throws IOException, InterruptedException { String loc = context.getConfiguration().get(TaskConfig.TEXT_LANGUAGE); if (loc == null) { splitter = BreakIterator.getWordInstance(new Locale("nl_NL")); } else {//from ww w .ja v a 2 s . co m splitter = BreakIterator.getWordInstance(new Locale(loc)); } String useDescStr = context.getConfiguration().get(TaskConfig.TEXT_ANALYSIS_USE_DESCRIPTION); if (useDescStr != null && !useDescStr.isEmpty()) { useDescription = Boolean.parseBoolean(useDescStr); } else { LOG.warn("parameter missing <" + TaskConfig.TEXT_ANALYSIS_USE_DESCRIPTION + "> defaulting to: true"); useDescription = true; } String useTitleStr = context.getConfiguration().get(TaskConfig.TEXT_ANALYSIS_USE_TITLE); if (useTitleStr != null && !useTitleStr.isEmpty()) { useTitle = Boolean.parseBoolean(useTitleStr); } else { LOG.warn("parameter missing <" + TaskConfig.TEXT_ANALYSIS_USE_TITLE + "> defaulting to: true"); useTitle = true; } String useKeyWordsStr = context.getConfiguration().get(TaskConfig.TEXT_ANALYSIS_USE_KEYWORDS); if (useKeyWordsStr != null && !useKeyWordsStr.isEmpty()) { useKeywords = Boolean.parseBoolean(useKeyWordsStr); } }
From source file:IteratorTest.java
protected void refreshDisplay() { int startIndex, nextIndex; Vector items = new Vector(); String msgText = textArea.getText(); Locale locale = (Locale) (localeButton.getSelectedItem()); BreakIterator iterator = null; if (charButton.isSelected()) { iterator = BreakIterator.getCharacterInstance(locale); } else if (wordButton.isSelected()) { iterator = BreakIterator.getWordInstance(locale); } else if (lineButton.isSelected()) { iterator = BreakIterator.getLineInstance(locale); } else if (sentButton.isSelected()) { iterator = BreakIterator.getSentenceInstance(locale); }//from ww w . j av a 2 s . co m iterator.setText(msgText); startIndex = iterator.first(); nextIndex = iterator.next(); while (nextIndex != BreakIterator.DONE) { items.addElement(msgText.substring(startIndex, nextIndex)); startIndex = nextIndex; nextIndex = iterator.next(); } itemList.setListData(items); }
From source file:forge.view.arcane.util.OutlinedLabel.java
/** {@inheritDoc} */ @Override//from w ww . java2s .c o m public final void paint(final Graphics g) { if (getText().length() == 0) { return; } Dimension size = getSize(); // // if( size.width < 50 ) { // g.setColor(Color.cyan); // g.drawRect(0, 0, size.width-1, size.height-1); // } Graphics2D g2d = (Graphics2D) g; g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); g2d.setRenderingHint(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON); int textX = outlineSize, textY = 0; int wrapWidth = Math.max(0, wrap ? size.width - outlineSize * 2 : Integer.MAX_VALUE); final String text = getText(); AttributedString attributedString = new AttributedString(text); if (!StringUtils.isEmpty(text)) { attributedString.addAttribute(TextAttribute.FONT, getFont()); } AttributedCharacterIterator charIterator = attributedString.getIterator(); FontRenderContext fontContext = g2d.getFontRenderContext(); LineBreakMeasurer measurer = new LineBreakMeasurer(charIterator, BreakIterator.getWordInstance(Locale.ENGLISH), fontContext); int lineCount = 0; while (measurer.getPosition() < charIterator.getEndIndex()) { measurer.nextLayout(wrapWidth); lineCount++; if (lineCount > 2) { break; } } charIterator.first(); // Use char wrap if word wrap would cause more than two lines of text. if (lineCount > 2) { measurer = new LineBreakMeasurer(charIterator, BreakIterator.getCharacterInstance(Locale.ENGLISH), fontContext); } else { measurer.setPosition(0); } while (measurer.getPosition() < charIterator.getEndIndex()) { TextLayout textLayout = measurer.nextLayout(wrapWidth); float ascent = textLayout.getAscent(); textY += ascent; // Move down to baseline. g2d.setColor(outlineColor); g2d.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 0.8f)); textLayout.draw(g2d, textX + outlineSize, textY - outlineSize); textLayout.draw(g2d, textX + outlineSize, textY + outlineSize); textLayout.draw(g2d, textX - outlineSize, textY - outlineSize); textLayout.draw(g2d, textX - outlineSize, textY + outlineSize); g2d.setColor(getForeground()); g2d.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, 1.0f)); textLayout.draw(g2d, textX, textY); // Move down to top of next line. textY += textLayout.getDescent() + textLayout.getLeading(); } }
From source file:de.tudarmstadt.lt.lm.service.BreakIteratorStringProvider.java
@Override public List<String> tokenizeSentence_intern(String sentence, String language_code) { ArrayList<String> tokens = new ArrayList<String>(); BreakIterator token_bounds = BreakIterator.getWordInstance(LocaleUtils.toLocale(language_code)); token_bounds.setText(sentence.trim()); int begin_t = token_bounds.first(); for (int end_t = token_bounds.next(); end_t != BreakIterator.DONE; begin_t = end_t, end_t = token_bounds .next()) {/*from w w w . j ava2s . c o m*/ String token = de.tudarmstadt.lt.utilities.StringUtils .trim_and_replace_emptyspace(sentence.substring(begin_t, end_t), "_"); if (!token.isEmpty()) { // add token iff token is not empty tokens.add(token); } } return tokens; }