Here you can find the source of tokenizeAutocomplete(String text, int minSize)
Parameter | Description |
---|---|
text | the text to tokenize |
minSize | the minimum size of the tokens/auto-complete, if a word is shorter than this size it will still be added as a token. |
public static String tokenizeAutocomplete(String text, int minSize)
//package com.java2s; //License from project: Open Source License public class Main { /**/*from w w w . j a v a 2 s. co m*/ * Split words into smaller parts so these can be auto-completed * @param text the text to tokenize * @param minSize the minimum size of the tokens/auto-complete, if a word is shorter than this size * it will still be added as a token. * @return auto-complete compatible tokenized text */ public static String tokenizeAutocomplete(String text, int minSize) { if (minSize <= 0) { throw new IllegalArgumentException("minSize has to be higher than 0"); } String[] words = splitTextToWords(text); String tokens = ""; for (String word : words) { if (word.length() > minSize) { for (int i = 0; i <= word.length() - minSize; ++i) { for (int currentLength = minSize; currentLength <= word.length() - i; ++currentLength) { tokens += word.substring(i, i + currentLength) + " "; } } } else { tokens += word + " "; } } return tokens; } /** * Splits a text into words. * @param text the text to split into words * @return all words in the text */ public static String[] splitTextToWords(String text) { return text.split("[^0-9a-zA-Z']+"); } }