Java examples for java.lang:String Split
Splits the provided text into an array, using whitespace as the separator, preserving all tokens, including empty tokens created by adjacent separators.
import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Arrays; import java.util.Collection; import java.util.ArrayList; import java.util.Formatter; import java.util.List; import java.util.Iterator; import java.util.Locale; import java.util.regex.Pattern; import java.util.regex.Matcher; public class Main{ public static void main(String[] argv){ String str = "java2s.com"; System.out.println(java.util.Arrays.toString(splitPreserveAllTokens(str))); } private static final String[] EMPTY_STRING_ARRAY = new String[0]; /** * The empty String <code>""</code>. * @since 2.0 */ public static final String EMPTY = ""; /** * <p>Splits the provided text into an array, using whitespace as the * separator, preserving all tokens, including empty tokens created by * adjacent separators. This is an alternative to using StringTokenizer. * Whitespace is defined by {@link Character#isWhitespace(char)}.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>.</p> * * <pre> * splitPreserveAllTokens(null) = null * splitPreserveAllTokens("") = [] * splitPreserveAllTokens("abc def") = ["abc", "def"] * splitPreserveAllTokens("abc def") = ["abc", "", "def"] * splitPreserveAllTokens(" abc ") = ["", "abc", ""] * </pre> * * @param str the String to parse, may be <code>null</code> * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(String str) { return splitWorker(str, null, -1, true); } /** * <p>Splits the provided text into an array, separator specified, * preserving all tokens, including empty tokens created by adjacent * separators. This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>.</p> * * <pre> * splitPreserveAllTokens(null, *) = null * splitPreserveAllTokens("", *) = [] * splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"] * splitPreserveAllTokens("a..b.c", '.') = ["a", "", "b", "c"] * splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"] * splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"] * splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"] * splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""] * splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""] * splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"] * splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"] * splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""] * </pre> * * @param str the String to parse, may be <code>null</code> * @param separatorChar the character used as the delimiter, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(String str, char separatorChar) { return splitWorker(str, separatorChar, true); } /** * <p>Splits the provided text into an array, separators specified, * preserving all tokens, including empty tokens created by adjacent * separators. This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <pre> * splitPreserveAllTokens(null, *) = null * splitPreserveAllTokens("", *) = [] * splitPreserveAllTokens("abc def", null) = ["abc", "def"] * splitPreserveAllTokens("abc def", " ") = ["abc", "def"] * splitPreserveAllTokens("abc def", " ") = ["abc", "", def"] * splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"] * splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""] * splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""] * splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"] * splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"] * splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"] * splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""] * </pre> * * @param str the String to parse, may be <code>null</code> * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(String str, String separatorChars) { return splitWorker(str, separatorChars, -1, true); } /** * <p>Splits the provided text into an array with a maximum length, * separators specified, preserving all tokens, including empty tokens * created by adjacent separators.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * Adjacent separators are treated as one separator.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <p>If more than <code>max</code> delimited substrings are found, the last * returned string includes all characters after the first <code>max - 1</code> * returned strings (including separator characters).</p> * * <pre> * splitPreserveAllTokens(null, *, *) = null * splitPreserveAllTokens("", *, *) = [] * splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"] * splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"] * splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"] * splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"] * splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"] * splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"] * splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"] * </pre> * * @param str the String to parse, may be <code>null</code> * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @param max the maximum number of elements to include in the * array. A zero or negative value implies no limit * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(String str, String separatorChars, int max) { return splitWorker(str, separatorChars, max, true); } /** * Performs the logic for the <code>split</code> and * <code>splitPreserveAllTokens</code> methods that do not return a * maximum array length. * * @param str the String to parse, may be <code>null</code> * @param separatorChar the separate character * @param preserveAllTokens if <code>true</code>, adjacent separators are * treated as empty token separators; if <code>false</code>, adjacent * separators are treated as one separator. * @return an array of parsed Strings, <code>null</code> if null String input */ private static String[] splitWorker(String str, char separatorChar, boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) if (str == null) { return null; } int len = str.length(); if (len == 0) { return EMPTY_STRING_ARRAY; } List list = new ArrayList(); int i = 0, start = 0; boolean match = false; boolean lastMatch = false; while (i < len) { if (str.charAt(i) == separatorChar) { if (match || preserveAllTokens) { list.add(str.substring(start, i)); match = false; lastMatch = true; } start = ++i; continue; } lastMatch = false; match = true; i++; } if (match || (preserveAllTokens && lastMatch)) { list.add(str.substring(start, i)); } return (String[]) list.toArray(new String[list.size()]); } /** * Performs the logic for the <code>split</code> and * <code>splitPreserveAllTokens</code> methods that return a maximum array * length. * * @param str the String to parse, may be <code>null</code> * @param separatorChars the separate character * @param max the maximum number of elements to include in the * array. A zero or negative value implies no limit. * @param preserveAllTokens if <code>true</code>, adjacent separators are * treated as empty token separators; if <code>false</code>, adjacent * separators are treated as one separator. * @return an array of parsed Strings, <code>null</code> if null String input */ private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) // Direct code is quicker than StringTokenizer. // Also, StringTokenizer uses isSpace() not isWhitespace() if (str == null) { return null; } int len = str.length(); if (len == 0) { return EMPTY_STRING_ARRAY; } List list = new ArrayList(); int sizePlus1 = 1; int i = 0, start = 0; boolean match = false; boolean lastMatch = false; if (separatorChars == null) { // Null separator means use whitespace while (i < len) { if (Character.isWhitespace(str.charAt(i))) { if (match || preserveAllTokens) { lastMatch = true; if (sizePlus1++ == max) { i = len; lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; } lastMatch = false; match = true; i++; } } else if (separatorChars.length() == 1) { // Optimise 1 character case char sep = separatorChars.charAt(0); while (i < len) { if (str.charAt(i) == sep) { if (match || preserveAllTokens) { lastMatch = true; if (sizePlus1++ == max) { i = len; lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; } lastMatch = false; match = true; i++; } } else { // standard case while (i < len) { if (separatorChars.indexOf(str.charAt(i)) >= 0) { if (match || preserveAllTokens) { lastMatch = true; if (sizePlus1++ == max) { i = len; lastMatch = false; } list.add(str.substring(start, i)); match = false; } start = ++i; continue; } lastMatch = false; match = true; i++; } } if (match || (preserveAllTokens && lastMatch)) { list.add(str.substring(start, i)); } return (String[]) list.toArray(new String[list.size()]); } /** * Gets a String's length or <code>0</code> if the String is <code>null</code>. * * @param str * a String or <code>null</code> * @return String length or <code>0</code> if the String is <code>null</code>. * @since 2.4 */ public static int length(String str) { return str == null ? 0 : str.length(); } /** * <p>Gets a substring from the specified String avoiding exceptions.</p> * * <p>A negative start position can be used to start <code>n</code> * characters from the end of the String.</p> * * <p>A <code>null</code> String will return <code>null</code>. * An empty ("") String will return "".</p> * * <pre> * substring(null, *) = null * substring("", *) = "" * substring("abc", 0) = "abc" * substring("abc", 2) = "c" * substring("abc", 4) = "" * substring("abc", -2) = "bc" * substring("abc", -4) = "abc" * </pre> * * @param str the String to get the substring from, may be null * @param start the position to start from, negative means * count back from the end of the String by this many characters * @return substring from start position, <code>null</code> if null String input */ public static String substring(String str, int start) { if (str == null) { return null; } // handle negatives, which means last n characters if (start < 0) { start = str.length() + start; // remember start is negative } if (start < 0) { start = 0; } if (start > str.length()) { return EMPTY; } return str.substring(start); } /** * <p>Gets a substring from the specified String avoiding exceptions.</p> * * <p>A negative start position can be used to start/end <code>n</code> * characters from the end of the String.</p> * * <p>The returned substring starts with the character in the <code>start</code> * position and ends before the <code>end</code> position. All position counting is * zero-based -- i.e., to start at the beginning of the string use * <code>start = 0</code>. Negative start and end positions can be used to * specify offsets relative to the end of the String.</p> * * <p>If <code>start</code> is not strictly to the left of <code>end</code>, "" * is returned.</p> * * <pre> * substring(null, *, *) = null * substring("", * , *) = ""; * substring("abc", 0, 2) = "ab" * substring("abc", 2, 0) = "" * substring("abc", 2, 4) = "c" * substring("abc", 4, 6) = "" * substring("abc", 2, 2) = "" * substring("abc", -2, -1) = "b" * substring("abc", -4, 2) = "ab" * </pre> * * @param str the String to get the substring from, may be null * @param start the position to start from, negative means * count back from the end of the String by this many characters * @param end the position to end at (exclusive), negative means * count back from the end of the String by this many characters * @return substring from start position to end positon, * <code>null</code> if null String input */ public static String substring(String str, int start, int end) { if (str == null) { return null; } // handle negatives if (end < 0) { end = str.length() + end; // remember end is negative } if (start < 0) { start = str.length() + start; // remember start is negative } // check length next if (end > str.length()) { end = str.length(); } // if start is greater than end, return "" if (start > end) { return EMPTY; } if (start < 0) { start = 0; } if (end < 0) { end = 0; } return str.substring(start, end); } /** * <p>Checks if the String contains only whitespace.</p> * * <p><code>null</code> will return <code>false</code>. * An empty String ("") will return <code>true</code>.</p> * * <pre> * isWhitespace(null) = false * isWhitespace("") = true * isWhitespace(" ") = true * isWhitespace("abc") = false * isWhitespace("ab2c") = false * isWhitespace("ab-c") = false * </pre> * * @param str the String to check, may be null * @return <code>true</code> if only contains whitespace, and is non-null * @since 2.0 */ public static boolean isWhitespace(String str) { if (str == null) { return false; } int sz = str.length(); for (int i = 0; i < sz; i++) { if ((Character.isWhitespace(str.charAt(i)) == false)) { return false; } } return true; } /** * <p>Finds the first index within a String, handling <code>null</code>. * This method uses {@link String#indexOf(int)}.</p> * * <p>A <code>null</code> or empty ("") String will return <code>-1</code>.</p> * * <pre> * indexOf(null, *) = -1 * indexOf("", *) = -1 * indexOf("aabaabaa", 'a') = 0 * indexOf("aabaabaa", 'b') = 2 * </pre> * * @param str the String to check, may be null * @param searchChar the character to find * @return the first index of the search character, * -1 if no match or <code>null</code> string input * @since 2.0 */ public static int indexOf(String str, char searchChar) { if (isEmpty(str)) { return -1; } return str.indexOf(searchChar); } /** * <p>Finds the first index within a String from a start position, * handling <code>null</code>. * This method uses {@link String#indexOf(int, int)}.</p> * * <p>A <code>null</code> or empty ("") String will return <code>-1</code>. * A negative start position is treated as zero. * A start position greater than the string length returns <code>-1</code>.</p> * * <pre> * indexOf(null, *, *) = -1 * indexOf("", *, *) = -1 * indexOf("aabaabaa", 'b', 0) = 2 * indexOf("aabaabaa", 'b', 3) = 5 * indexOf("aabaabaa", 'b', 9) = -1 * indexOf("aabaabaa", 'b', -1) = 2 * </pre> * * @param str the String to check, may be null * @param searchChar the character to find * @param startPos the start position, negative treated as zero * @return the first index of the search character, * -1 if no match or <code>null</code> string input * @since 2.0 */ public static int indexOf(String str, char searchChar, int startPos) { if (isEmpty(str)) { return -1; } return str.indexOf(searchChar, startPos); } /** * <p>Finds the first index within a String, handling <code>null</code>. * This method uses {@link String#indexOf(String)}.</p> * * <p>A <code>null</code> String will return <code>-1</code>.</p> * * <pre> * indexOf(null, *) = -1 * indexOf(*, null) = -1 * indexOf("", "") = 0 * indexOf("aabaabaa", "a") = 0 * indexOf("aabaabaa", "b") = 2 * indexOf("aabaabaa", "ab") = 1 * indexOf("aabaabaa", "") = 0 * </pre> * * @param str the String to check, may be null * @param searchStr the String to find, may be null * @return the first index of the search String, * -1 if no match or <code>null</code> string input * @since 2.0 */ public static int indexOf(String str, String searchStr) { if (str == null || searchStr == null) { return -1; } return str.indexOf(searchStr); } /** * <p>Finds the first index within a String, handling <code>null</code>. * This method uses {@link String#indexOf(String, int)}.</p> * * <p>A <code>null</code> String will return <code>-1</code>. * A negative start position is treated as zero. * An empty ("") search String always matches. * A start position greater than the string length only matches * an empty search String.</p> * * <pre> * indexOf(null, *, *) = -1 * indexOf(*, null, *) = -1 * indexOf("", "", 0) = 0 * indexOf("aabaabaa", "a", 0) = 0 * indexOf("aabaabaa", "b", 0) = 2 * indexOf("aabaabaa", "ab", 0) = 1 * indexOf("aabaabaa", "b", 3) = 5 * indexOf("aabaabaa", "b", 9) = -1 * indexOf("aabaabaa", "b", -1) = 2 * indexOf("aabaabaa", "", 2) = 2 * indexOf("abc", "", 9) = 3 * </pre> * * @param str the String to check, may be null * @param searchStr the String to find, may be null * @param startPos the start position, negative treated as zero * @return the first index of the search String, * -1 if no match or <code>null</code> string input * @since 2.0 */ public static int indexOf(String str, String searchStr, int startPos) { if (str == null || searchStr == null) { return -1; } // JDK1.2/JDK1.3 have a bug, when startPos > str.length for "", hence if (searchStr.length() == 0 && startPos >= str.length()) { return str.length(); } return str.indexOf(searchStr, startPos); } }