Splits the provided text into an array with a maximum length, separators specified, preserving all tokens, including empty tokens created by adjacent separators.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
public class Main {
/**
* <p>Splits the provided text into an array with a maximum length,
* separators specified, preserving all tokens, including empty tokens
* created by adjacent separators.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* Adjacent separators are treated as one separator.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
*
* <p>If more than <code>max</code> delimited substrings are found, the last
* returned string includes all characters after the first <code>max - 1</code>
* returned strings (including separator characters).</p>
*
* <pre>
* StringUtils.splitPreserveAllTokens(null, *, *) = null
* StringUtils.splitPreserveAllTokens("", *, *) = []
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(String str, String separatorChars, int max) {
return splitWorker(str, separatorChars, max, true);
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximum array
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
*/
private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return (String[]) list.toArray(new String[list.size()]);
}
}
Related examples in the same category
1. | The string passed to the split method is a regular expression | | |
2. | String.split(): " s".split(" ") -> {"","","s"} | | |
3. | String.split(): "".split("") (one empty string value array) | | |
4. | String.split(): " ".split(" ") -> {} (Empty array) | | |
5. | String.split(): " ".split(" ") ->(Empty array too) | | |
6. | String.split(): " s ".split(" ") -> {"","","s"} (!) (space before and after) | | |
7. | " ".split(" ") generates a NullPointerException | | |
8. | Using split() with a space can be a problem | | |
9. | String.split() is based on regular expression | | |
10. | Split a string using String.split() | | |
11. | Split by dot | | |
12. | Special character needs to be escaped with a \ | | |
13. | Escape special character with a \ | | |
14. | Keep the empty strings | | |
15. | Specify a regular expression to match one or more spaces | | |
16. | Split a String | | |
17. | Using second argument in the String.split() method to control the maximum number of substrings generated by splitting a string. | | |
18. | Special characters needs to be escaped while providing them as delimeters like "." and "|". | | |
19. | Control the maximum number of substrings generated by splitting a string. | | |
20. | Split with regular expression | | |
21. | Pattern Splitting for space splittor | | |
22. | Split Strings with Patterns: split("[-/%]") | | |
23. | Use split() to extract substrings from a string. | | |
24. | Split on word boundaries. | | |
25. | Split same string on commas and zero or more spaces. | | |
26. | Split on word boundaries, but allow embedded periods and @. | | |
27. | Split on various punctuation and zero or more trailing spaces. | | |
28. | Parsing Character-Separated Data with a Regular Expression | | |
29. | Parse a line whose separator is a comma followed by a space | | |
30. | Parse a line with and's and or's | | |
31. | Split up a string into multiple strings based on a delimiter | | |
32. | Splits a String by Character type as returned by java.lang.Character.getType(char) | | |
33. | Splits a String by char: Groups of contiguous characters of the same type are returned as complete tokens. | | |
34. | Splits a string around matches of the given delimiter character. | | |
35. | Splits the provided text into an array with a maximum length, separators specified. | | |
36. | Splits the provided text into an array, separator specified, preserving all tokens, including empty tokens created by adjacent separators. | | |
37. | Splits the provided text into an array, separator specified. | | |
38. | Splits the provided text into an array, separator string specified. Returns a maximum of max substrings. | | |
39. | Splits the provided text into an array, separators specified, preserving all tokens, including empty tokens created by adjacent separators. | | |
40. | Splits the provided text into an array, separators specified. This is an alternative to using StringTokenizer. | | |
41. | Splits the provided text into an array, using whitespace as the separator, preserving all tokens, including empty tokens created by adjacent separators. | | |
42. | Break a string into tokens | | |
43. | String split on multicharacter delimiter | | |
44. | Split the source into two strings at the first occurrence of the splitter Subsequent occurrences are not treated specially, and may be part of the second string. | | |
45. | Returns the first substring that is enclosed by the specified delimiters. | | |
46. | Split strings | | |
47. | Splits the provided text into a list, based on a given separator. | | |