Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang.text; import java.util.ArrayList; import java.util.List; import java.util.ListIterator; import java.util.NoSuchElementException; /** * Tokenizes a string based based on delimiters (separators) and supporting * quoting and ignored character concepts. * <p> * This class can split a String into many smaller strings. It aims to do a * similar job to {@link java.util.StringTokenizer StringTokenizer}, however it * offers much more control and flexibility including implementing the * <code>ListIterator</code> interface. By default, it is set up like * <code>StringTokenizer</code>. * <p> * The input String is split into a number of <i>tokens</i>. Each token is * separated from the next String by a <i>delimiter</i>. One or more delimiter * characters must be specified. * <p> * Each token may be surrounded by quotes. The <i>quote</i> matcher specifies * the quote character(s). A quote may be escaped within a quoted section by * duplicating itself. * <p> * Between each token and the delimiter are potentially characters that need * trimming. The <i>trimmer</i> matcher specifies these characters. One usage * might be to trim whitespace characters. * <p> * At any point outside the quotes there might potentially be invalid * characters. The <i>ignored</i> matcher specifies these characters to be * removed. One usage might be to remove new line characters. * <p> * Empty tokens may be removed or returned as null. * * <pre> * "a,b,c" - Three tokens "a","b","c" (comma delimiter) * " a, b , c " - Three tokens "a","b","c" (default CSV processing trims whitespace) * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched) * </pre> * <p> * * This tokenizer has the following properties and options: * * <table> * <tr> * <th>Property</th> * <th>Type</th> * <th>Default</th> * </tr> * <tr> * <td>delim</td> * <td>CharSetMatcher</td> * <td>{ \t\n\r\f}</td> * </tr> * <tr> * <td>quote</td> * <td>NoneMatcher</td> * <td>{}</td> * </tr> * <tr> * <td>ignore</td> * <td>NoneMatcher</td> * <td>{}</td> * </tr> * <tr> * <td>emptyTokenAsNull</td> * <td>boolean</td> * <td>false</td> * </tr> * <tr> * <td>ignoreEmptyTokens</td> * <td>boolean</td> * <td>true</td> * </tr> * </table> * <P> * NOTE: Modified for Android(TM) platform. by DUMAPIC * (http://twitter.com/#!/dumapick) * * @modified 2011-05-15 version 1.0.0 * </P> * * @author Apache Software Foundation * @author Matthew Inger * @author Gary D. Gregory * @since 2.2 * @version $Id: StrTokenizer.java 907631 2010-02-08 12:22:48Z sebb $ */ public class StrTokenizer implements ListIterator<Object>, Cloneable { private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE; private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE; static { CSV_TOKENIZER_PROTOTYPE = new StrTokenizer(); CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher()); CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher()); CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher()); CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); TSV_TOKENIZER_PROTOTYPE = new StrTokenizer(); TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher()); TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher()); TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher()); TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); } /** The text to work on. */ private char chars[]; /** The parsed tokens */ private String tokens[]; /** The current iteration position */ private int tokenPos; /** The delimiter matcher */ private StrMatcher delimMatcher = StrMatcher.splitMatcher(); /** The quote matcher */ private StrMatcher quoteMatcher = StrMatcher.noneMatcher(); /** The ignored matcher */ private StrMatcher ignoredMatcher = StrMatcher.noneMatcher(); /** The trimmer matcher */ private StrMatcher trimmerMatcher = StrMatcher.noneMatcher(); /** Whether to return empty tokens as null */ private boolean emptyAsNull = false; /** Whether to ignore empty tokens */ private boolean ignoreEmptyTokens = true; // ----------------------------------------------------------------------- /** * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>. * * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>. */ private static StrTokenizer getCSVClone() { return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing will * be trim whitespace from both ends (which can be overridden with the * setTrimmer method). * <p> * You must call a "reset" method to set the string which you want to parse. * * @return a new tokenizer instance which parses Comma Separated Value * strings */ public static StrTokenizer getCSVInstance() { return getCSVClone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing will * be trim whitespace from both ends (which can be overridden with the * setTrimmer method). * * @param input * the text to parse * @return a new tokenizer instance which parses Comma Separated Value * strings */ public static StrTokenizer getCSVInstance(String input) { StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing will * be trim whitespace from both ends (which can be overridden with the * setTrimmer method). * * @param input * the text to parse * @return a new tokenizer instance which parses Comma Separated Value * strings */ public static StrTokenizer getCSVInstance(char[] input) { StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } /** * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>. * * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>. */ private static StrTokenizer getTSVClone() { return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * <p> * You must call a "reset" method to set the string which you want to parse. * * @return a new tokenizer instance which parses Tab Separated Value * strings. */ public static StrTokenizer getTSVInstance() { return getTSVClone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * * @param input * the string to parse * @return a new tokenizer instance which parses Tab Separated Value * strings. */ public static StrTokenizer getTSVInstance(String input) { StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * * @param input * the string to parse * @return a new tokenizer instance which parses Tab Separated Value * strings. */ public static StrTokenizer getTSVInstance(char[] input) { StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } // ----------------------------------------------------------------------- /** * Constructs a tokenizer splitting on space, tab, newline and formfeed as * per StringTokenizer, but with no text to tokenize. * <p> * This constructor is normally used with {@link #reset(String)}. */ public StrTokenizer() { super(); this.chars = null; } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed as * per StringTokenizer. * * @param input * the string which is to be parsed */ public StrTokenizer(String input) { super(); if (input != null) { chars = input.toCharArray(); } else { chars = null; } } /** * Constructs a tokenizer splitting on the specified delimiter character. * * @param input * the string which is to be parsed * @param delim * the field delimiter character */ public StrTokenizer(String input, char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified delimiter string. * * @param input * the string which is to be parsed * @param delim * the field delimiter string */ public StrTokenizer(String input, String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input * the string which is to be parsed * @param delim * the field delimiter matcher */ public StrTokenizer(String input, StrMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character and * handling quotes using the specified quote character. * * @param input * the string which is to be parsed * @param delim * the field delimiter character * @param quote * the field quoted string character */ public StrTokenizer(String input, char delim, char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * * @param input * the string which is to be parsed * @param delim * the field delimiter matcher * @param quote * the field quoted string matcher */ public StrTokenizer(String input, StrMatcher delim, StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed as * per StringTokenizer. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the string which is to be parsed, not cloned */ public StrTokenizer(char[] input) { super(); this.chars = input; } /** * Constructs a tokenizer splitting on the specified character. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter character */ public StrTokenizer(char[] input, char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified string. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter string */ public StrTokenizer(char[] input, String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter matcher */ public StrTokenizer(char[] input, StrMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character and * handling quotes using the specified quote character. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter character * @param quote * the field quoted string character */ public StrTokenizer(char[] input, char delim, char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the string which is to be parsed, not cloned * @param delim * the field delimiter character * @param quote * the field quoted string character */ public StrTokenizer(char[] input, StrMatcher delim, StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } // API // ----------------------------------------------------------------------- /** * Gets the number of tokens found in the String. * * @return the number of matched tokens */ public int size() { checkTokenized(); return tokens.length; } /** * Gets the next token from the String. Equivalent to {@link #next()} except * it returns null rather than throwing {@link NoSuchElementException} when * no tokens remain. * * @return the next sequential token, or null when no more tokens are found */ public String nextToken() { if (hasNext()) { return tokens[tokenPos++]; } return null; } /** * Gets the previous token from the String. * * @return the previous sequential token, or null when no more tokens are * found */ public String previousToken() { if (hasPrevious()) { return tokens[--tokenPos]; } return null; } /** * Gets a copy of the full token list as an independent modifiable array. * * @return the tokens as a String array */ public String[] getTokenArray() { checkTokenized(); return (String[]) tokens.clone(); } /** * Gets a copy of the full token list as an independent modifiable list. * * @return the tokens as a String array */ public List<String> getTokenList() { checkTokenized(); List<String> list = new ArrayList<String>(tokens.length); for (int i = 0; i < tokens.length; i++) { list.add(tokens[i]); } return list; } /** * Resets this tokenizer, forgetting all parsing and iteration already * completed. * <p> * This method allows the same tokenizer to be reused for the same String. * * @return this, to enable chaining */ public StrTokenizer reset() { tokenPos = 0; tokens = null; return this; } /** * Reset this tokenizer, giving it a new input string to parse. In this * manner you can re-use a tokenizer with the same settings on multiple * input lines. * * @param input * the new string to tokenize, null sets no text to parse * @return this, to enable chaining */ public StrTokenizer reset(String input) { reset(); if (input != null) { this.chars = input.toCharArray(); } else { this.chars = null; } return this; } /** * Reset this tokenizer, giving it a new input string to parse. In this * manner you can re-use a tokenizer with the same settings on multiple * input lines. * <p> * The input character array is not cloned, and must not be altered after * passing in to this method. * * @param input * the new character array to tokenize, not cloned, null sets no * text to parse * @return this, to enable chaining */ public StrTokenizer reset(char[] input) { reset(); this.chars = input; return this; } // ListIterator // ----------------------------------------------------------------------- /** * Checks whether there are any more tokens. * * @return true if there are more tokens */ public boolean hasNext() { checkTokenized(); return tokenPos < tokens.length; } /** * Gets the next token. * * @return the next String token * @throws NoSuchElementException * if there are no more elements */ public Object next() { if (hasNext()) { return tokens[tokenPos++]; } throw new NoSuchElementException(); } /** * Gets the index of the next token to return. * * @return the next token index */ public int nextIndex() { return tokenPos; } /** * Checks whether there are any previous tokens that can be iterated to. * * @return true if there are previous tokens */ public boolean hasPrevious() { checkTokenized(); return tokenPos > 0; } /** * Gets the token previous to the last returned token. * * @return the previous token */ public Object previous() { if (hasPrevious()) { return tokens[--tokenPos]; } throw new NoSuchElementException(); } /** * Gets the index of the previous token. * * @return the previous token index */ public int previousIndex() { return tokenPos - 1; } /** * Unsupported ListIterator operation. * * @throws UnsupportedOperationException * always */ public void remove() { throw new UnsupportedOperationException("remove() is unsupported"); } /** * Unsupported ListIterator operation. * * @param obj * this parameter ignored. * @throws UnsupportedOperationException * always */ public void set(Object obj) { throw new UnsupportedOperationException("set() is unsupported"); } /** * Unsupported ListIterator operation. * * @param obj * this parameter ignored. * @throws UnsupportedOperationException * always */ public void add(Object obj) { throw new UnsupportedOperationException("add() is unsupported"); } // Implementation // ----------------------------------------------------------------------- /** * Checks if tokenization has been done, and if not then do it. */ private void checkTokenized() { if (tokens == null) { if (chars == null) { // still call tokenize as subclass may do some work List<String> split = tokenize(null, 0, 0); tokens = (String[]) split.toArray(new String[split.size()]); } else { List<String> split = tokenize(chars, 0, chars.length); tokens = (String[]) split.toArray(new String[split.size()]); } } } /** * Internal method to performs the tokenization. * <p> * Most users of this class do not need to call this method. This method * will be called automatically by other (public) methods when required. * <p> * This method exists to allow subclasses to add code before or after the * tokenization. For example, a subclass could alter the character array, * offset or count to be parsed, or call the tokenizer multiple times on * multiple strings. It is also be possible to filter the results. * <p> * <code>StrTokenizer</code> will always pass a zero offset and a count * equal to the length of the array to this method, however a subclass may * pass other values, or even an entirely different array. * * @param chars * the character array being tokenized, may be null * @param offset * the start position within the character array, must be valid * @param count * the number of characters to tokenize, must be valid * @return the modifiable list of String tokens, unmodifiable if null array * or zero count */ protected List<String> tokenize(char[] chars, int offset, int count) { if (chars == null || count == 0) { return new ArrayList<String>(0); } StrBuilder buf = new StrBuilder(); List<String> tokens = new ArrayList<String>(); int pos = offset; // loop around the entire buffer while (pos >= 0 && pos < count) { // find next token pos = readNextToken(chars, pos, count, buf, tokens); // handle case where end of string is a delimiter if (pos >= count) { addToken(tokens, ""); } } return tokens; } /** * Adds a token to a list, paying attention to the parameters we've set. * * @param list * the list to add to * @param tok * the token to add */ private void addToken(List<String> list, String tok) { if (tok == null || tok.length() == 0) { if (isIgnoreEmptyTokens()) { return; } if (isEmptyTokenAsNull()) { tok = null; } } list.add(tok); } /** * Reads character by character through the String to get the next token. * * @param chars * the character array being tokenized * @param start * the first character of field * @param len * the length of the character array being tokenized * @param workArea * a temporary work area * @param tokens * the list of parsed tokens * @return the starting position of the next field (the character * immediately after the delimiter), or -1 if end of string found */ private int readNextToken(char[] chars, int start, int len, StrBuilder workArea, List<String> tokens) { // skip all leading whitespace, unless it is the // field delimiter or the quote character while (start < len) { int removeLen = Math.max(getIgnoredMatcher().isMatch(chars, start, start, len), getTrimmerMatcher().isMatch(chars, start, start, len)); if (removeLen == 0 || getDelimiterMatcher().isMatch(chars, start, start, len) > 0 || getQuoteMatcher().isMatch(chars, start, start, len) > 0) { break; } start += removeLen; } // handle reaching end if (start >= len) { addToken(tokens, ""); return -1; } // handle empty token int delimLen = getDelimiterMatcher().isMatch(chars, start, start, len); if (delimLen > 0) { addToken(tokens, ""); return start + delimLen; } // handle found token int quoteLen = getQuoteMatcher().isMatch(chars, start, start, len); if (quoteLen > 0) { return readWithQuotes(chars, start + quoteLen, len, workArea, tokens, start, quoteLen); } return readWithQuotes(chars, start, len, workArea, tokens, 0, 0); } /** * Reads a possibly quoted string token. * * @param chars * the character array being tokenized * @param start * the first character of field * @param len * the length of the character array being tokenized * @param workArea * a temporary work area * @param tokens * the list of parsed tokens * @param quoteStart * the start position of the matched quote, 0 if no quoting * @param quoteLen * the length of the matched quote, 0 if no quoting * @return the starting position of the next field (the character * immediately after the delimiter, or if end of string found, then * the length of string */ private int readWithQuotes(char[] chars, int start, int len, StrBuilder workArea, List<String> tokens, int quoteStart, int quoteLen) { // Loop until we've found the end of the quoted // string or the end of the input workArea.clear(); int pos = start; boolean quoting = (quoteLen > 0); int trimStart = 0; while (pos < len) { // quoting mode can occur several times throughout a string // we must switch between quoting and non-quoting until we // encounter a non-quoted delimiter, or end of string if (quoting) { // In quoting mode // If we've found a quote character, see if it's // followed by a second quote. If so, then we need // to actually put the quote character into the token // rather than end the token. if (isQuote(chars, pos, len, quoteStart, quoteLen)) { if (isQuote(chars, pos + quoteLen, len, quoteStart, quoteLen)) { // matched pair of quotes, thus an escaped quote workArea.append(chars, pos, quoteLen); pos += (quoteLen * 2); trimStart = workArea.size(); continue; } // end of quoting quoting = false; pos += quoteLen; continue; } // copy regular character from inside quotes workArea.append(chars[pos++]); trimStart = workArea.size(); } else { // Not in quoting mode // check for delimiter, and thus end of token int delimLen = getDelimiterMatcher().isMatch(chars, pos, start, len); if (delimLen > 0) { // return condition when end of token found addToken(tokens, workArea.substring(0, trimStart)); return pos + delimLen; } // check for quote, and thus back into quoting mode if (quoteLen > 0) { if (isQuote(chars, pos, len, quoteStart, quoteLen)) { quoting = true; pos += quoteLen; continue; } } // check for ignored (outside quotes), and ignore int ignoredLen = getIgnoredMatcher().isMatch(chars, pos, start, len); if (ignoredLen > 0) { pos += ignoredLen; continue; } // check for trimmed character // don't yet know if its at the end, so copy to workArea // use trimStart to keep track of trim at the end int trimmedLen = getTrimmerMatcher().isMatch(chars, pos, start, len); if (trimmedLen > 0) { workArea.append(chars, pos, trimmedLen); pos += trimmedLen; continue; } // copy regular character from outside quotes workArea.append(chars[pos++]); trimStart = workArea.size(); } } // return condition when end of string found addToken(tokens, workArea.substring(0, trimStart)); return -1; } /** * Checks if the characters at the index specified match the quote already * matched in readNextToken(). * * @param chars * the character array being tokenized * @param pos * the position to check for a quote * @param len * the length of the character array being tokenized * @param quoteStart * the start position of the matched quote, 0 if no quoting * @param quoteLen * the length of the matched quote, 0 if no quoting * @return true if a quote is matched */ private boolean isQuote(char[] chars, int pos, int len, int quoteStart, int quoteLen) { for (int i = 0; i < quoteLen; i++) { if ((pos + i) >= len || chars[pos + i] != chars[quoteStart + i]) { return false; } } return true; } // Delimiter // ----------------------------------------------------------------------- /** * Gets the field delimiter matcher. * * @return the delimiter matcher in use */ public StrMatcher getDelimiterMatcher() { return this.delimMatcher; } /** * Sets the field delimiter matcher. * <p> * The delimitier is used to separate one token from another. * * @param delim * the delimiter matcher to use * @return this, to enable chaining */ public StrTokenizer setDelimiterMatcher(StrMatcher delim) { if (delim == null) { this.delimMatcher = StrMatcher.noneMatcher(); } else { this.delimMatcher = delim; } return this; } /** * Sets the field delimiter character. * * @param delim * the delimiter character to use * @return this, to enable chaining */ public StrTokenizer setDelimiterChar(char delim) { return setDelimiterMatcher(StrMatcher.charMatcher(delim)); } /** * Sets the field delimiter string. * * @param delim * the delimiter string to use * @return this, to enable chaining */ public StrTokenizer setDelimiterString(String delim) { return setDelimiterMatcher(StrMatcher.stringMatcher(delim)); } // Quote // ----------------------------------------------------------------------- /** * Gets the quote matcher currently in use. * <p> * The quote character is used to wrap data between the tokens. This enables * delimiters to be entered as data. The default value is '"' (double * quote). * * @return the quote matcher in use */ public StrMatcher getQuoteMatcher() { return quoteMatcher; } /** * Set the quote matcher to use. * <p> * The quote character is used to wrap data between the tokens. This enables * delimiters to be entered as data. * * @param quote * the quote matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setQuoteMatcher(StrMatcher quote) { if (quote != null) { this.quoteMatcher = quote; } return this; } /** * Sets the quote character to use. * <p> * The quote character is used to wrap data between the tokens. This enables * delimiters to be entered as data. * * @param quote * the quote character to use * @return this, to enable chaining */ public StrTokenizer setQuoteChar(char quote) { return setQuoteMatcher(StrMatcher.charMatcher(quote)); } // Ignored // ----------------------------------------------------------------------- /** * Gets the ignored character matcher. * <p> * These characters are ignored when parsing the String, unless they are * within a quoted region. The default value is not to ignore anything. * * @return the ignored matcher in use */ public StrMatcher getIgnoredMatcher() { return ignoredMatcher; } /** * Set the matcher for characters to ignore. * <p> * These characters are ignored when parsing the String, unless they are * within a quoted region. * * @param ignored * the ignored matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setIgnoredMatcher(StrMatcher ignored) { if (ignored != null) { this.ignoredMatcher = ignored; } return this; } /** * Set the character to ignore. * <p> * This character is ignored when parsing the String, unless it is within a * quoted region. * * @param ignored * the ignored character to use * @return this, to enable chaining */ public StrTokenizer setIgnoredChar(char ignored) { return setIgnoredMatcher(StrMatcher.charMatcher(ignored)); } // Trimmer // ----------------------------------------------------------------------- /** * Gets the trimmer character matcher. * <p> * These characters are trimmed off on each side of the delimiter until the * token or quote is found. The default value is not to trim anything. * * @return the trimmer matcher in use */ public StrMatcher getTrimmerMatcher() { return trimmerMatcher; } /** * Sets the matcher for characters to trim. * <p> * These characters are trimmed off on each side of the delimiter until the * token or quote is found. * * @param trimmer * the trimmer matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setTrimmerMatcher(StrMatcher trimmer) { if (trimmer != null) { this.trimmerMatcher = trimmer; } return this; } // ----------------------------------------------------------------------- /** * Gets whether the tokenizer currently returns empty tokens as null. The * default for this property is false. * * @return true if empty tokens are returned as null */ public boolean isEmptyTokenAsNull() { return this.emptyAsNull; } /** * Sets whether the tokenizer should return empty tokens as null. The * default for this property is false. * * @param emptyAsNull * whether empty tokens are returned as null * @return this, to enable chaining */ public StrTokenizer setEmptyTokenAsNull(boolean emptyAsNull) { this.emptyAsNull = emptyAsNull; return this; } // ----------------------------------------------------------------------- /** * Gets whether the tokenizer currently ignores empty tokens. The default * for this property is true. * * @return true if empty tokens are not returned */ public boolean isIgnoreEmptyTokens() { return ignoreEmptyTokens; } /** * Sets whether the tokenizer should ignore and not return empty tokens. The * default for this property is true. * * @param ignoreEmptyTokens * whether empty tokens are not returned * @return this, to enable chaining */ public StrTokenizer setIgnoreEmptyTokens(boolean ignoreEmptyTokens) { this.ignoreEmptyTokens = ignoreEmptyTokens; return this; } // ----------------------------------------------------------------------- /** * Gets the String content that the tokenizer is parsing. * * @return the string content being parsed */ public String getContent() { if (chars == null) { return null; } return new String(chars); } // ----------------------------------------------------------------------- /** * Creates a new instance of this Tokenizer. The new instance is reset so * that it will be at the start of the token list. If a * {@link CloneNotSupportedException} is caught, return <code>null</code>. * * @return a new instance of this Tokenizer which has been reset. */ public Object clone() { try { return cloneReset(); } catch (CloneNotSupportedException ex) { return null; } } /** * Creates a new instance of this Tokenizer. The new instance is reset so * that it will be at the start of the token list. * * @return a new instance of this Tokenizer which has been reset. * @throws CloneNotSupportedException * if there is a problem cloning */ Object cloneReset() throws CloneNotSupportedException { // this method exists to enable 100% test coverage StrTokenizer cloned = (StrTokenizer) super.clone(); if (cloned.chars != null) { cloned.chars = (char[]) cloned.chars.clone(); } cloned.reset(); return cloned; } // ----------------------------------------------------------------------- /** * Gets the String content that the tokenizer is parsing. * * @return the string content being parsed */ public String toString() { if (tokens == null) { return "StrTokenizer[not tokenized yet]"; } return "StrTokenizer" + getTokenList(); } }