/*
* Static String formatting and query routines.
* Copyright (C) 2001-2005 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Java+Utilities
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
import java.util.HashMap;
import java.util.regex.Pattern;
/**
* Utilities for String formatting, manipulation, and queries.
* More information about this class is available from <a target="_top" href=
* "http://ostermiller.org/utils/StringHelper.html">ostermiller.org</a>.
*
* @author Stephen Ostermiller http://ostermiller.org/contact.pl?regarding=Java+Utilities
* @since ostermillerutils 1.00.00
*/
public class StringHelper {
/**
* Split the given String into tokens.
*
* This method is meant to be similar to the split
* function in other programming languages but it does
* not use regular expressions. Rather the String is
* split on a single String literal.
*
* Unlike java.util.StringTokenizer which accepts
* multiple character tokens as delimiters, the delimiter
* here is a single String literal.
*
* Each null token is returned as an empty String.
* Delimiters are never returned as tokens.
*
* If there is no delimiter because it is either empty or
* null, the only element in the result is the original String.
*
* StringHelper.split("1-2-3", "-");<br>
* result: {"1","2","3"}<br>
* StringHelper.split("-1--2-", "-");<br>
* result: {"","1","","2",""}<br>
* StringHelper.split("123", "");<br>
* result: {"123"}<br>
* StringHelper.split("1-2---3----4", "--");<br>
* result: {"1-2","-3","","4"}<br>
*
* @param s String to be split.
* @param delimiter String literal on which to split.
* @return an array of tokens.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.00.00
*/
public static String[] split(String s, String delimiter){
int delimiterLength;
// the next statement has the side effect of throwing a null pointer
// exception if s is null.
int stringLength = s.length();
if (delimiter == null || (delimiterLength = delimiter.length()) == 0){
// it is not inherently clear what to do if there is no delimiter
// On one hand it would make sense to return each character because
// the null String can be found between each pair of characters in
// a String. However, it can be found many times there and we don'
// want to be returning multiple null tokens.
// returning the whole String will be defined as the correct behavior
// in this instance.
return new String[] {s};
}
// a two pass solution is used because a one pass solution would
// require the possible resizing and copying of memory structures
// In the worst case it would have to be resized n times with each
// resize having a O(n) copy leading to an O(n^2) algorithm.
int count;
int start;
int end;
// Scan s and count the tokens.
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
count++;
start = end + delimiterLength;
}
count++;
// allocate an array to return the tokens,
// we now know how big it should be
String[] result = new String[count];
// Scan s again, but this time pick out the tokens
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
result[count] = (s.substring(start, end));
count++;
start = end + delimiterLength;
}
end = stringLength;
result[count] = s.substring(start, end);
return (result);
}
/**
* Split the given String into tokens. Delimiters will
* be returned as tokens.
*
* This method is meant to be similar to the split
* function in other programming languages but it does
* not use regular expressions. Rather the String is
* split on a single String literal.
*
* Unlike java.util.StringTokenizer which accepts
* multiple character tokens as delimiters, the delimiter
* here is a single String literal.
*
* Each null token is returned as an empty String.
* Delimiters are never returned as tokens.
*
* If there is no delimiter because it is either empty or
* null, the only element in the result is the original String.
*
* StringHelper.split("1-2-3", "-");<br>
* result: {"1","-","2","-","3"}<br>
* StringHelper.split("-1--2-", "-");<br>
* result: {"","-","1","-","","-","2","-",""}<br>
* StringHelper.split("123", "");<br>
* result: {"123"}<br>
* StringHelper.split("1-2--3---4----5", "--");<br>
* result: {"1-2","--","3","--","-4","--","","--","5"}<br>
*
* @param s String to be split.
* @param delimiter String literal on which to split.
* @return an array of tokens.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.05.00
*/
public static String[] splitIncludeDelimiters(String s, String delimiter){
int delimiterLength;
// the next statement has the side effect of throwing a null pointer
// exception if s is null.
int stringLength = s.length();
if (delimiter == null || (delimiterLength = delimiter.length()) == 0){
// it is not inherently clear what to do if there is no delimiter
// On one hand it would make sense to return each character because
// the null String can be found between each pair of characters in
// a String. However, it can be found many times there and we don'
// want to be returning multiple null tokens.
// returning the whole String will be defined as the correct behavior
// in this instance.
return new String[] {s};
}
// a two pass solution is used because a one pass solution would
// require the possible resizing and copying of memory structures
// In the worst case it would have to be resized n times with each
// resize having a O(n) copy leading to an O(n^2) algorithm.
int count;
int start;
int end;
// Scan s and count the tokens.
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
count+=2;
start = end + delimiterLength;
}
count++;
// allocate an array to return the tokens,
// we now know how big it should be
String[] result = new String[count];
// Scan s again, but this time pick out the tokens
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
result[count] = (s.substring(start, end));
count++;
result[count] = delimiter;
count++;
start = end + delimiterLength;
}
end = stringLength;
result[count] = s.substring(start, end);
return (result);
}
}