A collection of File, URL and filename utility methods
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
/**
* A collection of <code>File</code>, <code>URL</code> and filename
* utility methods
*
* @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
* @version CVS $Revision: 1.1 $ $Date: 2002/03/17 13:37:13 $
*/
public class NetUtils {
/**
* Array containing the safe characters set as defined by RFC 1738
*/
private static BitSet safeCharacters;
private static final char[] hexadecimal =
{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F'};
static {
safeCharacters = new BitSet(256);
int i;
// 'lowalpha' rule
for (i = 'a'; i <= 'z'; i++) {
safeCharacters.set(i);
}
// 'hialpha' rule
for (i = 'A'; i <= 'Z'; i++) {
safeCharacters.set(i);
}
// 'digit' rule
for (i = '0'; i <= '9'; i++) {
safeCharacters.set(i);
}
// 'safe' rule
safeCharacters.set('$');
safeCharacters.set('-');
safeCharacters.set('_');
safeCharacters.set('.');
safeCharacters.set('+');
// 'extra' rule
safeCharacters.set('!');
safeCharacters.set('*');
safeCharacters.set('\'');
safeCharacters.set('(');
safeCharacters.set(')');
safeCharacters.set(',');
// special characters common to http: file: and ftp: URLs ('fsegment' and 'hsegment' rules)
safeCharacters.set('/');
safeCharacters.set(':');
safeCharacters.set('@');
safeCharacters.set('&');
safeCharacters.set('=');
}
/**
* Decode a path
*
* @param path the path to decode
* @return the decoded path
*/
public static String decodePath(String path) throws Exception {
return java.net.URLDecoder.decode(path, "koi8-r");
}
/**
* Encode a path as required by the URL specificatin (<a href="http://www.ietf.org/rfc/rfc1738.txt">
* RFC 1738</a>). This differs from <code>java.net.URLEncoder.encode()</code> which encodes according
* to the <code>x-www-form-urlencoded</code> MIME format.
*
* @param path the path to encode
* @return the encoded path
*/
public static String encodePath(String path) {
// stolen from org.apache.catalina.servlets.DefaultServlet ;)
/**
* Note: This code portion is very similar to URLEncoder.encode.
* Unfortunately, there is no way to specify to the URLEncoder which
* characters should be encoded. Here, ' ' should be encoded as "%20"
* and '/' shouldn't be encoded.
*/
int maxBytesPerChar = 10;
StringBuffer rewrittenPath = new StringBuffer(path.length());
ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
OutputStreamWriter writer = null;
try {
writer = new OutputStreamWriter(buf, "UTF8");
} catch (Exception e) {
e.printStackTrace();
writer = new OutputStreamWriter(buf);
}
for (int i = 0; i < path.length(); i++) {
int c = (int) path.charAt(i);
if (safeCharacters.get(c)) {
rewrittenPath.append((char)c);
} else {
// convert to external encoding before hex conversion
try {
writer.write(c);
writer.flush();
} catch(IOException e) {
buf.reset();
continue;
}
byte[] ba = buf.toByteArray();
for (int j = 0; j < ba.length; j++) {
// Converting each byte in the buffer
byte toEncode = ba[j];
rewrittenPath.append('%');
int low = (int) (toEncode & 0x0f);
int high = (int) ((toEncode & 0xf0) >> 4);
rewrittenPath.append(hexadecimal[high]);
rewrittenPath.append(hexadecimal[low]);
}
buf.reset();
}
}
return rewrittenPath.toString();
}
/**
* Returns the path of the given resource.
*
* @path the resource
* @return the resource path
*/
public static String getPath(String uri) {
int i = uri.lastIndexOf('/');
if(i > -1)
return uri.substring(0, i);
i = uri.indexOf(':');
return (i > -1) ? uri.substring(i+1,uri.length()) : "";
}
/**
* Remove path and file information from a filename returning only its
* extension component
*
* @param filename The filename
* @return The filename extension (with starting dot!)
*/
public static String getExtension(String uri) {
int dot = uri.lastIndexOf('.');
if (dot > -1) {
uri = uri.substring(dot);
int slash = uri.lastIndexOf('/');
if (slash > -1) {
return null;
} else {
int sharp = uri.lastIndexOf('#');
if (sharp > -1) {
// uri starts with dot already
return uri.substring(0, sharp);
} else {
int mark = uri.lastIndexOf('?');
if (mark > -1) {
// uri starts with dot already
return uri.substring(0, mark);
} else {
return uri;
}
}
}
} else {
return null;
}
}
/**
* Absolutize a relative resource on the given absolute path.
*
* @path the absolute path
* @relativeResource the relative resource
* @return the absolutized resource
*/
public static String absolutize(String path, String relativeResource) {
if (("".equals(path)) || (path == null)) return relativeResource;
if (relativeResource.charAt(0) != '/') {
int length = path.length() - 1;
boolean slashPresent = (path.charAt(length) == '/');
StringBuffer b = new StringBuffer();
b.append(path);
if (!slashPresent) b.append('/');
b.append(relativeResource);
return b.toString();
} else {
// resource is already absolute
return relativeResource;
}
}
/**
* Relativize an absolute resource on a given absolute path.
*
* @path the absolute path
* @relativeResource the absolute resource
* @return the resource relative to the given path
*/
public static String relativize(String path, String absoluteResource) {
if (("".equals(path)) || (path == null)) return absoluteResource;
int length = path.length() - 1;
boolean slashPresent = path.charAt(length) == '/';
if (absoluteResource.startsWith(path)) {
// resource is direct descentant
return absoluteResource.substring(length + (slashPresent ? 1 : 2));
} else {
// resource is not direct descendant
if (!slashPresent) path += "/";
int index = StringUtils.matchStrings(path, absoluteResource);
if (index > 0 && path.charAt(index-1) != '/') {
index = path.substring(0, index).lastIndexOf('/');
index++;
}
String pathDiff = path.substring(index);
String resource = absoluteResource.substring(index);
int levels = StringUtils.count(pathDiff, '/');
StringBuffer b = new StringBuffer();
for (int i = 0; i < levels; i++) {
b.append("../");
}
b.append(resource);
return b.toString();
}
}
/**
* Normalize a uri containing ../ and ./ paths.
*
* @param uri The uri path to normalize
* @return The normalized uri
*/
public static String normalize(String uri) {
String[] dirty = StringUtils.split(uri, "/");
int length = dirty.length;
String[] clean = new String[length];
boolean path;
boolean finished;
while (true) {
path = false;
finished = true;
for (int i = 0, j = 0; (i < length) && (dirty[i] != null); i++) {
if (".".equals(dirty[i])) {
// ignore
} else if ("..".equals(dirty[i])) {
clean[j++] = dirty[i];
if (path) finished = false;
} else {
if ((i+1 < length) && ("..".equals(dirty[i+1]))) {
i++;
} else {
clean[j++] = dirty[i];
path = true;
}
}
}
if (finished) {
break;
} else {
dirty = clean;
clean = new String[length];
}
}
StringBuffer b = new StringBuffer(uri.length());
for (int i = 0; (i < length) && (clean[i] != null); i++) {
b.append(clean[i]);
if ((i+1 < length) && (clean[i+1] != null)) b.append("/");
}
return b.toString();
}
/**
* Remove parameters from a uri.
*
* @param uri The uri path to deparameterize.
* @param parameters The map that collects parameters.
* @return The cleaned uri
*/
public static String deparameterize(String uri, Map parameters) {
int i = uri.lastIndexOf('?');
if (i == -1) return uri;
String[] params = StringUtils.split(uri.substring(i+1), "&");
for (int j = 0; j < params.length; j++) {
String p = params[j];
int k = p.indexOf('=');
if (k == -1) break;
String name = p.substring(0, k);
String value = p.substring(k+1);
parameters.put(name, value);
}
return uri.substring(0, i);
}
public static String parameterize(String uri, Map parameters) {
if (parameters.size() == 0) {
return uri;
}
StringBuffer buffer = new StringBuffer(uri);
buffer.append('?');
for (Iterator i = parameters.entrySet().iterator(); i.hasNext();) {
Map.Entry entry = (Map.Entry)i.next();
buffer.append(entry.getKey());
buffer.append('=');
buffer.append(entry.getValue());
if (i.hasNext()) {
buffer.append('&');
}
}
return buffer.toString();
}
}
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
/**
* A collection of <code>String</code> handling utility methods.
*
* @author <a href="mailto:ricardo@apache.org">Ricardo Rocha</a>
* @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
* @version CVS $Revision: 1.1 $ $Date: 2002/03/17 13:37:13 $
*/
class StringUtils {
/**
* Split a string as an array using whitespace as separator
*
* @param line The string to be split
* @return An array of whitespace-separated tokens
*/
public static String[] split(String line) {
return split(line, " \t\n\r");
}
/**
* Split a string as an array using a given set of separators
*
* @param line The string to be split
* @param delimiter A string containing token separators
* @return An array of token
*/
public static String[] split(String line, String delimiter) {
return Tokenizer.tokenize(line, delimiter, false);
}
/**
* Tests whether a given character is alphabetic, numeric or
* underscore
*
* @param c The character to be tested
* @return whether the given character is alphameric or not
*/
public static boolean isAlphaNumeric(char c) {
return c == '_' ||
(c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9');
}
/**
* Counts the occurrence of the given char in the string.
*
* @param str The string to be tested
* @param c the char to be counted
* @return the occurrence of the character in the string.
*/
public static int count(String str, char c) {
int index = 0;
char[] chars = str.toCharArray();
for (int i = 0; i < chars.length; i++) {
if (chars[i] == c) index++;
}
return index;
}
/**
* Matches two strings.
*
* @param a The first string
* @param b The second string
* @return the index where the two strings stop matching starting from 0
*/
public static int matchStrings(String a, String b) {
int i;
char[] ca = a.toCharArray();
char[] cb = b.toCharArray();
int len = ( ca.length < cb.length ) ? ca.length : cb.length;
for (i = 0; i < len; i++) {
if (ca[i] != cb[i]) break;
}
return i;
}
/**
* Replaces tokens in input with Value present in System.getProperty
*/
public static String replaceToken(String s) {
int startToken = s.indexOf("${");
int endToken = s.indexOf("}",startToken);
String token = s.substring(startToken+2,endToken);
StringBuffer value = new StringBuffer();
value.append(s.substring(0,startToken));
value.append(System.getProperty(token));
value.append(s.substring(endToken+1));
return value.toString();
}
}
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
/**
* Replacement for StringTokenizer in java.util, beacuse of bug in the
* Sun's implementation.
*
* @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A>
*/
class Tokenizer implements Enumeration {
/**
* Constructs a string tokenizer for the specified string. All characters
* in the delim argument are the delimiters for separating tokens.
* If the returnTokens flag is true, then the delimiter characters are
* also returned as tokens. Each delimiter is returned as a string of
* length one. If the flag is false, the delimiter characters are skipped
* and only serve as separators between tokens.
*
* @param str a string to be parsed
* @param delim the delimiters
* @param returnTokens flag indicating whether to return the delimiters
* as tokens
*/
public Tokenizer(String str, String delim, boolean returnTokens) {
this.str = str;
this.delim = delim;
this.returnTokens = returnTokens;
max = str.length();
}
/**
* Constructs a string tokenizer for the specified string. The characters
* in the delim argument are the delimiters for separating tokens.
* Delimiter characters themselves will not be treated as tokens.
*
* @param str a string to be parsed
* @param delim the delimiters
*/
public Tokenizer(String str, String delim) {
this(str, delim, false);
}
/**
* Constructs a string tokenizer for the specified string. The character
* in the delim argument is the delimiter for separating tokens.
* Delimiter character themselves will not be treated as token.
*
* @param str a string to be parsed
* @param delim the delimiter
*/
public Tokenizer(String str, char delim) {
this(str, String.valueOf(delim), false);
}
/**
* Constructs a string tokenizer for the specified string. The tokenizer
* uses the default delimiter set, which is " \t\n\r\f": the space
* character, the tab character, the newline character, the carriage-return
* character, and the form-feed character. Delimiter characters themselves
* will not be treated as tokens.
*
* @param str a string to be parsed
*/
public Tokenizer(String str) {
this(str, DEFAULT_DELIMITERS, false);
}
/**
* Tests if there are more tokens available from this tokenizer's string.
* If this method returns true, then a subsequent call to nextToken with
* no argument will successfully return a token.
*
* @return true if and only if there is at least one token in the string
* after the current position; false otherwise.
*/
public boolean hasMoreTokens() {
return ((current < max) ? (true) :
(((current == max) && (max == 0
|| (returnTokens && delim.indexOf(str.charAt(previous)) >= 0)))));
}
/**
* Returns the next token from this string tokenizer.
*
* @return the next token from this string tokenizer
*
* @exception NoSuchElementException if there are no more tokens in this
* tokenizer's string
*/
public String nextToken() throws NoSuchElementException {
if (current == max
&& (max == 0
|| (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))) {
current++;
return new String();
}
if (current >= max)
throw new NoSuchElementException();
int start = current;
String result = null;
if (delim.indexOf(str.charAt(start)) >= 0) {
if (previous == -1 || (returnTokens && previous != current
&& delim.indexOf(str.charAt(previous)) >= 0)) {
result = new String();
}
else if (returnTokens)
result = str.substring(start, ++current);
if (!returnTokens)
current++;
}
previous = start;
start = current;
if (result == null)
while (current < max && delim.indexOf(str.charAt(current)) < 0)
current++;
return result == null ? str.substring(start, current) : result;
}
/**
* Returns the next token in this string tokenizer's string. First, the
* set of characters considered to be delimiters by this Tokenizer
* object is changed to be the characters in the string delim.
* Then the next token in the string after the current position is
* returned. The current position is advanced beyond the recognized token.
* The new delimiter set remains the default after this call.
*
* @param delim the new delimiters
*
* @return the next token, after switching to the new delimiter set
*
* @exception NoSuchElementException if there are no more tokens in this
* tokenizer's string.
*/
public String nextToken(String delim) throws NoSuchElementException {
this.delim = delim;
return nextToken();
}
/**
* Returns the same value as the hasMoreTokens method. It exists so that
* this class can implement the Enumeration interface.
*
* @return true if there are more tokens; false otherwise.
*/
public boolean hasMoreElements() {
return hasMoreTokens();
}
/**
* Returns the same value as the nextToken method, except that its
* declared return value is Object rather than String. It exists so that
* this class can implement the Enumeration interface.
*
* @return the next token in the string
*
* @exception NoSuchElementException if there are no more tokens in this
* tokenizer's string
*/
public Object nextElement() {
return nextToken();
}
/**
* Calculates the number of times that this tokenizer's nextToken method
* can be called before it generates an exception. The current position
* is not advanced.
*
* @return the number of tokens remaining in the string using the
* current delimiter set
*/
public int countTokens() {
int curr = current;
int count = 0;
for (int i = curr; i < max; i++) {
if (delim.indexOf(str.charAt(i)) >= 0)
count++;
curr++;
}
return count + (returnTokens ? count : 0) + 1;
}
/**
* Resets this tokenizer's state so the tokenizing starts from the begin.
*/
public void reset() {
previous = -1;
current = 0;
}
/**
* Constructs a string tokenizer for the specified string. All characters
* in the delim argument are the delimiters for separating tokens.
* If the returnTokens flag is true, then the delimiter characters are
* also returned as tokens. Each delimiter is returned as a string of
* length one. If the flag is false, the delimiter characters are skipped
* and only serve as separators between tokens. Then tokenizes the str
* and return an String[] array with tokens.
*
* @param str a string to be parsed
* @param delim the delimiters
* @param returnTokens flag indicating whether to return the delimiters
* as tokens
*
* @return array with tokens
*/
public static String[] tokenize(String str, String delim,
boolean returnTokens) {
Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens);
String[] tokens = new String[tokenizer.countTokens()];
int i = 0;
while (tokenizer.hasMoreTokens()) {
tokens[i] = tokenizer.nextToken();
i++;
}
return tokens;
}
/**
* Default delimiters "\t\n\r\f":
* the space character, the tab character, the newline character,
* the carriage-return character, and the form-feed character.
*/
public static final String DEFAULT_DELIMITERS = " \t\n\r\f";
/**
* String to tokenize.
*/
private String str = null;
/**
* Delimiters.
*/
private String delim = null;
/**
* Flag indicating whether to return the delimiters as tokens.
*/
private boolean returnTokens = false;
/**
* Previous token start.
*/
private int previous = -1;
/**
* Current position in str string.
*/
private int current = 0;
/**
* Maximal position in str string.
*/
private int max = 0;
}
Related examples in the same category