A collection of File, URL and filename utility methods

    
/*****************************************************************************
 * Copyright (C) The Apache Software Foundation. All rights reserved.        *
 * ------------------------------------------------------------------------- *
 * This software is published under the terms of the Apache Software License *
 * version 1.1, a copy of which has been included  with this distribution in *
 * the LICENSE file.                                                         *
 *****************************************************************************/


import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;

/**
 * A collection of <code>File</code>, <code>URL</code> and filename
 * utility methods
 *
 * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
 * @version CVS $Revision: 1.1 $ $Date: 2002/03/17 13:37:13 $
 */

public class NetUtils {

    /**
     * Array containing the safe characters set as defined by RFC 1738
     */
    private static BitSet safeCharacters;


    private static final char[] hexadecimal =
    {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
     'A', 'B', 'C', 'D', 'E', 'F'};

    static {
        safeCharacters = new BitSet(256);
        int i;
        // 'lowalpha' rule
        for (i = 'a'; i <= 'z'; i++) {
            safeCharacters.set(i);
        }
        // 'hialpha' rule
        for (i = 'A'; i <= 'Z'; i++) {
            safeCharacters.set(i);
        }
        // 'digit' rule
        for (i = '0'; i <= '9'; i++) {
            safeCharacters.set(i);
        }

        // 'safe' rule
        safeCharacters.set('$');
        safeCharacters.set('-');
        safeCharacters.set('_');
        safeCharacters.set('.');
        safeCharacters.set('+');

        // 'extra' rule
        safeCharacters.set('!');
        safeCharacters.set('*');
        safeCharacters.set('\'');
        safeCharacters.set('(');
        safeCharacters.set(')');
        safeCharacters.set(',');

        // special characters common to http: file: and ftp: URLs ('fsegment' and 'hsegment' rules)
        safeCharacters.set('/');
        safeCharacters.set(':');
        safeCharacters.set('@');
        safeCharacters.set('&');
        safeCharacters.set('=');
    }

    /**
     * Decode a path
     *
     * @param path the path to decode
     * @return the decoded path
     */
    public static String decodePath(String path) throws Exception {
        return java.net.URLDecoder.decode(path, "koi8-r");
    }

    /**
     * Encode a path as required by the URL specificatin (<a href="http://www.ietf.org/rfc/rfc1738.txt">
     * RFC 1738</a>). This differs from <code>java.net.URLEncoder.encode()</code> which encodes according
     * to the <code>x-www-form-urlencoded</code> MIME format.
     *
     * @param path the path to encode
     * @return the encoded path
     */
    public static String encodePath(String path) {
       // stolen from org.apache.catalina.servlets.DefaultServlet ;)

        /**
         * Note: This code portion is very similar to URLEncoder.encode.
         * Unfortunately, there is no way to specify to the URLEncoder which
         * characters should be encoded. Here, ' ' should be encoded as "%20"
         * and '/' shouldn't be encoded.
         */

        int maxBytesPerChar = 10;
        StringBuffer rewrittenPath = new StringBuffer(path.length());
        ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
        OutputStreamWriter writer = null;
        try {
            writer = new OutputStreamWriter(buf, "UTF8");
        } catch (Exception e) {
            e.printStackTrace();
            writer = new OutputStreamWriter(buf);
        }

        for (int i = 0; i < path.length(); i++) {
            int c = (int) path.charAt(i);
            if (safeCharacters.get(c)) {
                rewrittenPath.append((char)c);
            } else {
                // convert to external encoding before hex conversion
                try {
                    writer.write(c);
                    writer.flush();
                } catch(IOException e) {
                    buf.reset();
                    continue;
                }
                byte[] ba = buf.toByteArray();
                for (int j = 0; j < ba.length; j++) {
                    // Converting each byte in the buffer
                    byte toEncode = ba[j];
                    rewrittenPath.append('%');
                    int low = (int) (toEncode & 0x0f);
                    int high = (int) ((toEncode & 0xf0) >> 4);
                    rewrittenPath.append(hexadecimal[high]);
                    rewrittenPath.append(hexadecimal[low]);
                }
                buf.reset();
            }
        }

        return rewrittenPath.toString();
    }

    /**
     * Returns the path of the given resource.
     *
     * @path the resource
     * @return the resource path
     */
    public static String getPath(String uri) {
        int i = uri.lastIndexOf('/');
        if(i > -1)
            return uri.substring(0, i);
        i = uri.indexOf(':');
        return (i > -1) ? uri.substring(i+1,uri.length()) : "";
    }

   /**
    * Remove path and file information from a filename returning only its
    * extension  component
    *
    * @param filename The filename
    * @return The filename extension (with starting dot!)
    */
    public static String getExtension(String uri) {
        int dot = uri.lastIndexOf('.');
        if (dot > -1) {
            uri = uri.substring(dot);
            int slash = uri.lastIndexOf('/');
            if (slash > -1) {
                return null;
            } else {
                int sharp = uri.lastIndexOf('#');
                if (sharp > -1) {
                    // uri starts with dot already
                    return uri.substring(0, sharp);
                } else {
                    int mark = uri.lastIndexOf('?');
                    if (mark > -1) {
                        // uri starts with dot already
                        return uri.substring(0, mark);
                    } else {
                        return uri;
                    }
                }
            }
        } else {
            return null;
        }
    }

    /**
     * Absolutize a relative resource on the given absolute path.
     *
     * @path the absolute path
     * @relativeResource the relative resource
     * @return the absolutized resource
     */
    public static String absolutize(String path, String relativeResource) {
        if (("".equals(path)) || (path == null)) return relativeResource;
        if (relativeResource.charAt(0) != '/') {
            int length = path.length() - 1;
            boolean slashPresent = (path.charAt(length) == '/');
            StringBuffer b = new StringBuffer();
            b.append(path);
            if (!slashPresent) b.append('/');
            b.append(relativeResource);
            return b.toString();
        } else {
            // resource is already absolute
            return relativeResource;
        }
    }

    /**
     * Relativize an absolute resource on a given absolute path.
     *
     * @path the absolute path
     * @relativeResource the absolute resource
     * @return the resource relative to the given path
     */
    public static String relativize(String path, String absoluteResource) {
        if (("".equals(path)) || (path == null)) return absoluteResource;
        int length = path.length() - 1;
        boolean slashPresent = path.charAt(length) == '/';
        if (absoluteResource.startsWith(path)) {
            // resource is direct descentant
            return absoluteResource.substring(length + (slashPresent ? 1 : 2));
        } else {
            // resource is not direct descendant
            if (!slashPresent) path += "/";
            int index = StringUtils.matchStrings(path, absoluteResource);
            if (index > 0 && path.charAt(index-1) != '/') {
                index = path.substring(0, index).lastIndexOf('/');
                index++;
            }
            String pathDiff = path.substring(index);
            String resource = absoluteResource.substring(index);
            int levels = StringUtils.count(pathDiff, '/');
            StringBuffer b = new StringBuffer();
            for (int i = 0; i < levels; i++) {
                b.append("../");
            }
            b.append(resource);
            return b.toString();
        }
    }

    /**
     * Normalize a uri containing ../ and ./ paths.
     *
     * @param uri The uri path to normalize
     * @return The normalized uri
     */
    public static String normalize(String uri) {
        String[] dirty = StringUtils.split(uri, "/");
        int length = dirty.length;
        String[] clean = new String[length];

        boolean path;
        boolean finished;
        while (true) {
            path = false;
            finished = true;
            for (int i = 0, j = 0; (i < length) && (dirty[i] != null); i++) {
                if (".".equals(dirty[i])) {
                    // ignore
                } else if ("..".equals(dirty[i])) {
                    clean[j++] = dirty[i];
                    if (path) finished = false;
                } else {
                    if ((i+1 < length) && ("..".equals(dirty[i+1]))) {
                        i++;
                    } else {
                        clean[j++] = dirty[i];
                        path = true;
                    }
                }
            }
            if (finished) {
                break;
            } else {
                dirty = clean;
                clean = new String[length];
            }
        }

        StringBuffer b = new StringBuffer(uri.length());

        for (int i = 0; (i < length) && (clean[i] != null); i++) {
            b.append(clean[i]);
            if ((i+1 < length) && (clean[i+1] != null)) b.append("/");
        }

        return b.toString();
    }

    /**
     * Remove parameters from a uri.
     *
     * @param uri The uri path to deparameterize.
     * @param parameters The map that collects parameters.
     * @return The cleaned uri
     */
    public static String deparameterize(String uri, Map parameters) {
        int i = uri.lastIndexOf('?');
        if (i == -1) return uri;
        String[] params = StringUtils.split(uri.substring(i+1), "&");
        for (int j = 0; j < params.length; j++) {
            String p = params[j];
            int k = p.indexOf('=');
            if (k == -1) break;
            String name = p.substring(0, k);
            String value = p.substring(k+1);
            parameters.put(name, value);
        }
        return uri.substring(0, i);
    }

    public static String parameterize(String uri, Map parameters) {
        if (parameters.size() == 0) {
            return uri;
        }
        StringBuffer buffer = new StringBuffer(uri);
        buffer.append('?');
        for (Iterator i = parameters.entrySet().iterator(); i.hasNext();) {
            Map.Entry entry = (Map.Entry)i.next();
            buffer.append(entry.getKey());
            buffer.append('=');
            buffer.append(entry.getValue());
            if (i.hasNext()) {
                buffer.append('&');
            }
        }
        return buffer.toString();
    }
}

/*****************************************************************************
 * Copyright (C) The Apache Software Foundation. All rights reserved.        *
 * ------------------------------------------------------------------------- *
 * This software is published under the terms of the Apache Software License *
 * version 1.1, a copy of which has been included  with this distribution in *
 * the LICENSE file.                                                         *
 *****************************************************************************/




/**
 * A collection of <code>String</code> handling utility methods.
 *
 * @author <a href="mailto:ricardo@apache.org">Ricardo Rocha</a>
 * @author <a href="mailto:stefano@apache.org">Stefano Mazzocchi</a>
 * @version CVS $Revision: 1.1 $ $Date: 2002/03/17 13:37:13 $
 */
 class StringUtils {
  
    /**
     * Split a string as an array using whitespace as separator
     *
     * @param line The string to be split
     * @return An array of whitespace-separated tokens
     */
    public static String[] split(String line) {
        return split(line, " \t\n\r");
    }

    /**
     * Split a string as an array using a given set of separators
     *
     * @param line The string to be split
     * @param delimiter A string containing token separators
     * @return An array of token
     */
    public static String[] split(String line, String delimiter) {
        return Tokenizer.tokenize(line, delimiter, false);
    }

    /**
     * Tests whether a given character is alphabetic, numeric or
     * underscore
     *
     * @param c The character to be tested
     * @return whether the given character is alphameric or not
     */
    public static boolean isAlphaNumeric(char c) {
        return c == '_' ||
            (c >= 'a' && c <= 'z') ||
            (c >= 'A' && c <= 'Z') ||
            (c >= '0' && c <= '9');
    }

    /**
     * Counts the occurrence of the given char in the string.
     *
     * @param str The string to be tested
     * @param c the char to be counted
     * @return the occurrence of the character in the string.
     */
    public static int count(String str, char c) {
        int index = 0;
        char[] chars = str.toCharArray();
        for (int i = 0; i < chars.length; i++) {
            if (chars[i] == c) index++;
        }
        return index;
    }

    /**
     * Matches two strings.
     *
     * @param a The first string
     * @param b The second string
     * @return the index where the two strings stop matching starting from 0
     */
    public static int matchStrings(String a, String b) {
        int i;
        char[] ca = a.toCharArray();
        char[] cb = b.toCharArray();
        int len = ( ca.length < cb.length ) ? ca.length : cb.length;

        for (i = 0; i < len; i++) {
            if (ca[i] != cb[i]) break;
        }

        return i;
    }

    /**
     * Replaces tokens in input with Value present in System.getProperty
     */
    public static String replaceToken(String s) {
        int startToken = s.indexOf("${");
        int endToken = s.indexOf("}",startToken);
        String token = s.substring(startToken+2,endToken);
        StringBuffer value = new StringBuffer();
        value.append(s.substring(0,startToken));
        value.append(System.getProperty(token));
        value.append(s.substring(endToken+1));
        return value.toString();
    }
}

 /*****************************************************************************
  * Copyright (C) The Apache Software Foundation. All rights reserved.        *
  * ------------------------------------------------------------------------- *
  * This software is published under the terms of the Apache Software License *
  * version 1.1, a copy of which has been included  with this distribution in *
  * the LICENSE file.                                                         *
  *****************************************************************************/



 /**
  * Replacement for StringTokenizer in java.util, beacuse of bug in the
  * Sun's implementation.
  *
  * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A>
  */
  class Tokenizer implements Enumeration {

   /**
    * Constructs a string tokenizer for the specified string. All characters
    * in the delim argument are the delimiters for separating tokens.
    * If the returnTokens flag is true, then the delimiter characters are
    * also returned as tokens. Each delimiter is returned as a string of
    * length one. If the flag is false, the delimiter characters are skipped
    * and only serve as separators between tokens.
    *
    * @param str           a string to be parsed
    * @param delim         the delimiters
    * @param returnTokens  flag indicating whether to return the delimiters
    *                      as tokens
    */
   public Tokenizer(String str, String delim, boolean returnTokens) {
     this.str = str;
     this.delim = delim;
     this.returnTokens = returnTokens;

     max = str.length();
   }

   /**
    * Constructs a string tokenizer for the specified string. The characters
    * in the delim argument are the delimiters for separating tokens.
    * Delimiter characters themselves will not be treated as tokens.
    *
    * @param str          a string to be parsed
    * @param delim        the delimiters
    */
   public Tokenizer(String str, String delim) {
     this(str, delim, false);
   }

   /**
    * Constructs a string tokenizer for the specified string. The character
    * in the delim argument is the delimiter for separating tokens.
    * Delimiter character themselves will not be treated as token.
    *
    * @param str          a string to be parsed
    * @param delim        the delimiter
    */
   public Tokenizer(String str, char delim) {
     this(str, String.valueOf(delim), false);
   }

   /**
    * Constructs a string tokenizer for the specified string. The tokenizer
    * uses the default delimiter set, which is " \t\n\r\f": the space
    * character, the tab character, the newline character, the carriage-return
    * character, and the form-feed character. Delimiter characters themselves
    * will not be treated as tokens.
    *
    * @param str          a string to be parsed
    */
   public Tokenizer(String str) {
     this(str, DEFAULT_DELIMITERS, false);
   }

   /**
    * Tests if there are more tokens available from this tokenizer's string.
    * If this method returns true, then a subsequent call to nextToken with
    * no argument will successfully return a token.
    *
    * @return true if and only if there is at least one token in the string
    * after the current position; false otherwise.
    */
   public boolean hasMoreTokens() {
     return ((current < max) ? (true) :
       (((current == max) && (max == 0
         || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0)))));
   }

   /**
    * Returns the next token from this string tokenizer.
    *
    * @return the next token from this string tokenizer
    *
    * @exception NoSuchElementException  if there are no more tokens in this
    *                                    tokenizer's string
    */
   public String nextToken() throws NoSuchElementException {
     if (current == max
       && (max == 0
       || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))) {

       current++;
       return new String();
     }

     if (current >= max)
       throw new NoSuchElementException();

     int start = current;
     String result = null;

     if (delim.indexOf(str.charAt(start)) >= 0) {
       if (previous == -1 || (returnTokens && previous != current
         && delim.indexOf(str.charAt(previous)) >= 0)) {

         result = new String();
       }
       else if (returnTokens)
         result = str.substring(start, ++current);

       if (!returnTokens)
         current++;
     }

     previous = start;
     start = current;

     if (result == null)
       while (current < max && delim.indexOf(str.charAt(current)) < 0)
         current++;

     return result == null ? str.substring(start, current) : result;
   }

   /**
    * Returns the next token in this string tokenizer's string. First, the
    * set of characters considered to be delimiters by this Tokenizer
    * object is changed to be the characters in the string delim.
    * Then the next token in the string after the current position is
    * returned. The current position is advanced beyond the recognized token.
    * The new delimiter set remains the default after this call.
    *
    * @param delim the new delimiters
    *
    * @return the next token, after switching to the new delimiter set
    *
    * @exception NoSuchElementException  if there are no more tokens in this
    *                                    tokenizer's string.
    */
   public String nextToken(String delim) throws NoSuchElementException {
     this.delim = delim;
     return nextToken();
   }

   /**
    * Returns the same value as the hasMoreTokens method. It exists so that
    * this class can implement the Enumeration interface.
    *
    * @return true if there are more tokens; false otherwise.
    */
   public boolean hasMoreElements() {
     return hasMoreTokens();
   }

   /**
    * Returns the same value as the nextToken method, except that its
    * declared return value is Object rather than String. It exists so that
    * this class can implement the Enumeration interface.
    *
    * @return the next token in the string
    *
    * @exception NoSuchElementException  if there are no more tokens in this
    *                                    tokenizer's string
    */
   public Object nextElement() {
     return nextToken();
   }

   /**
    * Calculates the number of times that this tokenizer's nextToken method
    * can be called before it generates an exception. The current position
    * is not advanced.
    *
    * @return  the number of tokens remaining in the string using the
    *          current delimiter set
    */
   public int countTokens() {
     int curr = current;
     int count = 0;

     for (int i = curr; i < max; i++) {
       if (delim.indexOf(str.charAt(i)) >= 0)
         count++;

       curr++;
     }

     return count + (returnTokens ? count : 0) + 1;
   }

   /**
    * Resets this tokenizer's state so the tokenizing starts from the begin.
    */
   public void reset() {
     previous = -1;
     current = 0;
   }

   /**
    * Constructs a string tokenizer for the specified string. All characters
    * in the delim argument are the delimiters for separating tokens.
    * If the returnTokens flag is true, then the delimiter characters are
    * also returned as tokens. Each delimiter is returned as a string of
    * length one. If the flag is false, the delimiter characters are skipped
    * and only serve as separators between tokens. Then tokenizes the str
    * and return an String[] array with tokens.
    *
    * @param str           a string to be parsed
    * @param delim         the delimiters
    * @param returnTokens  flag indicating whether to return the delimiters
    *                      as tokens
    *
    * @return array with tokens
    */
   public static String[] tokenize(String str, String delim,
     boolean returnTokens) {

     Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens);
     String[] tokens = new String[tokenizer.countTokens()];

     int i = 0;
     while (tokenizer.hasMoreTokens()) {
       tokens[i] = tokenizer.nextToken();
       i++;
     }

     return tokens;
   }

   /**
    * Default delimiters "\t\n\r\f":
    * the space character, the tab character, the newline character,
    * the carriage-return character, and the form-feed character.
    */
   public static final String DEFAULT_DELIMITERS = " \t\n\r\f";

   /**
    * String to tokenize.
    */
   private String str = null;

   /**
    * Delimiters.
    */
   private String delim = null;

   /**
    * Flag indicating whether to return the delimiters as tokens.
    */
   private boolean returnTokens = false;

   /**
    * Previous token start.
    */
   private int previous = -1;

   /**
    * Current position in str string.
    */
   private int current = 0;

   /**
    * Maximal position in str string.
    */
   private int max = 0;
 }
Related examples in the same category

1.	Creating a URL with a single string.
2.	Creating a URL With components
3.	Converting Between a Filename Path and a URL
4.	URL Constructor Test
5.	URL Encode Test
6.	Get URL Content
7.	Get URL Parts
8.	Read from a URL
9.	Convert a URL to a URI
10.	Converting Between a URL and a URI
11.	Convert an absolute URI to a URL
12.	URL Equality
13.	Parsing a URL
14.	URL Request
15.	URL Get
16.	A URL Retrieval Example
17.	URL Reader
18.	URL Connection Reader
19.	Using URLConnection
20.	Parse URL
21.	Resolve a relative URL
22.	sends e-mail using a mailto: URL
23.	Convert the absolute URI to a URL object
24.	Convert URI to URL
25.	Get parts of a url
26.	Checks, whether the URL uses a file based protocol.
27.	Add Parameter to URL
28.	Returns the anchor value of the given URL
29.	Extracts the file name from the URL.
30.	Creates a relative url by stripping the common parts of the the url.
31.	Checks, whether the URL points to the same service. A service is equal if the protocol, host and port are equal.
32.	Extracts the base URL from the given URL by stripping the query and anchor part.
33.	Returns true if the URL represents a path, and false otherwise.
34.	Parse Port
35.	Parse Host
36.	Given a URL check if its a jar url(jar:!/archive) and if it is, extract the archive entry into the given dest directory and return a file URL to its location
37.	check the validity of url pattern according to the spec.
38.	Build Relative URL Path
39.	Checks that the protocol://host:port part of two URLs are equal
40.	Create valid URL from a system id
41.	Extract URL File Name
42.	Extract the URL page name from the given path
43.	Get Domain Name
44.	Get Locale From String
45.	Get URL Last Modified
46.	Get the name of the parent of the given URL path
47.	Get the parent of the given URL path
48.	Has URLContent Changed
49.	Is URL a local file
50.	Normalize an URL
51.	Normalizes an URL
52.	Resolve a relative URL string against an absolute URL string
53.	ResourceBundle String manager
54.	Save URL contents to a file
55.	URL Path: standardize the creation of mutation of path-like structures
56.	Utility class for building URLs
57.	Add Default Port to a URL If Missing
58.	Get Relative Path To URL
59.	Download from a URL and save to a file
A collection of File, URL and filename utility methods : URL « Network Protocol « Java

Related examples in the same category