Java URL to Host Name getHostAndPortFromUrl(String url)

Here you can find the source of getHostAndPortFromUrl(String url)

Description

Parse the string as a URL and extract the host name from it.

License

LGPL

Parameter

Parameter Description
url The string as a url

Return

The hostname or null if not defined for the URL type

Declaration

public static String[] getHostAndPortFromUrl(String url) throws MalformedURLException 

Method Source Code

//package com.java2s;
/*/*w  w w  .  j a  va2 s .c om*/
 * Copyright (c) 1999 - 2012 The Virtual Light Company
 *                            http://www.vlc.com.au/
 *
 * This code is licensed under the GNU Library GPL v2.1. Please read docs/LICENSE.txt
 * for the full details. A copy of the LGPL may be found at
 *
 * http://www.gnu.org/copyleft/lgpl.html
 *
 * The code is distributed as-is and contains no warranty or guarantee for fitnesse of
 * purpose. Use it at your own risk.
 */

import java.util.HashMap;

import java.net.MalformedURLException;

public class Main {
    /** Hashmap to hold all of the ports to protocol mapping */
    private static HashMap protocol_ports;

    /**
     * Parse the string as a URL and extract the host name from it. If
     * the protocol does not support host names, then it will return
     * <CODE>null</CODE>. The string returned contains both the host name
     * and port from the URL in two strings. The first string from the
     * return value is the host name and the second is the port number as
     * a string.
     * <P>
     * If the URL does not explicitly set the port number then this will attempt
     * to fetch the port number by calling getDefaultPort() and returning a
     * string representation of that.
     *
     * @param url The string as a url
     * @return The hostname or null if not defined for the URL type
     * @exception MalformedURLException Invalid URL passed
     */
    public static String[] getHostAndPortFromUrl(String url) throws MalformedURLException {
        // first fetch the scheme. If we know that the scheme is one that can
        // support hostnames, we'll keep looking otherwise return straight away.
        int size = url.length();
        char[] url_chars = url.toCharArray();

        // now, from the scheme onwards look for a host name
        String scheme = getScheme(url_chars);

        // if the scheme has a default port number, then it must also have a
        // host available as part of the URL. If it doesn't exist in the hashmap
        // then assume it doesn't and barf.
        if (protocol_ports.get(scheme) == null)
            return null;

        // now parse the URL. Must rememeber that a URL can also contain password
        // and username info _before_ the hostname/port combo. Also, we need to
        // strip the scheme and leading ':' and possibly "//" characters too.
        // Start at the sceheme and count until we hit something that is
        // alphanumeric, then start looking at the hostname stuff.
        //
        // Start the index at the int length of the scheme string. Since the String
        // is terminated by a ':' then this should give us already one cahr offset
        // that is needed. All we do is wander through until we reach something
        // that is not '/' to start stripping the authority part.
        int start_pos, end_pos;
        int start_hostname, end_hostname;
        int start_port, end_port;
        int index = scheme.length() + 1;
        String hostname = null;
        String port = null;
        boolean has_userinfo = false;

        for (; (index < size) && (url_chars[index] == '/'); index++)
            ;

        // now look for anything not ? / or # and
        start_pos = index;

        // first find the end of the authority part.
        while ((index < size) && (url_chars[index] != '/') && (url_chars[index] != '?')
                && (url_chars[index] != '#')) {
            index++;
        }

        end_pos = index;
        start_hostname = start_pos;
        end_hostname = end_pos;

        // now lets start again with just the authority. Search for @ and :
        for (index = start_pos; (index < end_pos) && (url_chars[index] != '@'); index++)
            ;

        if (index != end_pos) {
            start_hostname = index + 1;
            has_userinfo = true;
        }

        // From this endpoint, now look for a semi colon
        index = has_userinfo ? index + 1 : start_pos;

        for (; (index < end_pos) && (url_chars[index] != ':'); index++)
            ;

        if ((index <= end_pos) || (url_chars[index] == ':'))
            end_hostname = index - 1;
        else
            end_hostname = index;

        start_port = index + 1;

        if ((end_pos == url_chars.length) || (url_chars[end_pos] == '/') || (url_chars[end_pos] == '?')
                || (url_chars[end_pos] == '#'))
            end_port = end_pos - 1;
        else
            end_port = end_pos;

        hostname = new String(url_chars, start_hostname, end_hostname - start_hostname + 1);

        if (!(start_port >= end_pos)) {
            port = new String(url_chars, start_port, end_port - start_port + 1);
        } else
            port = Integer.toString(getDefaultPort(scheme));

        String[] ret_val = { hostname, port };

        return ret_val;
    }

    /**
     * Get the scheme from the url string that we've been given. The scheme is
     * determined by the regex <CODE>(([^:/?#]+):)?</CODE>. The scheme may be
     * one of the predefined types that are defined as constants for this class.
     *
     * @param uri The uri as a string
     * @return A string representing the scheme, or null if it can't be found
     */
    public static String getScheme(String uri) {
        // we do this the hard way using char arrays rather than using a
        // regex package to reduce core dependencies
        char[] uri_chars = uri.toCharArray();

        return getScheme(uri_chars);
    }

    /**
     * Character array based version of the getScheme method.
     *
     * @param uri The URI to parse
     * @return The string representing the scheme.
     */
    private static String getScheme(char[] uri) {
        int size = uri.length;
        String scheme = null;
        int index = getSchemeCharLength(uri);

        if ((index > 0) && (index < size) && (uri[index] == ':'))
            scheme = new String(uri, 0, index).toLowerCase();

        return scheme;
    }

    /**
     * Return the default port used by a given protocol.
     *
     * @param protocol the protocol
     * @return the port number, or 0 if unknown
     */
    public static int getDefaultPort(String protocol) {
        String prot = protocol.trim().toLowerCase();

        int ret_val = 0;
        Integer val = (Integer) protocol_ports.get(prot);

        if (val != null)
            ret_val = val.intValue();

        return ret_val;
    }

    /**
     * Variant on the getScheme method that tells how long the string is
     * in terms of characters from the start of the array
     *
     * @param uri The characters of the URI
     * @return The number of chars in the length of the scheme part
     */
    private static int getSchemeCharLength(char[] uri) {
        int index = 0;
        int size = uri.length;
        String scheme = null;

        // quick case insensitive compare to see if the URI starts with either
        // "URL" or "URI"
        if ((uri[3] == ':') && (uri[0] == 'u' || uri[0] == 'U') && (uri[1] == 'r' || uri[1] == 'R')
                && (uri[2] == 'i' || uri[2] == 'I' || uri[2] == 'l' || uri[2] == 'L'))
            index = 4;

        while ((index < size) && (uri[index] != ':') && (uri[index] != '/') && (uri[index] != '?')
                && (uri[index] != '#'))
            index++;

        return index;
    }
}

Related

  1. getHost(String urlString)
  2. getHost(String urlString)
  3. getHost(URL url)
  4. getHostAddressAsBytes(String url)
  5. getHostAndPort(final URL url)
  6. getHostFromURL(final String urlSpec)
  7. getHostname(String completeUrl)
  8. getHostName(String url)
  9. getHostName(String url)