Here you can find the source of getHostAndPortFromUrl(String url)
Parameter | Description |
---|---|
url | The string as a url |
public static String[] getHostAndPortFromUrl(String url) throws MalformedURLException
//package com.java2s; /*/*w w w . j a va2 s .c om*/ * Copyright (c) 1999 - 2012 The Virtual Light Company * http://www.vlc.com.au/ * * This code is licensed under the GNU Library GPL v2.1. Please read docs/LICENSE.txt * for the full details. A copy of the LGPL may be found at * * http://www.gnu.org/copyleft/lgpl.html * * The code is distributed as-is and contains no warranty or guarantee for fitnesse of * purpose. Use it at your own risk. */ import java.util.HashMap; import java.net.MalformedURLException; public class Main { /** Hashmap to hold all of the ports to protocol mapping */ private static HashMap protocol_ports; /** * Parse the string as a URL and extract the host name from it. If * the protocol does not support host names, then it will return * <CODE>null</CODE>. The string returned contains both the host name * and port from the URL in two strings. The first string from the * return value is the host name and the second is the port number as * a string. * <P> * If the URL does not explicitly set the port number then this will attempt * to fetch the port number by calling getDefaultPort() and returning a * string representation of that. * * @param url The string as a url * @return The hostname or null if not defined for the URL type * @exception MalformedURLException Invalid URL passed */ public static String[] getHostAndPortFromUrl(String url) throws MalformedURLException { // first fetch the scheme. If we know that the scheme is one that can // support hostnames, we'll keep looking otherwise return straight away. int size = url.length(); char[] url_chars = url.toCharArray(); // now, from the scheme onwards look for a host name String scheme = getScheme(url_chars); // if the scheme has a default port number, then it must also have a // host available as part of the URL. If it doesn't exist in the hashmap // then assume it doesn't and barf. if (protocol_ports.get(scheme) == null) return null; // now parse the URL. Must rememeber that a URL can also contain password // and username info _before_ the hostname/port combo. Also, we need to // strip the scheme and leading ':' and possibly "//" characters too. // Start at the sceheme and count until we hit something that is // alphanumeric, then start looking at the hostname stuff. // // Start the index at the int length of the scheme string. Since the String // is terminated by a ':' then this should give us already one cahr offset // that is needed. All we do is wander through until we reach something // that is not '/' to start stripping the authority part. int start_pos, end_pos; int start_hostname, end_hostname; int start_port, end_port; int index = scheme.length() + 1; String hostname = null; String port = null; boolean has_userinfo = false; for (; (index < size) && (url_chars[index] == '/'); index++) ; // now look for anything not ? / or # and start_pos = index; // first find the end of the authority part. while ((index < size) && (url_chars[index] != '/') && (url_chars[index] != '?') && (url_chars[index] != '#')) { index++; } end_pos = index; start_hostname = start_pos; end_hostname = end_pos; // now lets start again with just the authority. Search for @ and : for (index = start_pos; (index < end_pos) && (url_chars[index] != '@'); index++) ; if (index != end_pos) { start_hostname = index + 1; has_userinfo = true; } // From this endpoint, now look for a semi colon index = has_userinfo ? index + 1 : start_pos; for (; (index < end_pos) && (url_chars[index] != ':'); index++) ; if ((index <= end_pos) || (url_chars[index] == ':')) end_hostname = index - 1; else end_hostname = index; start_port = index + 1; if ((end_pos == url_chars.length) || (url_chars[end_pos] == '/') || (url_chars[end_pos] == '?') || (url_chars[end_pos] == '#')) end_port = end_pos - 1; else end_port = end_pos; hostname = new String(url_chars, start_hostname, end_hostname - start_hostname + 1); if (!(start_port >= end_pos)) { port = new String(url_chars, start_port, end_port - start_port + 1); } else port = Integer.toString(getDefaultPort(scheme)); String[] ret_val = { hostname, port }; return ret_val; } /** * Get the scheme from the url string that we've been given. The scheme is * determined by the regex <CODE>(([^:/?#]+):)?</CODE>. The scheme may be * one of the predefined types that are defined as constants for this class. * * @param uri The uri as a string * @return A string representing the scheme, or null if it can't be found */ public static String getScheme(String uri) { // we do this the hard way using char arrays rather than using a // regex package to reduce core dependencies char[] uri_chars = uri.toCharArray(); return getScheme(uri_chars); } /** * Character array based version of the getScheme method. * * @param uri The URI to parse * @return The string representing the scheme. */ private static String getScheme(char[] uri) { int size = uri.length; String scheme = null; int index = getSchemeCharLength(uri); if ((index > 0) && (index < size) && (uri[index] == ':')) scheme = new String(uri, 0, index).toLowerCase(); return scheme; } /** * Return the default port used by a given protocol. * * @param protocol the protocol * @return the port number, or 0 if unknown */ public static int getDefaultPort(String protocol) { String prot = protocol.trim().toLowerCase(); int ret_val = 0; Integer val = (Integer) protocol_ports.get(prot); if (val != null) ret_val = val.intValue(); return ret_val; } /** * Variant on the getScheme method that tells how long the string is * in terms of characters from the start of the array * * @param uri The characters of the URI * @return The number of chars in the length of the scheme part */ private static int getSchemeCharLength(char[] uri) { int index = 0; int size = uri.length; String scheme = null; // quick case insensitive compare to see if the URI starts with either // "URL" or "URI" if ((uri[3] == ':') && (uri[0] == 'u' || uri[0] == 'U') && (uri[1] == 'r' || uri[1] == 'R') && (uri[2] == 'i' || uri[2] == 'I' || uri[2] == 'l' || uri[2] == 'L')) index = 4; while ((index < size) && (uri[index] != ':') && (uri[index] != '/') && (uri[index] != '?') && (uri[index] != '#')) index++; return index; } }