Description
Normalizes a URI as specified in section 6.2.2 of RFC 3986
License
Apache License
Parameter
Parameter | Description |
---|
uri | a URI |
Exception
Parameter | Description |
---|
URISyntaxException | an exception |
UnsupportedEncodingException | an exception |
Return
an RFC 3986 URI normalized according to section 6.2.2.
Declaration
public static URI normalizeURI(String uri) throws URISyntaxException,
UnsupportedEncodingException
Method Source Code
//package com.java2s;
/* Copyright 2014 MITRE Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.// w w w . j a v a 2s .c om
*/
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
public class Main {
/**
* Normalizes a URI as specified in section 6.2.2 of RFC 3986
* @param uri a URI
* @return an RFC 3986 URI normalized according to section 6.2.2.
* @throws URISyntaxException
* @throws UnsupportedEncodingException
*/
public static URI normalizeURI(String uri) throws URISyntaxException,
UnsupportedEncodingException {
return normalizeURI(new URI(uri));
}
/**
* Normalizes a URI as specified in section 6.2.2 of RFC 3986.
* At present, this does nothing for opaque URIs (such as URNs, and mailto:foo@bar.com). For non-opaque
* URIs, it standardizes the case of escaped octets, hostname, fixes port references, alphebetizes and
* properly encodes query string parameters, and resolves relative paths.
* @param uri a URI
* @return an RFC 3986 URI normalized according to section 6.2.2.
* @throws URISyntaxException
* @throws UnsupportedEncodingException
*/
public static URI normalizeURI(URI uri) throws URISyntaxException,
UnsupportedEncodingException {
if (uri.isOpaque())
return uri;
uri = uri.normalize();
String scheme = uri.getScheme();
String userInfo = uri.getUserInfo();
String host = uri.getHost();
String path = uri.getPath();
String query = uri.getQuery();
String fragment = uri.getFragment();
Integer port = uri.getPort();
if (path == null || "".equals(path))
path = "/";
if (scheme != null)
scheme = scheme.toLowerCase();
if (host != null)
host = host.toLowerCase();
if (port != null && port.equals(getPortForScheme(scheme)))
port = null;
if (port != null)
return new URI(scheme, userInfo, host, port, URLEncoder.encode(
path, "UTF-8").replaceAll("%2F", "/"),
normalizeQueryString(query), (fragment == null ? null
: URLEncoder.encode(fragment, "UTF-8")));
else {
String authority = host;
if (userInfo != null)
authority = userInfo + "@" + host;
return new URI(scheme, authority, URLEncoder.encode(path,
"UTF-8").replaceAll("%2F", "/"),
normalizeQueryString(query), (fragment == null ? null
: URLEncoder.encode(fragment, "UTF-8")));
} // End else
}
/**
* See http://www.iana.org/assignments/port-numbers. This is a partial list of only the most common.
* @param scheme a scheme within a URI (such as http, ftp, ssh, etc)
* @return the standard port number for that scheme.
*/
private static Integer getPortForScheme(String scheme) {
scheme = scheme.toLowerCase();
if ("http".equals(scheme))
return 80;
if ("ftp".equals(scheme))
return 21;
if ("ssh".equals(scheme))
return 22;
if ("telnet".equals(scheme))
return 23;
if ("gopher".equals(scheme))
return 70;
if ("http-alt".equals(scheme))
return 8080;
if ("radan-http".equals(scheme))
return 8088;
if ("dnsix".equals(scheme))
return 90;
if ("echo".equals(scheme))
return 7;
if ("daytime".equals(scheme))
return 13;
if ("smtp".equals(scheme))
return 25;
if ("time".equals(scheme))
return 37;
return null;
}
/**
* Given an un-encoded URI query string, this will return a normalized, properly encoded URI query string.
* <b>Important:</b> This method uses java's URLEncoder, which returns things that are
* application/x-www-form-urlencoded, instead of things that are properly octet-esacped as the URI spec
* requires. As a result, some substitutions are made to properly translate space characters to meet the
* URI spec.
* @param queryString
* @return
*/
private static String normalizeQueryString(String queryString)
throws UnsupportedEncodingException {
if ("".equals(queryString) || queryString == null)
return queryString;
String[] pieces = queryString.split("&");
HashMap<String, String> kvp = new HashMap<String, String>();
StringBuffer builder = new StringBuffer("");
for (int x = 0; x < pieces.length; x++) {
String[] bs = pieces[x].split("=", 2);
bs[0] = URLEncoder.encode(bs[0], "UTF-8");
if (bs.length == 1)
kvp.put(bs[0], null);
else {
kvp.put(bs[0], URLEncoder.encode(bs[1], "UTF-8")
.replaceAll("\\+", "%20"));
}
}
// Sort the keys alphabetically, ignoring case.
ArrayList<String> keys = new ArrayList<String>(kvp.keySet());
Collections.sort(keys, new Comparator<String>() {
public int compare(String o1, String o2) {
return o1.compareToIgnoreCase(o2);
}
});
// With the alphabetic list of parameter names, re-build the query string.
for (int x = 0; x < keys.size(); x++) {
// Some parameters have no value, and are simply present. If so, we put null in kvp,
// and we just put the parameter name, no "=value".
if (kvp.get(keys.get(x)) == null)
builder.append(keys.get(x));
else
builder.append(keys.get(x) + "=" + kvp.get(keys.get(x)));
if (x < (keys.size() - 1))
builder.append("&");
}
return builder.toString();
}
}
Related
- normalizedSetCookiePath(final String path, final URI originUri)
- normalizedUri(URI uri)
- normalizeGitRepoLocation(URI location)
- normalizeLink(String link, URI parent, boolean removeParams)
- normalizeURI(final URI uri)
- normalizeURIPath(String uri)
- normalizeUriPath(String uriPath)