com.feilong.core.net.URIUtil.java Source code

Java tutorial

Introduction

Here is the source code for com.feilong.core.net.URIUtil.java

Source

/*
 * Copyright (C) 2008 feilong
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.feilong.core.net;

import static org.apache.commons.lang3.StringUtils.EMPTY;
import static org.apache.commons.lang3.StringUtils.INDEX_NOT_FOUND;

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.feilong.core.CharsetType;
import com.feilong.core.URIComponents;
import com.feilong.core.lang.StringUtil;
import com.feilong.tools.slf4j.Slf4jUtil;

import static com.feilong.core.URIComponents.QUESTIONMARK;
import static com.feilong.core.Validator.isNullOrEmpty;

/**
 * ?{@link java.net.URI}(Uniform Resource Locator) {@link java.net.URL}(Uniform Resource Identifier) .
 * 
 * <h3>{@link java.net.URI} {@link java.net.URL}:</h3>
 * 
 * <blockquote>
 * 
 * <table border="1" cellspacing="0" cellpadding="4" summary="">
 * <tr style="background-color:#ccccff">
 * <th align="left"></th>
 * <th align="left"></th>
 * </tr>
 * 
 * <tr valign="top">
 * <td>{@link java.net.URI URI}<br>
 * Uniform Resource Identifier</td>
 * <td>?,??<br>
 * There are two types of URIs: URLs and URNs. <br>
 * See RFC 1630: Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the
 * Network as used in the WWW.</td>
 * </tr>
 * 
 * <tr valign="top" style="background-color:#eeeeff">
 * <td>{@link java.net.URL URL}<br>
 * Uniform Resource Locator</td>
 * <td>??,?URI,?URL???,locate?. <br>
 * See RFC 1738: Uniform Resource Locators (URL)</td>
 * </tr>
 * 
 * <tr valign="top">
 * <td>URN<br>
 * uniform resource name</td>
 * <td>???,????, <br>
 * mailto:java-net@java.sun.com.</td>
 * </tr>
 * </table>
 * 
 * <p>
 * ,{@link java.net.URI URI}?,?,URLURN??.URLURN?URI.
 * </p>
 * 
 * <p>
 * {@link URI}??,?,???URI;<br>
 * {@link URL}??,????,<span style="color:red">?,schema</span>.
 * </p>
 * </blockquote>
 * 
 * <h3> {@link URI} ?:</h3>
 * 
 * <blockquote>
 * <ul>
 * <li>getQuery()  URI ??.</li>
 * <li>getRawQuery()  URI ?. URI ?()??? URI .</li>
 * </ul>
 * </blockquote>
 * 
 * <h3> URI path parameter(Matrix URIs) <a href="http://www.w3.org/DesignIssues/MatrixURIs.html">MatrixURIs</a>:</h3>
 * 
 * <blockquote>
 * 
 * <p style="color:red">
 * Note: relative Matrix URLs are notgenerally implemented so this is just a theoretical discussion.
 * </p>
 * 
 * <p>
 * A URI path parameter is part of a path segment that occurs after its name. <br>
 * </p>
 * <ul>
 * <li>Path parameters offer a unique opportunity to control the representations of resources</li>
 * <li>Since they can't be manipulated by standard Web forms, they have to be constructed out of band</li>
 * <li>Since they're part of the path, they're sequential, unlike query strings</li>
 * <li>Most importantly, however, their behaviour is not explicitly defined.</li>
 * </ul>
 * 
 * <p>
 * When defining constraints for the syntax of path parameters, we can take these characteristics into account, and define parameters that
 * stack sequentially, and each take multiple values.
 * </p>
 * 
 * <p>
 * In the last paragraph of section 3.3, The URI specification suggests employing the semicolon ;, equals = and comma , characters for this
 * task. Therefore:
 * </p>
 * <blockquote>
 * <table border="1" cellspacing="0" cellpadding="4" summary="">
 * <tr style="background-color:#ccccff">
 * <th align="left">characters</th>
 * <th align="left"></th>
 * <th align="left"></th>
 * </tr>
 * <tr valign="top">
 * <td>semicolon ;</td>
 * <td>will delimit the parameters themselves.<br>
 * That is, anything in a path segment to the right of a semicolon will be treated as a new parameter</td>
 * <td>like this: <span style="color:green">/path/name;param1;p2;p3</span></td>
 * </tr>
 * <tr valign="top" style="background-color:#eeeeff">
 * <td>equals sign =</td>
 * <td>will separate parameter names from their values, should a given parameter take values.<br>
 * That is, within a path parameter, everything to the right of an equals sign is treated as a value,</td>
 * <td>like this: <span style="color:green">param=value;p2</span></td>
 * </tr>
 * <tr valign="top">
 * <td>comma ,</td>
 * <td>will separate individual values passed into a single parameter,</td>
 * <td>like this: <span style="color:green">;param=val1,val2,val3</span></td>
 * </tr>
 * </table>
 * </blockquote>
 * <p>
 * This means that although it may be visually confusing, parameter names can take commas but no equals signs, values can take equals signs
 * but no commas, and no part of the path segment can take semicolons literally. All other sub-delimiters should be percent-encoded. This
 * also means that one's opportunities for self-expression with URI paths are further constrained.
 * </p>
 * 
 * <p>
 * ?path??path?(?matrix?),path?";".????"=",?:"/file;p=1",path"file"path?"p","1".<br>
 * ??    ,YahooRESTfulAPI ?: Matrix???GET???.?
 * <a href="https://developer.yahoo.com/social/rest_api_guide/partial-resources.html#paging-collection">?</a>
 * .matrix???URI?path,?path.
 * </p>
 * </blockquote>
 * 
 * @author <a href="http://feitianbenyue.iteye.com/">feilong</a>
 * @see java.net.URI
 * @see java.net.URL
 * @see URIComponents
 * @see <a href="http://www.w3.org/DesignIssues/MatrixURIs.html">MatrixURIs</a>
 * @since 1.0.0
 */
public final class URIUtil {

    /** The Constant LOGGER. */
    private static final Logger LOGGER = LoggerFactory.getLogger(URIUtil.class);

    /** Don't let anyone instantiate this class. */
    private URIUtil() {
        //AssertionError?. ?????. ???.
        //see Effective Java 2nd
        throw new AssertionError("No " + getClass().getName() + " instances for you!");
    }

    /**
     *  {@link java.net.URI#create(String)} URI,?.
     * 
     * <h3>{@link URI#URI(String)}  {@link URI#create(String)}:</h3>
     * <blockquote>
     * <p>
     * {@link URI#URI(String)},StringURI???RFC 2396, {@link URISyntaxException}.<br>
     * ?URI,? {@link URISyntaxException},??? {@link java.net.URI#create(String)}
     * </p>
     * </blockquote>
     * 
     * @param uri
     *            the uri
     * @return {@link java.net.URI#URI(String)} <br>
     *          <code>uri</code> null, {@link NullPointerException}<br>
     *          <code>uri</code> blank, {@link IllegalArgumentException}
     * @see java.net.URI#URI(String)
     * @since 1.8.0
     */
    public static URI create(String uri) {
        Validate.notBlank(uri, "uri can't be blank!");
        try {
            return URI.create(uri);
        } catch (Exception e) {
            throw new URIParseException(Slf4jUtil.format("uri:[{}]", uri), e);
        }
    }

    /**
     *  uricharset {@link URI}.
     * 
     * @param uriString
     *            the uri string
     * @param charsetType
     *            ??,null empty,?,?<br>
     *            ??,??,ie?chrome ? url , ?
     * @return  <code>uriString</code> null, {@link NullPointerException}<br>
     *          <code>uriString</code> blank, {@link IllegalArgumentException}<br>
     *          Exception, {@link URIParseException}
     * @see URI#create(String)
     * @see #encodeUri(String, String)
     * @see <a
     *      href="http://stackoverflow.com/questions/15004593/java-request-getquerystring-value-different-between-chrome-and-ie-browser">
     *      java-request-getquerystring-value-different-between-chrome-and-ie-browser</a>
     */
    public static URI create(String uriString, String charsetType) {
        return create(encodeUri(uriString, charsetType));
    }

    /**
     *  uriStringcharset {@link URI}.
     * 
     * @param uriString
     *            the uri string
     * @param charsetType
     *            ??,null empty,?,?<br>
     *            ??,??,ie?chrome ? url , ?
     * @return  <code>uriString</code> null, {@link NullPointerException}<br>
     *          <code>uriString</code> blank, {@link IllegalArgumentException}
     * @see <a
     *      href="http://stackoverflow.com/questions/15004593/java-request-getquerystring-value-different-between-chrome-and-ie-browser">
     *      java-request-getquerystring-value-different-between-chrome-and-ie-browser</a>
     * @see URI#create(String)
     * @see ParamUtil#addParameterArrayValueMap(String, Map, String)
     * @since 1.4.0
     */
    public static String encodeUri(String uriString, String charsetType) {
        Validate.notBlank(uriString, "uriString can't be null/empty!");
        LOGGER.trace("input uriString:[{}],charsetType:{}", uriString, charsetType);

        if (!hasQueryString(uriString) || isNullOrEmpty(charsetType)) {
            return uriString;// ?? ???
        }

        Map<String, String[]> safeArrayValueMap = ParamUtil.toSafeArrayValueMap(getQueryString(uriString),
                charsetType);
        String encodeUrl = ParamUtil.addParameterArrayValueMap(uriString, safeArrayValueMap, charsetType);
        LOGGER.trace("input uriString:[{}],charsetType:[{}],after url:[{}]", uriString, charsetType, encodeUrl);
        return encodeUrl;
    }

    /**
     * ??queryStringpath,????( <span style="color:red">???</span>).
     *
     * @param uriString
     *            the uri
     * @return  isNullOrEmpty(url), {@link StringUtils#EMPTY}
     * @since 1.8.0 change to default
     */
    static String getFullPathWithoutQueryString(String uriString) {
        if (isNullOrEmpty(uriString)) {
            return EMPTY;
        }
        // url???  XXX 
        int index = uriString.indexOf(QUESTIONMARK);
        return index == INDEX_NOT_FOUND ? uriString : uriString.substring(0, index);
    }

    /**
     * queryString.
     * 
     * <h3>:</h3>
     * <blockquote>
     * 
     * <pre class="code">
     * URIUtil.getQueryString({@code "http://127.0.0.1/cmens/t-b-f-a-c-s-f-p-g-e-i-o.htm?a=1&a=2"})
     * </pre>
     * 
     * <b>:</b> {@code a=1&a=2}
     * </blockquote>
     * 
     * @param uriString
     *            the uri
     * @return ? <code>uriString</code> isNullOrEmpty, {@link StringUtils#EMPTY};<br>
     *         ? <code>uriString</code> ???, {@link StringUtils#EMPTY};<br>
     *         ?????
     * @since 1.8.0 change to default
     */
    static String getQueryString(String uriString) {
        if (isNullOrEmpty(uriString)) {
            return EMPTY;
        }
        // url???  XXX 
        int index = uriString.indexOf(QUESTIONMARK);
        return index == INDEX_NOT_FOUND ? EMPTY : StringUtil.substring(uriString, index + 1);
    }

    /**
     * ?queryString.
     *
     * @param uriString
     *            the uri string
     * @return  <code>uriString</code> nullempty, {@link StringUtils#EMPTY}
     * @since 1.8.0 change to private
     */
    // XXX 
    private static boolean hasQueryString(String uriString) {
        return isNullOrEmpty(uriString) ? false : StringUtils.contains(uriString, QUESTIONMARK);
    }

    // [start] encode/decode

    /**
     * ?,?? .
     * 
     * <p style="color:red">
     * ?? {@link java.net.URLEncoder}  {@link java.net.URLDecoder}??URL,???.
     * </p>
     * 
     * <p>
     * Translates a string into <code>application/x-www-form-urlencoded</code> format using a specific encoding scheme. This method uses the
     * supplied encoding scheme to obtain the bytes for unsafe characters.
     * </p>
     * 
     * <p>
     * :
     * </p>
     * 
     * <ul>
     * <li>? "a"  "z"?"A"  "Z"  "0"  "9" ????.</li>
     * <li> "."?"-"?"*"  "_" ????.</li>
     * <li> " " ?? "+".</li>
     * <li>?,??.<br>
     * ??? 3  "%xy" , xy ????.<br>
     * ??? UTF-8.<br>
     * ,,??,??.</li>
     * </ul>
     *
     * @param value
     *            the value
     * @param charsetType
     *            charsetType {@link CharsetType}
     * @return  <code>value</code> nullempty, {@link StringUtils#EMPTY}<br>
     *          <code>charsetType</code> nullempty, <code>value</code><br>
     * @see URLEncoder#encode(String, String)
     */
    public static String encode(String value, String charsetType) {
        return encodeOrDecode(value, charsetType, true);
    }

    /**
     * ?,??.
     * 
     * <p style="color:red">
     * ??{@link java.net.URLEncoder}  {@link java.net.URLDecoder}??URL,???.
     * </p>
     * 
     * <p>
     * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific encoding scheme. The supplied encoding is used to
     * determine what characters are represented by any consecutive sequences of the form "<code>%<i>xy</i></code>".
     * </p>
     * 
     * <p>
     * Not doing so may introduce incompatibilites.<br>
     * <em><strong>Note:</strong> 
     * :<a href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">World Wide Web Consortium Recommendation</a>,UTF-8. ????.</em>
     * </p>
     * 
     * <h3>URLDecoder: Incomplete trailing escape (%) pattern:</h3>
     * <blockquote>
     * 
     * URLDecoder class throws this exception when last char is "%" sign. <br>
     * If "%" sign comes in middle of string then it won't throw exception.
     * 
     * </blockquote>
     *
     * @param value
     *            ??
     * @param charsetType
     *            charsetType {@link CharsetType}
     * @return  <code>value</code> nullempty, {@link StringUtils#EMPTY}<br>
     *          <code>charsetType</code> nullempty, <code>value</code><br>
     * @see <a href="http://dwr.2114559.n2.nabble.com/Exception-URLDecoder-Incomplete-trailing-escape-pattern-td5396332.html">Exception ::
     *      URLDecoder: Incomplete trailing escape (%) pattern</a>
     * @see java.net.URLDecoder#decode(String, String)
     * @see "org.springframework.web.util.UriUtils#decode(String, String)"
     */
    public static String decode(String value, String charsetType) {
        return encodeOrDecode(value, charsetType, false);
    }

    /**
     * Encode or decode.
     *
     * @param value
     *            the value
     * @param charsetType
     *            the charset type
     * @param encodeOrDecode
     *            true encode,false decode
     * @return the string
     * @since 1.6.2
     */
    private static String encodeOrDecode(String value, String charsetType, boolean encodeOrDecode) {
        if (isNullOrEmpty(value)) {
            return EMPTY;
        }
        try {
            return isNullOrEmpty(charsetType) ? value
                    : (encodeOrDecode ? URLEncoder.encode(value, charsetType)
                            : URLDecoder.decode(value, charsetType));
        } catch (UnsupportedEncodingException e) {
            throw new URIParseException(e);
        }
    }

    // [end]
}