Java tutorial
/* * Copyright (C) 2008 feilong * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sunchenbin.store.feilong.core.net; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.sunchenbin.store.feilong.core.lang.CharsetType; import com.sunchenbin.store.feilong.core.lang.StringUtil; import com.sunchenbin.store.feilong.core.tools.slf4j.Slf4jUtil; import com.sunchenbin.store.feilong.core.util.Validator; /** * ?{@link java.net.URI}(Uniform Resource Locator) {@link java.net.URL}(Uniform Resource Identifier) . * * <h3>{@link java.net.URI} {@link java.net.URL}:</h3> * * <blockquote> * * <table border="1" cellspacing="0" cellpadding="4"> * <tr style="background-color:#ccccff"> * <th align="left"></th> * <th align="left"></th> * </tr> * * <tr valign="top"> * <td>{@link java.net.URI URI}<br> * Uniform Resource Identifier</td> * <td>?,??<br> * There are two types of URIs: URLs and URNs. <br> * See RFC 1630: Universal Resource Identifiers in WWW: A Unifying Syntax for the Expression of Names and Addresses of Objects on the * Network as used in the WWW.</td> * </tr> * * <tr valign="top" style="background-color:#eeeeff"> * <td>{@link java.net.URL URL}<br> * Uniform Resource Locator</td> * <td>??,?URI,?URL???,locate? <br> * See RFC 1738: Uniform Resource Locators (URL)</td> * </tr> * * <tr valign="top"> * <td>URN<br> * uniform resource name</td> * <td>???,????, <br> * mailto:java-net@java.sun.com</td> * </tr> * </table> * * <p> * ,{@link java.net.URI URI}?,?,URLURN??URLURN?URI * </p> * * <p> * {@link URI}??,?,???URI;<br> * {@link URL}??,????,<span style="color:red">?,schema</span> * </p> * </blockquote> * * <h3> {@link URI} ?:</h3> * * <blockquote> * <ul> * <li>getQuery() URI ??</li> * <li>getRawQuery() URI ? URI ???? URI </li> * </ul> * </blockquote> * * <h3> URI path parameter(Matrix URIs) {@link <a href="http://www.w3.org/DesignIssues/MatrixURIs.html">MatrixURIs</a>}:</h3> * * <blockquote> * * <p style="color:red"> * Note: relative Matrix URLs are notgenerally implemented so this is just a theoretical discussion. * </p> * * <p> * A URI path parameter is part of a path segment that occurs after its name. <br> * </p> * <ul> * <li>Path parameters offer a unique opportunity to control the representations of resources</li> * <li>Since they can't be manipulated by standard Web forms, they have to be constructed out of band</li> * <li>Since they're part of the path, they're sequential, unlike query strings</li> * <li>Most importantly, however, their behaviour is not explicitly defined.</li> * </ul> * * <p> * When defining constraints for the syntax of path parameters, we can take these characteristics into account, and define parameters that * stack sequentially, and each take multiple values. * </p> * * <p> * In the last paragraph of section 3.3, The URI specification suggests employing the semicolon ;, equals = and comma , characters for this * task. Therefore: * </p> * <blockquote> * <table border="1" cellspacing="0" cellpadding="4"> * <tr style="background-color:#ccccff"> * <th align="left">characters</th> * <th align="left"></th> * <th align="left">example</th> * </tr> * <tr valign="top"> * <td>semicolon ;</td> * <td>will delimit the parameters themselves.<br> * That is, anything in a path segment to the right of a semicolon will be treated as a new parameter</td> * <td>like this: <span style="color:green">/path/name;param1;p2;p3</span></td> * </tr> * <tr valign="top" style="background-color:#eeeeff"> * <td>equals sign =</td> * <td>will separate parameter names from their values, should a given parameter take values.<br> * That is, within a path parameter, everything to the right of an equals sign is treated as a value,</td> * <td>like this: <span style="color:green">param=value;p2</span></td> * </tr> * <tr valign="top"> * <td>comma ,</td> * <td>will separate individual values passed into a single parameter,</td> * <td>like this: <span style="color:green">;param=val1,val2,val3</span></td> * </tr> * </table> * </blockquote> * <p> * This means that although it may be visually confusing, parameter names can take commas but no equals signs, values can take equals signs * but no commas, and no part of the path segment can take semicolons literally. All other sub-delimiters should be percent-encoded. This * also means that one's opportunities for self-expression with URI paths are further constrained. * </p> * * <p> * ?path??path?(?matrix?),path?";"????"=",?"/file;p=1",path"file"path?"p","1"<br> * ?? ,YahooRESTfulAPI ? Matrix???GET???? * {@link <a href="https://developer.yahoo.com/social/rest_api_guide/partial-resources.html#paging-collection">?</a>} * matrix???URI?path,?path * </p> * </blockquote> * * @author feilong * @version 1.0.0 2010-6-11 ?02:06:43 * @see java.net.URI * @see java.net.URL * @see URIComponents * @see <a href="http://www.w3.org/DesignIssues/MatrixURIs.html">MatrixURIs</a> * @since 1.0.0 */ public final class URIUtil { /** The Constant LOGGER. */ private static final Logger LOGGER = LoggerFactory.getLogger(URIUtil.class); /** Don't let anyone instantiate this class. */ private URIUtil() { //AssertionError?. ?????. ???. //see Effective Java 2nd throw new AssertionError("No " + getClass().getName() + " instances for you!"); } /** * call {@link java.net.URI#URI(String)}. * * <p> * StringURI???RFC 2396, {@link URISyntaxException}.<br> * ?URI,? {@link URISyntaxException},??? {@link java.net.URI#create(String)} * </p> * * @param uri * the uri * @return {@link java.net.URI#URI(String)} * @see java.net.URI#URI(String) * @since 1.3.0 */ public static URI newURI(String uri) { try { return new URI(uri); } catch (URISyntaxException e) { LOGGER.error(Slf4jUtil.formatMessage("uri:[{}]", uri), e); throw new URIParseException(e); } } /** * uricharset {@link URI}. * * <p> * {@link URI#create(String)} * </p> * * <p> * uriString????, {@link URI#create(String)}<br> * uriString???, {@link ParamUtil#addParameterArrayValueMap(String, Map, String)}url,? {@link URI#create(String)} * * </p> * * @param uriString * the uri string * @param charsetType * ??,null empty,?,?<br> * ??,??,ie?chrome ? url , ? * @return if isNullOrEmpty(uri),return null;<br> * if Exception,throw URIParseException * @see URI#create(String) * @see #encodeUri(String, String) * @see <a * href="http://stackoverflow.com/questions/15004593/java-request-getquerystring-value-different-between-chrome-and-ie-browser">java-request-getquerystring-value-different-between-chrome-and-ie-browser</a> */ public static URI create(String uriString, String charsetType) { try { String encodeUrl = encodeUri(uriString, charsetType); return URI.create(encodeUrl); } catch (Exception e) { LOGGER.error("Exception:", e); throw new URIParseException(e); } } /** * ?path??. * * <p> * ( {@link java.net.URI#isAbsolute()},?{@code url's scheme !=null} ). * </p> * * @param uriString * * @return <tt>true</tt> if, and only if, this URI is absolute * @see java.net.URI#isAbsolute() */ public static boolean isAbsolutePath(String uriString) { URI uri = newURI(uriString); return null == uri ? false : uri.isAbsolute(); } /** * uriStringcharset {@link URI}. * * <p> * {@link URI#create(String)} * </p> * * <p> * uriString????,{@link URI#create(String)}<br> * uriString???,{@link ParamUtil#addParameterArrayValueMap(String, Map, String)}url,? {@link URI#create(String)} * </p> * * @param uriString * the uri string * @param charsetType * ??,null empty,?,?<br> * ??,??,ie?chrome ? url , ? * @return the string * @see <a * href="http://stackoverflow.com/questions/15004593/java-request-getquerystring-value-different-between-chrome-and-ie-browser">java-request-getquerystring-value-different-between-chrome-and-ie-browser</a> * @see URI#create(String) * @see ParamUtil#addParameterArrayValueMap(String, Map, String) * @since 1.4.0 */ public static String encodeUri(String uriString, String charsetType) { if (Validator.isNullOrEmpty(uriString)) { throw new NullPointerException("the url is null or empty!"); } LOGGER.debug("in uriString:[{}],charsetType:{}", uriString, charsetType); if (!hasQueryString(uriString)) { return uriString;// ?? ??? } // XXX ?? ? // cmens/t-b-f-a-c-s-f-p400-600,0-200,200-400,600-up-gCold Gear-eBase Layer-i1-o.htm // ??? String queryString = StringUtil.substring(uriString, URIComponents.QUESTIONMARK, 1); Map<String, String[]> map = ParamUtil.toSafeArrayValueMap(queryString, charsetType); String encodeUrl = ParamUtil.addParameterArrayValueMap(uriString, map, charsetType); LOGGER.debug("after url:{}", encodeUrl); return encodeUrl; } /** * ??queryStringpath,????( <span style="color:red">???</span>). * * @param uri * the uri * @return if isNullOrEmpty(url),renturn {@link StringUtils#EMPTY} * @since 1.4.0 */ public static String getFullPathWithoutQueryString(String uri) { if (Validator.isNullOrEmpty(uri)) { return StringUtils.EMPTY; } // url??? XXX int index = uri.indexOf(URIComponents.QUESTIONMARK); if (index == -1) { return uri; } return uri.substring(0, index); } /** * queryString. * * <p> * uriString isNullOrEmpty, uriString ???, empty,????? * </p> * * @param uriString * the uri * @return the query string * @since 1.4.0 */ public static String getQueryString(String uriString) { if (Validator.isNullOrEmpty(uriString)) { return StringUtils.EMPTY; } // url??? XXX int index = uriString.indexOf(URIComponents.QUESTIONMARK); if (index == -1) { return StringUtils.EMPTY; } return StringUtil.substring(uriString, index + 1); } /** * ?queryString. * * @param uriString * the uri string * @return true, if checks for query string * @since 1.4.0 */ // XXX public static boolean hasQueryString(String uriString) { // url??? return Validator.isNullOrEmpty(uriString) ? false : -1 != uriString.indexOf(URIComponents.QUESTIONMARK); } // [start] encode/decode /** * iso-8859-1??. * * <p> * iso-8859-1JAVA * </p> * * @param str * * @param charsetType * ?,see {@link CharsetType} * @return ?,if isNullOrEmpty(str) return "" * @see "org.apache.commons.codec.net.URLCodec#encode(String, String)" */ public static String decodeISO88591String(String str, String charsetType) { if (Validator.isNullOrEmpty(str)) { return StringUtils.EMPTY; } byte[] bytes = StringUtil.getBytes(str, CharsetType.ISO_8859_1); return StringUtil.newString(bytes, charsetType); } /** * ?,?? . * * <p style="color:red"> * ?? {@link java.net.URLEncoder} {@link java.net.URLDecoder}??URL,???. * </p> * * <p> * Translates a string into <code>application/x-www-form-urlencoded</code> format using a specific encoding scheme. This method uses the * supplied encoding scheme to obtain the bytes for unsafe characters. * </p> * * <p> * * </p> * * <ul> * <li>? "a" "z"?"A" "Z" "0" "9" ????.</li> * <li> "."?"-"?"*" "_" ????.</li> * <li> " " ?? "+".</li> * <li>?,??.<br> * ??? 3 "%xy" , xy ????.<br> * ??? UTF-8.<br> * ,,??,??.</li> * </ul> * * @param value * the value * @param charsetType * charsetType {@link CharsetType} * @return {@link java.net.URLEncoder#encode(String, String)}<br> * if isNullOrEmpty(charsetType), value<br> * @see URLEncoder#encode(String, String) * @see CharsetType */ public static String encode(String value, String charsetType) { if (Validator.isNullOrEmpty(value)) { return StringUtils.EMPTY; } if (Validator.isNullOrEmpty(charsetType)) { return value; } try { return URLEncoder.encode(value, charsetType); } catch (UnsupportedEncodingException e) { LOGGER.error("UnsupportedEncodingException:", e); throw new URIParseException(e); } } /** * ?,??. * * <p style="color:red"> * ?? {@link java.net.URLEncoder} {@link java.net.URLDecoder}??URL,???. * </p> * * <p> * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific encoding scheme. The supplied encoding is used to * determine what characters are represented by any consecutive sequences of the form "<code>%<i>xy</i></code>". * </p> * * <p> * Not doing so may introduce incompatibilites.<br> * <em><strong>Note:</strong> * <a href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">World Wide Web Consortium Recommendation</a>,UTF-8. ????.</em> * </p> * * @param value * ?? * @param charsetType * charsetType {@link CharsetType} * @return the newly {@link java.net.URLDecoder#decode(String, String)} ??<br> * if isNullOrEmpty(charsetType) , value<br> * @see URLEncoder#encode(java.lang.String, java.lang.String) * @see CharsetType */ public static String decode(String value, String charsetType) { if (Validator.isNullOrEmpty(value)) { return StringUtils.EMPTY; } if (Validator.isNullOrEmpty(charsetType)) { return value; } try { return URLDecoder.decode(value, charsetType); } catch (UnsupportedEncodingException e) { LOGGER.error("UnsupportedEncodingException:", e); throw new URIParseException(e); } } // [end] }