Description
Decodes a bit of an URL encoded by a browser.
License
Apache License
Parameter
Parameter | Description |
---|
s | The string to decode (can be empty). |
charset | The charset to use to decode the string (should really be Charsets#UTF_8 . |
Exception
Parameter | Description |
---|
IllegalArgumentException | if the string contains a malformedescape sequence. |
Return
The decoded string, or s if there's nothing to decode. If the string to decode is null , returns an empty string.
Declaration
@SuppressWarnings("fallthrough")
public static String decodeComponent(final String s, final Charset charset)
Method Source Code
//package com.java2s;
/*/*from ww w . jav a 2 s . c o m*/
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.nio.charset.Charset;
public class Main {
/**
* Decodes a bit of an URL encoded by a browser.
* <p/>
* The string is expected to be encoded as per RFC 3986, Section 2.
* This is the encoding used by JavaScript functions {@code encodeURI}
* and {@code encodeURIComponent}, but not {@code escape}. For example
* in this encoding, é (in Unicode {@code U+00E9} or in UTF-8
* {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
* <p/>
* This is essentially equivalent to calling
* <code>{@link java.net.URLDecoder URLDecoder}.{@link
* java.net.URLDecoder#decode(String, String)}</code>
* except that it's over 2x faster and generates less garbage for the GC.
* Actually this function doesn't allocate any memory if there's nothing
* to decode, the argument itself is returned.
*
* @param s The string to decode (can be empty).
* @param charset The charset to use to decode the string (should really
* be {@link Charsets#UTF_8}.
* @return The decoded string, or {@code s} if there's nothing to decode.
* If the string to decode is {@code null}, returns an empty string.
* @throws IllegalArgumentException if the string contains a malformed
* escape sequence.
*/
@SuppressWarnings("fallthrough")
public static String decodeComponent(final String s, final Charset charset) {
if (s == null) {
return "";
}
final int size = s.length();
boolean modified = false;
for (int i = 0; i < size; i++) {
final char c = s.charAt(i);
switch (c) {
case '%':
i++; // We can skip at least one char, e.g. `%%'.
// Fall through.
case '+':
modified = true;
break;
}
}
if (!modified) {
return s;
}
final byte[] buf = new byte[size];
int pos = 0; // position in `buf'.
for (int i = 0; i < size; i++) {
char c = s.charAt(i);
switch (c) {
case '+':
buf[pos++] = ' '; // "+" -> " "
break;
case '%':
if (i == size - 1) {
throw new IllegalArgumentException("unterminated escape" + " sequence at end of string: " + s);
}
c = s.charAt(++i);
if (c == '%') {
buf[pos++] = '%'; // "%%" -> "%"
break;
} else if (i == size - 1) {
throw new IllegalArgumentException("partial escape" + " sequence at end of string: " + s);
}
c = decodeHexNibble(c);
final char c2 = decodeHexNibble(s.charAt(++i));
if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
throw new IllegalArgumentException("invalid escape sequence `%" + s.charAt(i - 1) + s.charAt(i)
+ "' at index " + (i - 2) + " of: " + s);
}
c = (char) (c * 16 + c2);
// Fall through.
default:
buf[pos++] = (byte) c;
break;
}
}
return new String(buf, 0, pos, charset);
}
/**
* Helper to decode half of a hexadecimal number from a string.
*
* @param c The ASCII character of the hexadecimal number to decode.
* Must be in the range {@code [0-9a-fA-F]}.
* @return The hexadecimal value represented in the ASCII character
* given, or {@link Character#MAX_VALUE} if the character is invalid.
*/
private static char decodeHexNibble(final char c) {
if ('0' <= c && c <= '9') {
return (char) (c - '0');
} else if ('a' <= c && c <= 'f') {
return (char) (c - 'a' + 10);
} else if ('A' <= c && c <= 'F') {
return (char) (c - 'A' + 10);
} else {
return Character.MAX_VALUE;
}
}
}
Related
- decode(String s, Charset encoding)
- decode(String url, Charset charset)
- decode(String value, Charset charset)
- decode(String value, Charset charset)
- decodeCharset(String value, String charset)
- decodeFormFields(final String content, final Charset charset)
- decodeURL(@Nullable String str, Charset charSet)
- decodeWithDefaultCharSet(String urlToDecode)
- getCharsetDecoder(String charsetName)