Java String Decode by Charset decodeComponent(final String s, final Charset charset)

Description

Decodes a bit of an URL encoded by a browser.

License

Apache License

Parameter

Parameter	Description
s	The string to decode (can be empty).
charset	The charset to use to decode the string (should really be Charsets#UTF_8 .

Exception

Parameter	Description
IllegalArgumentException	if the string contains a malformedescape sequence.

Return

The decoded string, or s if there's nothing to decode. If the string to decode is null , returns an empty string.

Declaration

@SuppressWarnings("fallthrough")
public static String decodeComponent(final String s, final Charset charset)

Method Source Code


//package com.java2s;
/*/*from   ww w  . jav a  2  s  . c  o  m*/
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

import java.nio.charset.Charset;

public class Main {
    /**
     * Decodes a bit of an URL encoded by a browser.
     * <p/>
     * The string is expected to be encoded as per RFC 3986, Section 2.
     * This is the encoding used by JavaScript functions {@code encodeURI}
     * and {@code encodeURIComponent}, but not {@code escape}.  For example
     * in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
     * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
     * <p/>
     * This is essentially equivalent to calling
     * <code>{@link java.net.URLDecoder URLDecoder}.{@link
     * java.net.URLDecoder#decode(String, String)}</code>
     * except that it's over 2x faster and generates less garbage for the GC.
     * Actually this function doesn't allocate any memory if there's nothing
     * to decode, the argument itself is returned.
     *
     * @param s       The string to decode (can be empty).
     * @param charset The charset to use to decode the string (should really
     *                be {@link Charsets#UTF_8}.
     * @return The decoded string, or {@code s} if there's nothing to decode.
     * If the string to decode is {@code null}, returns an empty string.
     * @throws IllegalArgumentException if the string contains a malformed
     *                                  escape sequence.
     */
    @SuppressWarnings("fallthrough")
    public static String decodeComponent(final String s, final Charset charset) {
        if (s == null) {
            return "";
        }
        final int size = s.length();
        boolean modified = false;
        for (int i = 0; i < size; i++) {
            final char c = s.charAt(i);
            switch (c) {
            case '%':
                i++; // We can skip at least one char, e.g. `%%'.
                // Fall through.
            case '+':
                modified = true;
                break;
            }
        }
        if (!modified) {
            return s;
        }
        final byte[] buf = new byte[size];
        int pos = 0; // position in `buf'.
        for (int i = 0; i < size; i++) {
            char c = s.charAt(i);
            switch (c) {
            case '+':
                buf[pos++] = ' '; // "+" -> " "
                break;
            case '%':
                if (i == size - 1) {
                    throw new IllegalArgumentException("unterminated escape" + " sequence at end of string: " + s);
                }
                c = s.charAt(++i);
                if (c == '%') {
                    buf[pos++] = '%'; // "%%" -> "%"
                    break;
                } else if (i == size - 1) {
                    throw new IllegalArgumentException("partial escape" + " sequence at end of string: " + s);
                }
                c = decodeHexNibble(c);
                final char c2 = decodeHexNibble(s.charAt(++i));
                if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
                    throw new IllegalArgumentException("invalid escape sequence `%" + s.charAt(i - 1) + s.charAt(i)
                            + "' at index " + (i - 2) + " of: " + s);
                }
                c = (char) (c * 16 + c2);
                // Fall through.
            default:
                buf[pos++] = (byte) c;
                break;
            }
        }
        return new String(buf, 0, pos, charset);
    }

    /**
     * Helper to decode half of a hexadecimal number from a string.
     *
     * @param c The ASCII character of the hexadecimal number to decode.
     *          Must be in the range {@code [0-9a-fA-F]}.
     * @return The hexadecimal value represented in the ASCII character
     * given, or {@link Character#MAX_VALUE} if the character is invalid.
     */
    private static char decodeHexNibble(final char c) {
        if ('0' <= c && c <= '9') {
            return (char) (c - '0');
        } else if ('a' <= c && c <= 'f') {
            return (char) (c - 'a' + 10);
        } else if ('A' <= c && c <= 'F') {
            return (char) (c - 'A' + 10);
        } else {
            return Character.MAX_VALUE;
        }
    }
}

Java String Decode by Charset decodeComponent(final String s, final Charset charset)

Description

License

Parameter

Exception

Return

Declaration

Method Source Code

Related