Java UTF8 utf8BytesToString(byte[] bytes, int start, int length)

Description

Converts an array of UTF-8 bytes into a string.

License

Apache License

Parameter

Parameter	Description
bytes	non-null; the bytes to convert
start	the start index of the utf8 string to convert
length	the length of the utf8 string to convert, not including any null-terminator that might be present

Return

non-null; the converted string

Declaration

public static String utf8BytesToString(byte[] bytes, int start,
        int length)

Method Source Code

//package com.java2s;
/*/*from   w w w .ja va 2s .  c  o  m*/
 * Copyright (C) 2007 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

public class Main {
    private static char[] tempBuffer = null;

    /**
     * Converts an array of UTF-8 bytes into a string.
     * 
     * This method uses a global buffer to avoid having to allocate one every time, so it is *not* thread-safe
     * 
     * @param bytes
     *            non-null; the bytes to convert
     * @param start
     *            the start index of the utf8 string to convert
     * @param length
     *            the length of the utf8 string to convert, not including any null-terminator that might be present
     * @return non-null; the converted string
     */
    public static String utf8BytesToString(byte[] bytes, int start,
            int length) {
        if (tempBuffer == null || tempBuffer.length < length) {
            tempBuffer = new char[length];
        }
        char[] chars = tempBuffer;
        int outAt = 0;

        for (int at = start; length > 0; /* at */) {
            int v0 = bytes[at] & 0xFF;
            char out;
            switch (v0 >> 4) {
            case 0x00:
            case 0x01:
            case 0x02:
            case 0x03:
            case 0x04:
            case 0x05:
            case 0x06:
            case 0x07: {
                // 0XXXXXXX -- single-byte encoding
                length--;
                if (v0 == 0) {
                    // A single zero byte is illegal.
                    return throwBadUtf8(v0, at);
                }
                out = (char) v0;
                at++;
                break;
            }
            case 0x0c:
            case 0x0d: {
                // 110XXXXX -- two-byte encoding
                length -= 2;
                if (length < 0) {
                    return throwBadUtf8(v0, at);
                }
                int v1 = bytes[at + 1] & 0xFF;
                if ((v1 & 0xc0) != 0x80) {
                    return throwBadUtf8(v1, at + 1);
                }
                int value = ((v0 & 0x1f) << 6) | (v1 & 0x3f);
                if ((value != 0) && (value < 0x80)) {
                    /*
                     * This should have been represented with one-byte encoding.
                     */
                    return throwBadUtf8(v1, at + 1);
                }
                out = (char) value;
                at += 2;
                break;
            }
            case 0x0e: {
                // 1110XXXX -- three-byte encoding
                length -= 3;
                if (length < 0) {
                    return throwBadUtf8(v0, at);
                }
                int v1 = bytes[at + 1] & 0xFF;
                if ((v1 & 0xc0) != 0x80) {
                    return throwBadUtf8(v1, at + 1);
                }
                int v2 = bytes[at + 2] & 0xFF;
                if ((v1 & 0xc0) != 0x80) {
                    return throwBadUtf8(v2, at + 2);
                }
                int value = ((v0 & 0x0f) << 12) | ((v1 & 0x3f) << 6)
                        | (v2 & 0x3f);
                if (value < 0x800) {
                    /*
                     * This should have been represented with one- or two-byte encoding.
                     */
                    return throwBadUtf8(v2, at + 2);
                }
                out = (char) value;
                at += 3;
                break;
            }
            default: {
                // 10XXXXXX, 1111XXXX -- illegal
                return throwBadUtf8(v0, at);
            }
            }
            chars[outAt] = out;
            outAt++;
        }

        return new String(chars, 0, outAt);
    }

    /**
     * Helper for {@link #utf8BytesToString}, which throws the right exception for a bogus utf-8 byte.
     * 
     * @param value
     *            the byte value
     * @param offset
     *            the file offset
     * @return never
     * @throws IllegalArgumentException
     *             always thrown
     */
    private static String throwBadUtf8(int value, int offset) {
        throw new IllegalArgumentException("bad utf-8 byte "
                + String.format("%02x", value) + " at offset "
                + String.format("%08x", offset));
    }
}

Java UTF8 utf8BytesToString(byte[] bytes, int start, int length)

Description

License

Parameter

Return

Declaration

Method Source Code

Related