Here you can find the source of bytes2StringUTF8(byte[] buf, int bufOffset, int bufLength, boolean bigEndian)
Parameter | Description |
---|---|
buf | a parameter |
bigEndian | a parameter |
public static String bytes2StringUTF8(byte[] buf, int bufOffset, int bufLength, boolean bigEndian)
//package com.java2s; public class Main { /**/* www . j a va2 s .co m*/ * format the byte[] to String in UTF-8 encode * @param buf * @param bigEndian * @return */ public static String bytes2StringUTF8(byte[] buf, int bufOffset, int bufLength, boolean bigEndian) { int len = bytesUTF8len(buf, bufOffset, bufLength); char[] cbuf = new char[len]; len = bytes2charsUTF8(buf, bufOffset, bufLength, cbuf, bigEndian); String str = new String(cbuf, 0, len); cbuf = null; return str; } public static String bytes2StringUTF8(byte[] buf) { return bytes2StringUTF8(buf, 0, buf.length, true); } /** * get the length of the bytes in UTF-8 * rules: 0xxxxxxx or 11xxxxxx is the first the byte of the * @param buf * @return */ private static int bytesUTF8len(byte[] buf, int bufOffset, int bufLength) { int len = 0; for (int i = bufOffset; i < (bufOffset + bufLength); i++) { if (((buf[i]) & 0x80) == 0x00 || ((buf[i]) & 0xc0) == 0xc0) { len++; } } return len; } /** * encode the byte[] to char[] in UTF-8 * @param buf * @param cbuf * @return the cbuf valid length */ private static int bytes2charsUTF8(byte[] buf, int bufOffset, int bufLength, char[] cbuf, boolean bigEndian) { int cpos = 0, pos = bufOffset; byte b1, b2; b1 = 0; b2 = 0; while (pos < (bufOffset + bufLength)) { if ((buf[pos] & 0x80) == 0x00) { //U-00000000 - U-0000007F: 0xxxxxxx b1 = 0; b2 = buf[pos]; pos++; } else if ((buf[pos] & 0xe0) == 0xc0) { //U-00000080 - U-000007FF: 110xxxxx 10xxxxxx if ((buf[pos + 1] & 0x80) == 0x80) { b1 = (byte) (((buf[pos] & 0x1f) >> 2) & 0xff); b2 = (byte) (((buf[pos] & 0x03) << 6) | (buf[pos + 1] & 0x3f) & 0xff); pos += 2; } else { /* invalid format, use ? instead * -- 2006-3-29 13:55:32 */ b1 = 0x00; b2 = 0x3f; pos += 1; } } else if ((buf[pos] & 0xf0) == 0xe0) { //U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx if (((buf[pos + 1] & 0x80) == 0x80) && ((buf[pos + 2] & 0x80) == 0x80)) { b1 = (byte) ((((buf[pos] & 0x0f) << 4) | ((buf[pos + 1] & 0x3f) >> 2)) & 0xff); b2 = (byte) (((buf[pos + 1] & 0x03) << 6) | (buf[pos + 2] & 0x3f) & 0xff); pos += 3; } else if ((buf[pos + 1] & 0x80) == 0x80) { /* invalid format, use ? instead * -- 2006-3-29 13:55:32 */ b1 = 0x00; b2 = 0x3f; pos += 2; } else { /* invalid format, use ? instead * -- 2006-3-29 13:55:32 */ b1 = 0x00; b2 = 0x3f; pos += 1; } } else { b1 = 0; b2 = 0; pos++; continue; } if (bigEndian) { cbuf[cpos] = (char) (((b1 & 0xff) << 8 | (b2 & 0xff)) & 0xffff); } else { cbuf[cpos] = (char) (((b2 & 0xff) << 8 | (b1 & 0xff)) & 0xffff); } cpos++; } return cpos; } }