Java examples for java.lang:String UTF
Generates a text string from a UTF-8 byte array.
/*/* www . j av a2 s . co m*/ Written in 2013 by Peter O. Any copyright is dedicated to the Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ If you like this, you should donate to Peter O. at: http://upokecenter.dreamhosters.com/articles/donate-now-2/ */ //package com.java2s; public class Main { /** * Generates a text string from a UTF-8 byte array. * @param bytes A byte array containing text encoded in UTF-8. * @param replace If true, replaces invalid encoding with the replacement * character (U + FFFD). If false, stops processing when invalid UTF-8 * is seen. * @return A string represented by the UTF-8 byte array. * @throws NullPointerException The parameter {@code bytes} is null. * @throws IllegalArgumentException The string is not valid UTF-8 and {@code replace} * is false. */ public static String GetUtf8String(byte[] bytes, boolean replace) { if (bytes == null) { throw new NullPointerException("bytes"); } StringBuilder b = new StringBuilder(); if (ReadUtf8FromBytes(bytes, 0, bytes.length, b, replace) != 0) { throw new IllegalArgumentException("Invalid UTF-8"); } return b.toString(); } /** * Generates a text string from a portion of a UTF-8 byte array. * @param bytes A byte array containing text encoded in UTF-8. * @param offset Offset into the byte array to start reading. * @param bytesCount Length, in bytes, of the UTF-8 string. * @param replace If true, replaces invalid encoding with the replacement * character (U + FFFD). If false, stops processing when invalid UTF-8 * is seen. * @return A string represented by the UTF-8 byte array. * @throws NullPointerException The parameter {@code bytes} is null. * @throws IllegalArgumentException The portion of the byte array is not valid UTF-8 * and {@code replace} is false. * @throws IllegalArgumentException The parameter {@code offset} is less than 0, * {@code bytesCount} is less than 0, or offset plus bytesCount is * greater than the length of "data" . */ public static String GetUtf8String(byte[] bytes, int offset, int bytesCount, boolean replace) { if (bytes == null) { throw new NullPointerException("bytes"); } if (offset < 0) { throw new IllegalArgumentException("offset (" + offset + ") is less than " + "0"); } if (offset > bytes.length) { throw new IllegalArgumentException("offset (" + offset + ") is more than " + bytes.length); } if (bytesCount < 0) { throw new IllegalArgumentException("bytesCount (" + bytesCount + ") is less than 0"); } if (bytesCount > bytes.length) { throw new IllegalArgumentException("bytesCount (" + bytesCount + ") is more than " + bytes.length); } if (bytes.length - offset < bytesCount) { throw new IllegalArgumentException("bytes's length minus " + offset + " (" + (bytes.length - offset) + ") is less than " + bytesCount); } StringBuilder b = new StringBuilder(); if (ReadUtf8FromBytes(bytes, offset, bytesCount, b, replace) != 0) { throw new IllegalArgumentException("Invalid UTF-8"); } return b.toString(); } /** * Reads a string in UTF-8 encoding from a byte array. * @param data A byte array containing a UTF-8 string. * @param offset Offset into the byte array to start reading. * @param bytesCount Length, in bytes, of the UTF-8 string. * @param builder A string builder object where the resulting string will be * stored. * @param replace If true, replaces invalid encoding with the replacement * character (U + FFFD). If false, stops processing when invalid UTF-8 * is seen. * @return 0 if the entire string was read without errors, or -1 if the string * is not valid UTF-8 and {@code replace} is false. * @throws NullPointerException The parameter {@code data} is null or {@code * builder} is null. * @throws IllegalArgumentException The parameter {@code offset} is less than 0, * {@code bytesCount} is less than 0, or offset plus bytesCount is * greater than the length of {@code data} . */ public static int ReadUtf8FromBytes(byte[] data, int offset, int bytesCount, StringBuilder builder, boolean replace) { if (data == null) { throw new NullPointerException("data"); } if (offset < 0) { throw new IllegalArgumentException("offset (" + offset + ") is less than " + "0"); } if (offset > data.length) { throw new IllegalArgumentException("offset (" + offset + ") is more than " + data.length); } if (bytesCount < 0) { throw new IllegalArgumentException("bytesCount (" + bytesCount + ") is less than 0"); } if (bytesCount > data.length) { throw new IllegalArgumentException("bytesCount (" + bytesCount + ") is more than " + data.length); } if (data.length - offset < bytesCount) { throw new IllegalArgumentException("data.length minus offset (" + (data.length - offset) + ") is less than " + bytesCount); } if (builder == null) { throw new NullPointerException("builder"); } int cp = 0; int bytesSeen = 0; int bytesNeeded = 0; int lower = 0x80; int upper = 0xbf; int pointer = offset; int endpointer = offset + bytesCount; while (pointer < endpointer) { int b = data[pointer] & (int) 0xff; ++pointer; if (bytesNeeded == 0) { if ((b & 0x7f) == b) { builder.append((char) b); } else if (b >= 0xc2 && b <= 0xdf) { bytesNeeded = 1; cp = (b - 0xc0) << 6; } else if (b >= 0xe0 && b <= 0xef) { lower = (b == 0xe0) ? 0xa0 : 0x80; upper = (b == 0xed) ? 0x9f : 0xbf; bytesNeeded = 2; cp = (b - 0xe0) << 12; } else if (b >= 0xf0 && b <= 0xf4) { lower = (b == 0xf0) ? 0x90 : 0x80; upper = (b == 0xf4) ? 0x8f : 0xbf; bytesNeeded = 3; cp = (b - 0xf0) << 18; } else { if (replace) { builder.append((char) 0xfffd); } else { return -1; } } continue; } if (b < lower || b > upper) { cp = bytesNeeded = bytesSeen = 0; lower = 0x80; upper = 0xbf; if (replace) { --pointer; builder.append((char) 0xfffd); continue; } return -1; } else { lower = 0x80; upper = 0xbf; ++bytesSeen; cp += (b - 0x80) << (6 * (bytesNeeded - bytesSeen)); if (bytesSeen != bytesNeeded) { continue; } int ret = cp; cp = 0; bytesSeen = 0; bytesNeeded = 0; if (ret <= 0xffff) { builder.append((char) ret); } else { int ch = ret - 0x10000; int lead = (ch / 0x400) + 0xd800; int trail = (ch & 0x3ff) + 0xdc00; builder.append((char) lead); builder.append((char) trail); } } } if (bytesNeeded != 0) { if (replace) { builder.append((char) 0xfffd); } else { return -1; } } return 0; } }