Java examples for File Path IO:UTF
Reads a string in UTF-8 encoding from a data stream in full and returns that string.
/*/*w w w. j av a 2 s. c om*/ Written in 2013 by Peter O. Any copyright is dedicated to the Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ If you like this, you should donate to Peter O. at: http://upokecenter.dreamhosters.com/articles/donate-now-2/ */ import java.io.*; public class Main{ /** * Reads a string in UTF-8 encoding from a data stream in full and returns that * string. Replaces invalid encoding with the replacement character (U + * FFFD). * @param stream A readable data stream. * @return The string read. * @throws java.io.IOException An I/O error occurred. * @throws NullPointerException The parameter {@code stream} is null. */ public static String ReadUtf8ToString(InputStream stream) throws java.io.IOException { return ReadUtf8ToString(stream, -1, true); } /** * Reads a string in UTF-8 encoding from a data stream and returns that string. * @param stream A readable data stream. * @param bytesCount The length, in bytes, of the string. If this is less than * 0, this function will read until the end of the stream. * @param replace If true, replaces invalid encoding with the replacement * character (U + FFFD). If false, throws an error if an unpaired * surrogate code point is seen. * @return The string read. * @throws java.io.IOException An I/O error occurred; or, the string is not * valid UTF-8 and {@code replace} is false. * @throws NullPointerException The parameter {@code stream} is null. */ public static String ReadUtf8ToString(InputStream stream, int bytesCount, boolean replace) throws java.io.IOException { StringBuilder builder = new StringBuilder(); int retval = DataUtilities.ReadUtf8(stream, bytesCount, builder, replace); if (retval == -1) { throw new IOException("Unpaired surrogate code point found.", new java.nio.charset.MalformedInputException(1)); } return builder.toString(); } /** * Reads a string in UTF-8 encoding from a data stream. * @param stream A readable data stream. * @param bytesCount The length, in bytes, of the string. If this is less than * 0, this function will read until the end of the stream. * @param builder A string builder object where the resulting string will be * stored. * @param replace If true, replaces invalid encoding with the replacement * character (U + FFFD). If false, stops processing when an unpaired * surrogate code point is seen. * @return 0 if the entire string was read without errors, -1 if the string is * not valid UTF-8 and {@code replace} is false, or -2 if the end of the * stream was reached before the last character was read completely * (which is only the case if {@code bytesCount} is 0 or greater). * @throws java.io.IOException An I/O error occurred. * @throws NullPointerException The parameter {@code stream} is null or {@code * builder} is null. */ public static int ReadUtf8(InputStream stream, int bytesCount, StringBuilder builder, boolean replace) throws java.io.IOException { if (stream == null) { throw new NullPointerException("stream"); } if (builder == null) { throw new NullPointerException("builder"); } int cp = 0; int bytesSeen = 0; int bytesNeeded = 0; int lower = 0x80; int upper = 0xbf; int pointer = 0; while (pointer < bytesCount || bytesCount < 0) { int b = stream.read(); if (b < 0) { if (bytesNeeded != 0) { bytesNeeded = 0; if (replace) { builder.append((char) 0xfffd); if (bytesCount >= 0) { return -2; } break; // end of stream } return -1; } if (bytesCount >= 0) { return -2; } break; // end of stream } if (bytesCount > 0) { ++pointer; } if (bytesNeeded == 0) { if ((b & 0x7f) == b) { builder.append((char) b); } else if (b >= 0xc2 && b <= 0xdf) { bytesNeeded = 1; cp = (b - 0xc0) << 6; } else if (b >= 0xe0 && b <= 0xef) { lower = (b == 0xe0) ? 0xa0 : 0x80; upper = (b == 0xed) ? 0x9f : 0xbf; bytesNeeded = 2; cp = (b - 0xe0) << 12; } else if (b >= 0xf0 && b <= 0xf4) { lower = (b == 0xf0) ? 0x90 : 0x80; upper = (b == 0xf4) ? 0x8f : 0xbf; bytesNeeded = 3; cp = (b - 0xf0) << 18; } else { if (replace) { builder.append((char) 0xfffd); } else { return -1; } } continue; } if (b < lower || b > upper) { cp = bytesNeeded = bytesSeen = 0; lower = 0x80; upper = 0xbf; if (replace) { builder.append((char) 0xfffd); // "Read" the last byte again if (b < 0x80) { builder.append((char) b); } else if (b >= 0xc2 && b <= 0xdf) { bytesNeeded = 1; cp = (b - 0xc0) << 6; } else if (b >= 0xe0 && b <= 0xef) { lower = (b == 0xe0) ? 0xa0 : 0x80; upper = (b == 0xed) ? 0x9f : 0xbf; bytesNeeded = 2; cp = (b - 0xe0) << 12; } else if (b >= 0xf0 && b <= 0xf4) { lower = (b == 0xf0) ? 0x90 : 0x80; upper = (b == 0xf4) ? 0x8f : 0xbf; bytesNeeded = 3; cp = (b - 0xf0) << 18; } else { builder.append((char) 0xfffd); } continue; } return -1; } else { lower = 0x80; upper = 0xbf; ++bytesSeen; cp += (b - 0x80) << (6 * (bytesNeeded - bytesSeen)); if (bytesSeen != bytesNeeded) { continue; } int ret = cp; cp = 0; bytesSeen = 0; bytesNeeded = 0; if (ret <= 0xffff) { builder.append((char) ret); } else { int ch = ret - 0x10000; int lead = (ch / 0x400) + 0xd800; int trail = (ch & 0x3ff) + 0xdc00; builder.append((char) lead); builder.append((char) trail); } } } if (bytesNeeded != 0) { if (replace) { builder.append((char) 0xfffd); } else { return -1; } } return 0; } }