Java tutorial
//package com.java2s; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; import java.io.Reader; import java.io.InputStreamReader; import java.util.regex.Pattern; import java.util.regex.Matcher; public class Main { /** * Creates a reader allowing to read the contents of specified text source. * <p>This method implements the detection of the encoding. * <p>Note that the detection of the encoding always works * because it uses a fallback value. * * @param in the text source * @param encoding the detected encoding is copied there. * May be <code>null</code>. * @return a reader allowing to read the contents of the text source. * This reader will automatically skip the BOM if any. * @exception IOException if there is an I/O problem */ public static Reader createReader(InputStream in, String fallbackEncoding, String[] encoding) throws IOException { byte[] bytes = new byte[1024]; int byteCount = -1; PushbackInputStream in2 = new PushbackInputStream(in, bytes.length); try { int count = in2.read(bytes, 0, bytes.length); if (count > 0) { in2.unread(bytes, 0, count); } byteCount = count; } catch (IOException ignored) { } String charset = null; if (byteCount > 0) { if (byteCount >= 2) { // Use BOM --- int b0 = (bytes[0] & 0xFF); int b1 = (bytes[1] & 0xFF); switch ((b0 << 8) | b1) { case 0xFEFF: charset = "UTF-16BE"; // We don't want to read the BOM. in2.skip(2); break; case 0xFFFE: charset = "UTF-16LE"; in2.skip(2); break; case 0xEFBB: if (byteCount >= 3 && (bytes[2] & 0xFF) == 0xBF) { charset = "UTF-8"; in2.skip(3); } break; } } if (charset == null) { // Unsupported characters are replaced by U+FFFD. String text = new String(bytes, 0, byteCount, "US-ASCII"); if (text.startsWith("<?xml")) { Pattern pattern = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)"); Matcher matcher = pattern.matcher(text); if (matcher.find()) { charset = matcher.group(1); } else { charset = "UTF-8"; } } } } if (charset == null) { charset = fallbackEncoding; if (charset == null) { charset = "UTF-8"; } } if (encoding != null) { encoding[0] = charset; } return new InputStreamReader(in2, charset); } }