Main.java Source code

Java tutorial

Introduction

Here is the source code for Main.java

Source

//package com.java2s;
import java.io.IOException;

import java.io.InputStream;

import java.io.PushbackInputStream;
import java.io.Reader;
import java.io.InputStreamReader;

import java.util.regex.Pattern;
import java.util.regex.Matcher;

public class Main {
    /**
     * Creates a reader allowing to read the contents of specified text source.
     * <p>This method implements the detection of the encoding.
     * <p>Note that the detection of the encoding always works 
     * because it uses a fallback value.
     *
     * @param in the text source
     * @param encoding the detected encoding is copied there.
     * May be <code>null</code>.
     * @return a reader allowing to read the contents of the text source.
     * This reader will automatically skip the BOM if any.
     * @exception IOException if there is an I/O problem
     */
    public static Reader createReader(InputStream in, String fallbackEncoding, String[] encoding)
            throws IOException {
        byte[] bytes = new byte[1024];
        int byteCount = -1;

        PushbackInputStream in2 = new PushbackInputStream(in, bytes.length);
        try {
            int count = in2.read(bytes, 0, bytes.length);
            if (count > 0) {
                in2.unread(bytes, 0, count);
            }
            byteCount = count;
        } catch (IOException ignored) {
        }

        String charset = null;

        if (byteCount > 0) {
            if (byteCount >= 2) {
                // Use BOM ---

                int b0 = (bytes[0] & 0xFF);
                int b1 = (bytes[1] & 0xFF);

                switch ((b0 << 8) | b1) {
                case 0xFEFF:
                    charset = "UTF-16BE";
                    // We don't want to read the BOM.
                    in2.skip(2);
                    break;
                case 0xFFFE:
                    charset = "UTF-16LE";
                    in2.skip(2);
                    break;
                case 0xEFBB:
                    if (byteCount >= 3 && (bytes[2] & 0xFF) == 0xBF) {
                        charset = "UTF-8";
                        in2.skip(3);
                    }
                    break;
                }
            }

            if (charset == null) {
                // Unsupported characters are replaced by U+FFFD.
                String text = new String(bytes, 0, byteCount, "US-ASCII");

                if (text.startsWith("<?xml")) {
                    Pattern pattern = Pattern.compile("encoding\\s*=\\s*['\"]([^'\"]+)");
                    Matcher matcher = pattern.matcher(text);
                    if (matcher.find()) {
                        charset = matcher.group(1);
                    } else {
                        charset = "UTF-8";
                    }
                }
            }
        }

        if (charset == null) {
            charset = fallbackEncoding;
            if (charset == null) {
                charset = "UTF-8";
            }
        }

        if (encoding != null) {
            encoding[0] = charset;
        }
        return new InputStreamReader(in2, charset);
    }
}