Main.java Source code

Java tutorial

Introduction

Here is the source code for Main.java

Source

//package com.java2s;
/*
 * The following methods come from a library written by Tom Fennelly.
 * Here was the header of the licence.
 */

import java.io.IOException;
import java.io.InputStream;

import java.nio.charset.Charset;

public class Main {
    /**
     * detectEncoding.java - Returns the character encoding of an input stream containin an XML file.<br/> 
     * Copyright (c) 2009 Alexander Hristov .
     *
     * Licensed under the LGPL License - http://www.gnu.org/licenses/lgpl.txt
     * Returns the character encoding of an input stream containin an XML file.<br/> 
     * 
     * The encoding is detected using the guidelines specified in the
     * <a href='http://www.w3.org/TR/xml/#sec-guessing'>XML W3C Specification</a>,
     * and the method was designed to be as fast as possible, without extensive
     * string operations or regular expressions<br/> <br/> 
     * 
     * <code>
     * A sample use would be<br/><br/>
     * InputStream in = ...; <br/>
     * String encoding = detectEncoding(in);<br/>
     * BufferedReader reader = new BufferedReader(new InputStreamReader(in,encoding)); <br/>
     * </code><br/> 
     * 
     * and from that point you can happily read text from the input stream.
     * 
     * @param in
     *          Stream containing the data to be read. The stream must support
     *          mark()/reset(), otherwise the caller should wrap that stream in a
     *          {@link BufferedInputStream} before invokin the method. After the
     *          call, the stream is positioned at the &lt; character (this means
     *          that if there were any byte-order-marks, they are skipped).
     * 
     * @return Detected encoding, using the canonical name in java.io (see <a
     *         href=
     *         'http://java.sun.com/j2se/1.4.2/docs/guide/intl/encoding.doc.html'>Supported
     *         Encodings</a> ).
     * 
     * @author Alexander Hristov
     */
    public static String detectEncoding(InputStream in) throws IOException {
        String encoding = null;
        in.mark(400);
        int ignoreBytes = 0;
        boolean readEncoding = false;
        byte[] buffer = new byte[400];
        int read = in.read(buffer, 0, 4);
        switch (buffer[0]) {
        case (byte) 0x00:
            if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0xFE && buffer[3] == (byte) 0xFF) {
                ignoreBytes = 4;
                encoding = "UTF_32BE";
            } else if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x3C) {
                encoding = "UTF_32BE";
                readEncoding = true;
            } else if (buffer[1] == (byte) 0x3C && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x3F) {
                encoding = "UnicodeBigUnmarked";
                readEncoding = true;
            }
            break;
        case (byte) 0xFF:
            if (buffer[1] == (byte) 0xFE && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x00) {
                ignoreBytes = 4;
                encoding = "UTF_32LE";
            } else if (buffer[1] == (byte) 0xFE) {
                ignoreBytes = 2;
                encoding = "UnicodeLittleUnmarked";
            }
            break;

        case (byte) 0x3C:
            readEncoding = true;
            if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0x00 && buffer[3] == (byte) 0x00) {
                encoding = "UTF_32LE";
            } else if (buffer[1] == (byte) 0x00 && buffer[2] == (byte) 0x3F && buffer[3] == (byte) 0x00) {
                encoding = "UnicodeLittleUnmarked";
            } else if (buffer[1] == (byte) 0x3F && buffer[2] == (byte) 0x78 && buffer[3] == (byte) 0x6D) {
                encoding = "ASCII";
            }
            break;
        case (byte) 0xFE:
            if (buffer[1] == (byte) 0xFF) {
                encoding = "UnicodeBigUnmarked";
                ignoreBytes = 2;
            }
            break;
        case (byte) 0xEF:
            if (buffer[1] == (byte) 0xBB && buffer[2] == (byte) 0xBF) {
                encoding = "UTF8";
                ignoreBytes = 3;
            }
            break;
        case (byte) 0x4C:
            if (buffer[1] == (byte) 0x6F && buffer[2] == (byte) 0xA7 && buffer[3] == (byte) 0x94) {
                encoding = "CP037";
            }
            break;
        }
        if (encoding == null) {
            encoding = System.getProperty("file.encoding");
        }
        if (readEncoding) {
            read = in.read(buffer, 4, buffer.length - 4);
            Charset cs = Charset.forName(encoding);
            String s = new String(buffer, 4, read, cs);
            int pos = s.indexOf("encoding");
            if (pos == -1) {
                encoding = System.getProperty("file.encoding");
            } else {
                int limit = s.indexOf("?>");
                char delim;
                int start = s.indexOf(delim = '\'', pos);
                if (start == -1 || start >= limit)
                    start = s.indexOf(delim = '"', pos);
                if (start == -1 || start >= limit)
                    throw (new IOException("Encoding error " + buffer));
                int end = s.indexOf(delim, start + 1);
                if (end == -1 || end >= limit)
                    throw (new IOException("Encoding error " + buffer));
                encoding = s.substring(start + 1, end);
            }
        }

        in.reset();
        while (ignoreBytes-- > 0)
            in.read();
        return encoding;
    }
}