Java Text File Read by Charset createBufferedReaderWithGuessedCharset(File file)

Here you can find the source of createBufferedReaderWithGuessedCharset(File file)

Description

Guess the file's character set and create BufferedReader

License

Open Source License

Parameter

Parameter Description
file input text file

Exception

Parameter Description
IOException any IOException during guessing character set or creating FileInputStream

Return

BufferedReader using guessed character set

Declaration

public static BufferedReader createBufferedReaderWithGuessedCharset(File file) throws IOException 

Method Source Code

//package com.java2s;
/*//from w w w  . j ava2 s.  c o m
 * License information at https://github.com/Caltech-IPAC/firefly/blob/master/License.txt
 */

import java.io.*;

import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;

public class Main {
    private static String[] CHARSETS_TO_BE_TESTED = { "ISO-8859-1", "windows-1252", "windows-1253", "UTF-8",
            "UTF-16", };

    /**
     * Guess the file's character set and create BufferedReader
     * @param file input text file
     * @return BufferedReader using guessed character set
     * @throws IOException any IOException during guessing character set or creating FileInputStream
     */
    public static BufferedReader createBufferedReaderWithGuessedCharset(File file) throws IOException {
        return createBufferedReaderWithGuessedCharset(file, "UTF-8");
    }

    /**
     * Guess the file's character set and create BufferedReader
     * @param file input text file
     * @param defaultCharset Default character set: US-ASCII, ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16
     * @return BufferedReader using guessed character set
     * @throws IOException any IOException during guessing character set or creating FileInputStream
     */
    public static BufferedReader createBufferedReaderWithGuessedCharset(File file, String defaultCharset)
            throws IOException {
        String charset = null;
        try {
            charset = guessCharset(file);
        } catch (Exception e) {
            IOException ioe = new IOException();
            ioe.initCause(e);
        }
        InputStreamReader isr = new InputStreamReader(new FileInputStream(file), charset);
        return new BufferedReader(isr);
    }

    public static String guessCharset(File file) throws Exception {
        return detectCharset(file, CHARSETS_TO_BE_TESTED).displayName();
    }

    public static Charset detectCharset(File f, String[] charsets) throws Exception {
        Charset charset = null;
        BufferedInputStream input = new BufferedInputStream(new FileInputStream(f));
        byte[] buffer = new byte[5120];
        input.read(buffer);
        for (String charsetName : charsets) {
            charset = detectCharset(buffer, Charset.forName(charsetName));
            if (charset != null) {
                break;
            }
        }
        input.close();
        return charset;
    }

    private static Charset detectCharset(byte[] buffer, Charset charset) {
        try {
            CharsetDecoder decoder = charset.newDecoder();
            decoder.reset();

            boolean identified = identify(buffer, decoder);

            if (identified) {
                return charset;
            } else {
                return null;
            }

        } catch (Exception e) {
            return null;
        }
    }

    private static boolean identify(byte[] bytes, CharsetDecoder decoder) {
        try {
            decoder.decode(ByteBuffer.wrap(bytes));
        } catch (CharacterCodingException e) {
            return false;
        }
        return true;
    }
}

Related

  1. asReader(InputStream input, Charset charset)
  2. convert(File file, Charset from, String toEncoding, ByteArrayOutputStream bytearray, boolean headersOn, int totalLinesToRead)
  3. copy(Reader input, OutputStream output, Charset encoding)
  4. createBOMStrippedReader(InputStream stream, String defaultCharset)
  5. createInputStreamReader(File file, String charsetName)
  6. createReader(Path p, Charset cs)
  7. getDecoder(Charset charset, ThreadLocal> localDecoder)
  8. getFileContent(IFile file, String charset)