Description
Guess the file's character set and create BufferedReader
License
Open Source License
Parameter
Parameter | Description |
---|
file | input text file |
Exception
Parameter | Description |
---|
IOException | any IOException during guessing character set or creating FileInputStream |
Return
BufferedReader using guessed character set
Declaration
public static BufferedReader createBufferedReaderWithGuessedCharset(File file) throws IOException
Method Source Code
//package com.java2s;
/*//from w w w . j ava2 s. c o m
* License information at https://github.com/Caltech-IPAC/firefly/blob/master/License.txt
*/
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
public class Main {
private static String[] CHARSETS_TO_BE_TESTED = { "ISO-8859-1", "windows-1252", "windows-1253", "UTF-8",
"UTF-16", };
/**
* Guess the file's character set and create BufferedReader
* @param file input text file
* @return BufferedReader using guessed character set
* @throws IOException any IOException during guessing character set or creating FileInputStream
*/
public static BufferedReader createBufferedReaderWithGuessedCharset(File file) throws IOException {
return createBufferedReaderWithGuessedCharset(file, "UTF-8");
}
/**
* Guess the file's character set and create BufferedReader
* @param file input text file
* @param defaultCharset Default character set: US-ASCII, ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16
* @return BufferedReader using guessed character set
* @throws IOException any IOException during guessing character set or creating FileInputStream
*/
public static BufferedReader createBufferedReaderWithGuessedCharset(File file, String defaultCharset)
throws IOException {
String charset = null;
try {
charset = guessCharset(file);
} catch (Exception e) {
IOException ioe = new IOException();
ioe.initCause(e);
}
InputStreamReader isr = new InputStreamReader(new FileInputStream(file), charset);
return new BufferedReader(isr);
}
public static String guessCharset(File file) throws Exception {
return detectCharset(file, CHARSETS_TO_BE_TESTED).displayName();
}
public static Charset detectCharset(File f, String[] charsets) throws Exception {
Charset charset = null;
BufferedInputStream input = new BufferedInputStream(new FileInputStream(f));
byte[] buffer = new byte[5120];
input.read(buffer);
for (String charsetName : charsets) {
charset = detectCharset(buffer, Charset.forName(charsetName));
if (charset != null) {
break;
}
}
input.close();
return charset;
}
private static Charset detectCharset(byte[] buffer, Charset charset) {
try {
CharsetDecoder decoder = charset.newDecoder();
decoder.reset();
boolean identified = identify(buffer, decoder);
if (identified) {
return charset;
} else {
return null;
}
} catch (Exception e) {
return null;
}
}
private static boolean identify(byte[] bytes, CharsetDecoder decoder) {
try {
decoder.decode(ByteBuffer.wrap(bytes));
} catch (CharacterCodingException e) {
return false;
}
return true;
}
}
Related
- asReader(InputStream input, Charset charset)
- convert(File file, Charset from, String toEncoding, ByteArrayOutputStream bytearray, boolean headersOn, int totalLinesToRead)
- copy(Reader input, OutputStream output, Charset encoding)
- createBOMStrippedReader(InputStream stream, String defaultCharset)
- createInputStreamReader(File file, String charsetName)
- createReader(Path p, Charset cs)
- getDecoder(Charset charset, ThreadLocal> localDecoder)
- getFileContent(IFile file, String charset)