Description
Try to guess the file encoding.
License
Apache License
Parameter
Parameter | Description |
---|
file | The file needed to guess the encoding. |
Exception
Parameter | Description |
---|
IOException | an exception |
Return
The encoding, may be null.
Declaration
public static String guessEncoding(File file) throws IOException
Method Source Code
/**/*from w w w .ja va 2s. co m*/
* Copyright 2009 Welocalize, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
public class Main{
static public final String UTF8 = "UTF-8";
static public final String UTF16LE = "UTF-16LE";
static public final String UTF16BE = "UTF-16BE";
/**
* Try to guess the file encoding.
* <p>
*
* Only guees encodings of "UTF-8", "UTF-16" or "UTF-16BE".
*
* @param file
* The file needed to guess the encoding.
* @return The encoding, may be null.
* @throws IOException
*/
public static String guessEncoding(File file) throws IOException {
byte[] b = readFile(file, 3);
String guess = null;
if (b[0] == (byte) 0xef && b[1] == (byte) 0xbb
&& b[2] == (byte) 0xbf)
guess = UTF8;
else if (b[0] == (byte) 0xff && b[1] == (byte) 0xfe)
guess = UTF16LE;
else if (b[0] == (byte) 0xfe && b[1] == (byte) 0xff)
guess = UTF16BE;
return guess;
}
public static byte[] readFile(File file, int size) throws IOException {
return readFile(new FileInputStream(file), size);
}
/**
* Reads bytes from given input stream with specified length.
*/
public static byte[] readFile(InputStream in, int size)
throws IOException {
byte[] b = new byte[size];
try {
in.read(b, 0, size);
} finally {
if (in != null) {
in.close();
}
}
return b;
}
public static String readFile(File file) throws IOException {
FileInputStream in = null;
try {
in = new FileInputStream(file);
byte[] b = new byte[in.available()];
in.read(b, 0, b.length);
return new String(b);
} finally {
if (in != null) {
in.close();
}
}
}
public static String readFile(File file, String encoding)
throws IOException {
return readFile(new FileInputStream(file), encoding);
}
/**
* Reads the given input stream to a string content.
*/
public static String readFile(InputStream in, String encoding)
throws IOException {
try {
byte[] b = new byte[in.available()];
in.read(b);
return new String(b, encoding);
} finally {
if (in != null) {
in.close();
}
}
}
}
Related
- guessEncoding(File file)
- guessEncoding(InputStream inputStream)
- getEncodingOfXml(File file)