Here you can find the source of obtainEncodingStringFromInputStream(InputStream bis)
Parameter | Description |
---|---|
bis | file to parse |
Parameter | Description |
---|
public static String obtainEncodingStringFromInputStream(InputStream bis) throws IOException
//package com.java2s; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; import java.util.regex.Pattern; public class Main { private static final int ENOUGH = 200; protected static final String EXTRACT_ENCODING_REGEX = ".*encoding=\""; protected static final String XML_FIRST_LINE_REGEX = "<\\?xml version=\"1\\.0\" encoding=\"(.*)\"?>"; /**/*from ww w . j a v a 2 s.c om*/ * Helper method to obtain the content encoding from an input stream. * * @param bis file to parse * @return the encoding if we are able to extract and parse it, else the * default value expected by the expat parser, i.e. "UTF-8" * @throws {@link IOException} if there is a problem reading from the file. * @throws {@link IllegalArgumentException} if the InputStream does not * support mark and reset. */ public static String obtainEncodingStringFromInputStream(InputStream bis) throws IOException { String encoding = "UTF-8"; if (!bis.markSupported()) { throw new IllegalArgumentException( "Error in the program, InputStream needs to support markSupported()"); } // read the first line after setting the mark, then reset // before calling the parser. bis.mark(ENOUGH); DataInputStream dis = new DataInputStream(bis); String line = dis.readLine(); line = line.replace("'", "\""); if (line.matches(XML_FIRST_LINE_REGEX)) { encoding = extractEncoding(line); } bis.reset(); return encoding; } /** * Helper method to extract the XML file encoding * * @param line the first line of an XML file * @return The value of the encoding in lower-case. */ protected static String extractEncoding(String line) { Pattern p = Pattern.compile(EXTRACT_ENCODING_REGEX); String matches[] = p.split(line); // We want the value after encoding=" String value = matches[1]; // We don't need anything after the first " after the value String cleanup[] = value.split("\""); String encoding = cleanup[0]; return encoding.toLowerCase(); } }