Here you can find the source of getEncodingFromHTML(InputStream is)
public static String getEncodingFromHTML(InputStream is) throws IOException
//package com.java2s; import java.io.*; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { public static String getEncodingFromHTML(InputStream is) throws IOException { final int FIND_CHARSET_CACHE_SIZE = 4 * 1024; BufferedInputStream bis = new BufferedInputStream(is); bis.mark(FIND_CHARSET_CACHE_SIZE); byte[] cache = new byte[FIND_CHARSET_CACHE_SIZE]; bis.read(cache);//w w w. jav a 2 s . co m bis.reset(); return getHtmlCharset(new String(cache)); } public static String getHtmlCharset(String content) { String encoding = null; final String CHARSET_REGX = "<meta.*charset=\"?([a-zA-Z0-9-_/]+)\"?"; Matcher m = Pattern.compile(CHARSET_REGX).matcher(content); if (m.find()) { encoding = m.group(1); } return encoding; } }