Here you can find the source of getCharsetFromContent(URL url)
public static String getCharsetFromContent(URL url) throws IOException
//package com.java2s; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.nio.charset.Charset; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { public static String getCharsetFromContent(URL url) throws IOException { InputStream stream = url.openStream(); byte chunk[] = new byte[2048]; int bytesRead = stream.read(chunk); if (bytesRead > 0) { String startContent = new String(chunk); String pattern = "\\<meta\\s*http-equiv=[\\\"\\']content-type[\\\"\\']\\s*content\\s*=\\s*[\"']text/html\\s*;\\s*charset=([a-z\\d\\-]*)[\\\"\\'\\>]"; Matcher matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(startContent); if (matcher.find()) { String charset = matcher.group(1); if (Charset.isSupported(charset)) { return charset; }/*from w w w . j a va 2 s . com*/ } } return null; } }