Here you can find the source of getEncoding(String text)
private static Charset getEncoding(String text)
//package com.java2s; /*--------------------------------------------------------------- * Copyright 2005 by the Radiological Society of North America * * This source software is released under the terms of the * RSNA Public License (http://mirc.rsna.org/rsnapubliclicense) *----------------------------------------------------------------*/ import java.nio.charset.Charset; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { public static Charset utf8 = Charset.forName("UTF-8"); private static Charset getEncoding(String text) { //See if this is an xml document with an encoding declaration. Pattern xml = Pattern.compile("^\\s*<\\?xml\\s+[^>]*\\s*encoding\\s*=\\s*(\"[^\"]*\")", Pattern.DOTALL | Pattern.MULTILINE); Matcher xmlMatcher = xml.matcher(text); if (xmlMatcher.find()) return getEncoding(xmlMatcher); //See if this is an html document with a charset declaration. Pattern html = Pattern.compile( "^\\s*<(html|HTML).*<(meta|META)\\s+[^>]*\\s*(charset|CHARSET)\\s*=\\s*(\"[^\"]*\"|[^\"\\s]*)", Pattern.DOTALL | Pattern.MULTILINE); Matcher htmlMatcher = html.matcher(text); if (htmlMatcher.find()) return getEncoding(htmlMatcher); //We don't recognize this document declaration; use UTF-8. //Maybe this should actually be ISO-8859-1 since //that is the web default encoding, but it is probably //better to default to UTF-8 because that will be better //for sites in the Far East, and the pain for the Europeans //will be minimal. return utf8; }/* w ww .j a va 2 s. c om*/ private static Charset getEncoding(Matcher matcher) { int groups = matcher.groupCount(); String name = matcher.group(groups); if (name.startsWith("\"")) name = name.substring(1); if (name.endsWith("\"")) name = name.substring(0, name.length() - 1); try { return Charset.forName(name); } catch (Exception ex) { return utf8; } } }