Java Charset Create getCharsetFromContent(URL url)

Description

get Charset From Content

License

Open Source License

Declaration

public static String getCharsetFromContent(URL url) throws IOException

Method Source Code


//package com.java2s;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.Charset;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Main {
    public static String getCharsetFromContent(URL url) throws IOException {
        InputStream stream = url.openStream();
        byte chunk[] = new byte[2048];
        int bytesRead = stream.read(chunk);
        if (bytesRead > 0) {
            String startContent = new String(chunk);
            String pattern = "\\<meta\\s*http-equiv=[\\\"\\']content-type[\\\"\\']\\s*content\\s*=\\s*[\"']text/html\\s*;\\s*charset=([a-z\\d\\-]*)[\\\"\\'\\>]";
            Matcher matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(startContent);
            if (matcher.find()) {
                String charset = matcher.group(1);
                if (Charset.isSupported(charset)) {
                    return charset;
                }/*from   w w w  .  j a va 2  s .  com*/
            }
        }

        return null;
    }
}

Java Charset Create getCharsetFromContent(URL url)

Description

License

Declaration

Method Source Code

Related