Here you can find the source of getFileText(File file, Charset charset)
Parameter | Description |
---|---|
file | the file to read. |
charset | the character set to use for the encoding of the file. |
public static String getFileText(File file, Charset charset)
//package com.java2s; /*--------------------------------------------------------------- * Copyright 2005 by the Radiological Society of North America * * This source software is released under the terms of the * RSNA Public License (http://mirc.rsna.org/rsnapubliclicense) *----------------------------------------------------------------*/ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.StringWriter; import java.nio.charset.Charset; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { public static Charset utf8 = Charset.forName("UTF-8"); /**// w w w . j ava2 s. c om * Reads a text file completely, trying to obtain the charset from the * file itself, and defaulting to UTF-8 if it fails. * @param file the file to read. * @return the text of the file, or an empty string if an error occurred. */ public static String getFileText(File file) { String text = getFileText(file, utf8); Charset charset = getEncoding(text); if (charset.name().equals(utf8.name())) return text; return getFileText(file, charset); } /** * Reads a text file completely, using the specified encoding, or * UTF-8 if the specified encoding is not supported. * @param file the file to read. * @param encoding the name of the charset to use. * @return the text of the file, or an empty string if an error occurred. */ public static String getFileText(File file, String encoding) { Charset charset; try { charset = Charset.forName(encoding); } catch (Exception ex) { charset = utf8; } return getFileText(file, charset); } /** * Reads a text file completely, using the specified encoding. * @param file the file to read. * @param charset the character set to use for the encoding of the file. * @return the text of the file, or an empty string if an error occurred. */ public static String getFileText(File file, Charset charset) { BufferedReader br = null; try { if (!file.exists()) return ""; br = new BufferedReader(new InputStreamReader(new FileInputStream(file), charset)); StringWriter sw = new StringWriter(); int n; char[] cbuf = new char[1024]; while ((n = br.read(cbuf, 0, cbuf.length)) != -1) sw.write(cbuf, 0, n); br.close(); return sw.toString(); } catch (Exception e) { if (br != null) { try { br.close(); } catch (Exception ignore) { } } return ""; } } private static Charset getEncoding(String text) { //See if this is an xml document with an encoding declaration. Pattern xml = Pattern.compile("^\\s*<\\?xml\\s+[^>]*\\s*encoding\\s*=\\s*(\"[^\"]*\")", Pattern.DOTALL | Pattern.MULTILINE); Matcher xmlMatcher = xml.matcher(text); if (xmlMatcher.find()) return getEncoding(xmlMatcher); //See if this is an html document with a charset declaration. Pattern html = Pattern.compile( "^\\s*<(html|HTML).*<(meta|META)\\s+[^>]*\\s*(charset|CHARSET)\\s*=\\s*(\"[^\"]*\"|[^\"\\s]*)", Pattern.DOTALL | Pattern.MULTILINE); Matcher htmlMatcher = html.matcher(text); if (htmlMatcher.find()) return getEncoding(htmlMatcher); //We don't recognize this document declaration; use UTF-8. //Maybe this should actually be ISO-8859-1 since //that is the web default encoding, but it is probably //better to default to UTF-8 because that will be better //for sites in the Far East, and the pain for the Europeans //will be minimal. return utf8; } private static Charset getEncoding(Matcher matcher) { int groups = matcher.groupCount(); String name = matcher.group(groups); if (name.startsWith("\"")) name = name.substring(1); if (name.endsWith("\"")) name = name.substring(0, name.length() - 1); try { return Charset.forName(name); } catch (Exception ex) { return utf8; } } }