Java tutorial
/* * Copyright 2012 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Revision History * Author Date Description * --------------- ---------------- ------------ * Sang-cheon Park 2012. 9. 20. First Draft. */ package com.athena.chameleon.engine.threadpool.task; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import com.ibm.icu.text.CharsetDetector; /** * <pre> * ?? ?? Runnable Task * </pre> * * @author Sang-cheon Park * @version 1.0 */ public class FileEncodingConvertTask extends BaseTask { /* private static final String[] SEARCH_CHAR_SET = { "encoding=\"EUC-KR\"", "encoding=\"euc-kr\"", "Encoding=\"EUC-KR\"", "Encoding=\"euc-kr\"", "charset=EUC-KR", "charset=euc-kr", "charset=\"EUC-KR\"", "charset=\"euc-kr\"", "encoding=\"MS949\"", "encoding=\"ms949\"", "Encoding=\"MS949\"", "Encoding=\"ms949\"", "charset=MS949", "charset=ms949", "charset=\"MS949\"", "charset=\"ms949\"", "encoding=\"KSC5601\"", "encoding=\"ksc5601\"", "Encoding=\"KSC5601\"", "Encoding=\"ksc5601\"", "charset=KSC5601", "charset=ksc5601", "charset=\"KSC5601\"", "charset=\"ksc5601\"", "encoding=\"UTF-16BE\"", "encoding=\"utf-16be\"", "Encoding=\"UTF-16BE\"", "Encoding=\"utf-16be\"", "charset=UTF-16BE", "charset=utf-16be", "charset=\"UTF-16BE\"", "charset=\"utf-16be\"", "encoding=\"UTF-16LE\"", "encoding=\"utf-16le\"", "Encoding=\"UTF-16LE\"", "Encoding=\"utf-16le\"", "charset=UTF-16LE", "charset=utf-16le", "charset=\"UTF-16LE\"", "charset=\"utf-16le\"", "encoding=\"UTF-16\"", "encoding=\"utf-16\"", "Encoding=\"UTF-16\"", "Encoding=\"utf-16\"", "charset=UTF-16", "charset=utf-16", "charset=\"UTF-16\"", "charset=\"utf-16\"" }; /*/ private static String[] SEARCH_CHAR_SET; //*/ // ? ? ? ? ? ? ... private static final String[] TARGET_SUFFIX = { "html", "htm", "jsp", "xml", "js", "css" }; static { String[] temp = CharsetDetector.getAllDetectableCharsets(); temp = (String[]) ArrayUtils.add(temp, "MS949"); temp = (String[]) ArrayUtils.add(temp, "KSC5601"); SEARCH_CHAR_SET = CharsetDetector.getAllDetectableCharsets(); SEARCH_CHAR_SET = (String[]) ArrayUtils.add(SEARCH_CHAR_SET, "MS949"); SEARCH_CHAR_SET = (String[]) ArrayUtils.add(SEARCH_CHAR_SET, "KSC5601"); for (String charSet : temp) { SEARCH_CHAR_SET = (String[]) ArrayUtils.add(SEARCH_CHAR_SET, charSet.toLowerCase()); } } private File file; private String defaultEncoding = "UTF-8"; private String extension; public FileEncodingConvertTask(File file, String defaultEncoding) { this(file.getAbsolutePath() + " Convert Task", file, defaultEncoding); } public FileEncodingConvertTask(String taskName, File file, String defaultEncoding) { super(taskName); setFile(file); if (StringUtils.isNotEmpty(defaultEncoding)) { setDefaultEncoding(defaultEncoding); } } /** * @param file the file to set */ public void setFile(File file) { this.file = file; this.file.setWritable(true); this.extension = file.getName().substring(file.getName().lastIndexOf(".") + 1).toLowerCase(); } /** * @param defaultEncoding the defaultEncoding to set */ public void setDefaultEncoding(String defaultEncoding) { this.defaultEncoding = defaultEncoding; } @Override protected void taskRun() { OutputStreamWriter output = null; InputStream input = null; try { input = new FileInputStream(file); byte[] data = IOUtils.toByteArray(input, file.length()); IOUtils.closeQuietly(input); /* * CharsetDector ? ? ?? ?. * * UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, Shift_JIS, ISO-2022-JP, * ISO-2022-CN, ISO-2022-KR, GB18030, EUC-JP, EUC-KR, Big5, ISO-8859-1, * ISO-8859-2, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, windows-1251, * windows-1256, KOI8-R, ISO-8859-9, IBM424_rtl, IBM424_ltr, IBM420_rtl, IBM420_ltr */ // ? ? defaultEncoding ? // new CharsetDetector().getString(data, defaultEncoding); ?? ? ? ? // ?? null ?? ?. //String fileContents = new CharsetDetector().getString(data, defaultEncoding); String fileContents = null; com.ibm.icu.text.CharsetMatch cm = null; try { CharsetDetector detector = new CharsetDetector(); detector.setDeclaredEncoding(defaultEncoding); detector.setText(data); cm = detector.detect(); fileContents = cm.getString(); //logger.debug("Encoding => {}" + cm.getName()); //logger.debug("Contents => {}" + cm.getString()); } catch (Exception e) { // Ignore... } if (fileContents != null) { // html, jsp, xml ?? ? Character Set ?? ??? UTF-8 // charset=EUC-KR, encoding="EUC-KR" EUC-KR, ISO-8859-1 // SEARCH_CHAR_SET? if (ArrayUtils.contains(TARGET_SUFFIX, extension)) { fileContents = replace(fileContents); } output = new OutputStreamWriter(new FileOutputStream(file), defaultEncoding); output.write(fileContents); IOUtils.closeQuietly(output); } } catch (UnsupportedEncodingException e) { logger.error("UnsupportedEncodingException has occurred : ", e); } catch (FileNotFoundException e) { logger.error("FileNotFoundException has occurred : ", e); } catch (IOException e) { logger.error("IOException has occurred : ", e); } catch (Exception e) { throw new RuntimeException(e); } } private String replace(String str) { for (String charSet : SEARCH_CHAR_SET) { str = str.replaceAll(charSet, defaultEncoding); } return str; }//end of replace() }//end of FileEncodingConvertTask.java