Java tutorial
//package com.java2s; /* * $Id: XMLUtilities.java,v 1.28 2005/09/05 13:58:15 gmcgoldrick Exp $ * * Copyright (C) 2002, Cladonia Ltd. All rights reserved. * * This software is the proprietary information of Cladonia Ltd. * Use is subject to license terms. */ import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.SortedMap; public class Main { private static final String UTF_8 = "UTF-8"; private static final String UTF_16BE = "UTF-16BE"; private static final String UTF_16LE = "UTF-16LE"; private static final String EBCDIC = "EBCDIC-CP-US"; private static final String UCS_4BE = "UCS-4BE"; private static final String UCS_4LE = "UCS-4LE"; private static final String UNKNOWN = null; public static String getStreamEncoding(InputStream stream) throws IOException { String encoding = null; boolean DEBUG = false; if (DEBUG) { SortedMap map = Charset.availableCharsets(); Object[] keys = map.keySet().toArray(); for (int i = 0; i < keys.length; i++) { System.out.println("Key = " + keys[i] + " Value = " + map.get(keys[i])); } } int ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); // UCS-4 Big Endian (1234) if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xFE) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xFF) { encoding = UCS_4BE; } } else if (ch == 0xFF) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xFE) { encoding = UNKNOWN; } } else if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x3C) { encoding = UCS_4BE; } } else if (ch == 0x3C) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { encoding = UNKNOWN; } } } else if (ch == 0x3C) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { encoding = UNKNOWN; } else if (ch == 0x3F) { encoding = UTF_16BE; } } } } else if (ch == 0x3C) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { encoding = UCS_4LE; } } else if (ch == 0x3F) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { encoding = UTF_16LE; } } } else if (ch == 0x3F) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x78) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x6D) { encoding = UTF_8; } } } } else if (ch == 0xFF) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xFE) { ch = stream.read(); encoding = UTF_16LE; if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { encoding = UCS_4LE; } } } } else if (ch == 0xFE) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xFF) { ch = stream.read(); encoding = UTF_16BE; if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x00) { encoding = UNKNOWN; } } } } else if (ch == 0xEF) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xBB) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xBF) { // System.out.println( "Found UTF-8 byte order mark."); // strip utf-8 byte order mark stream.mark(1024); encoding = UTF_8; } } } else if (ch == 0x4C) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x6F) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0xA7) { ch = stream.read(); if (DEBUG) System.out.print("[" + ch + "]"); if (ch == 0x94) { encoding = EBCDIC; } } } } if (DEBUG) System.out.println("getStreamEncoding() [" + encoding + "]"); return encoding; } }