Entity Decoder
/**
* Copyright (C) 2003 Manfred Andres
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
import java.util.StringTokenizer;
public class EntityDecoder {
public final static String htmlEntities[] = { "€", "", "‚",
"ƒ", "„", "…", "†", "‡", "ˆ",
"‰", "Š", "‹", "Œ", "", "Ž", "",
"", "‘", "’", "“", "”", "•",
"–", "—", "˜", "™", "š", "›",
"œ", "", "ž", "Ÿ", " ", "¡", "¢",
"£", "¤", "¥", "¦", "§", "¨",
"©", "ª", "«", "¬", "­", "®", "¯",
"°", "±", "²", "³", "´", "µ",
"¶", "·", "¸", "¹", "º", "»",
"¼", "½", "¾", "¿", "À",
"Á", "Â", "Ã", "Ä", "Å", "Æ",
"Ç", "È", "É", "Ê", "Ë",
"Ì", "Í", "Î", "Ï", "Ð", "Ñ",
"Ò", "Ó", "Ô", "Õ", "Ö", "×",
"Ø", "Ù", "Ú", "Û", "Ü",
"Ý", "Þ", "ß", "à", "á",
"â", "ã", "ä", "å", "æ", "ç",
"è", "é", "ê", "ë", "ì",
"í", "î", "ï", "ð", "ñ", "ò",
"ó", "ô", "õ", "ö", "÷",
"ø", "ù", "ú", "û", "ü",
"ý", "þ", "ÿ"};
public final static String entities[] = { "F6", "E4", "FC", "D6", "C4",
"DC", "DF", "3F", "5C", "2C", "3A", "3B", "23", "2B", "7E", "21",
"22", "A7", "24", "25", "26", "28", "29", "3D", "3C", "3E", "7B",
"5B", "5D", "7D", "2F", "E2", "EA", "EE", "F4", "FB", "C2", "CA",
"CE", "D4", "DB", "E1", "E9", "ED", "F3", "FA", "C1", "C9", "CD",
"D3", "DA", "E0", "E8", "EC", "F2", "F9", "C1", "C9", "CD", "D3",
"DA", "B0", "B3", "B2", "80", "7C", "5E", "60", "B4", "27", "20",
"40", "98", "2A"};
public final static String charsHtml[] = { "", "", "", "", "", "",
"", "?", "\\", ",", ":", ";", "#", "+", "˜", "!", "\"",
"§", "$", "%", "&", "(", ")", "=", "<", ">", "{",
"[", "]", "}", "/", "â", "ê", "î", "ô",
"û", "Â", "Ê", "Î", "Ô", "Û",
"á", "é", "í", "ó", "ú",
"Á", "É", "Í", "Ó", "Ú",
"à", "è", "ì", "ò", "Ù",
"À", "È", "Ì", "Ò", "Ù",
"°", "³", "²", "€", "|", "ˆ", "`",
"´", "'", " ", "@", "~", "*"};
public final static String chars[] = { "", "", "", "", "", "", "",
"?", "\\", ",", ":", ";", "#", "+", "~", "!", "\"", "", "$", "%",
"&", "(", ")", "=", "<", ">", "{", "[", "]", "}", "/", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "|", "^", "`", "", "'", " ", "@",
"~", "*"};
public static String entityToChar (String raw) {
return (entityTo (raw, chars));
}
public static String entityToHtml (String raw) {
return (charToHtml(entityTo (raw, chars)));
}
public static String htmlToChar (String raw) {
return convert (raw, charsHtml, chars);
}
public static String charToHtml (String raw) {
if (raw == null)
return null;
char[] chars = raw.toCharArray();
StringBuffer encoded = new StringBuffer();
for (int i = 0; i < chars.length; i++) {
char c = chars[i];
if (c == '<')
encoded.append("<");
else if (c == '>')
encoded.append(">");
else if (c < 128)
encoded.append(c);
else if (c < 256)
encoded.append(htmlEntities[c-128]);
else {
encoded.append("&#");
encoded.append((int) c);
encoded.append(";");
}
}
return encoded.toString();
}
public static String entityTo (String raw, String[] tc) {
StringBuffer sb = new StringBuffer ();
boolean entity = false;
raw = raw.replace ('+', ' ');
String tokens = tc == charsHtml ? "%<>" : "%";
for (StringTokenizer st = new StringTokenizer (raw, tokens, true); st.hasMoreTokens (); ) {
String token = st.nextToken ();
if (entity) {
boolean replaced = false;
for (int i = 0; i < entities.length; i++) {
if (token.startsWith (entities[i])) {
sb.append (tc[i]);
sb.append (token.substring (2));
replaced = true;
break;
}
}
if (!replaced)
sb.append (token);
entity = false;
} else if (token.equals ("%")) {
entity = true;
continue;
} else if (token.equals ("<")) {
sb.append ("<");
} else if (token.equals (">")) {
sb.append (">");
} else {
sb.append (token);
}
}
return (sb.toString ());
}
public static String convert (String raw, String[] from, String[] to) {
String result = raw;
for (int i = 0 ; i < from.length; i++) {
int idx = result.indexOf(from[i]);
if (idx < 0)
continue;
StringBuffer sb = new StringBuffer();
while (idx > -1) {
sb.append (result.substring(0,idx));
sb.append (to[i]);
result = result.substring(idx + from[i].length());
idx = result.indexOf(from[i]);
}
sb.append (result);
result = sb.toString();
}
return result;
}
}
Related examples in the same category