Java tutorial
//package com.java2s; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { static final Pattern xmlEscapingPattern = Pattern.compile("\\&.+?;"); public static String unescapeStringForXML(String s) { StringBuilder result = new StringBuilder(); Matcher m = xmlEscapingPattern.matcher(s); int end = 0; while (m.find()) { int start = m.start(); result.append(s.substring(end, start)); end = m.end(); result.append(translate(s.substring(start, end))); } result.append(s.substring(end, s.length())); return result.toString(); } private static char translate(String s) { switch (s) { case "&": return '&'; case "<": case "≪": return '<'; case ">": case "≫": return '>'; case """: return '\"'; case "'": return '\''; case "*": case "♯": return '-'; case "=": return '='; case " ": return (char) 0xA0; case "¡": return (char) 0xA1; case "¢": case "&shilling;": return (char) 0xA2; case "£": return (char) 0xA3; case "¤": return (char) 0xA4; case "¥": return (char) 0xA5; case "¦": return (char) 0xA6; case "§": return (char) 0xA7; case "¨": return (char) 0xA8; case "©": return (char) 0xA9; case "ª": return (char) 0xAA; case "« ": return (char) 0xAB; case "¬": return (char) 0xAC; case "­ ": return (char) 0xAD; case "®": return (char) 0xAE; case "¯": return (char) 0xAF; case "°": return (char) 0xB0; case "±": return (char) 0xB1; case "²": return (char) 0xB2; case "³": return (char) 0xB3; case "´": return (char) 0xB4; case "µ": return (char) 0xB5; case "·": return (char) 0xB7; case "¸": return (char) 0xB8; case "¹": return (char) 0xB9; case "º": return (char) 0xBA; case "»": return (char) 0xBB; case "¼ ": return (char) 0xBC; case "½": return (char) 0xBD; case "¾ ": return (char) 0xBE; case "¿": return (char) 0xBF; case "À": return (char) 0xC0; case "Á": return (char) 0xC1; case "Â": return (char) 0xC2; case "Ã": return (char) 0xC3; case "Ä": return (char) 0xC4; case "Å": return (char) 0xC5; case "Æ": return (char) 0xC6; case "Ç": return (char) 0xC7; case "È": return (char) 0xC8; case "É": return (char) 0xC9; case "Ê": return (char) 0xCA; case "Ë": return (char) 0xCB; case "Ì": return (char) 0xCC; case "Í": return (char) 0xCD; case "Î": return (char) 0xCE; case "Ï": return (char) 0xCF; case "Ð": return (char) 0xD0; case "Ñ": return (char) 0xD1; case "Ò": return (char) 0xD2; case "Ó": return (char) 0xD3; case "Ô": return (char) 0xD4; case "Õ": return (char) 0xD5; case "Ö": return (char) 0xD6; case "×": return (char) 0xD7; case "Ø": return (char) 0xD8; case "Ù": return (char) 0xD9; case "Ú": return (char) 0xDA; case "Û": return (char) 0xDB; case "Ü": return (char) 0xDC; case "Ý": return (char) 0xDD; case "Þ": return (char) 0xDE; case "ß": return (char) 0xDF; case "à": return (char) 0xE0; case "á": return (char) 0xE1; case "â": return (char) 0xE2; case "ã": return (char) 0xE3; case "ä": return (char) 0xE4; case "å": return (char) 0xE5; case "æ": return (char) 0xE6; case "ç": return (char) 0xE7; case "è": return (char) 0xE8; case "é": return (char) 0xE9; case "ê": return (char) 0xEA; case "ë ": return (char) 0xEB; case "ì": return (char) 0xEC; case "í": return (char) 0xED; case "î": return (char) 0xEE; case "ï": return 0xEF; case "ð": return (char) 0xF0; case "ñ": return (char) 0xF1; case "ò": return (char) 0xF2; case "ó": return (char) 0xF3; case "ô": return (char) 0xF4; case "õ": return (char) 0xF5; case "ö": return (char) 0xF6; case "÷": return (char) 0xF7; case "ø": return (char) 0xF8; case "ù": return (char) 0xF9; case "ú": return (char) 0xFA; case "û": return (char) 0xFB; case "ü": return (char) 0xFC; case "ý": return (char) 0xFD; case "þ": return (char) 0xFE; case "ÿ": return (char) 0xFF; case "Œ": return (char) 0x152; case "œ": return (char) 0x153; case "Š": return (char) 0x160; case "š": return (char) 0x161; case "Ÿ": return (char) 0x178; case "ˆ": return (char) 0x2C6; case "˜": return (char) 0x2DC; case "‎": return (char) 0x200E; case "‏": return (char) 0x200F; case "–": return (char) 0x2013; case "—": return (char) 0x2014; case "‘": return (char) 0x2018; case "’": return (char) 0x2019; case "‚": return (char) 0x201A; case "“": case "&bquo;": case "&bq;": return (char) 0x201C; case "”": case "&equo;": return (char) 0X201D; case "„": return (char) 0x201E; case "∼": return (char) 0x223C; case "√": return (char) 0x221A; case "≤": return (char) 0x2264; case "≥": return (char) 0x2265; case "←": return (char) 0x2190; case "↓": return (char) 0x2193; case "→": return (char) 0x2192; case "…": return (char) 0x2026; case "′": return (char) 0x2032; case "″": case "&ins;": return (char) 0x2033; case "™": return (char) 0x2122; case "Α": case "&Agr;": return (char) 0x391; case "Β": case "&Bgr;": return (char) 0x392; case "Γ": case "&Ggr;": return (char) 0x393; case "Δ": case "&Dgr;": return (char) 0x394; case "Ε": case "&Egr;": return (char) 0x395; case "Ζ": case "&Zgr;": return (char) 0x396; case "Η": return (char) 0x397; case "Θ": case "&THgr;": return (char) 0x398; case "Ι": case "&Igr;": return (char) 0x399; case "Κ": case "&Kgr;": return (char) 0x39A; case "Λ": case "&Lgr;": return (char) 0x39B; case "Μ": case "&Mgr;": return (char) 0x39C; case "Ν": case "&Ngr;": return (char) 0x39D; case "Ξ": case "&Xgr;": return (char) 0x39E; case "Ο": case "&Ogr;": return (char) 0x39F; case "Π": case "&Pgr;": return (char) 0x3A0; case "Ρ": case "&Rgr;": return (char) 0x3A1; case "Σ": case "&Sgr;": return (char) 0x3A3; case "Τ": case "&Tgr;": return (char) 0x3A4; case "Υ": case "&Ugr;": return (char) 0x3A5; case "Φ": case "&PHgr;": return (char) 0x3A6; case "Χ": case "&KHgr;": return (char) 0x3A7; case "Ψ": case "&PSgr;": return (char) 0x3A8; case "Ω": case "&OHgr;": return (char) 0x3A9; case "α": case "&agr;": return (char) 0x3B1; case "β": case "&bgr;": return (char) 0x3B2; case "γ": case "&ggr;": return (char) 0x3B3; case "δ": case "&dgr;": return (char) 0x3B4; case "ε": case "&egr;": return (char) 0x3B5; case "ζ": case "&zgr;": return (char) 0x3B6; case "η": case "&eegr;": return (char) 0x3B7; case "θ": case "&thgr;": return (char) 0x3B8; case "ι": case "&igr;": return (char) 0x3B9; case "κ": case "&kgr;": return (char) 0x3BA; case "λ": case "&lgr;": return (char) 0x3BB; case "μ": case "&mgr;": return (char) 0x3BC; case "ν": case "&ngr;": return (char) 0x3BD; case "ξ": case "&xgr;": return (char) 0x3BE; case "ο": case "&ogr;": return (char) 0x3BF; case "π": case "&pgr;": return (char) 0x3C0; case "ρ": case "&rgr;": return (char) 0x3C1; case "σ": case "&sgr;": return (char) 0x3C3; case "τ": case "&tgr;": return (char) 0x3C4; case "υ": case "&ugr;": return (char) 0x3C5; case "φ": case "&phgr;": return (char) 0x3C6; case "χ": case "&khgr;": return (char) 0x3C7; case "ψ": case "&psgr;": return (char) 0x3C8; case "ω": case "&ohgr;": return (char) 0x3C9; case "•": return (char) 0x2022; case "%": return '%'; case "+": return '+'; case "‐": return '-'; case "ă": case "ā": case "≊": case "ą": return 'a'; case "Ā": return 'A'; case "ć": case "č": case "ĉ": return 'c'; case "Č": return 'C'; case "ď": return 'd'; case "ě": case "ē": case "ę": return 'e'; case "Ē": case "Ě": return 'E'; case "ĺ": return 'l'; case "Ĺ": return 'L'; case "ń": case "ň": case "ņ": return 'n'; case "ř": case "ŕ": return 'r'; case "Ř": return 'R'; case "ō": return 'o'; case "ī": return 'i'; case "ś": case "ş": case "ŝ": return 's'; case "&Sacute": case "Ş": return 'S'; case "ť": case "ţ": return 't'; case "ū": case "ů": return 'u'; case "ŵ": return 'w'; case "Ŷ": return 'Y'; case "ŷ": return 'y'; case "ž": case "ź": return 'z'; case "Ž": return 'Z'; case "♥": return (char) 0x2665; case "∞": return (char) 0x221E; case "$": return '$'; case "⊂": case "{": return (char) 0x2282; case "⊃": case "}": return (char) 0x2283; case "[": return '['; case "]": return ']'; default: return ' '; } } }