Java tutorial
import java.util.HashMap; import java.util.Map; /* Copyright (c) 2008 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //package com.google.gdata.util.common.base; /** * Some common string manipulation utilities. */ public class Util { static Map<String, Character> escapeStrings; static { // HTML character entity references as defined in HTML 4 // see http://www.w3.org/TR/REC-html40/sgml/entities.html escapeStrings = new HashMap<String, Character>(252); escapeStrings.put(" ", new Character('\u00A0')); escapeStrings.put("¡", new Character('\u00A1')); escapeStrings.put("¢", new Character('\u00A2')); escapeStrings.put("£", new Character('\u00A3')); escapeStrings.put("¤", new Character('\u00A4')); escapeStrings.put("¥", new Character('\u00A5')); escapeStrings.put("¦", new Character('\u00A6')); escapeStrings.put("§", new Character('\u00A7')); escapeStrings.put("¨", new Character('\u00A8')); escapeStrings.put("©", new Character('\u00A9')); escapeStrings.put("ª", new Character('\u00AA')); escapeStrings.put("«", new Character('\u00AB')); escapeStrings.put("¬", new Character('\u00AC')); escapeStrings.put("­", new Character('\u00AD')); escapeStrings.put("®", new Character('\u00AE')); escapeStrings.put("¯", new Character('\u00AF')); escapeStrings.put("°", new Character('\u00B0')); escapeStrings.put("±", new Character('\u00B1')); escapeStrings.put("²", new Character('\u00B2')); escapeStrings.put("³", new Character('\u00B3')); escapeStrings.put("´", new Character('\u00B4')); escapeStrings.put("µ", new Character('\u00B5')); escapeStrings.put("¶", new Character('\u00B6')); escapeStrings.put("·", new Character('\u00B7')); escapeStrings.put("¸", new Character('\u00B8')); escapeStrings.put("¹", new Character('\u00B9')); escapeStrings.put("º", new Character('\u00BA')); escapeStrings.put("»", new Character('\u00BB')); escapeStrings.put("¼", new Character('\u00BC')); escapeStrings.put("½", new Character('\u00BD')); escapeStrings.put("¾", new Character('\u00BE')); escapeStrings.put("¿", new Character('\u00BF')); escapeStrings.put("À", new Character('\u00C0')); escapeStrings.put("Á", new Character('\u00C1')); escapeStrings.put("Â", new Character('\u00C2')); escapeStrings.put("Ã", new Character('\u00C3')); escapeStrings.put("Ä", new Character('\u00C4')); escapeStrings.put("Å", new Character('\u00C5')); escapeStrings.put("Æ", new Character('\u00C6')); escapeStrings.put("Ç", new Character('\u00C7')); escapeStrings.put("È", new Character('\u00C8')); escapeStrings.put("É", new Character('\u00C9')); escapeStrings.put("Ê", new Character('\u00CA')); escapeStrings.put("Ë", new Character('\u00CB')); escapeStrings.put("Ì", new Character('\u00CC')); escapeStrings.put("Í", new Character('\u00CD')); escapeStrings.put("Î", new Character('\u00CE')); escapeStrings.put("Ï", new Character('\u00CF')); escapeStrings.put("Ð", new Character('\u00D0')); escapeStrings.put("Ñ", new Character('\u00D1')); escapeStrings.put("Ò", new Character('\u00D2')); escapeStrings.put("Ó", new Character('\u00D3')); escapeStrings.put("Ô", new Character('\u00D4')); escapeStrings.put("Õ", new Character('\u00D5')); escapeStrings.put("Ö", new Character('\u00D6')); escapeStrings.put("×", new Character('\u00D7')); escapeStrings.put("Ø", new Character('\u00D8')); escapeStrings.put("Ù", new Character('\u00D9')); escapeStrings.put("Ú", new Character('\u00DA')); escapeStrings.put("Û", new Character('\u00DB')); escapeStrings.put("Ü", new Character('\u00DC')); escapeStrings.put("Ý", new Character('\u00DD')); escapeStrings.put("Þ", new Character('\u00DE')); escapeStrings.put("ß", new Character('\u00DF')); escapeStrings.put("à", new Character('\u00E0')); escapeStrings.put("á", new Character('\u00E1')); escapeStrings.put("â", new Character('\u00E2')); escapeStrings.put("ã", new Character('\u00E3')); escapeStrings.put("ä", new Character('\u00E4')); escapeStrings.put("å", new Character('\u00E5')); escapeStrings.put("æ", new Character('\u00E6')); escapeStrings.put("ç", new Character('\u00E7')); escapeStrings.put("è", new Character('\u00E8')); escapeStrings.put("é", new Character('\u00E9')); escapeStrings.put("ê", new Character('\u00EA')); escapeStrings.put("ë", new Character('\u00EB')); escapeStrings.put("ì", new Character('\u00EC')); escapeStrings.put("í", new Character('\u00ED')); escapeStrings.put("î", new Character('\u00EE')); escapeStrings.put("ï", new Character('\u00EF')); escapeStrings.put("ð", new Character('\u00F0')); escapeStrings.put("ñ", new Character('\u00F1')); escapeStrings.put("ò", new Character('\u00F2')); escapeStrings.put("ó", new Character('\u00F3')); escapeStrings.put("ô", new Character('\u00F4')); escapeStrings.put("õ", new Character('\u00F5')); escapeStrings.put("ö", new Character('\u00F6')); escapeStrings.put("÷", new Character('\u00F7')); escapeStrings.put("ø", new Character('\u00F8')); escapeStrings.put("ù", new Character('\u00F9')); escapeStrings.put("ú", new Character('\u00FA')); escapeStrings.put("û", new Character('\u00FB')); escapeStrings.put("ü", new Character('\u00FC')); escapeStrings.put("ý", new Character('\u00FD')); escapeStrings.put("þ", new Character('\u00FE')); escapeStrings.put("ÿ", new Character('\u00FF')); escapeStrings.put("ƒ", new Character('\u0192')); escapeStrings.put("Α", new Character('\u0391')); escapeStrings.put("Β", new Character('\u0392')); escapeStrings.put("Γ", new Character('\u0393')); escapeStrings.put("Δ", new Character('\u0394')); escapeStrings.put("Ε", new Character('\u0395')); escapeStrings.put("Ζ", new Character('\u0396')); escapeStrings.put("Η", new Character('\u0397')); escapeStrings.put("Θ", new Character('\u0398')); escapeStrings.put("Ι", new Character('\u0399')); escapeStrings.put("Κ", new Character('\u039A')); escapeStrings.put("Λ", new Character('\u039B')); escapeStrings.put("Μ", new Character('\u039C')); escapeStrings.put("Ν", new Character('\u039D')); escapeStrings.put("Ξ", new Character('\u039E')); escapeStrings.put("Ο", new Character('\u039F')); escapeStrings.put("Π", new Character('\u03A0')); escapeStrings.put("Ρ", new Character('\u03A1')); escapeStrings.put("Σ", new Character('\u03A3')); escapeStrings.put("Τ", new Character('\u03A4')); escapeStrings.put("Υ", new Character('\u03A5')); escapeStrings.put("Φ", new Character('\u03A6')); escapeStrings.put("Χ", new Character('\u03A7')); escapeStrings.put("Ψ", new Character('\u03A8')); escapeStrings.put("Ω", new Character('\u03A9')); escapeStrings.put("α", new Character('\u03B1')); escapeStrings.put("β", new Character('\u03B2')); escapeStrings.put("γ", new Character('\u03B3')); escapeStrings.put("δ", new Character('\u03B4')); escapeStrings.put("ε", new Character('\u03B5')); escapeStrings.put("ζ", new Character('\u03B6')); escapeStrings.put("η", new Character('\u03B7')); escapeStrings.put("θ", new Character('\u03B8')); escapeStrings.put("ι", new Character('\u03B9')); escapeStrings.put("κ", new Character('\u03BA')); escapeStrings.put("λ", new Character('\u03BB')); escapeStrings.put("μ", new Character('\u03BC')); escapeStrings.put("ν", new Character('\u03BD')); escapeStrings.put("ξ", new Character('\u03BE')); escapeStrings.put("ο", new Character('\u03BF')); escapeStrings.put("π", new Character('\u03C0')); escapeStrings.put("ρ", new Character('\u03C1')); escapeStrings.put("ς", new Character('\u03C2')); escapeStrings.put("σ", new Character('\u03C3')); escapeStrings.put("τ", new Character('\u03C4')); escapeStrings.put("υ", new Character('\u03C5')); escapeStrings.put("φ", new Character('\u03C6')); escapeStrings.put("χ", new Character('\u03C7')); escapeStrings.put("ψ", new Character('\u03C8')); escapeStrings.put("ω", new Character('\u03C9')); escapeStrings.put("ϑ", new Character('\u03D1')); escapeStrings.put("ϒ", new Character('\u03D2')); escapeStrings.put("ϖ", new Character('\u03D6')); escapeStrings.put("•", new Character('\u2022')); escapeStrings.put("…", new Character('\u2026')); escapeStrings.put("′", new Character('\u2032')); escapeStrings.put("″", new Character('\u2033')); escapeStrings.put("‾", new Character('\u203E')); escapeStrings.put("⁄", new Character('\u2044')); escapeStrings.put("℘", new Character('\u2118')); escapeStrings.put("ℑ", new Character('\u2111')); escapeStrings.put("ℜ", new Character('\u211C')); escapeStrings.put("™", new Character('\u2122')); escapeStrings.put("ℵ", new Character('\u2135')); escapeStrings.put("←", new Character('\u2190')); escapeStrings.put("↑", new Character('\u2191')); escapeStrings.put("→", new Character('\u2192')); escapeStrings.put("↓", new Character('\u2193')); escapeStrings.put("↔", new Character('\u2194')); escapeStrings.put("↵", new Character('\u21B5')); escapeStrings.put("⇐", new Character('\u21D0')); escapeStrings.put("⇑", new Character('\u21D1')); escapeStrings.put("⇒", new Character('\u21D2')); escapeStrings.put("⇓", new Character('\u21D3')); escapeStrings.put("⇔", new Character('\u21D4')); escapeStrings.put("∀", new Character('\u2200')); escapeStrings.put("∂", new Character('\u2202')); escapeStrings.put("∃", new Character('\u2203')); escapeStrings.put("∅", new Character('\u2205')); escapeStrings.put("∇", new Character('\u2207')); escapeStrings.put("∈", new Character('\u2208')); escapeStrings.put("∉", new Character('\u2209')); escapeStrings.put("∋", new Character('\u220B')); escapeStrings.put("∏", new Character('\u220F')); escapeStrings.put("∑", new Character('\u2211')); escapeStrings.put("−", new Character('\u2212')); escapeStrings.put("∗", new Character('\u2217')); escapeStrings.put("√", new Character('\u221A')); escapeStrings.put("∝", new Character('\u221D')); escapeStrings.put("∞", new Character('\u221E')); escapeStrings.put("∠", new Character('\u2220')); escapeStrings.put("∧", new Character('\u2227')); escapeStrings.put("∨", new Character('\u2228')); escapeStrings.put("∩", new Character('\u2229')); escapeStrings.put("∪", new Character('\u222A')); escapeStrings.put("∫", new Character('\u222B')); escapeStrings.put("∴", new Character('\u2234')); escapeStrings.put("∼", new Character('\u223C')); escapeStrings.put("≅", new Character('\u2245')); escapeStrings.put("≈", new Character('\u2248')); escapeStrings.put("≠", new Character('\u2260')); escapeStrings.put("≡", new Character('\u2261')); escapeStrings.put("≤", new Character('\u2264')); escapeStrings.put("≥", new Character('\u2265')); escapeStrings.put("⊂", new Character('\u2282')); escapeStrings.put("⊃", new Character('\u2283')); escapeStrings.put("⊄", new Character('\u2284')); escapeStrings.put("⊆", new Character('\u2286')); escapeStrings.put("⊇", new Character('\u2287')); escapeStrings.put("⊕", new Character('\u2295')); escapeStrings.put("⊗", new Character('\u2297')); escapeStrings.put("⊥", new Character('\u22A5')); escapeStrings.put("⋅", new Character('\u22C5')); escapeStrings.put("⌈", new Character('\u2308')); escapeStrings.put("⌉", new Character('\u2309')); escapeStrings.put("⌊", new Character('\u230A')); escapeStrings.put("⌋", new Character('\u230B')); escapeStrings.put("⟨", new Character('\u2329')); escapeStrings.put("⟩", new Character('\u232A')); escapeStrings.put("◊", new Character('\u25CA')); escapeStrings.put("♠", new Character('\u2660')); escapeStrings.put("♣", new Character('\u2663')); escapeStrings.put("♥", new Character('\u2665')); escapeStrings.put("♦", new Character('\u2666')); escapeStrings.put(""", new Character('\u0022')); escapeStrings.put("&", new Character('\u0026')); escapeStrings.put("<", new Character('\u003C')); escapeStrings.put(">", new Character('\u003E')); escapeStrings.put("Œ", new Character('\u0152')); escapeStrings.put("œ", new Character('\u0153')); escapeStrings.put("Š", new Character('\u0160')); escapeStrings.put("š", new Character('\u0161')); escapeStrings.put("Ÿ", new Character('\u0178')); escapeStrings.put("ˆ", new Character('\u02C6')); escapeStrings.put("˜", new Character('\u02DC')); escapeStrings.put(" ", new Character('\u2002')); escapeStrings.put(" ", new Character('\u2003')); escapeStrings.put(" ", new Character('\u2009')); escapeStrings.put("‌", new Character('\u200C')); escapeStrings.put("‍", new Character('\u200D')); escapeStrings.put("‎", new Character('\u200E')); escapeStrings.put("‏", new Character('\u200F')); escapeStrings.put("–", new Character('\u2013')); escapeStrings.put("—", new Character('\u2014')); escapeStrings.put("‘", new Character('\u2018')); escapeStrings.put("’", new Character('\u2019')); escapeStrings.put("‚", new Character('\u201A')); escapeStrings.put("“", new Character('\u201C')); escapeStrings.put("”", new Character('\u201D')); escapeStrings.put("„", new Character('\u201E')); escapeStrings.put("†", new Character('\u2020')); escapeStrings.put("‡", new Character('\u2021')); escapeStrings.put("‰", new Character('\u2030')); escapeStrings.put("‹", new Character('\u2039')); escapeStrings.put("›", new Character('\u203A')); escapeStrings.put("€", new Character('\u20AC')); } /** * Replace all the occurences of HTML escape strings with the * respective characters. * * @param s a <code>String</code> value * @return a <code>String</code> value */ public static final String unescapeHTML(String s) { char[] chars = s.toCharArray(); char[] escaped = new char[chars.length]; // Note: escaped[pos] = end of the escaped char array. int pos = 0; for (int i = 0; i < chars.length;) { if (chars[i] != '&') { escaped[pos++] = chars[i++]; continue; } // Allow e.g. { int j = i + 1; if (j < chars.length && chars[j] == '#') j++; // Scan until we find a char that is not letter or digit. for (; j < chars.length; j++) { if (!Character.isLetterOrDigit(chars[j])) break; } boolean replaced = false; if (j < chars.length && chars[j] == ';') { if (s.charAt(i + 1) == '#') { // Check for &#D; and 
 pattern try { long charcode = 0; char ch = s.charAt(i + 2); if (ch == 'x' || ch == 'X') { charcode = Long.parseLong(new String(chars, i + 3, j - i - 3), 16); } else if (Character.isDigit(ch)) { charcode = Long.parseLong(new String(chars, i + 2, j - i - 2)); } if (charcode > 0 && charcode < 65536) { escaped[pos++] = (char) charcode; replaced = true; } } catch (NumberFormatException ex) { // Failed, not replaced. } } else { String key = new String(chars, i, j - i + 1); Character repl = escapeStrings.get(key); if (repl != null) { escaped[pos++] = repl.charValue(); replaced = true; } } j++; // Skip over ';' } if (!replaced) { // Not a recognized escape sequence, leave as-is System.arraycopy(chars, i, escaped, pos, j - i); pos += j - i; } i = j; } return new String(escaped, 0, pos); } }