Back to project page ara-twitter.
The source code is released under:
Apache License
If you think the Android project ara-twitter listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.
/* * Copyright 2007 Yusuke Yamamoto/* w ww. j a v a 2s . c om*/ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bakingcode.io.twitter.tools; import java.util.HashMap; import java.util.Map; public final class HTMLEntity { public static String escape(String original) { StringBuilder buf = new StringBuilder(original); escape(buf); return buf.toString(); } public static void escape(StringBuilder original) { int index = 0; String escaped; while (index < original.length()) { escaped = entityEscapeMap.get(original.substring(index, index + 1)); if (escaped != null) { original.replace(index, index + 1, escaped); index += escaped.length(); } else { index++; } } } public static String unescape(String original) { String returnValue = null; if (original != null) { StringBuilder buf = new StringBuilder(original); unescape(buf); returnValue = buf.toString(); } return returnValue; } public static void unescape(StringBuilder original) { int index = 0; int semicolonIndex; String escaped; String entity; while (index < original.length()) { index = original.indexOf("&", index); if (-1 == index) { break; } semicolonIndex = original.indexOf(";", index); if (-1 != semicolonIndex) { escaped = original.substring(index, semicolonIndex + 1); entity = escapeEntityMap.get(escaped); if (entity != null) { original.replace(index, semicolonIndex + 1, entity); } index++; } else { break; } } } private static Map<String, String> entityEscapeMap = new HashMap<String, String>(); private static Map<String, String> escapeEntityMap = new HashMap<String, String>(); static { String[][] entities = {{" ", " "/* no-break space = non-breaking space */, "\u00A0"} , {"¡", "¡"/* inverted exclamation mark */, "\u00A1"} , {"¢", "¢"/* cent sign */, "\u00A2"} , {"£", "£"/* pound sign */, "\u00A3"} , {"¤", "¤"/* currency sign */, "\u00A4"} , {"¥", "¥"/* yen sign = yuan sign */, "\u00A5"} , {"¦", "¦"/* broken bar = broken vertical bar */, "\u00A6"} , {"§", "§"/* section sign */, "\u00A7"} , {"¨", "¨"/* diaeresis = spacing diaeresis */, "\u00A8"} , {"©", "©"/* copyright sign */, "\u00A9"} , {"ª", "ª"/* feminine ordinal indicator */, "\u00AA"} , {"«", "«"/* left-pointing double angle quotation mark = left pointing guillemet */, "\u00AB"} , {"¬", "¬"/* not sign = discretionary hyphen */, "\u00AC"} , {"­", "­"/* soft hyphen = discretionary hyphen */, "\u00AD"} , {"®", "®"/* registered sign = registered trade mark sign */, "\u00AE"} , {"¯", "¯"/* macron = spacing macron = overline = APL overbar */, "\u00AF"} , {"°", "°"/* degree sign */, "\u00B0"} , {"±", "±"/* plus-minus sign = plus-or-minus sign */, "\u00B1"} , {"²", "²"/* superscript two = superscript digit two = squared */, "\u00B2"} , {"³", "³"/* superscript three = superscript digit three = cubed */, "\u00B3"} , {"´", "´"/* acute accent = spacing acute */, "\u00B4"} , {"µ", "µ"/* micro sign */, "\u00B5"} , {"¶", "¶"/* pilcrow sign = paragraph sign */, "\u00B6"} , {"·", "·"/* middle dot = Georgian comma = Greek middle dot */, "\u00B7"} , {"¸", "¸"/* cedilla = spacing cedilla */, "\u00B8"} , {"¹", "¹"/* superscript one = superscript digit one */, "\u00B9"} , {"º", "º"/* masculine ordinal indicator */, "\u00BA"} , {"»", "»"/* right-pointing double angle quotation mark = right pointing guillemet */, "\u00BB"} , {"¼", "¼"/* vulgar fraction one quarter = fraction one quarter */, "\u00BC"} , {"½", "½"/* vulgar fraction one half = fraction one half */, "\u00BD"} , {"¾", "¾"/* vulgar fraction three quarters = fraction three quarters */, "\u00BE"} , {"¿", "¿"/* inverted question mark = turned question mark */, "\u00BF"} , {"À", "À"/* latin capital letter A with grave = latin capital letter A grave */, "\u00C0"} , {"Á", "Á"/* latin capital letter A with acute */, "\u00C1"} , {"Â", "Â"/* latin capital letter A with circumflex */, "\u00C2"} , {"Ã", "Ã"/* latin capital letter A with tilde */, "\u00C3"} , {"Ä", "Ä"/* latin capital letter A with diaeresis */, "\u00C4"} , {"Å", "Å"/* latin capital letter A with ring above = latin capital letter A ring */, "\u00C5"} , {"Æ", "Æ"/* latin capital letter AE = latin capital ligature AE */, "\u00C6"} , {"Ç", "Ç"/* latin capital letter C with cedilla */, "\u00C7"} , {"È", "È"/* latin capital letter E with grave */, "\u00C8"} , {"É", "É"/* latin capital letter E with acute */, "\u00C9"} , {"Ê", "Ê"/* latin capital letter E with circumflex */, "\u00CA"} , {"Ë", "Ë"/* latin capital letter E with diaeresis */, "\u00CB"} , {"Ì", "Ì"/* latin capital letter I with grave */, "\u00CC"} , {"Í", "Í"/* latin capital letter I with acute */, "\u00CD"} , {"Î", "Î"/* latin capital letter I with circumflex */, "\u00CE"} , {"Ï", "Ï"/* latin capital letter I with diaeresis */, "\u00CF"} , {"Ð", "Ð"/* latin capital letter ETH */, "\u00D0"} , {"Ñ", "Ñ"/* latin capital letter N with tilde */, "\u00D1"} , {"Ò", "Ò"/* latin capital letter O with grave */, "\u00D2"} , {"Ó", "Ó"/* latin capital letter O with acute */, "\u00D3"} , {"Ô", "Ô"/* latin capital letter O with circumflex */, "\u00D4"} , {"Õ", "Õ"/* latin capital letter O with tilde */, "\u00D5"} , {"Ö", "Ö"/* latin capital letter O with diaeresis */, "\u00D6"} , {"×", "×"/* multiplication sign */, "\u00D7"} , {"Ø", "Ø"/* latin capital letter O with stroke = latin capital letter O slash */, "\u00D8"} , {"Ù", "Ù"/* latin capital letter U with grave */, "\u00D9"} , {"Ú", "Ú"/* latin capital letter U with acute */, "\u00DA"} , {"Û", "Û"/* latin capital letter U with circumflex */, "\u00DB"} , {"Ü", "Ü"/* latin capital letter U with diaeresis */, "\u00DC"} , {"Ý", "Ý"/* latin capital letter Y with acute */, "\u00DD"} , {"Þ", "Þ"/* latin capital letter THORN */, "\u00DE"} , {"ß", "ß"/* latin small letter sharp s = ess-zed */, "\u00DF"} , {"à", "à"/* latin small letter a with grave = latin small letter a grave */, "\u00E0"} , {"á", "á"/* latin small letter a with acute */, "\u00E1"} , {"â", "â"/* latin small letter a with circumflex */, "\u00E2"} , {"ã", "ã"/* latin small letter a with tilde */, "\u00E3"} , {"ä", "ä"/* latin small letter a with diaeresis */, "\u00E4"} , {"å", "å"/* latin small letter a with ring above = latin small letter a ring */, "\u00E5"} , {"æ", "æ"/* latin small letter ae = latin small ligature ae */, "\u00E6"} , {"ç", "ç"/* latin small letter c with cedilla */, "\u00E7"} , {"è", "è"/* latin small letter e with grave */, "\u00E8"} , {"é", "é"/* latin small letter e with acute */, "\u00E9"} , {"ê", "ê"/* latin small letter e with circumflex */, "\u00EA"} , {"ë", "ë"/* latin small letter e with diaeresis */, "\u00EB"} , {"ì", "ì"/* latin small letter i with grave */, "\u00EC"} , {"í", "í"/* latin small letter i with acute */, "\u00ED"} , {"î", "î"/* latin small letter i with circumflex */, "\u00EE"} , {"ï", "ï"/* latin small letter i with diaeresis */, "\u00EF"} , {"ð", "ð"/* latin small letter eth */, "\u00F0"} , {"ñ", "ñ"/* latin small letter n with tilde */, "\u00F1"} , {"ò", "ò"/* latin small letter o with grave */, "\u00F2"} , {"ó", "ó"/* latin small letter o with acute */, "\u00F3"} , {"ô", "ô"/* latin small letter o with circumflex */, "\u00F4"} , {"õ", "õ"/* latin small letter o with tilde */, "\u00F5"} , {"ö", "ö"/* latin small letter o with diaeresis */, "\u00F6"} , {"÷", "÷"/* division sign */, "\u00F7"} , {"ø", "ø"/* latin small letter o with stroke = latin small letter o slash */, "\u00F8"} , {"ù", "ù"/* latin small letter u with grave */, "\u00F9"} , {"ú", "ú"/* latin small letter u with acute */, "\u00FA"} , {"û", "û"/* latin small letter u with circumflex */, "\u00FB"} , {"ü", "ü"/* latin small letter u with diaeresis */, "\u00FC"} , {"ý", "ý"/* latin small letter y with acute */, "\u00FD"} , {"þ", "þ"/* latin small letter thorn with */, "\u00FE"} , {"ÿ", "ÿ"/* latin small letter y with diaeresis */, "\u00FF"} , {"ƒ", "ƒ"/* latin small f with hook = function = florin */, "\u0192"} /* Greek */ , {"Α", "Α"/* greek capital letter alpha */, "\u0391"} , {"Β", "Β"/* greek capital letter beta */, "\u0392"} , {"Γ", "Γ"/* greek capital letter gamma */, "\u0393"} , {"Δ", "Δ"/* greek capital letter delta */, "\u0394"} , {"Ε", "Ε"/* greek capital letter epsilon */, "\u0395"} , {"Ζ", "Ζ"/* greek capital letter zeta */, "\u0396"} , {"Η", "Η"/* greek capital letter eta */, "\u0397"} , {"Θ", "Θ"/* greek capital letter theta */, "\u0398"} , {"Ι", "Ι"/* greek capital letter iota */, "\u0399"} , {"Κ", "Κ"/* greek capital letter kappa */, "\u039A"} , {"Λ", "Λ"/* greek capital letter lambda */, "\u039B"} , {"Μ", "Μ"/* greek capital letter mu */, "\u039C"} , {"Ν", "Ν"/* greek capital letter nu */, "\u039D"} , {"Ξ", "Ξ"/* greek capital letter xi */, "\u039E"} , {"Ο", "Ο"/* greek capital letter omicron */, "\u039F"} , {"Π", "Π"/* greek capital letter pi */, "\u03A0"} , {"Ρ", "Ρ"/* greek capital letter rho */, "\u03A1"} /* there is no Sigmaf and no \u03A2 */ , {"Σ", "Σ"/* greek capital letter sigma */, "\u03A3"} , {"Τ", "Τ"/* greek capital letter tau */, "\u03A4"} , {"Υ", "Υ"/* greek capital letter upsilon */, "\u03A5"} , {"Φ", "Φ"/* greek capital letter phi */, "\u03A6"} , {"Χ", "Χ"/* greek capital letter chi */, "\u03A7"} , {"Ψ", "Ψ"/* greek capital letter psi */, "\u03A8"} , {"Ω", "Ω"/* greek capital letter omega */, "\u03A9"} , {"α", "α"/* greek small letter alpha */, "\u03B1"} , {"β", "β"/* greek small letter beta */, "\u03B2"} , {"γ", "γ"/* greek small letter gamma */, "\u03B3"} , {"δ", "δ"/* greek small letter delta */, "\u03B4"} , {"ε", "ε"/* greek small letter epsilon */, "\u03B5"} , {"ζ", "ζ"/* greek small letter zeta */, "\u03B6"} , {"η", "η"/* greek small letter eta */, "\u03B7"} , {"θ", "θ"/* greek small letter theta */, "\u03B8"} , {"ι", "ι"/* greek small letter iota */, "\u03B9"} , {"κ", "κ"/* greek small letter kappa */, "\u03BA"} , {"λ", "λ"/* greek small letter lambda */, "\u03BB"} , {"μ", "μ"/* greek small letter mu */, "\u03BC"} , {"ν", "ν"/* greek small letter nu */, "\u03BD"} , {"ξ", "ξ"/* greek small letter xi */, "\u03BE"} , {"ο", "ο"/* greek small letter omicron */, "\u03BF"} , {"π", "π"/* greek small letter pi */, "\u03C0"} , {"ρ", "ρ"/* greek small letter rho */, "\u03C1"} , {"ς", "ς"/* greek small letter final sigma */, "\u03C2"} , {"σ", "σ"/* greek small letter sigma */, "\u03C3"} , {"τ", "τ"/* greek small letter tau */, "\u03C4"} , {"υ", "υ"/* greek small letter upsilon */, "\u03C5"} , {"φ", "φ"/* greek small letter phi */, "\u03C6"} , {"χ", "χ"/* greek small letter chi */, "\u03C7"} , {"ψ", "ψ"/* greek small letter psi */, "\u03C8"} , {"ω", "ω"/* greek small letter omega */, "\u03C9"} , {"ϑ", "ϑ"/* greek small letter theta symbol */, "\u03D1"} , {"ϒ", "ϒ"/* greek upsilon with hook symbol */, "\u03D2"} , {"ϖ", "ϖ"/* greek pi symbol */, "\u03D6"} /* General Punctuation */ , {"•", "•"/* bullet = black small circle */, "\u2022"} /* bullet is NOT the same as bullet operator ,"\u2219*/ , {"…", "…"/* horizontal ellipsis = three dot leader */, "\u2026"} , {"′", "′"/* prime = minutes = feet */, "\u2032"} , {"″", "″"/* double prime = seconds = inches */, "\u2033"} , {"‾", "‾"/* overline = spacing overscore */, "\u203E"} , {"⁄", "⁄"/* fraction slash */, "\u2044"} /* Letterlike Symbols */ , {"℘", "℘"/* script capital P = power set = Weierstrass p */, "\u2118"} , {"ℑ", "ℑ"/* blackletter capital I = imaginary part */, "\u2111"} , {"ℜ", "ℜ"/* blackletter capital R = real part symbol */, "\u211C"} , {"™", "™"/* trade mark sign */, "\u2122"} , {"ℵ", "ℵ"/* alef symbol = first transfinite cardinal */, "\u2135"} /* alef symbol is NOT the same as hebrew letter alef ,"\u05D0"}*/ /* Arrows */ , {"←", "←"/* leftwards arrow */, "\u2190"} , {"↑", "↑"/* upwards arrow */, "\u2191"} , {"→", "→"/* rightwards arrow */, "\u2192"} , {"↓", "↓"/* downwards arrow */, "\u2193"} , {"↔", "↔"/* left right arrow */, "\u2194"} , {"↵", "↵"/* downwards arrow with corner leftwards = carriage return */, "\u21B5"} , {"⇐", "⇐"/* leftwards double arrow */, "\u21D0"} /* Unicode does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */ , {"⇑", "⇑"/* upwards double arrow */, "\u21D1"} , {"⇒", "⇒"/* rightwards double arrow */, "\u21D2"} /* Unicode does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */ , {"⇓", "⇓"/* downwards double arrow */, "\u21D3"} , {"⇔", "⇔"/* left right double arrow */, "\u21D4"} /* Mathematical Operators */ , {"∀", "∀"/* for all */, "\u2200"} , {"∂", "∂"/* partial differential */, "\u2202"} , {"∃", "∃"/* there exists */, "\u2203"} , {"∅", "∅"/* empty set = null set = diameter */, "\u2205"} , {"∇", "∇"/* nabla = backward difference */, "\u2207"} , {"∈", "∈"/* element of */, "\u2208"} , {"∉", "∉"/* not an element of */, "\u2209"} , {"∋", "∋"/* contains as member */, "\u220B"} /* should there be a more memorable name than 'ni'? */ , {"∏", "∏"/* n-ary product = product sign */, "\u220F"} /* prod is NOT the same character as ,"\u03A0"}*/ , {"∑", "∑"/* n-ary sumation */, "\u2211"} /* sum is NOT the same character as ,"\u03A3"}*/ , {"−", "−"/* minus sign */, "\u2212"} , {"∗", "∗"/* asterisk operator */, "\u2217"} , {"√", "√"/* square root = radical sign */, "\u221A"} , {"∝", "∝"/* proportional to */, "\u221D"} , {"∞", "∞"/* infinity */, "\u221E"} , {"∠", "∠"/* angle */, "\u2220"} , {"∧", "∧"/* logical and = wedge */, "\u2227"} , {"∨", "∨"/* logical or = vee */, "\u2228"} , {"∩", "∩"/* intersection = cap */, "\u2229"} , {"∪", "∪"/* union = cup */, "\u222A"} , {"∫", "∫"/* integral */, "\u222B"} , {"∴", "∴"/* therefore */, "\u2234"} , {"∼", "∼"/* tilde operator = varies with = similar to */, "\u223C"} /* tilde operator is NOT the same character as the tilde ,"\u007E"}*/ , {"≅", "≅"/* approximately equal to */, "\u2245"} , {"≈", "≈"/* almost equal to = asymptotic to */, "\u2248"} , {"≠", "≠"/* not equal to */, "\u2260"} , {"≡", "≡"/* identical to */, "\u2261"} , {"≤", "≤"/* less-than or equal to */, "\u2264"} , {"≥", "≥"/* greater-than or equal to */, "\u2265"} , {"⊂", "⊂"/* subset of */, "\u2282"} , {"⊃", "⊃"/* superset of */, "\u2283"} /* note that nsup 'not a superset of ,"\u2283"}*/ , {"⊆", "⊆"/* subset of or equal to */, "\u2286"} , {"⊇", "⊇"/* superset of or equal to */, "\u2287"} , {"⊕", "⊕"/* circled plus = direct sum */, "\u2295"} , {"⊗", "⊗"/* circled times = vector product */, "\u2297"} , {"⊥", "⊥"/* up tack = orthogonal to = perpendicular */, "\u22A5"} , {"⋅", "⋅"/* dot operator */, "\u22C5"} /* dot operator is NOT the same character as ,"\u00B7"} /* Miscellaneous Technical */ , {"⌈", "⌈"/* left ceiling = apl upstile */, "\u2308"} , {"⌉", "⌉"/* right ceiling */, "\u2309"} , {"⌊", "⌊"/* left floor = apl downstile */, "\u230A"} , {"⌋", "⌋"/* right floor */, "\u230B"} , {"⟨", "〈"/* left-pointing angle bracket = bra */, "\u2329"} /* lang is NOT the same character as ,"\u003C"}*/ , {"⟩", "〉"/* right-pointing angle bracket = ket */, "\u232A"} /* rang is NOT the same character as ,"\u003E"}*/ /* Geometric Shapes */ , {"◊", "◊"/* lozenge */, "\u25CA"} /* Miscellaneous Symbols */ , {"♠", "♠"/* black spade suit */, "\u2660"} /* black here seems to mean filled as opposed to hollow */ , {"♣", "♣"/* black club suit = shamrock */, "\u2663"} , {"♥", "♥"/* black heart suit = valentine */, "\u2665"} , {"♦", "♦"/* black diamond suit */, "\u2666"} , {""", """ /* quotation mark = APL quote */, "\""} , {"&", "&" /* ampersand */, "\u0026"} , {"<", "<" /* less-than sign */, "\u003C"} , {">", ">" /* greater-than sign */, "\u003E"} /* Latin Extended-A */ , {"Œ", "Œ" /* latin capital ligature OE */, "\u0152"} , {"œ", "œ" /* latin small ligature oe */, "\u0153"} /* ligature is a misnomer this is a separate character in some languages */ , {"Š", "Š" /* latin capital letter S with caron */, "\u0160"} , {"š", "š" /* latin small letter s with caron */, "\u0161"} , {"Ÿ", "Ÿ" /* latin capital letter Y with diaeresis */, "\u0178"} /* Spacing Modifier Letters */ , {"ˆ", "ˆ" /* modifier letter circumflex accent */, "\u02C6"} , {"˜", "˜" /* small tilde */, "\u02DC"} /* General Punctuation */ , {" ", " "/* en space */, "\u2002"} , {" ", " "/* em space */, "\u2003"} , {" ", " "/* thin space */, "\u2009"} , {"‌", "‌"/* zero width non-joiner */, "\u200C"} , {"‍", "‍"/* zero width joiner */, "\u200D"} , {"‎", "‎"/* left-to-right mark */, "\u200E"} , {"‏", "‏"/* right-to-left mark */, "\u200F"} , {"–", "–"/* en dash */, "\u2013"} , {"—", "—"/* em dash */, "\u2014"} , {"‘", "‘"/* left single quotation mark */, "\u2018"} , {"’", "’"/* right single quotation mark */, "\u2019"} , {"‚", "‚"/* single low-9 quotation mark */, "\u201A"} , {"“", "“"/* left double quotation mark */, "\u201C"} , {"”", "”"/* right double quotation mark */, "\u201D"} , {"„", "„"/* double low-9 quotation mark */, "\u201E"} , {"†", "†"/* dagger */, "\u2020"} , {"‡", "‡"/* double dagger */, "\u2021"} , {"‰", "‰"/* per mille sign */, "\u2030"} , {"‹", "‹"/* single left-pointing angle quotation mark */, "\u2039"} /* lsaquo is proposed but not yet ISO standardized */ , {"›", "›"/* single right-pointing angle quotation mark */, "\u203A"} /* rsaquo is proposed but not yet ISO standardized */ , {"€", "€" /* euro sign */, "\u20AC"}}; for (String[] entity : entities) { entityEscapeMap.put(entity[2], entity[0]); escapeEntityMap.put(entity[0], entity[2]); escapeEntityMap.put(entity[1], entity[2]); } } }