Util.java Source code

Java tutorial

Introduction

Here is the source code for Util.java

Source

import java.util.HashMap;
import java.util.Map;

/* Copyright (c) 2008 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
//package com.google.gdata.util.common.base;

/**
 * Some common string manipulation utilities.
 */
public class Util {

    static Map<String, Character> escapeStrings;

    static {
        // HTML character entity references as defined in HTML 4
        // see http://www.w3.org/TR/REC-html40/sgml/entities.html
        escapeStrings = new HashMap<String, Character>(252);

        escapeStrings.put("&nbsp;", new Character('\u00A0'));
        escapeStrings.put("&iexcl;", new Character('\u00A1'));
        escapeStrings.put("&cent;", new Character('\u00A2'));
        escapeStrings.put("&pound;", new Character('\u00A3'));
        escapeStrings.put("&curren;", new Character('\u00A4'));
        escapeStrings.put("&yen;", new Character('\u00A5'));
        escapeStrings.put("&brvbar;", new Character('\u00A6'));
        escapeStrings.put("&sect;", new Character('\u00A7'));
        escapeStrings.put("&uml;", new Character('\u00A8'));
        escapeStrings.put("&copy;", new Character('\u00A9'));
        escapeStrings.put("&ordf;", new Character('\u00AA'));
        escapeStrings.put("&laquo;", new Character('\u00AB'));
        escapeStrings.put("&not;", new Character('\u00AC'));
        escapeStrings.put("&shy;", new Character('\u00AD'));
        escapeStrings.put("&reg;", new Character('\u00AE'));
        escapeStrings.put("&macr;", new Character('\u00AF'));
        escapeStrings.put("&deg;", new Character('\u00B0'));
        escapeStrings.put("&plusmn;", new Character('\u00B1'));
        escapeStrings.put("&sup2;", new Character('\u00B2'));
        escapeStrings.put("&sup3;", new Character('\u00B3'));
        escapeStrings.put("&acute;", new Character('\u00B4'));
        escapeStrings.put("&micro;", new Character('\u00B5'));
        escapeStrings.put("&para;", new Character('\u00B6'));
        escapeStrings.put("&middot;", new Character('\u00B7'));
        escapeStrings.put("&cedil;", new Character('\u00B8'));
        escapeStrings.put("&sup1;", new Character('\u00B9'));
        escapeStrings.put("&ordm;", new Character('\u00BA'));
        escapeStrings.put("&raquo;", new Character('\u00BB'));
        escapeStrings.put("&frac14;", new Character('\u00BC'));
        escapeStrings.put("&frac12;", new Character('\u00BD'));
        escapeStrings.put("&frac34;", new Character('\u00BE'));
        escapeStrings.put("&iquest;", new Character('\u00BF'));
        escapeStrings.put("&Agrave;", new Character('\u00C0'));
        escapeStrings.put("&Aacute;", new Character('\u00C1'));
        escapeStrings.put("&Acirc;", new Character('\u00C2'));
        escapeStrings.put("&Atilde;", new Character('\u00C3'));
        escapeStrings.put("&Auml;", new Character('\u00C4'));
        escapeStrings.put("&Aring;", new Character('\u00C5'));
        escapeStrings.put("&AElig;", new Character('\u00C6'));
        escapeStrings.put("&Ccedil;", new Character('\u00C7'));
        escapeStrings.put("&Egrave;", new Character('\u00C8'));
        escapeStrings.put("&Eacute;", new Character('\u00C9'));
        escapeStrings.put("&Ecirc;", new Character('\u00CA'));
        escapeStrings.put("&Euml;", new Character('\u00CB'));
        escapeStrings.put("&Igrave;", new Character('\u00CC'));
        escapeStrings.put("&Iacute;", new Character('\u00CD'));
        escapeStrings.put("&Icirc;", new Character('\u00CE'));
        escapeStrings.put("&Iuml;", new Character('\u00CF'));
        escapeStrings.put("&ETH;", new Character('\u00D0'));
        escapeStrings.put("&Ntilde;", new Character('\u00D1'));
        escapeStrings.put("&Ograve;", new Character('\u00D2'));
        escapeStrings.put("&Oacute;", new Character('\u00D3'));
        escapeStrings.put("&Ocirc;", new Character('\u00D4'));
        escapeStrings.put("&Otilde;", new Character('\u00D5'));
        escapeStrings.put("&Ouml;", new Character('\u00D6'));
        escapeStrings.put("&times;", new Character('\u00D7'));
        escapeStrings.put("&Oslash;", new Character('\u00D8'));
        escapeStrings.put("&Ugrave;", new Character('\u00D9'));
        escapeStrings.put("&Uacute;", new Character('\u00DA'));
        escapeStrings.put("&Ucirc;", new Character('\u00DB'));
        escapeStrings.put("&Uuml;", new Character('\u00DC'));
        escapeStrings.put("&Yacute;", new Character('\u00DD'));
        escapeStrings.put("&THORN;", new Character('\u00DE'));
        escapeStrings.put("&szlig;", new Character('\u00DF'));
        escapeStrings.put("&agrave;", new Character('\u00E0'));
        escapeStrings.put("&aacute;", new Character('\u00E1'));
        escapeStrings.put("&acirc;", new Character('\u00E2'));
        escapeStrings.put("&atilde;", new Character('\u00E3'));
        escapeStrings.put("&auml;", new Character('\u00E4'));
        escapeStrings.put("&aring;", new Character('\u00E5'));
        escapeStrings.put("&aelig;", new Character('\u00E6'));
        escapeStrings.put("&ccedil;", new Character('\u00E7'));
        escapeStrings.put("&egrave;", new Character('\u00E8'));
        escapeStrings.put("&eacute;", new Character('\u00E9'));
        escapeStrings.put("&ecirc;", new Character('\u00EA'));
        escapeStrings.put("&euml;", new Character('\u00EB'));
        escapeStrings.put("&igrave;", new Character('\u00EC'));
        escapeStrings.put("&iacute;", new Character('\u00ED'));
        escapeStrings.put("&icirc;", new Character('\u00EE'));
        escapeStrings.put("&iuml;", new Character('\u00EF'));
        escapeStrings.put("&eth;", new Character('\u00F0'));
        escapeStrings.put("&ntilde;", new Character('\u00F1'));
        escapeStrings.put("&ograve;", new Character('\u00F2'));
        escapeStrings.put("&oacute;", new Character('\u00F3'));
        escapeStrings.put("&ocirc;", new Character('\u00F4'));
        escapeStrings.put("&otilde;", new Character('\u00F5'));
        escapeStrings.put("&ouml;", new Character('\u00F6'));
        escapeStrings.put("&divide;", new Character('\u00F7'));
        escapeStrings.put("&oslash;", new Character('\u00F8'));
        escapeStrings.put("&ugrave;", new Character('\u00F9'));
        escapeStrings.put("&uacute;", new Character('\u00FA'));
        escapeStrings.put("&ucirc;", new Character('\u00FB'));
        escapeStrings.put("&uuml;", new Character('\u00FC'));
        escapeStrings.put("&yacute;", new Character('\u00FD'));
        escapeStrings.put("&thorn;", new Character('\u00FE'));
        escapeStrings.put("&yuml;", new Character('\u00FF'));
        escapeStrings.put("&fnof;", new Character('\u0192'));
        escapeStrings.put("&Alpha;", new Character('\u0391'));
        escapeStrings.put("&Beta;", new Character('\u0392'));
        escapeStrings.put("&Gamma;", new Character('\u0393'));
        escapeStrings.put("&Delta;", new Character('\u0394'));
        escapeStrings.put("&Epsilon;", new Character('\u0395'));
        escapeStrings.put("&Zeta;", new Character('\u0396'));
        escapeStrings.put("&Eta;", new Character('\u0397'));
        escapeStrings.put("&Theta;", new Character('\u0398'));
        escapeStrings.put("&Iota;", new Character('\u0399'));
        escapeStrings.put("&Kappa;", new Character('\u039A'));
        escapeStrings.put("&Lambda;", new Character('\u039B'));
        escapeStrings.put("&Mu;", new Character('\u039C'));
        escapeStrings.put("&Nu;", new Character('\u039D'));
        escapeStrings.put("&Xi;", new Character('\u039E'));
        escapeStrings.put("&Omicron;", new Character('\u039F'));
        escapeStrings.put("&Pi;", new Character('\u03A0'));
        escapeStrings.put("&Rho;", new Character('\u03A1'));
        escapeStrings.put("&Sigma;", new Character('\u03A3'));
        escapeStrings.put("&Tau;", new Character('\u03A4'));
        escapeStrings.put("&Upsilon;", new Character('\u03A5'));
        escapeStrings.put("&Phi;", new Character('\u03A6'));
        escapeStrings.put("&Chi;", new Character('\u03A7'));
        escapeStrings.put("&Psi;", new Character('\u03A8'));
        escapeStrings.put("&Omega;", new Character('\u03A9'));
        escapeStrings.put("&alpha;", new Character('\u03B1'));
        escapeStrings.put("&beta;", new Character('\u03B2'));
        escapeStrings.put("&gamma;", new Character('\u03B3'));
        escapeStrings.put("&delta;", new Character('\u03B4'));
        escapeStrings.put("&epsilon;", new Character('\u03B5'));
        escapeStrings.put("&zeta;", new Character('\u03B6'));
        escapeStrings.put("&eta;", new Character('\u03B7'));
        escapeStrings.put("&theta;", new Character('\u03B8'));
        escapeStrings.put("&iota;", new Character('\u03B9'));
        escapeStrings.put("&kappa;", new Character('\u03BA'));
        escapeStrings.put("&lambda;", new Character('\u03BB'));
        escapeStrings.put("&mu;", new Character('\u03BC'));
        escapeStrings.put("&nu;", new Character('\u03BD'));
        escapeStrings.put("&xi;", new Character('\u03BE'));
        escapeStrings.put("&omicron;", new Character('\u03BF'));
        escapeStrings.put("&pi;", new Character('\u03C0'));
        escapeStrings.put("&rho;", new Character('\u03C1'));
        escapeStrings.put("&sigmaf;", new Character('\u03C2'));
        escapeStrings.put("&sigma;", new Character('\u03C3'));
        escapeStrings.put("&tau;", new Character('\u03C4'));
        escapeStrings.put("&upsilon;", new Character('\u03C5'));
        escapeStrings.put("&phi;", new Character('\u03C6'));
        escapeStrings.put("&chi;", new Character('\u03C7'));
        escapeStrings.put("&psi;", new Character('\u03C8'));
        escapeStrings.put("&omega;", new Character('\u03C9'));
        escapeStrings.put("&thetasym;", new Character('\u03D1'));
        escapeStrings.put("&upsih;", new Character('\u03D2'));
        escapeStrings.put("&piv;", new Character('\u03D6'));
        escapeStrings.put("&bull;", new Character('\u2022'));
        escapeStrings.put("&hellip;", new Character('\u2026'));
        escapeStrings.put("&prime;", new Character('\u2032'));
        escapeStrings.put("&Prime;", new Character('\u2033'));
        escapeStrings.put("&oline;", new Character('\u203E'));
        escapeStrings.put("&frasl;", new Character('\u2044'));
        escapeStrings.put("&weierp;", new Character('\u2118'));
        escapeStrings.put("&image;", new Character('\u2111'));
        escapeStrings.put("&real;", new Character('\u211C'));
        escapeStrings.put("&trade;", new Character('\u2122'));
        escapeStrings.put("&alefsym;", new Character('\u2135'));
        escapeStrings.put("&larr;", new Character('\u2190'));
        escapeStrings.put("&uarr;", new Character('\u2191'));
        escapeStrings.put("&rarr;", new Character('\u2192'));
        escapeStrings.put("&darr;", new Character('\u2193'));
        escapeStrings.put("&harr;", new Character('\u2194'));
        escapeStrings.put("&crarr;", new Character('\u21B5'));
        escapeStrings.put("&lArr;", new Character('\u21D0'));
        escapeStrings.put("&uArr;", new Character('\u21D1'));
        escapeStrings.put("&rArr;", new Character('\u21D2'));
        escapeStrings.put("&dArr;", new Character('\u21D3'));
        escapeStrings.put("&hArr;", new Character('\u21D4'));
        escapeStrings.put("&forall;", new Character('\u2200'));
        escapeStrings.put("&part;", new Character('\u2202'));
        escapeStrings.put("&exist;", new Character('\u2203'));
        escapeStrings.put("&empty;", new Character('\u2205'));
        escapeStrings.put("&nabla;", new Character('\u2207'));
        escapeStrings.put("&isin;", new Character('\u2208'));
        escapeStrings.put("&notin;", new Character('\u2209'));
        escapeStrings.put("&ni;", new Character('\u220B'));
        escapeStrings.put("&prod;", new Character('\u220F'));
        escapeStrings.put("&sum;", new Character('\u2211'));
        escapeStrings.put("&minus;", new Character('\u2212'));
        escapeStrings.put("&lowast;", new Character('\u2217'));
        escapeStrings.put("&radic;", new Character('\u221A'));
        escapeStrings.put("&prop;", new Character('\u221D'));
        escapeStrings.put("&infin;", new Character('\u221E'));
        escapeStrings.put("&ang;", new Character('\u2220'));
        escapeStrings.put("&and;", new Character('\u2227'));
        escapeStrings.put("&or;", new Character('\u2228'));
        escapeStrings.put("&cap;", new Character('\u2229'));
        escapeStrings.put("&cup;", new Character('\u222A'));
        escapeStrings.put("&int;", new Character('\u222B'));
        escapeStrings.put("&there4;", new Character('\u2234'));
        escapeStrings.put("&sim;", new Character('\u223C'));
        escapeStrings.put("&cong;", new Character('\u2245'));
        escapeStrings.put("&asymp;", new Character('\u2248'));
        escapeStrings.put("&ne;", new Character('\u2260'));
        escapeStrings.put("&equiv;", new Character('\u2261'));
        escapeStrings.put("&le;", new Character('\u2264'));
        escapeStrings.put("&ge;", new Character('\u2265'));
        escapeStrings.put("&sub;", new Character('\u2282'));
        escapeStrings.put("&sup;", new Character('\u2283'));
        escapeStrings.put("&nsub;", new Character('\u2284'));
        escapeStrings.put("&sube;", new Character('\u2286'));
        escapeStrings.put("&supe;", new Character('\u2287'));
        escapeStrings.put("&oplus;", new Character('\u2295'));
        escapeStrings.put("&otimes;", new Character('\u2297'));
        escapeStrings.put("&perp;", new Character('\u22A5'));
        escapeStrings.put("&sdot;", new Character('\u22C5'));
        escapeStrings.put("&lceil;", new Character('\u2308'));
        escapeStrings.put("&rceil;", new Character('\u2309'));
        escapeStrings.put("&lfloor;", new Character('\u230A'));
        escapeStrings.put("&rfloor;", new Character('\u230B'));
        escapeStrings.put("&lang;", new Character('\u2329'));
        escapeStrings.put("&rang;", new Character('\u232A'));
        escapeStrings.put("&loz;", new Character('\u25CA'));
        escapeStrings.put("&spades;", new Character('\u2660'));
        escapeStrings.put("&clubs;", new Character('\u2663'));
        escapeStrings.put("&hearts;", new Character('\u2665'));
        escapeStrings.put("&diams;", new Character('\u2666'));
        escapeStrings.put("&quot;", new Character('\u0022'));
        escapeStrings.put("&amp;", new Character('\u0026'));
        escapeStrings.put("&lt;", new Character('\u003C'));
        escapeStrings.put("&gt;", new Character('\u003E'));
        escapeStrings.put("&OElig;", new Character('\u0152'));
        escapeStrings.put("&oelig;", new Character('\u0153'));
        escapeStrings.put("&Scaron;", new Character('\u0160'));
        escapeStrings.put("&scaron;", new Character('\u0161'));
        escapeStrings.put("&Yuml;", new Character('\u0178'));
        escapeStrings.put("&circ;", new Character('\u02C6'));
        escapeStrings.put("&tilde;", new Character('\u02DC'));
        escapeStrings.put("&ensp;", new Character('\u2002'));
        escapeStrings.put("&emsp;", new Character('\u2003'));
        escapeStrings.put("&thinsp;", new Character('\u2009'));
        escapeStrings.put("&zwnj;", new Character('\u200C'));
        escapeStrings.put("&zwj;", new Character('\u200D'));
        escapeStrings.put("&lrm;", new Character('\u200E'));
        escapeStrings.put("&rlm;", new Character('\u200F'));
        escapeStrings.put("&ndash;", new Character('\u2013'));
        escapeStrings.put("&mdash;", new Character('\u2014'));
        escapeStrings.put("&lsquo;", new Character('\u2018'));
        escapeStrings.put("&rsquo;", new Character('\u2019'));
        escapeStrings.put("&sbquo;", new Character('\u201A'));
        escapeStrings.put("&ldquo;", new Character('\u201C'));
        escapeStrings.put("&rdquo;", new Character('\u201D'));
        escapeStrings.put("&bdquo;", new Character('\u201E'));
        escapeStrings.put("&dagger;", new Character('\u2020'));
        escapeStrings.put("&Dagger;", new Character('\u2021'));
        escapeStrings.put("&permil;", new Character('\u2030'));
        escapeStrings.put("&lsaquo;", new Character('\u2039'));
        escapeStrings.put("&rsaquo;", new Character('\u203A'));
        escapeStrings.put("&euro;", new Character('\u20AC'));
    }

    /**
     * Replace all the occurences of HTML escape strings with the
     * respective characters.
     *
     * @param s a <code>String</code> value
     * @return a <code>String</code> value
     */
    public static final String unescapeHTML(String s) {
        char[] chars = s.toCharArray();
        char[] escaped = new char[chars.length];

        // Note: escaped[pos] = end of the escaped char array.
        int pos = 0;

        for (int i = 0; i < chars.length;) {
            if (chars[i] != '&') {
                escaped[pos++] = chars[i++];
                continue;
            }

            // Allow e.g. &#123;
            int j = i + 1;
            if (j < chars.length && chars[j] == '#')
                j++;

            // Scan until we find a char that is not letter or digit.
            for (; j < chars.length; j++) {
                if (!Character.isLetterOrDigit(chars[j]))
                    break;
            }

            boolean replaced = false;
            if (j < chars.length && chars[j] == ';') {
                if (s.charAt(i + 1) == '#') { // Check for &#D; and &#xD; pattern
                    try {
                        long charcode = 0;
                        char ch = s.charAt(i + 2);
                        if (ch == 'x' || ch == 'X') {
                            charcode = Long.parseLong(new String(chars, i + 3, j - i - 3), 16);
                        } else if (Character.isDigit(ch)) {
                            charcode = Long.parseLong(new String(chars, i + 2, j - i - 2));
                        }
                        if (charcode > 0 && charcode < 65536) {
                            escaped[pos++] = (char) charcode;
                            replaced = true;
                        }
                    } catch (NumberFormatException ex) {
                        // Failed, not replaced.
                    }

                } else {
                    String key = new String(chars, i, j - i + 1);
                    Character repl = escapeStrings.get(key);
                    if (repl != null) {
                        escaped[pos++] = repl.charValue();
                        replaced = true;
                    }
                }
                j++; // Skip over ';'
            }

            if (!replaced) {
                // Not a recognized escape sequence, leave as-is
                System.arraycopy(chars, i, escaped, pos, j - i);
                pos += j - i;
            }
            i = j;
        }
        return new String(escaped, 0, pos);
    }
}