Java HTML Unescape unescapeHTML(String s)

Here you can find the source of unescapeHTML(String s)

Description

Turn any HTML escape entities in the string into characters and return the resulting string.

License

Open Source License

Parameter

Parameter Description
s String to be unescaped.

Exception

Parameter Description
NullPointerException if s is null.

Return

unescaped String.

Declaration

public static String unescapeHTML(String s) 

Method Source Code


//package com.java2s;
/*/*ww w  .  ja va  2s  . c o  m*/
 * Static String formatting and query routines.
 * Copyright (C) 2001,2002 Stephen Ostermiller
 * http://ostermiller.org/contact.pl?regarding=Java+Utilities
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * See COPYING.TXT for details.
 */

import java.util.HashMap;

public class Main {
    private static HashMap htmlEntities = new HashMap();

    /**
     * Turn any HTML escape entities in the string into
     * characters and return the resulting string.
     *
     * @param s String to be unescaped.
     * @return unescaped String.
     * @throws NullPointerException if s is null.
     */
    public static String unescapeHTML(String s) {
        StringBuffer result = new StringBuffer(s.length());
        int ampInd = s.indexOf("&");
        int lastEnd = 0;
        while (ampInd >= 0) {
            int nextAmp = s.indexOf("&", ampInd + 1);
            int nextSemi = s.indexOf(";", ampInd + 1);
            if (nextSemi != -1 && (nextAmp == -1 || nextSemi < nextAmp)) {
                int value = -1;
                String escape = s.substring(ampInd + 1, nextSemi);
                try {
                    if (escape.startsWith("#")) {
                        value = Integer.parseInt(escape.substring(1), 10);
                    } else {
                        if (htmlEntities.containsKey(escape)) {
                            value = ((Integer) (htmlEntities.get(escape))).intValue();
                        }
                    }
                } catch (NumberFormatException x) {
                }
                result.append(s.substring(lastEnd, ampInd));
                lastEnd = nextSemi + 1;
                if (value >= 0 && value <= 0xffff) {
                    result.append((char) value);
                } else {
                    result.append("&").append(escape).append(";");
                }
            }
            ampInd = nextAmp;
        }
        result.append(s.substring(lastEnd));
        return result.toString();
    }
}

Related

  1. unescapeHTML(String html)
  2. unescapeHtml(String s)
  3. unescapeHTML(String s)
  4. unescapeHtml(String s)
  5. unescapeHTML(String s)
  6. unescapeHTML(String source)
  7. unescapeHTML(String source)
  8. unescapeHTML(String source, int start)
  9. unescapeHTML(String str)