Here you can find the source of unescapeHTML(String source)
Parameter | Description |
---|---|
source | a String possibly containing escaped HTML characters |
public static final String unescapeHTML(String source)
//package com.java2s; /*// w w w . ja v a 2s . c om * Copyright 2009 David Jurgens * * This file is part of the S-Space package and is covered under the terms and * conditions therein. * * The S-Space package is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as published * by the Free Software Foundation and distributed hereunder to you. * * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER * RIGHTS. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ import java.util.HashMap; import java.util.Map; public class Main { /** * A mapping from HTML codes for escaped special characters to their unicode * character equivalents. */ private static final Map<String, String> HTML_CODES = new HashMap<String, String>(); private static final Map<String, String> LATIN1_CODES = new HashMap<String, String>(); /** * Returns the provided string where all HTML special characters * (e.g. <pre> </pre>) have been replaced with their utf8 equivalents. * * @param source a String possibly containing escaped HTML characters */ public static final String unescapeHTML(String source) { StringBuilder sb = new StringBuilder(source.length()); // position markers for the & and ; int start = -1, end = -1; // the end position of the last escaped HTML character int last = 0; start = source.indexOf("&"); end = source.indexOf(";", start); while (start > -1 && end > start) { String encoded = source.substring(start, end + 1); String decoded = HTML_CODES.get(encoded); // if encoded form wasn't in the HTML codes, try checking to see if // it was a Latin-1 code if (decoded == null) { decoded = LATIN1_CODES.get(encoded); } if (decoded != null) { // append the string containing all characters from the last escaped // character to the current one String s = source.substring(last, start); sb.append(s).append(decoded); last = end + 1; } start = source.indexOf("&", end); end = source.indexOf(";", start); } // if there weren't any substitutions, don't both to create a new String if (sb.length() == 0) return source; // otherwise finish the substitution by appending all the text from the // last substitution until the end of the string sb.append(source.substring(last)); return sb.toString(); } /** * Modifies the provided {@link StringBuilder} by replacing all HTML special * characters (e.g. <pre> </pre>) with their utf8 equivalents. * * @param source a String possibly containing escaped HTML characters */ public static final void unescapeHTML(StringBuilder source) { // position markers for the & and ; int start = -1, end = -1; // the end position of the last escaped HTML character int last = 0; start = source.indexOf("&"); end = source.indexOf(";", start); while (start > -1 && end > start) { String encoded = source.substring(start, end + 1); String decoded = HTML_CODES.get(encoded); // if encoded form wasn't in the HTML codes, try checking to see if // it was a Latin-1 code if (decoded == null) { decoded = LATIN1_CODES.get(encoded); } // If the string had encoded HTML that was recognized, replace it // with the decoded version if (decoded != null) { source.replace(start, end + 1, decoded); } // Use the start+1 rather than end, since the decoded text may be // smaller than the encoded version. However, don't use start in // case the decoded character was actually a '&'. start = source.indexOf("&", start + 1); end = source.indexOf(";", start); } } }