Description
Turn any HTML escape entities in the string into characters and return the resulting string.
License
Open Source License
Parameter
Parameter | Description |
---|
s | String to be un-escaped. |
Exception
Parameter | Description |
---|
NullPointerException | if s is null. |
Return
un-escaped String.
Declaration
public static String unescapeHTML(String s)
Method Source Code
//package com.java2s;
/*//w ww . j ava 2 s.c o m
* Copyright (C) 2001-2011 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Java+Utilities
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See LICENSE.txt for details.
*/
import java.util.HashMap;
public class Main {
private static HashMap<String, Integer> htmlEntities = new HashMap<String, Integer>();
/**
* Turn any HTML escape entities in the string into
* characters and return the resulting string.
*
* @param s String to be un-escaped.
* @return un-escaped String.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.00.00
*/
public static String unescapeHTML(String s) {
StringBuffer result = new StringBuffer(s.length());
int ampInd = s.indexOf("&");
int lastEnd = 0;
while (ampInd >= 0) {
int nextAmp = s.indexOf("&", ampInd + 1);
int nextSemi = s.indexOf(";", ampInd + 1);
if (nextSemi != -1 && (nextAmp == -1 || nextSemi < nextAmp)) {
int value = -1;
String escape = s.substring(ampInd + 1, nextSemi);
try {
if (escape.startsWith("#")) {
value = Integer.parseInt(escape.substring(1), 10);
} else {
if (htmlEntities.containsKey(escape)) {
value = htmlEntities.get(escape).intValue();
}
}
} catch (NumberFormatException x) {
// Could not parse the entity,
// output it verbatim
}
result.append(s.substring(lastEnd, ampInd));
lastEnd = nextSemi + 1;
if (value >= 0 && value <= 0xffff) {
result.append((char) value);
} else {
result.append("&").append(escape).append(";");
}
}
ampInd = nextAmp;
}
result.append(s.substring(lastEnd));
return result.toString();
}
/**
* Liberal parse method for integer values. If the input string is a representation of
* an integer, that value will be returned. Otherwise the default value is returned.
* Surrounding white space is NOT significant.
* <p>
* If the number starts with a base prefix ("0x" for hex, "0b" for binary, "0c" for
* octal), it will be parsed with that radix. Otherwise, the number will be parsed in
* base 10 radix.
* <p>
* This method does NOT throw number format exceptions.
*
* @param s String containing a integer value to be parsed
* @return parsed integer value or the default value
* @since ostermillerutils 1.07.01
*/
public static int parseInt(String s, int defaultValue) {
Integer integer = parseInteger(s);
if (integer != null)
return integer.intValue();
return defaultValue;
}
/**
* Liberal parse method for integer values. If the input string is a representation of
* an integer, that value will be returned. Otherwise the default value is returned.
* Surrounding white space is NOT significant.
* <p>
* This method does NOT throw number format exceptions.
*
* @param s String containing a integer value to be parsed
* @param radix number base used during parsing
* @return parsed integer value or the default value
* @since ostermillerutils 1.07.01
*/
public static int parseInt(String s, int radix, int defaultValue) {
Integer integer = parseInteger(s, radix);
if (integer != null)
return integer.intValue();
return defaultValue;
}
/**
* Liberal parse method for integer values. If the input string is a representation of
* an integer, that value will be returned. Otherwise null is returned.
* Surrounding white space is NOT significant.
* <p>
* If the number starts with a base prefix ("0x" for hex, "0b" for binary, "0c" for
* octal), it will be parsed with that radix. Otherwise, the number will be parsed in
* base 10 radix.
* <p>
* This method does NOT throw number format exceptions.
*
* @param s String containing a integer value to be parsed
* @return parsed integer value or the default value
* @since ostermillerutils 1.07.01
*/
public static Integer parseInteger(String s) {
if (s == null)
return null;
s = s.trim();
int radix = 10;
if (s.startsWith("0x") || s.startsWith("0X")) {
radix = 16;
s = s.substring(2);
}
if (s.startsWith("0c") || s.startsWith("0C")) {
radix = 8;
s = s.substring(2);
}
if (s.startsWith("0b") || s.startsWith("0B")) {
radix = 2;
s = s.substring(2);
}
return parseInteger(s, radix);
}
/**
* Liberal parse method for integer values. If the input string is a representation of
* an integer, that value will be returned. Otherwise null is returned.
* Surrounding white space is NOT significant.
* <p>
* This method does NOT throw number format exceptions.
*
* @param s String containing a integer value to be parsed
* @param radix number base used during parsing
* @return parsed integer value or the default value
* @since ostermillerutils 1.07.01
*/
public static Integer parseInteger(String s, int radix) {
if (s == null)
return null;
s = s.trim();
try {
return Integer.valueOf(s, radix);
} catch (NumberFormatException nfx) {
return null;
}
}
/**
* Trim any of the characters contained in the second
* string from the beginning and end of the first.
*
* @param s String to be trimmed.
* @param c list of characters to trim from s.
* @return trimmed String.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.00.00
*/
public static String trim(String s, String c) {
int length = s.length();
if (c == null) {
return s;
}
int cLength = c.length();
if (c.length() == 0) {
return s;
}
int start = 0;
int end = length;
boolean found; // trim-able character found.
int i;
// Start from the beginning and find the
// first non-trim-able character.
found = false;
for (i = 0; !found && i < length; i++) {
char ch = s.charAt(i);
found = true;
for (int j = 0; found && j < cLength; j++) {
if (c.charAt(j) == ch)
found = false;
}
}
// if all characters are trim-able.
if (!found)
return "";
start = i - 1;
// Start from the end and find the
// last non-trim-able character.
found = false;
for (i = length - 1; !found && i >= 0; i--) {
char ch = s.charAt(i);
found = true;
for (int j = 0; found && j < cLength; j++) {
if (c.charAt(j) == ch)
found = false;
}
}
end = i + 2;
return s.substring(start, end);
}
}
Related
- unEscapeHTML(final String escapedHTML)
- unescapeHtml(final String input)
- unescapeHTML(String comment)
- unescapeHTML(String html)
- unescapeHtml(String s)
- unescapeHtml(String s)
- unescapeHTML(String s)
- unescapeHTML(String s)
- unescapeHTML(String source)