Escape HTML
/**
* Berlin Brown
* Dec 26, 2006
*/
//package org.bresearch.websec.utils.botlist.xml;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
/**
* This is class is used by botverse.
* @author Berlin Brown
*
*/
public final class EscapeHTML {
/**
* Synonym for <tt>URLEncoder.encode(String, "UTF-8")</tt>.
*
* <P>Used to ensure that HTTP query strings are in proper form, by escaping
* special characters such as spaces.
*
* <P>An example use case for this method is a login scheme in which, after successful
* login, the user is redirected to the "original" target destination. Such a target
* might be passed around as a request parameter. Such a request parameter
* will have a URL as its value, as in "LoginTarget=Blah.jsp?this=that&blah=boo", and
* would need to be URL-encoded in order to escape its special characters.
*
* <P>It is important to note that if a query string appears in an <tt>HREF</tt>
* attribute, then there are two issues - ensuring the query string is valid HTTP
* (it is URL-encoded), and ensuring it is valid HTML (ensuring the ampersand is escaped).
*/
public String escapeURL(String aURLFragment) {
String result = null;
try {
result = URLEncoder.encode(aURLFragment, "UTF-8");
}
catch (UnsupportedEncodingException ex){
throw new RuntimeException("UTF-8 not supported", ex);
}
return result;
}
/**
* Replace characters having special meaning <em>inside</em> HTML tags
* with their escaped equivalents, using character entities such as <tt>'&'</tt>.
*
* <P>The escaped characters are :
* <ul>
* <li> <
* <li> >
* <li> "
* <li> '
* <li> \
* <li> &
* </ul>
*
* <P>This method ensures that arbitrary text appearing inside a tag does not "confuse"
* the tag. For example, <tt>HREF='Blah.do?Page=1&Sort=ASC'</tt>
* does not comply with strict HTML because of the ampersand, and should be changed to
* <tt>HREF='Blah.do?Page=1&Sort=ASC'</tt>. This is commonly seen in building
* query strings. (In JSTL, the c:url tag performs this task automatically.)
*/
public String escapeHTMLTag(String aTagFragment) {
final StringBuffer result = new StringBuffer();
final StringCharacterIterator iterator = new StringCharacterIterator(aTagFragment);
char character = iterator.current();
while (character != CharacterIterator.DONE ){
if (character == '<') {
result.append("<");
}
else if (character == '>') {
result.append(">");
}
else if (character == '\"') {
result.append(""");
}
else if (character == '\'') {
result.append("'");
}
else if (character == '\\') {
result.append("\");
}
else if (character == '&') {
result.append("&");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
/**
* Return <tt>aText</tt> with all start-of-tag and end-of-tag characters
* replaced by their escaped equivalents.
*
* <P>If user input may contain tags which must be disabled, then call
* this method, not {@link #forHTMLTag}. This method is used for text appearing
* <em>outside</em> of a tag, while {@link #forHTMLTag} is used for text appearing
* <em>inside</em> an HTML tag.
*
* <P>It is not uncommon to see text on a web page presented erroneously, because
* <em>all</em> special characters are escaped (as in {@link #forHTMLTag}), instead of
* just the start-of-tag and end-of-tag characters. In
* particular, the ampersand character is often escaped not once but <em>twice</em> :
* once when the original input occurs, and then a second time when the same item is
* retrieved from the database. This occurs because the ampersand is the only escaped
* character which appears in a character entity.
*/
public String escapeDisableTags(String aText) {
final StringBuffer result = new StringBuffer();
final StringCharacterIterator iterator = new StringCharacterIterator(aText);
char character = iterator.current();
while (character != CharacterIterator.DONE ){
if (character == '<') {
result.append("<");
}
else if (character == '>') {
result.append(">");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
}
Related examples in the same category