Write code to Replace characters that may be confused by a HTML parser with their equivalent character entity references.
/* * Static String formatting and query routines. * Copyright (C) 2001-2005 Stephen Ostermiller * http://ostermiller.org/contact.pl?regarding=Java+Utilities * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version.//from w w w .j a v a 2s . c o m * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * See COPYING.TXT for details. */ //package com.book2s; public class Main { public static void main(String[] argv) { String s = "book2s.com"; System.out.println(escapeHTML(s)); } /** * Replaces characters that may be confused by a HTML * parser with their equivalent character entity references. * <p> * Any data that will appear as text on a web page should * be be escaped. This is especially important for data * that comes from untrusted sources such as Internet users. * A common mistake in CGI programming is to ask a user for * data and then put that data on a web page. For example:<pre> * Server: What is your name? * User: <b>Joe<b> * Server: Hello <b>Joe</b>, Welcome</pre> * If the name is put on the page without checking that it doesn't * contain HTML code or without sanitizing that HTML code, the user * could reformat the page, insert scripts, and control the the * content on your web server. * <p> * This method will replace HTML characters such as > with their * HTML entity reference (&gt;) so that the html parser will * be sure to interpret them as plain text rather than HTML or script. * <p> * This method should be used for both data to be displayed in text * in the html document, and data put in form elements. For example:<br> * <code><html><body><i>This in not a &lt;tag&gt; * in HTML</i></body></html></code><br> * and<br> * <code><form><input type="hidden" name="date" value="<i>This data could * be &quot;malicious&quot;</i>"></form></code><br> * In the second example, the form data would be properly be resubmitted * to your cgi script in the URLEncoded format:<br> * <code><i>This data could be %22malicious%22</i></code> * * @param s String to be escaped * @return escaped String * @throws NullPointerException if s is null. * * @since ostermillerutils 1.00.00 */ public static String escapeHTML(String s) { int length = s.length(); int newLength = length; boolean someCharacterEscaped = false; // first check for characters that might // be dangerous and calculate a length // of the string that has escapes. for (int i = 0; i < length; i++) { char c = s.charAt(i); int cint = 0xffff & c; if (cint < 32) { switch (c) { case '\r': case '\n': case '\t': case '\f': { } break; default: { newLength -= 1; someCharacterEscaped = true; } } } else { switch (c) { case '\"': { newLength += 5; someCharacterEscaped = true; } break; case '&': case '\'': { newLength += 4; someCharacterEscaped = true; } break; case '<': case '>': { newLength += 3; someCharacterEscaped = true; } break; } } } if (!someCharacterEscaped) { // nothing to escape in the string return s; } StringBuffer sb = new StringBuffer(newLength); for (int i = 0; i < length; i++) { char c = s.charAt(i); int cint = 0xffff & c; if (cint < 32) { switch (c) { case '\r': case '\n': case '\t': case '\f': { sb.append(c); } break; default: { // Remove this character } } } else { switch (c) { case '\"': { sb.append("""); } break; case '\'': { sb.append("'"); } break; case '&': { sb.append("&"); } break; case '<': { sb.append("<"); } break; case '>': { sb.append(">"); } break; default: { sb.append(c); } } } } return sb.toString(); } }