Java tutorial
//package com.java2s; public class Main { /*** Removes HTML tags from a string * @param s The string to remove HTML tags from */ public static String removeHTML(final String text, boolean replaceNewlines) { if (null == text) return null; String s = text; // If we have an opener and no closer, then // it ain't HTML and we shouldn't break it int opener = s.indexOf("<"); int closer = s.indexOf(">"); if (opener != -1 && closer == -1) return s; int i = opener; while (i != -1) { // Find end pos int e = s.indexOf(">", i); if (e == -1) e = s.length(); String tag = s.substring(i + 1, e); // Strip from the string s = s.substring(0, i) + (replaceNewlines && tag.equals("br") ? "\n" : "") + (e < s.length() ? s.substring(e + 1, s.length()) : ""); // Find again i = s.indexOf("<"); } // Replace known sequences with characters s = replace(s, " ", " "); s = replace(s, " ", " "); s = replace(s, "&", " "); s = replace(s, "<", "<"); s = replace(s, ">", ">"); // Only compress whitespace if HTML was found if (text.equals(s)) return s; // Compress whitespace s = s.trim(); StringBuffer o = new StringBuffer(); boolean lastWasSpace = false; for (i = 0; i < s.length(); i++) { if (s.charAt(i) == ' ') { if (!lastWasSpace) { lastWasSpace = true; o.append(" "); } } else { o.append(s.substring(i, i + 1)); lastWasSpace = false; } } s = o.toString(); return s; } public static String removeHTML(String s) { return removeHTML(s, false); } /** Looks in findin for all occurrences of find and replaces them with replacewith * @param findin The string to find occurrences in * @param find The string to find * @param replacewith The string to replace found occurrences with * @return A string with all occurrences of find replaced. */ public static String replace(String findin, String find, String replacewith) { StringBuffer sb = new StringBuffer(findin); int i = 0; try { while (i <= sb.length() - find.length()) { if (sb.substring(i, i + find.length()).equalsIgnoreCase(find)) { sb.replace(i, i + find.length(), replacewith); } i++; } } catch (StringIndexOutOfBoundsException e) { // We hit the end of the string - do nothing and carry on } return sb.toString(); } }