Java tutorial
/** * Copyright (c) 2001-2012 "Redbasin Networks, INC" [http://redbasin.org] * * This file is part of Redbasin OpenDocShare community project. * * Redbasin OpenDocShare is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package util; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import java.util.regex.Pattern; import java.util.regex.Matcher; /** * This contains some strip html methods. * * @author Smitha Gudur (smitha@redbasin.com) * @version $Revision: 1.1 $ */ public class StripHTMLTags { /** Logger for this class and subclasses */ protected final Log logger = LogFactory.getLog(getClass()); public final String[] LINEBREAK_ELEMENTS = { "p", "br" }; public final String[] LINEBREAK_BOLD = { "p", "br", "b" }; public final String[] NO_HTML = { "" }; /** * This method strips all tags from the body except those given in * in the tag array. * * @param body The body or text to strip * @param tags The tag array for strip exclusion * @return String Return the body with the stripped tags. */ public String stripTags(String body, String[] tags) { if (null == body) return body; Pattern pattern = Pattern.compile("<.*?>", Pattern.DOTALL); Pattern[] tagPatterns = null; if (tags != null) { tagPatterns = new Pattern[tags.length]; for (int i = 0; i < tags.length; i++) { tagPatterns[i] = Pattern.compile("<(\\s*?)(/??)(\\s*?)" + tags[i] + "((\\s*?>)||(\\s(.*?)>))", Pattern.DOTALL); } } StringBuffer bodyStr = new StringBuffer(body); bodyStr = stripCSS(bodyStr); bodyStr = stripJS(bodyStr); Matcher matcher = pattern.matcher(bodyStr); while (matcher.find()) { logger.debug("Match: " + matcher.group()); boolean matches = false; if ((tags != null) && (tags.length != 0)) { for (int i = 0; i < tags.length && (!matches); i++) { logger.debug("Pattern: " + tagPatterns[i].pattern()); Matcher ematcher = tagPatterns[i].matcher(matcher.group()); matches = matches || ematcher.matches(); } } if (!matches) { logger.debug("Substituting Match"); bodyStr = bodyStr.replace(matcher.start(), matcher.end(), ""); matcher = pattern.matcher(bodyStr); } } return bodyStr.toString(); } /** * This strips any given complete pattern from the body. * * @StringBuffer the body to strip * @String the pattern to apply * @return StringBuffer return the stripped body */ StringBuffer stripPattern(StringBuffer body, String pattern) { Pattern mypattern = Pattern.compile(pattern, Pattern.DOTALL); Matcher matcher = mypattern.matcher(body); while (matcher.find()) { body = body.replace(matcher.start(), matcher.end(), ""); } return body; } /** * This strips the complete CSS section. * * @param StringBuffer The body to strip. * @return StringBuffer returns the stripped body. */ StringBuffer stripCSS(StringBuffer body) { return stripPattern(body, "<[sS][tT][yY][lL][eE].*?[sS][tT][yY][lL][eE]>"); } /** * This strips the complete javascript section. * * @param StringBuffer The body to strip. * @return StringBuffer returns the stripped body. */ StringBuffer stripJS(StringBuffer body) { return stripPattern(body, "<[sS][cC][rR][iI][pP][tT].*?[sS][cC][rR][iI][pP][tT]>"); } }