Java tutorial
/* * ***** BEGIN LICENSE BLOCK ***** * Zimbra Collaboration Suite Server * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Synacor, Inc. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software Foundation, * version 2 of the License. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * You should have received a copy of the GNU General Public License along with this program. * If not, see <https://www.gnu.org/licenses/>. * ***** END LICENSE BLOCK ***** */ package com.zimbra.cs.html; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.regex.Pattern; import org.apache.commons.lang.StringEscapeUtils; import org.apache.xerces.xni.Augmentations; import org.apache.xerces.xni.NamespaceContext; import org.apache.xerces.xni.QName; import org.apache.xerces.xni.XMLAttributes; import org.apache.xerces.xni.XMLLocator; import org.apache.xerces.xni.XMLResourceIdentifier; import org.apache.xerces.xni.XMLString; import org.apache.xerces.xni.XNIException; import org.cyberneko.html.filters.DefaultFilter; import org.owasp.html.PolicyFactory; import org.owasp.html.Sanitizers; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; import com.zimbra.common.localconfig.DebugConfig; import com.zimbra.common.util.StringUtil; import com.zimbra.common.util.ZimbraLog; import com.zimbra.cs.servlet.ZThreadLocal; /** * very Mutated version of ElementRemover.java filter from cyberneko html. * change accepted/removed elements to static hashmaps for one-time * initialization, switched from Hashtable to HashMap, sanatize * attributes, etc. * * TODO: more checks: * allow limited use of <meta> tags? like for Content-Type? * make sure any clicked links pop up in new window * figure out how to block images by default, and how to re-enable them. styles? * strict attr value checking? * don't allow id attr in tags if we aren't putting html into an iframe (I'm assuming we are, and id's in iframes don't conflict with iframes elsewhere) */ public class DefangFilter extends DefaultFilter { /** * */ private static final int ASCII_DATA_VALUE = 127; /** * disable all form/input type tags */ private static final boolean ENABLE_INPUT_TAGS = true; /** * enable table tags */ private static final boolean ENABLE_TABLE_TAGS = true; /** * enable phrase tags (EM, STRONG, CITE, DFN, CODE, SAMP, KBD, VAR, ABBR, ACRONYM) */ private static final boolean ENABLE_PHRASE_TAGS = true; /** * enable list tags (UL, OL, LI, DL, DT, DD, DIR, MENU) */ private static final boolean ENABLE_LIST_TAGS = true; /** * enable font style tags (TT, I, B, BIG, SMALL, STRIKE, S, U) */ private static final boolean ENABLE_FONT_STYLE_TAGS = true; /** The Host header received in the request. */ private String reqVirtualHost = null; /** enable same host post request for a form in email */ private static boolean sameHostFormPostCheck = DebugConfig.defang_block_form_same_host_post_req; // // Constants // /** A "null" object. */ protected static final Object NULL = new Object(); private static final PolicyFactory sanitizer = Sanitizers.IMAGES.and(Sanitizers.LINKS); // regexes inside of attr values to strip out private static final Pattern AV_JS_ENTITY = Pattern.compile(DebugConfig.defangAvJsEntity); private static final Pattern AV_SCRIPT_TAG = Pattern.compile(DebugConfig.defangAvScriptTag, Pattern.CASE_INSENSITIVE); private static final Pattern AV_JAVASCRIPT = Pattern.compile(DebugConfig.defangAvJavascript, Pattern.CASE_INSENSITIVE); private static final Pattern AV_VBSCRIPT = Pattern.compile(DebugConfig.defangAvVbscript, Pattern.CASE_INSENSITIVE); private static final Pattern AV_TAB = Pattern.compile(DebugConfig.defangAvTab, Pattern.CASE_INSENSITIVE); // regex for URLs href. TODO: beef this up private static final Pattern VALID_EXT_URL = Pattern.compile(DebugConfig.defangValidExtUrl, Pattern.CASE_INSENSITIVE); private static final Pattern VALID_IMG_FILE = Pattern.compile(DebugConfig.defangValidImgFile); private static final Pattern VALID_INT_IMG = Pattern.compile(DebugConfig.defangValidIntImg, Pattern.CASE_INSENSITIVE); private static List<String> ATTRIBUTES_CAN_ALLOW_SCRIPTS = Arrays .asList(DebugConfig.defangACanAllowScripts.split(",")); // matches the file format that convertd uses so it doesn't get 'pnsrc'ed private static final Pattern VALID_CONVERTD_FILE = Pattern.compile(DebugConfig.defangValidConvertdFile); //matches cid:1040f05975d4d4b8fcf8747be3eb9ae3c08e5cd4@ private static final Pattern IMG_SKIP_OWASPSANITIZE = Pattern.compile(DebugConfig.defangImgSkipOwaspSanitize, Pattern.CASE_INSENSITIVE); // // Data // // information /** attr Set cache */ private static HashMap<String, HashSet<String>> mAttrSetCache = new HashMap<String, HashSet<String>>(); /** Accepted elements. */ private static HashMap<String, HashSet<String>> mAcceptedElements = new HashMap<String, HashSet<String>>(); /** Removed elements. */ private static HashMap<String, Object> mRemovedElements = new HashMap<String, Object>(); // state private String mBaseHref = null; private URI mBaseHrefURI = null; /** Strip images */ boolean mNeuterImages; /** The name of the element in the process of being removed. */ protected String mRemovalElementName; /** Tracks the recursive nesting level of the element being removed. * Since we're skipping from the element's open-tag to its close-tag, * we need to make sure not to stop skipping if another element of * the same type was nested in the first. For instance, * <pre> * <skipme><foo><skipme>XX</skipme></foo></skipme> * </pre> should not stop skipping at the first <tt></skipme></tt> * but rather after the second. */ protected int mRemovalElementCount; /** The style element depth */ protected int mStyleDepth; //private static String[] STD_CORE = { "id", "class", "title", "style" }; private static String CORE = "id,class,title,style,"; private static String LANG = "dir,lang,xml:lang,"; private static String CORE_LANG = CORE + LANG; private static String KBD = "accesskey,tabindex,"; static { // set which elements to accept acceptElement("a", CORE + KBD + ",charset,coords,href,hreflang,name,rel,rev,shape,target,type"); acceptElement("address", CORE_LANG); //acceptElement("base", "href"); //,target"); acceptElement("bdo", CORE_LANG); acceptElement("blockquote", CORE_LANG + "cite"); acceptElement("body", CORE_LANG + "background"); //+"alink,background,bgcolor,link,text,vlink"); acceptElement("br", CORE + "clear"); acceptElement("center", CORE_LANG); acceptElement("del", CORE_LANG + "cite,datetime"); acceptElement("div", CORE_LANG + "align"); acceptElement("head", LANG); // profile attr removed acceptElement("h1", CORE_LANG + "align"); acceptElement("h2", CORE_LANG + "align"); acceptElement("h3", CORE_LANG + "align"); acceptElement("h4", CORE_LANG + "align"); acceptElement("h5", CORE_LANG + "align"); acceptElement("h6", CORE_LANG + "align"); acceptElement("hr", CORE_LANG + "align,noshade,size,width"); acceptElement("html", LANG + "xmlns"); acceptElement("img", CORE_LANG + "align,alt,border,height,hspace,ismap,longdesc,src,usemap,vspace,width,dfsrc,data-mce-src"); acceptElement("ins", CORE_LANG + "cite"); acceptElement("label", CORE_LANG + "for"); //acceptElement("link", CORE_LANG+"charset,href,hreflang,media,ntarget,rel,rev,type"); // NOTE: comment out noframes so its text shows up, since we are nuke frame-related tags //acceptElement("noframes", CORE_LANG); // NOTE: comment out noscript so its text shows up, since we are nuking script tags //acceptElement("noscript", CORE_LANG); // maybe convert to always execute if we are stripping script? acceptElement("p", CORE_LANG + "align"); acceptElement("pre", CORE_LANG + "width"); acceptElement("q", CORE_LANG + "cite"); acceptElement("span", CORE_LANG); acceptElement("style", CORE_LANG); acceptElement("sub", CORE_LANG); acceptElement("sup", CORE_LANG); //acceptElement("title", CORE_LANG); acceptElement("title", ""); if (ENABLE_FONT_STYLE_TAGS) { acceptElement("b", CORE_LANG); acceptElement("basefont", CORE_LANG + "color,face,size"); acceptElement("big", CORE_LANG); acceptElement("font", CORE_LANG + "color,face,size"); acceptElement("i", CORE_LANG); acceptElement("s", CORE_LANG); acceptElement("small", CORE_LANG); acceptElement("strike", CORE_LANG); acceptElement("tt", CORE_LANG); acceptElement("u", CORE_LANG); } else { // allow the text, just strip the tags } if (ENABLE_LIST_TAGS) { acceptElement("dir", CORE_LANG + "compact"); acceptElement("dl", CORE_LANG); acceptElement("dt", CORE_LANG); acceptElement("li", CORE_LANG + "type,value"); acceptElement("ol", CORE_LANG + "compact,start,type"); acceptElement("ul", CORE_LANG + "compact,type"); acceptElement("dd", CORE_LANG); acceptElement("menu", CORE_LANG + "compact"); } else { // allow the text, just strip the tags } if (ENABLE_PHRASE_TAGS) { acceptElement("abbr", CORE_LANG); acceptElement("acronym", CORE_LANG); acceptElement("cite", CORE_LANG); acceptElement("code", CORE_LANG); acceptElement("dfn", CORE_LANG); acceptElement("em", CORE_LANG); acceptElement("kbd", CORE_LANG); acceptElement("samp", CORE_LANG); acceptElement("strong", CORE_LANG); acceptElement("var", CORE_LANG); } else { // allow the text, just strip the tags } if (ENABLE_TABLE_TAGS) { acceptElement("caption", CORE_LANG + "align"); acceptElement("col", CORE_LANG + "alink,background,char,charoff,span,valign,width"); acceptElement("colgroup", CORE_LANG + "alink,background,char,charoff,span,valign,width"); acceptElement("table", CORE_LANG + "align,valign,background,bgcolor,border,cellpadding,cellspacing,frame,rules,summary,width"); acceptElement("tbody", CORE_LANG + "align,background,char,charoff,valign"); acceptElement("td", CORE_LANG + "abbr,align,axis,background,bgcolor,char,charoff,colspan,headers,height,nowrap,rowspan,scope,,valign,width"); acceptElement("tfoot", CORE_LANG + "align,background,char,charoff,valign"); acceptElement("th", CORE_LANG + "abbr,align,axis,background,bgcolor,char,charoff,colspan,headers,height,nowrap,rowspan,scope,valign,width"); acceptElement("thead", CORE_LANG + "align,background,char,charoff,valign"); acceptElement("tr", CORE_LANG + "align,background,bgcolor,char,charoff,valign"); } else { // allow the text, just strip the tags } if (ENABLE_INPUT_TAGS) { acceptElement("area", CORE_LANG + KBD + "alt,coords,href,nohref,shape,target"); acceptElement("button", CORE_LANG + KBD + "disabled,name,type,value"); acceptElement("fieldset", CORE_LANG); acceptElement("form", CORE_LANG + "action,accept,acceptcharset,enctype,method,name,target"); acceptElement("input", CORE_LANG + "accept,align,alt,checked,disabled,maxlength,name,readonly,size,src,type,value"); acceptElement("legend", CORE_LANG + "align"); acceptElement("map", CORE_LANG + "name"); acceptElement("optgroup", CORE_LANG + "disabled,label"); acceptElement("option", CORE_LANG + KBD + "disabled,label,selected,value"); acceptElement("select", CORE_LANG + KBD + "disabled,multiple,name,size"); acceptElement("textarea", CORE_LANG + "cols,disabled,name,readonly,rows"); } else { removeElement("area"); removeElement("button"); removeElement("fieldset"); removeElement("form"); removeElement("input"); removeElement("legend"); removeElement("map"); removeElement("optgroup"); removeElement("option"); removeElement("select"); removeElement("textarea"); } // completely remove these elements and all enclosing tags/text removeElement("applet"); removeElement("frame"); removeElement("frameset"); removeElement("iframe"); removeElement("object"); removeElement("script"); // don't remove "content" of these tags since they have none. //removeElement("meta"); //removeElement("param"); } /** * @param neuterImages */ public DefangFilter(boolean neuterImages) { mNeuterImages = neuterImages; if (ZThreadLocal.getRequestContext() != null) { this.reqVirtualHost = ZThreadLocal.getRequestContext().getVirtualHost(); } } /** * Specifies that the given element should be accepted and, optionally, * which attributes of that element should be kept. * * @param element The element to accept. * @param attributes The comma-seperated list of attributes to be kept or null if no * attributes should be kept for this element. * * see #removeElement */ public static void acceptElement(String element, String attributes) { element = element.toLowerCase(); HashSet<String> set = mAttrSetCache.get(attributes); if (set != null) { //System.out.println(element+" cached set "+set.size()); mAcceptedElements.put(element, set); return; } set = new HashSet<String>(); String attrs[] = attributes.toLowerCase().split(","); if (attrs != null && attrs.length > 0) { for (int i = 0; i < attrs.length; i++) { //deal with consecutive commas if (attrs[i].length() > 0) set.add(attrs[i]); } } mAcceptedElements.put(element, set); mAttrSetCache.put(attributes, set); } /** * Specifies that the given element should be completely removed. If an * element is encountered during processing that is on the remove list, * the element's start and end tags as well as all of content contained * within the element will be removed from the processing stream. * * @param element The element to completely remove. */ public static void removeElement(String element) { String key = element.toLowerCase(); Object value = NULL; mRemovedElements.put(key, value); } // // XMLDocumentHandler methods // // since Xerces-J 2.2.0 /** Start document. */ @Override public void startDocument(XMLLocator locator, String encoding, NamespaceContext nscontext, Augmentations augs) throws XNIException { mRemovalElementCount = 0; super.startDocument(locator, encoding, nscontext, augs); } // old methods /** Start document. */ @Override public void startDocument(XMLLocator locator, String encoding, Augmentations augs) throws XNIException { startDocument(locator, encoding, null, augs); } /** Start prefix mapping. */ @Override public void startPrefixMapping(String prefix, String uri, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.startPrefixMapping(prefix, uri, augs); } } /** Start element. */ @Override public void startElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException { String name = element.localpart; if (mRemovalElementName == null) { if (handleOpenTag(element, attributes)) super.startElement(element, attributes, augs); } else { if (name.equalsIgnoreCase(mRemovalElementName)) mRemovalElementCount++; } if (name.equalsIgnoreCase("style")) mStyleDepth++; } /** Empty element. */ @Override public void emptyElement(QName element, XMLAttributes attributes, Augmentations augs) throws XNIException { if (mRemovalElementName == null && handleOpenTag(element, attributes)) { super.emptyElement(element, attributes, augs); } } /** Comment. */ @Override public void comment(XMLString text, Augmentations augs) throws XNIException { // we can safely ignore comments // they can only provide loop holes for hackers to exploit // e.g. CDATA sections are reported as comments with our HTML parser configuration } /** Processing instruction. */ @Override public void processingInstruction(String target, XMLString data, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.processingInstruction(target, data, augs); } } /** Characters. */ @Override public void characters(XMLString text, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { if (mStyleDepth > 0) { String result = null; if (!StringUtil.isAsciiString(text.toString())) { result = extractAndSanitizeAsciiData(text.toString()); } else { result = sanitizeStyleValue(text.toString()); } super.characters(new XMLString(result.toCharArray(), 0, result.length()), augs); } else { super.characters(text, augs); } } } private static final Pattern COMMENT = Pattern.compile(DebugConfig.defangComment); protected static final Pattern STYLE_UNWANTED_FUNC = Pattern.compile(DebugConfig.defangStyleUnwantedFunc, Pattern.CASE_INSENSITIVE | Pattern.DOTALL); private static final Pattern STYLE_UNWANTED_IMPORT = Pattern.compile(DebugConfig.defangStyleUnwantedImport, Pattern.CASE_INSENSITIVE); private static String sanitizeStyleValue(String value) { // remove comments value = COMMENT.matcher(value).replaceAll(""); // strip off unwanted functions value = STYLE_UNWANTED_FUNC.matcher(value).replaceAll(""); // strip off any @import return STYLE_UNWANTED_IMPORT.matcher(value).replaceAll(""); } /** Ignorable whitespace. */ @Override public void ignorableWhitespace(XMLString text, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.ignorableWhitespace(text, augs); } } /** Start general entity. */ @Override public void startGeneralEntity(String name, XMLResourceIdentifier id, String encoding, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.startGeneralEntity(name, id, encoding, augs); } } /** Text declaration. */ @Override public void textDecl(String version, String encoding, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.textDecl(version, encoding, augs); } } /** End general entity. */ @Override public void endGeneralEntity(String name, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.endGeneralEntity(name, augs); } } /** Start CDATA section. */ @Override public void startCDATA(Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.startCDATA(augs); } } /** End CDATA section. */ @Override public void endCDATA(Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.endCDATA(augs); } } /** End element. */ @Override public void endElement(QName element, Augmentations augs) throws XNIException { String name = element.localpart; if (mRemovalElementName == null) { if (elementAccepted(element.rawname)) super.endElement(element, augs); } else { if (name.equalsIgnoreCase(mRemovalElementName) && --mRemovalElementCount == 0) mRemovalElementName = null; } if (name.equalsIgnoreCase("style")) mStyleDepth--; } /** End prefix mapping. */ @Override public void endPrefixMapping(String prefix, Augmentations augs) throws XNIException { if (mRemovalElementName == null) { super.endPrefixMapping(prefix, augs); } } // // Protected methods // /** Returns true if the specified element is accepted. */ protected static boolean elementAccepted(String element) { String key = element.toLowerCase(); return mAcceptedElements.containsKey(key); } /** Returns true if the specified element should be removed. */ protected static boolean elementRemoved(String element) { String key = element.toLowerCase(); return mRemovedElements.containsKey(key); } /** Handles an open tag. */ protected boolean handleOpenTag(QName element, XMLAttributes attributes) { String eName = element.rawname.toLowerCase(); if (eName.equals("base")) { int index = attributes.getIndex("href"); if (index != -1) { mBaseHref = attributes.getValue(index); if (mBaseHref != null) { try { mBaseHrefURI = new URI(mBaseHref); } catch (URISyntaxException e) { if (!mBaseHref.endsWith("/")) mBaseHref += "/"; } } } } if (elementAccepted(element.rawname)) { HashSet<String> value = mAcceptedElements.get(eName); if (value != NULL) { HashSet<String> anames = value; int attributeCount = attributes.getLength(); for (int i = 0; i < attributeCount; i++) { String aName = attributes.getQName(i).toLowerCase(); // remove the attribute if it isn't in the list of accepted names // or it has invalid content if (!anames.contains(aName) || removeAttrValue(eName, aName, attributes, i)) { attributes.removeAttributeAt(i--); attributeCount--; } else { sanitizeAttrValue(eName, aName, attributes, i); } } } else { attributes.removeAllAttributes(); } if (eName.equals("img") || eName.equals("input")) { fixUrlBase(attributes, "src"); } else if (eName.equals("a") || eName.equals("area")) { fixUrlBase(attributes, "href"); } fixUrlBase(attributes, "background"); if (eName.equals("a") || eName.equals("area")) { fixATag(attributes); } if (mNeuterImages) { String srcValue = Strings.nullToEmpty(attributes.getValue("src")); if (eName.equals("img") || eName.equals("input")) { if (VALID_EXT_URL.matcher(srcValue).find() || (!VALID_INT_IMG.matcher(srcValue).find() && !VALID_IMG_FILE.matcher(srcValue).find())) { neuterTag(attributes, "src", "df"); } else if (!VALID_INT_IMG.matcher(srcValue).find() && VALID_IMG_FILE.matcher(srcValue).find() && !VALID_CONVERTD_FILE.matcher(srcValue).find()) { neuterTag(attributes, "src", "pn"); } } neuterTag(attributes, "background", "df"); } return true; } else if (elementRemoved(element.rawname)) { mRemovalElementName = element.rawname; mRemovalElementCount = 1; } return false; } private void fixUrlBase(XMLAttributes attributes, String attrName) { int index = attributes.getIndex(attrName); if (index != -1) { String value = attributes.getValue(index); if (!value.startsWith("/")) { value = "/" + value; } if (mBaseHref != null && value != null && value.indexOf(":") == -1) { if (mBaseHrefURI != null) { try { attributes.setValue(index, mBaseHrefURI.resolve(value).toString()); return; } catch (IllegalArgumentException e) { // ignore and do string-logic } } attributes.setValue(index, mBaseHref + value); } } } /** * @param attributes */ private void neuterTag(XMLAttributes attributes, String aName, String prefix) { String df_aName = prefix + aName; int dfIndex = attributes.getIndex(df_aName); int index = attributes.getIndex(aName); if (index != -1) { String aValue = attributes.getValue(index); if (dfIndex != -1) { attributes.setValue(dfIndex, aValue); } else { attributes.addAttribute(new QName("", df_aName, df_aName, null), "CDATA", aValue); } attributes.removeAttributeAt(index); // remove dups if there are multiple src attributes index = attributes.getIndex(aName); while (index != -1) { attributes.removeAttributeAt(index); index = attributes.getIndex(aName); } } } /** * make sure all <a> tags have a target="_blank" attribute set. * @param attributes */ private void fixATag(XMLAttributes attributes) { // BEGIN: bug 7927 int index = attributes.getIndex("href"); if (index == -1) // links that don't have a href don't need target="_blank" return; String href = attributes.getValue(index); if (href.indexOf('#') == 0) // LOCAL links don't need target="_blank" return; // END: bug 7927 index = attributes.getIndex("target"); if (index != -1) { attributes.setValue(index, "_blank"); } else { attributes.addAttribute(new QName("", "target", "target", null), "CDATA", "_blank"); } } /** * Checks to see if an attr value should just be removed * @param eName The element name * @param aName The attribute name * @param attributes The set of the attribtues * @param i The index of the attribute * @return true if the attr should be removed, false if not */ private boolean removeAttrValue(String eName, String aName, XMLAttributes attributes, int i) { String value = attributes.getValue(i); // get rid of any spaces that might throw off the regex value = value == null ? null : value.trim(); if (aName.equalsIgnoreCase("href")) { if (VALID_EXT_URL.matcher(value).find()) { return false; } sanitizeAttrValue(eName, aName, attributes, i); } else if (aName.equalsIgnoreCase("longdesc") || aName.equalsIgnoreCase("usemap")) { if (!VALID_EXT_URL.matcher(value).find()) { return true; } } // We'll treat the SRC a little different since deleting it // may annoy the front end. Here, we'll check for // a valid url as well as just a valid filename in the // case that its an inline image if (aName.equals("src") || aName.equals("dfsrc") || aName.equals("data-mce-src")) { if (!(VALID_EXT_URL.matcher(value).find() || VALID_INT_IMG.matcher(value).find() || VALID_IMG_FILE.matcher(value).find())) { attributes.setValue(i, "#"); return false; } } return false; } public static String sanitize(String result, boolean isAllowedScript) { result = removeAnySpacesAndEncodedChars(result); if (!(IMG_SKIP_OWASPSANITIZE.matcher(result).find())) { result = sanitizer.sanitize(result); } result = AV_JS_ENTITY.matcher(result).replaceAll("JS-ENTITY-BLOCKED"); result = AV_SCRIPT_TAG.matcher(result).replaceAll("SCRIPT-TAG-BLOCKED"); if (isAllowedScript) { if (AV_TAB.matcher(result).find()) { result = AV_TAB.matcher(result).replaceAll(""); } if (AV_JAVASCRIPT.matcher(result).find()) result = AV_JAVASCRIPT.matcher(result).replaceAll("JAVASCRIPT-BLOCKED:"); else if (!VALID_INT_IMG.matcher(result).find()) { result = result.replaceAll("(?i)data\\s*:", "DATAURI-BLOCKED:"); } if (AV_VBSCRIPT.matcher(result).find()) { result = AV_VBSCRIPT.matcher(result).replaceAll("VBSCRIPT-BLOCKED:"); } } return result; } /** * @param result * @return */ public static String removeAnySpacesAndEncodedChars(String result) { String sanitizedStr = result; StringBuilder sb = new StringBuilder(); int index = result.indexOf(":"); if (index > -1) { String jsString = result.substring(0, index); char[] chars = jsString.toCharArray(); for (int i = 0; i < chars.length; ++i) { if (!Character.isSpace(chars[i])) { sb.append(chars[i]); } } } String temp = sb.toString(); temp = StringEscapeUtils.unescapeHtml(temp); if (index != -1 && (temp.toLowerCase().contains("javascript") || temp.toLowerCase().contains("vbscript"))) { sanitizedStr = temp + result.substring(index); } return sanitizedStr; } /** * sanitize an attr value. For now, this means stirpping out Java Script entity tags &{...}, * and <script> tags. * * */ private void sanitizeAttrValue(String eName, String aName, XMLAttributes attributes, int i) { String value = attributes.getValue(i); boolean canAllowScript = ATTRIBUTES_CAN_ALLOW_SCRIPTS.contains(aName.toLowerCase()); String result = sanitize(value, canAllowScript); if (aName.equalsIgnoreCase("style")) { result = sanitizeStyleValue(value); } if (!result.equals(value)) { attributes.setValue(i, result); } if (aName.equalsIgnoreCase("action") && sameHostFormPostCheck == true && this.reqVirtualHost != null) { try { URL url = new URL(value); String formActionHost = url.getHost().toLowerCase(); if (formActionHost.equalsIgnoreCase(reqVirtualHost)) { value = value.replace(formActionHost, "SAMEHOSTFORMPOST-BLOCKED"); attributes.setValue(i, value); } } catch (MalformedURLException e) { ZimbraLog.soap.info("Failure while trying to block mailicious code. Check for URL " + " match between the host and the action URL of a FORM." + "Error parsing URL, possible relative URL." + e.getMessage()); attributes.setValue(i, "SAMEHOSTFORMPOST-BLOCKED"); } } } /** * @param string * @return */ @VisibleForTesting String extractAndSanitizeAsciiData(String data) { char c[] = data.toCharArray(); StringBuilder sanitizedStrg = new StringBuilder(); StringBuilder asciiData = new StringBuilder(); for (int i = 0; i < c.length; ++i) { if (c[i] <= ASCII_DATA_VALUE) { asciiData.append(c[i]); } else { String temp = asciiData.toString(); if (!StringUtil.isNullOrEmpty(temp)) { temp = sanitizeStyleValue(temp); sanitizedStrg.append(temp); asciiData = new StringBuilder(); } sanitizedStrg.append(c[i]); } } //Append the asciiData to the sanitizedStrg sanitizedStrg.append(asciiData); return sanitizedStrg.toString(); } }