Java tutorial
/* * Copyright (c) 2010-2011, Martijn Brinkers, Djigzo. * * This file is part of Djigzo email encryption. * * Djigzo is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License * version 3, 19 November 2007 as published by the Free Software * Foundation. * * Djigzo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public * License along with Djigzo. If not, see <http://www.gnu.org/licenses/> * * Additional permission under GNU AGPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or * combining it with aspectjrt.jar, aspectjweaver.jar, tyrex-1.0.3.jar, * freemarker.jar, dom4j.jar, mx4j-jmx.jar, mx4j-tools.jar, * spice-classman-1.0.jar, spice-loggerstore-0.5.jar, spice-salt-0.8.jar, * spice-xmlpolicy-1.0.jar, saaj-api-1.3.jar, saaj-impl-1.3.jar, * wsdl4j-1.6.1.jar (or modified versions of these libraries), * containing parts covered by the terms of Eclipse Public License, * tyrex license, freemarker license, dom4j license, mx4j license, * Spice Software License, Common Development and Distribution License * (CDDL), Common Public License (CPL) the licensors of this Program grant * you additional permission to convey the resulting work. */ package mitm.common.dlp.impl; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import mitm.common.dlp.MatchFilter; import mitm.common.dlp.PolicyChecker; import mitm.common.dlp.PolicyCheckerContext; import mitm.common.dlp.PolicyPattern; import mitm.common.dlp.PolicyViolation; import mitm.common.dlp.PolicyViolationException; import mitm.common.util.SizeUtils; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.text.StrBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * PolicyChecker that checks whether the input contains any of the provided regular expressions. * * The Context should contains the following keys and associated types: * * The regular expressions patterns: * Key: mitm.common.dlp.patterns @see {@link ContextKeys} * Type: Collection&NamedPattern& * * The content: * Key: mitm.common.dlp.content @see {@link ContextKeys} * Type: String * * Note: overlapLength, maxMatchWidth and totalMaxMatchWidth should only be changed before this PolicyChecker is * used in a multi-threaded environment because access to these properties is not thread safe. * * @author Martijn Brinkers * */ public class RegExpPolicyChecker implements PolicyChecker { private final static Logger logger = LoggerFactory.getLogger(RegExpPolicyChecker.class); /* * The name of the key under which the map that keeps track of the hits of a specific matching rule. */ private final static String CONTEXT_KEY = "mitm.common.dlp.impl.RegExpPolicyChecker"; /** * The name of this policy */ public final static String POLICY_NAME = "RegExp"; /* * The length of the previous content which will be added to the new content to create some overlap. * This is done to make sure that even if the string split is done within some keyword (for example a SSN) the * complete keyword is still scanned. */ private int overlapLength = 64; /* * The max length of an individual match */ private int maxMatchWidth = 32; /* * The total max length of all matches for a rule */ private int totalMaxMatchWidth = SizeUtils.KB * 1; /* * Stores the number of hits and matches. */ private class RuleMatch { /* * The PolicyPattern */ private final PolicyPattern policyPattern; /* * Keeps track of the number of matches for this rule */ private int matchCount; /* * All the matches found for this rule */ private StrBuilder matches; public RuleMatch(PolicyPattern policyPattern) { this.policyPattern = policyPattern; } public void addMatch(String match) { if (matches == null) { matches = new StrBuilder(1024); } matchCount++; if (matches.length() >= totalMaxMatchWidth) { logger.debug("totalMaxMatchWidth exceeded."); } else { matches.appendSeparator(", "); /* * Make sure the matched part is not too long and that it's escaped when it contains a comma or quote */ matches.append(StringEscapeUtils.escapeCsv(StringUtils.abbreviate(match, maxMatchWidth))); /* * If length exceeds totalMaxMatchWidth resize and add ... */ if (matches.length() >= totalMaxMatchWidth) { matches.setLength(totalMaxMatchWidth).append("..."); } } } public PolicyPattern getPolicyPattern() { return policyPattern; } public String getMatches() { return matches != null ? matches.toString() : ""; } public boolean isViolated() { return matchCount >= policyPattern.getThreshold(); } } /* * Container class which will be stored in the context */ private class LocalContext { /* * Map that stores found reg exp matches */ private final Map<String, RuleMatch> ruleMatches = new HashMap<String, RuleMatch>(); /* * This PolicyChecker can be called multiple times with different contents when the input it too large * to be handled at once. When the input is too large it will be split up into multiple strings. The point * at which the content will be split will be 'arbitrarily' chosen. To compensate we will add a part of the * previous content to the new content (overlap) */ private String previousContent; public String getPreviousContent() { return previousContent; } public void setPreviousContent(String previousContent) { this.previousContent = previousContent; } public Map<String, RuleMatch> getRuleMatches() { return ruleMatches; } } @Override public String getName() { return POLICY_NAME; } private String getContent(PolicyCheckerContext context) { /* * Get part of the previous content to create some overlap */ LocalContext localContext = getLocalContext(context); String previousContent = localContext.getPreviousContent(); String newContent = context.getContent(); if (newContent != null) { /* * Replace the previous content with a part of the new content but only if it * was partial content */ localContext.setPreviousContent(StringUtils.right(newContent, overlapLength)); if (context.isPartial() && StringUtils.isNotEmpty(previousContent)) { newContent = previousContent + newContent; } } return newContent; } private LocalContext getLocalContext(PolicyCheckerContext context) { LocalContext localContext = context.get(CONTEXT_KEY, LocalContext.class); if (localContext == null) { localContext = new LocalContext(); context.set(CONTEXT_KEY, localContext); } return localContext; } private RuleMatch getRuleMatch(PolicyPattern policyPattern, PolicyCheckerContext context) { Map<String, RuleMatch> map = getLocalContext(context).getRuleMatches(); RuleMatch match = map.get(policyPattern.getName()); if (match == null) { match = new RuleMatch(policyPattern); map.put(policyPattern.getName(), match); } return match; } @Override public void init(PolicyCheckerContext context) { /* * no initialization required */ } @Override public void update(PolicyCheckerContext context) { String content = getContent(context); Collection<?> patterns = context.getPatterns(); if (content == null) { logger.debug("Content is missing."); return; } if (patterns == null) { logger.debug("Patterns are missing."); return; } for (Object element : patterns) { if (!(element instanceof PolicyPattern)) { logger.warn("Pattern is-not-a PolicyPattern but a " + element.getClass()); continue; } PolicyPattern policyPattern = (PolicyPattern) element; Pattern pattern = policyPattern.getPattern(); if (pattern == null) { logger.warn("Pattern is missing."); continue; } Matcher matcher = pattern.matcher(content); MatchFilter matchFilter = policyPattern.getMatchFilter(); while (matcher.find()) { RuleMatch ruleMatch = getRuleMatch(policyPattern, context); String match = matcher.group(); if (matchFilter != null) { match = matchFilter.filter(match); } ruleMatch.addMatch(match); if (logger.isDebugEnabled()) { logger.debug("Match found. Rule: " + policyPattern.getName() + ". Match: " + match); } } } } @Override public void finish(PolicyCheckerContext context) throws PolicyViolationException { PolicyViolationException policyViolationException = null; /* * Step through all found matches and see whether a policy is violated. */ for (RuleMatch ruleMatch : getLocalContext(context).getRuleMatches().values()) { if (ruleMatch.isViolated()) { PolicyPattern policyPattern = ruleMatch.getPolicyPattern(); PolicyViolation violation = new PolicyViolationImpl(getName(), policyPattern.getName(), ruleMatch.getMatches(), policyPattern.getPriority()); if (logger.isDebugEnabled()) { logger.debug("Policy violation." + violation); } if (policyViolationException == null) { policyViolationException = new PolicyViolationException("Policy violation"); } policyViolationException.addViolation(violation); } } if (policyViolationException != null) { throw policyViolationException; } } public int getOverlapLength() { return overlapLength; } public void setOverlapLength(int overlapLength) { this.overlapLength = overlapLength; } public int getMaxMatchWidth() { return maxMatchWidth; } public void setMaxMatchWidth(int maxMatchWidth) { /* * StringUtils.abbreviate requires at least 4 */ if (maxMatchWidth < 4) { throw new IllegalArgumentException("Minimal width is 4."); } this.maxMatchWidth = maxMatchWidth; } public int getTotalMaxMatchWidth() { return totalMaxMatchWidth; } public void setTotalMaxMatchWidth(int totalMaxMatchWidth) { this.totalMaxMatchWidth = totalMaxMatchWidth; } }