mitm.common.dlp.impl.RegExpPolicyChecker.java Source code

Java tutorial

Introduction

Here is the source code for mitm.common.dlp.impl.RegExpPolicyChecker.java

Source

/*
 * Copyright (c) 2010-2011, Martijn Brinkers, Djigzo.
 * 
 * This file is part of Djigzo email encryption.
 *
 * Djigzo is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License 
 * version 3, 19 November 2007 as published by the Free Software 
 * Foundation.
 *
 * Djigzo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public 
 * License along with Djigzo. If not, see <http://www.gnu.org/licenses/>
 *
 * Additional permission under GNU AGPL version 3 section 7
 * 
 * If you modify this Program, or any covered work, by linking or 
 * combining it with aspectjrt.jar, aspectjweaver.jar, tyrex-1.0.3.jar, 
 * freemarker.jar, dom4j.jar, mx4j-jmx.jar, mx4j-tools.jar, 
 * spice-classman-1.0.jar, spice-loggerstore-0.5.jar, spice-salt-0.8.jar, 
 * spice-xmlpolicy-1.0.jar, saaj-api-1.3.jar, saaj-impl-1.3.jar, 
 * wsdl4j-1.6.1.jar (or modified versions of these libraries), 
 * containing parts covered by the terms of Eclipse Public License, 
 * tyrex license, freemarker license, dom4j license, mx4j license,
 * Spice Software License, Common Development and Distribution License
 * (CDDL), Common Public License (CPL) the licensors of this Program grant 
 * you additional permission to convey the resulting work.
 */
package mitm.common.dlp.impl;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import mitm.common.dlp.MatchFilter;
import mitm.common.dlp.PolicyChecker;
import mitm.common.dlp.PolicyCheckerContext;
import mitm.common.dlp.PolicyPattern;
import mitm.common.dlp.PolicyViolation;
import mitm.common.dlp.PolicyViolationException;
import mitm.common.util.SizeUtils;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.text.StrBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * PolicyChecker that checks whether the input contains any of the provided regular expressions.
 * 
 * The Context should contains the following keys and associated types:
 * 
 * The regular expressions patterns:
 *   Key: mitm.common.dlp.patterns @see {@link ContextKeys}
 *   Type: Collection&amp;NamedPattern&amp;
 *   
 * The content:
 *   Key: mitm.common.dlp.content @see {@link ContextKeys}
 *   Type: String
 * 
 * Note: overlapLength, maxMatchWidth and totalMaxMatchWidth should only be changed before this PolicyChecker is
 * used in a multi-threaded environment because access to these properties is not thread safe.
 * 
 * @author Martijn Brinkers
 *
 */
public class RegExpPolicyChecker implements PolicyChecker {
    private final static Logger logger = LoggerFactory.getLogger(RegExpPolicyChecker.class);

    /*
     * The name of the key under which the map that keeps track of the hits of a specific matching rule.
     */
    private final static String CONTEXT_KEY = "mitm.common.dlp.impl.RegExpPolicyChecker";

    /**
     * The name of this policy
     */
    public final static String POLICY_NAME = "RegExp";

    /*
     * The length of the previous content which will be added to the new content to create some overlap.
     * This is done to make sure that even if the string split is done within some keyword (for example a SSN) the 
     * complete keyword is still scanned.
     */
    private int overlapLength = 64;

    /*
     * The max length of an individual match
     */
    private int maxMatchWidth = 32;

    /*
     * The total max length of all matches for a rule
     */
    private int totalMaxMatchWidth = SizeUtils.KB * 1;

    /*
     * Stores the number of hits and matches. 
     */
    private class RuleMatch {
        /*
         * The PolicyPattern
         */
        private final PolicyPattern policyPattern;

        /*
         * Keeps track of the number of matches for this rule
         */
        private int matchCount;

        /*
         * All the matches found for this rule
         */
        private StrBuilder matches;

        public RuleMatch(PolicyPattern policyPattern) {
            this.policyPattern = policyPattern;
        }

        public void addMatch(String match) {
            if (matches == null) {
                matches = new StrBuilder(1024);
            }

            matchCount++;

            if (matches.length() >= totalMaxMatchWidth) {
                logger.debug("totalMaxMatchWidth exceeded.");
            } else {
                matches.appendSeparator(", ");
                /*
                 * Make sure the matched part is not too long and that it's escaped when it contains a comma or quote
                 */
                matches.append(StringEscapeUtils.escapeCsv(StringUtils.abbreviate(match, maxMatchWidth)));

                /*
                 * If length exceeds totalMaxMatchWidth resize and add ...
                 */
                if (matches.length() >= totalMaxMatchWidth) {
                    matches.setLength(totalMaxMatchWidth).append("...");
                }
            }
        }

        public PolicyPattern getPolicyPattern() {
            return policyPattern;
        }

        public String getMatches() {
            return matches != null ? matches.toString() : "";
        }

        public boolean isViolated() {
            return matchCount >= policyPattern.getThreshold();
        }
    }

    /*
     * Container class which will be stored in the context
     */
    private class LocalContext {
        /*
         * Map that stores found reg exp matches
         */
        private final Map<String, RuleMatch> ruleMatches = new HashMap<String, RuleMatch>();

        /*
         * This PolicyChecker can be called multiple times with different contents when the input it too large
         * to be handled at once. When the input is too large it will be split up into multiple strings. The point 
         * at which the content will be split will be 'arbitrarily' chosen. To compensate we will add a part of the
         * previous content to the new content (overlap)
         */
        private String previousContent;

        public String getPreviousContent() {
            return previousContent;
        }

        public void setPreviousContent(String previousContent) {
            this.previousContent = previousContent;
        }

        public Map<String, RuleMatch> getRuleMatches() {
            return ruleMatches;
        }
    }

    @Override
    public String getName() {
        return POLICY_NAME;
    }

    private String getContent(PolicyCheckerContext context) {
        /*
         * Get part of the previous content to create some overlap
         */
        LocalContext localContext = getLocalContext(context);

        String previousContent = localContext.getPreviousContent();

        String newContent = context.getContent();

        if (newContent != null) {
            /*
             * Replace the previous content with a part of the new content but only if it 
             * was partial content 
             */
            localContext.setPreviousContent(StringUtils.right(newContent, overlapLength));

            if (context.isPartial() && StringUtils.isNotEmpty(previousContent)) {
                newContent = previousContent + newContent;
            }
        }

        return newContent;
    }

    private LocalContext getLocalContext(PolicyCheckerContext context) {
        LocalContext localContext = context.get(CONTEXT_KEY, LocalContext.class);

        if (localContext == null) {
            localContext = new LocalContext();

            context.set(CONTEXT_KEY, localContext);
        }

        return localContext;
    }

    private RuleMatch getRuleMatch(PolicyPattern policyPattern, PolicyCheckerContext context) {
        Map<String, RuleMatch> map = getLocalContext(context).getRuleMatches();

        RuleMatch match = map.get(policyPattern.getName());

        if (match == null) {
            match = new RuleMatch(policyPattern);

            map.put(policyPattern.getName(), match);
        }

        return match;
    }

    @Override
    public void init(PolicyCheckerContext context) {
        /*
         *  no initialization required
         */
    }

    @Override
    public void update(PolicyCheckerContext context) {
        String content = getContent(context);

        Collection<?> patterns = context.getPatterns();

        if (content == null) {
            logger.debug("Content is missing.");

            return;
        }

        if (patterns == null) {
            logger.debug("Patterns are missing.");

            return;
        }

        for (Object element : patterns) {
            if (!(element instanceof PolicyPattern)) {
                logger.warn("Pattern is-not-a PolicyPattern but a " + element.getClass());

                continue;
            }

            PolicyPattern policyPattern = (PolicyPattern) element;

            Pattern pattern = policyPattern.getPattern();

            if (pattern == null) {
                logger.warn("Pattern is missing.");

                continue;
            }

            Matcher matcher = pattern.matcher(content);

            MatchFilter matchFilter = policyPattern.getMatchFilter();

            while (matcher.find()) {
                RuleMatch ruleMatch = getRuleMatch(policyPattern, context);

                String match = matcher.group();

                if (matchFilter != null) {
                    match = matchFilter.filter(match);
                }

                ruleMatch.addMatch(match);

                if (logger.isDebugEnabled()) {
                    logger.debug("Match found. Rule: " + policyPattern.getName() + ". Match: " + match);
                }
            }
        }
    }

    @Override
    public void finish(PolicyCheckerContext context) throws PolicyViolationException {
        PolicyViolationException policyViolationException = null;

        /*
         * Step through all found matches and see whether a policy is violated.
         */
        for (RuleMatch ruleMatch : getLocalContext(context).getRuleMatches().values()) {
            if (ruleMatch.isViolated()) {
                PolicyPattern policyPattern = ruleMatch.getPolicyPattern();

                PolicyViolation violation = new PolicyViolationImpl(getName(), policyPattern.getName(),
                        ruleMatch.getMatches(), policyPattern.getPriority());

                if (logger.isDebugEnabled()) {
                    logger.debug("Policy violation." + violation);
                }

                if (policyViolationException == null) {
                    policyViolationException = new PolicyViolationException("Policy violation");
                }

                policyViolationException.addViolation(violation);
            }
        }

        if (policyViolationException != null) {
            throw policyViolationException;
        }
    }

    public int getOverlapLength() {
        return overlapLength;
    }

    public void setOverlapLength(int overlapLength) {
        this.overlapLength = overlapLength;
    }

    public int getMaxMatchWidth() {
        return maxMatchWidth;
    }

    public void setMaxMatchWidth(int maxMatchWidth) {
        /*
         * StringUtils.abbreviate requires at least 4 
         */
        if (maxMatchWidth < 4) {
            throw new IllegalArgumentException("Minimal width is 4.");
        }

        this.maxMatchWidth = maxMatchWidth;
    }

    public int getTotalMaxMatchWidth() {
        return totalMaxMatchWidth;
    }

    public void setTotalMaxMatchWidth(int totalMaxMatchWidth) {
        this.totalMaxMatchWidth = totalMaxMatchWidth;
    }
}