no.sesat.search.query.analyser.AnalysisRuleFactory.java Source code

Java tutorial

Introduction

Here is the source code for no.sesat.search.query.analyser.AnalysisRuleFactory.java

Source

/* Copyright (2005-2012) Schibsted ASA
 * This file is part of Possom.
 *
 *   Possom is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU Lesser General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   Possom is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Lesser General Public License for more details.
 *
 *   You should have received a copy of the GNU Lesser General Public License
 *   along with Possom.  If not, see <http://www.gnu.org/licenses/>.
*/
package no.sesat.search.query.analyser;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.xml.parsers.DocumentBuilderFactory;
import no.sesat.commons.ioc.BaseContext;
import no.sesat.commons.ioc.ContextWrapper;
import no.sesat.search.site.config.DocumentLoader;
import no.sesat.search.query.token.TokenPredicate;
import no.sesat.search.site.SiteKeyedFactory;
import org.apache.commons.collections.Predicate;
import org.apache.commons.collections.PredicateUtils;

import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import no.sesat.search.query.QueryStringContext;
import no.sesat.search.query.token.AbstractEvaluatorFactory;
import no.sesat.search.query.token.EvaluatorType;
import no.sesat.search.query.token.TokenPredicateUtility;
import no.sesat.search.site.config.PropertiesLoader;
import no.sesat.search.site.config.ResourceContext;
import no.sesat.search.site.config.UrlResourceLoader;
import no.sesat.search.site.Site;
import no.sesat.search.site.SiteContext;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**  Responsible for loading and serving all the AnalysisRule instances.
 * These rules consisting of score sets come from the configuration file SearchConstants.ANALYSIS_RULES_XMLFILE.
 * Rules are inherited on a per-rule basis. Global predicates are inherited as well. Inherited global predicates can
 * only be overidden by global predicates. Private predicates does no
 *
 *
 *
 * @version <tt>$Revision$</tt>
 */
public final class AnalysisRuleFactory implements SiteKeyedFactory {

    /**
     * The context the AnalysisRuleFactory must work against. *
     */
    public interface Context extends BaseContext, ResourceContext, SiteContext {
        String getUniqueId();
    }

    private static final Logger LOG = Logger.getLogger(AnalysisRuleFactory.class);

    private static final String ERR_DOC_BUILDER_CREATION = "Failed to DocumentBuilderFactory.newInstance().newDocumentBuilder()";
    private static final String ERR_UNABLE_TO_FIND_PREDICATE = "Unable to find predicate with id ";
    private static final String ERR_UNABLE_TO_FIND_PREDICATE_UTILS_METHOD = "Unable to find method PredicateUtils.";
    private static final String ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD = "Unable to use method PredicateUtils.";
    private static final String ERR_WHILE_READING_ELEMENT = "Error while reading element ";
    private static final String ERR_TOO_MANY_PREDICATES_IN_NOT = "Illegal to have more than one predicate inside a <not> element. Occurred under ";
    private static final String WARN_RULE_NOT_FOUND = "Unable to find rule: ";
    private static final String DEBUG_CREATED_PREDICATE = "Parsed predicate ";
    private static final String DEBUG_STARTING_RULE = "Parsing rule ";
    private static final String DEBUG_FINISHED_RULE = "Parsed rule ";

    private static final AnalysisRule DUMB_RULE = new AnalysisRule();

    /**
     *
     */
    private static final Map<Site, AnalysisRuleFactory> INSTANCES = new HashMap<Site, AnalysisRuleFactory>();
    private static final ReentrantReadWriteLock INSTANCES_LOCK = new ReentrantReadWriteLock();

    /** Name of the configuration file. **/
    public static final String ANALYSIS_RULES_XMLFILE = "AnalysisRules.xml";

    private final Map<String, Predicate> globalPredicates = new HashMap<String, Predicate>();

    private final Map<String, AnalysisRule> rules = new HashMap<String, AnalysisRule>();
    private final ReentrantReadWriteLock rulesLock = new ReentrantReadWriteLock();

    private final Context context;
    private final DocumentLoader loader;
    private volatile boolean init = false;

    private AnalysisRuleFactory(final Context cxt) throws ParserConfigurationException {

        context = cxt;
        try {
            INSTANCES_LOCK.writeLock().lock();

            final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setValidating(false);
            final DocumentBuilder builder = factory.newDocumentBuilder();
            loader = context.newDocumentLoader(cxt, ANALYSIS_RULES_XMLFILE, builder);

            INSTANCES.put(context.getSite(), this);
        } finally {
            INSTANCES_LOCK.writeLock().unlock();
        }
    }

    private void init() {

        if (!init) {
            loader.abut();
            LOG.info("Parsing " + ANALYSIS_RULES_XMLFILE + " started for " + context.getSite());

            final Document doc = loader.getDocument();
            assert null != doc : "No document loaded for " + context.getSite().getName();

            final Element root = doc.getDocumentElement();

            final Map<String, Predicate> inheritedPredicates = getInheritedPredicates();

            if (null != root) {

                // initialise anonymous predicate
                final String evaluatorTypes = root.getAttribute("evaluators");
                if (null != evaluatorTypes && 0 < evaluatorTypes.length()) {
                    for (String name : evaluatorTypes.split(",")) {

                        final String factoryName = name;

                        AbstractEvaluatorFactory.instanceOf(ContextWrapper
                                .wrap(AbstractEvaluatorFactory.Context.class, context, new BaseContext() {
                                    public String getEvaluatorFactoryClassName() {
                                        return factoryName;
                                    }

                                    public String getUniqueId() {
                                        return context.getUniqueId();
                                    }
                                }, new QueryStringContext() {
                                    public String getQueryString() {
                                        return "*";
                                    }
                                }));
                    }
                }

                readPredicates(root, globalPredicates, inheritedPredicates);

                // ruleList
                final NodeList ruleList = root.getElementsByTagName("rule");
                for (int i = 0; i < ruleList.getLength(); ++i) {

                    final Element rule = (Element) ruleList.item(i);
                    final String id = rule.getAttribute("id");
                    final AnalysisRule analysisRule = new AnalysisRule();
                    LOG.info(DEBUG_STARTING_RULE + id + " " + analysisRule);

                    // private predicates
                    final Map<String, Predicate> privatePredicates = new HashMap<String, Predicate>(
                            globalPredicates);

                    readPredicates(rule, privatePredicates, inheritedPredicates);

                    // scores
                    final NodeList scores = rule.getElementsByTagName("score");
                    for (int j = 0; j < scores.getLength(); ++j) {
                        final Element score = (Element) scores.item(j);
                        final String predicateName = score.getAttribute("predicate");
                        final Predicate predicate = findPredicate(predicateName, privatePredicates,
                                inheritedPredicates);
                        final int scoreValue = Integer.parseInt(score.getFirstChild().getNodeValue());

                        analysisRule.addPredicateScore(predicate, scoreValue);
                        final Map<Predicate, String> predicateToNameMap = new HashMap<Predicate, String>();
                        for (String key : inheritedPredicates.keySet()) {
                            predicateToNameMap.put(inheritedPredicates.get(key), key);
                        }
                        for (String key : privatePredicates.keySet()) {
                            predicateToNameMap.put(privatePredicates.get(key), key);
                        }
                        analysisRule.setPredicateNameMap(Collections.unmodifiableMap(predicateToNameMap));
                    }
                    try {
                        rulesLock.writeLock().lock();
                        rules.put(id, analysisRule);
                    } finally {
                        rulesLock.writeLock().unlock();
                    }
                    LOG.info(DEBUG_FINISHED_RULE + id + " " + analysisRule);
                }
            }
            LOG.info("Parsing " + ANALYSIS_RULES_XMLFILE + " finished");
        }
        init = true;
    }

    private AnalysisRuleFactory getParentFactory() {

        if (null != context.getSite().getParent()) {

            return instanceOf(ContextWrapper.wrap(Context.class, new SiteContext() {
                public Site getSite() {
                    return context.getSite().getParent();
                }
            }, context));
        }
        return null;
    }

    private Map<String, Predicate> readPredicates(final Element element, final Map<String, Predicate> predicateMap,
            final Map<String, Predicate> inheritedPredicates) {

        final NodeList predicates = element.getChildNodes();

        for (int i = 0; i < predicates.getLength(); ++i) {
            final Node node = predicates.item(i);
            if (node instanceof Element) {
                final Element e = (Element) node;
                if ("predicate".equals(e.getTagName())) {
                    readPredicate(e, predicateMap, inheritedPredicates);
                }
            }
        }
        return predicateMap;
    }

    private Predicate readPredicate(final Element element, final Map<String, Predicate> predicateMap,
            final Map<String, Predicate> inheritedPredicates) {

        Predicate result = null;

        final boolean hasId = element.hasAttribute("id");
        final boolean hasContent = element.hasChildNodes();

        if (hasId && !hasContent) {
            // it's an already defined predicate
            final String id = element.getAttribute("id");

            result = findPredicate(id, predicateMap, inheritedPredicates);

        } else {
            // we must create it
            final NodeList operators = element.getChildNodes();
            for (int i = 0; i < operators.getLength(); ++i) {
                final Node operator = operators.item(i);
                if (operator != null && operator instanceof Element) {

                    result = createPredicate((Element) operator, predicateMap, inheritedPredicates);
                    break;
                }
            }

            if (hasId) {
                // its got an ID so we must remember it.
                final String id = element.getAttribute("id");
                predicateMap.put(id, result);
                LOG.debug(DEBUG_CREATED_PREDICATE + id + " " + result);
            }
        }

        return result;
    }

    private Predicate findPredicate(final String name, final Map<String, Predicate> predicateMap,
            final Map<String, Predicate> parentPredicateMap) {

        Predicate result = null;
        // first check our predicateMap
        if (predicateMap.containsKey(name)) {
            result = predicateMap.get(name);
        } else if (parentPredicateMap.containsKey(name)) {
            result = parentPredicateMap.get(name);
        } else {
            // second check TokenPredicate's Categories, anonymous predicates, and exact peers.
            if (name.startsWith(TokenPredicate.EXACT_PREFIX)) {

                result = TokenPredicateUtility.getTokenPredicate(name.replaceFirst(TokenPredicate.EXACT_PREFIX, ""))
                        .exactPeer();
            } else {

                result = TokenPredicateUtility.getTokenPredicate(name);
            }

        }

        return result;
    }

    private Predicate createPredicate(final Element element, final Map predicateMap,
            final Map inheritedPredicates) {

        Predicate result = null;
        // The operator to use from PredicateUtils.
        //   The replaceAll's are so we end up with a method with one Predicate[] argument.
        final String methodName = element.getTagName().replaceAll("and", "all").replaceAll("or", "any")
                .replaceAll("either", "one").replaceAll("neither", "none") + "Predicate";
        // because we can't use the above operator methods with only one child predicate
        //  the not operator must be a special case.
        final boolean notPredicate = "not".equals(element.getTagName());

        try {
            // Find PredicateUtils static method through reflection
            final Method method = notPredicate ? null
                    : PredicateUtils.class.getMethod(methodName, new Class[] { Collection.class });

            // load all the predicates it will apply to
            final List childPredicates = new LinkedList();
            final NodeList predicates = element.getChildNodes();
            for (int i = 0; i < predicates.getLength(); ++i) {
                final Node node = predicates.item(i);
                if (node instanceof Element) {
                    final Element e = (Element) node;
                    if ("predicate".equals(e.getTagName())) {
                        childPredicates.add(readPredicate(e, predicateMap, inheritedPredicates));
                    }
                }
            }
            if (notPredicate) {
                // there should only be one in the list
                if (childPredicates.size() > 1) {
                    throw new IllegalStateException(ERR_TOO_MANY_PREDICATES_IN_NOT + element.getParentNode());
                }
                result = PredicateUtils.notPredicate((Predicate) childPredicates.get(0));
            } else {
                // use the operator through reflection
                result = (Predicate) method.invoke(null, new Object[] { childPredicates });
            }

        } catch (SecurityException ex) {
            LOG.error(ERR_WHILE_READING_ELEMENT + element);
            LOG.error(ERR_UNABLE_TO_FIND_PREDICATE_UTILS_METHOD + methodName, ex);
        } catch (NoSuchMethodException ex) {
            LOG.error(ERR_WHILE_READING_ELEMENT + element);
            LOG.error(ERR_UNABLE_TO_FIND_PREDICATE_UTILS_METHOD + methodName, ex);
        } catch (IllegalAccessException ex) {
            LOG.error(ERR_WHILE_READING_ELEMENT + element);
            LOG.error(ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD + methodName, ex);
        } catch (InvocationTargetException ex) {
            LOG.error(ERR_WHILE_READING_ELEMENT + element);
            LOG.error(ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD + methodName, ex);
        } catch (IllegalArgumentException ex) {
            LOG.error(ERR_WHILE_READING_ELEMENT + element);
            LOG.error(ERR_UNABLE_TO_USE_PREDICATE_UTILS_METHOD + methodName, ex);
        }

        return result;
    }

    public Map<String, AnalysisRule> getRulesMap() {

        LOG.trace("getRulesMap()");

        init();

        final Map<String, AnalysisRule> result = new HashMap<String, AnalysisRule>();

        try {
            rulesLock.readLock().lock();
            result.putAll(rules);
        } finally {
            rulesLock.readLock().unlock();
        }

        if (null != context.getSite().getParent()) {

            result.putAll(instanceOf(ContextWrapper.wrap(Context.class, new SiteContext() {
                public Site getSite() {
                    return context.getSite().getParent();
                }
            }, context)).getRulesMap());

        }

        return result;
    }

    /**
     *
     * Returns the rule with the name <code>ruleName</code>.
     *
     * @param   ruleName    the name of the rule
     * @return  the rule.
     */
    public AnalysisRule getRule(final String ruleName) {

        LOG.trace("getRule(" + ruleName + ')');

        init();

        AnalysisRule rule = null;
        try {
            rulesLock.readLock().lock();
            rule = rules.get(ruleName);
        } finally {
            rulesLock.readLock().unlock();
        }

        if (rule == null && null != context.getSite().getParent()) {
            rule = instanceOf(ContextWrapper.wrap(Context.class, new SiteContext() {
                public Site getSite() {
                    return context.getSite().getParent();
                }
            }, context)).getRule(ruleName);

            if (rule == null) {
                // if we cannot find an rule, then use the dumb guy that never scores.
                //  Rather than encourage a NullPointerException
                LOG.warn(WARN_RULE_NOT_FOUND + ruleName);
                rule = DUMB_RULE;
            }
        }

        return rule;
    }

    /**
     * Main method to retrieve the correct AnalysisRuleFactory to further obtain
     * AnalysisRule.
     *
     * @param cxt the contextual needs this factory must use to operate.
     * @return AnalysisRuleFactory for this site.
     */
    public static AnalysisRuleFactory instanceOf(final Context cxt) {

        final Site site = cxt.getSite();
        assert null != site : "valueOf(cxt) got null site";

        AnalysisRuleFactory instance = null;

        try {
            INSTANCES_LOCK.readLock().lock();
            instance = INSTANCES.get(site);
        } finally {
            INSTANCES_LOCK.readLock().unlock();
        }

        if (instance == null) {
            try {
                instance = new AnalysisRuleFactory(cxt);

            } catch (ParserConfigurationException ex) {
                LOG.error(ERR_DOC_BUILDER_CREATION, ex);
            }
        }
        return instance;
    }

    public boolean remove(final Site site) {

        try {
            INSTANCES_LOCK.writeLock().lock();
            return null != INSTANCES.remove(site);
        } finally {
            INSTANCES_LOCK.writeLock().unlock();
        }
    }

    /** Get all inherited globalPredicates. **/
    private Map<String, Predicate> getInheritedPredicates() {

        final AnalysisRuleFactory parentFactory = getParentFactory();

        return parentFactory != null ? parentFactory.getGlobalPredicates()
                : Collections.<String, Predicate>emptyMap();
    }

    /** Returns this site's and all parent site's global predicates in one map. **/
    private Map<String, Predicate> getGlobalPredicates() {

        init();

        final Map<String, Predicate> result = new HashMap<String, Predicate>(globalPredicates);
        result.putAll(getInheritedPredicates());
        return Collections.unmodifiableMap(result);
    }
}