org.apache.jackrabbit.core.query.lucene.JahiaIndexingConfigurationImpl.java Source code

Introduction

Here is the source code for org.apache.jackrabbit.core.query.lucene.JahiaIndexingConfigurationImpl.java
Source

/**
 * ==========================================================================================
 * =                   JAHIA'S DUAL LICENSING - IMPORTANT INFORMATION                       =
 * ==========================================================================================
 *
 *                                 http://www.jahia.com
 *
 *     Copyright (C) 2002-2017 Jahia Solutions Group SA. All rights reserved.
 *
 *     THIS FILE IS AVAILABLE UNDER TWO DIFFERENT LICENSES:
 *     1/GPL OR 2/JSEL
 *
 *     1/ GPL
 *     ==================================================================================
 *
 *     IF YOU DECIDE TO CHOOSE THE GPL LICENSE, YOU MUST COMPLY WITH THE FOLLOWING TERMS:
 *
 *     This program is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with this program. If not, see <http://www.gnu.org/licenses/>.
 *
 *
 *     2/ JSEL - Commercial and Supported Versions of the program
 *     ===================================================================================
 *
 *     IF YOU DECIDE TO CHOOSE THE JSEL LICENSE, YOU MUST COMPLY WITH THE FOLLOWING TERMS:
 *
 *     Alternatively, commercial and supported versions of the program - also known as
 *     Enterprise Distributions - must be used in accordance with the terms and conditions
 *     contained in a separate written agreement between you and Jahia Solutions Group SA.
 *
 *     If you are unsure which license is appropriate for your use,
 *     please contact the sales department at sales@jahia.com.
 */
package org.apache.jackrabbit.core.query.lucene;

import org.apache.commons.lang.StringUtils;
import org.apache.jackrabbit.core.NamespaceRegistryImpl;
import org.apache.jackrabbit.core.nodetype.xml.AdditionalNamespaceResolver;
import org.apache.jackrabbit.core.query.QueryHandlerContext;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.spi.Name;
import org.apache.jackrabbit.spi.NameFactory;
import org.apache.jackrabbit.spi.commons.conversion.NameResolver;
import org.apache.jackrabbit.spi.commons.conversion.ParsingNameResolver;
import org.apache.jackrabbit.spi.commons.name.NameFactoryImpl;
import org.apache.jackrabbit.spi.commons.namespace.NamespaceResolver;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.util.Version;
import org.jahia.api.Constants;
import org.jahia.utils.LuceneUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.*;
import org.w3c.dom.CharacterData;

import javax.jcr.NamespaceException;
import javax.jcr.NamespaceRegistry;
import javax.jcr.RepositoryException;
import java.lang.reflect.Constructor;
import java.util.*;
import java.util.regex.Pattern;

/**
 * Jahia specific {@link IndexingConfiguration} implementation.
 */
public class JahiaIndexingConfigurationImpl extends IndexingConfigurationImpl {
    private static final String FACET_EXPRESSION = ":" + JahiaNodeIndexer.FACET_PREFIX;

    private static final String SPELLCHECK_EXPRESSION = FieldNames.FULLTEXT + LuceneUtils.DASH;

    private static final Name TRANSLATION_TYPE = NameFactoryImpl.getInstance().create(Constants.JAHIANT_NS,
            "translation");

    /**
     * The logger instance for this class.
     */
    private static final Logger logger = LoggerFactory.getLogger(JahiaIndexingConfigurationImpl.class);

    /**
     * Should match value used in indexing_configuration.xml
     */
    public static final String FULL_SPELLCHECK_FIELD_NAME = "0:FULL:SPELLCHECK";

    /**
     * The item state manager to retrieve additional item states.
     */
    private ItemStateManager ism;

    /**
     * Sets of property names which are included in spellchecking.
     */
    private Set<String> includedInSpellchecking = null;

    /**
     * The configured {@code AnalyzerRegistry} if any.
     */
    private final LanguageCustomizingAnalyzerRegistry analyzerRegistry = new LanguageCustomizingAnalyzerRegistry(
            this);

    /**
     * @param node a node.
     * @return the text content of the <code>node</code>.
     */
    private static String getTextContent(Node node) {
        StringBuilder content = new StringBuilder();
        NodeList nodes = node.getChildNodes();
        for (int i = 0; i < nodes.getLength(); i++) {
            Node n = nodes.item(i);
            if (n.getNodeType() == Node.TEXT_NODE) {
                content.append(((CharacterData) n).getData());
            }
        }
        return content.toString();
    }

    private Set<Name> excludesFromI18NCopy = Collections.emptySet();

    private Set<Name> hierarchicalNodetypes = Collections.emptySet();

    private Set<ExcludedType> excludesTypesByPath = Collections.emptySet();

    private final Analyzer keywordAnalyzer = new KeywordAnalyzer();

    public JahiaIndexingConfigurationImpl() {
        super();
    }

    public Set<Name> getExcludesFromI18NCopy() {
        return excludesFromI18NCopy;
    }

    public Set<ExcludedType> getExcludesTypesByPath() {
        return excludesTypesByPath;
    }

    public Set<Name> getHierarchicalNodetypes() {
        return hierarchicalNodetypes;
    }

    /**
     * Returns the namespaces declared on the <code>node</code>.
     *
     * @param node a DOM node.
     * @return the namespaces
     */
    private Properties getNamespaces(Node node) {
        Properties namespaces = new Properties();
        NamedNodeMap attributes = node.getAttributes();
        for (int i = 0; i < attributes.getLength(); i++) {
            Attr attribute = (Attr) attributes.item(i);
            if (attribute.getName().startsWith("xmlns:")) {
                namespaces.setProperty(attribute.getName().substring(6), attribute.getValue());
            }
        }
        return namespaces;
    }

    /**
     * Returns the analyzer configured for the property with this fieldName
     * (the string representation ,JCR-style name, of the given <code>Name</code>
     * prefixed with <code>FieldNames.FULLTEXT_PREFIX</code>)),
     * and <code>null</code> if none is configured, or the configured analyzer
     * cannot be found. If <code>null</code> is returned, the default Analyzer
     * is used.
     *
     * @param fieldName the string representation ,JCR-style name, of the given <code>Name</code>
     *                  prefixed with <code>FieldNames.FULLTEXT_PREFIX</code>))
     * @return the <code>analyzer</code> to use for indexing this property
     */
    public Analyzer getPropertyAnalyzer(String fieldName) {
        Analyzer analyzer = StringUtils.contains(fieldName, FACET_EXPRESSION) ? keywordAnalyzer
                : super.getPropertyAnalyzer(
                        StringUtils.startsWith(fieldName, SPELLCHECK_EXPRESSION) ? "0:FULL:SPELLCHECK" : fieldName);
        return analyzer;
    }

    @Override
    public void init(Element config, QueryHandlerContext context, NamespaceMappings nsMappings) throws Exception {
        ism = context.getItemStateManager();
        Properties customNamespaces = getNamespaces(config);
        registerCustomNamespaces(context.getNamespaceRegistry(), customNamespaces);
        super.init(config, context, nsMappings);
        NamespaceResolver nsResolver = new AdditionalNamespaceResolver(customNamespaces);
        NameResolver resolver = new ParsingNameResolver(NameFactoryImpl.getInstance(), nsResolver);

        NodeList indexingConfigs = config.getChildNodes();
        for (int i = 0; i < indexingConfigs.getLength(); i++) {
            Node configNode = indexingConfigs.item(i);
            final String nodeName = configNode.getNodeName();
            if (nodeName.equals("i18ncopy")) {
                excludesFromI18NCopy = initPropertyCollectionFrom(configNode, "exclude-property", resolver);
            } else if (nodeName.equals("hierarchical")) {
                hierarchicalNodetypes = initPropertyCollectionFrom(configNode, "nodetype", resolver);
            } else if (nodeName.equals("spellchecker")) {
                includedInSpellchecking = initPropertyCollectionFrom(configNode, "include-property", null);
            } else if (nodeName.equals("analyzer-registry")) {
                processAnalyzerRegistryConfiguration(configNode);
            } else if (nodeName.equals("exclude") && configNode.getNodeType() == Node.ELEMENT_NODE) {
                processExcludeConfiguration((Element) configNode, context.getNamespaceRegistry());
            }
        }
    }

    private void processAnalyzerRegistryConfiguration(Node configNode) {
        try {
            // each node should be a language code to which an analyzer and its optional customization is
            // associated
            final NodeList childNodes = configNode.getChildNodes();
            final int length = childNodes.getLength();

            // iterate over children
            for (int j = 0; j < length; j++) {
                final Node child = childNodes.item(j);

                // only process elements
                if (Node.ELEMENT_NODE == child.getNodeType()) {
                    // language code
                    final String lang = child.getNodeName();

                    // Analyzer class name
                    final String className = getClassAttribute(child);
                    if (className != null) {

                        Analyzer analyzer = null;
                        final Class<?> analyzerClass = Class.forName(className);
                        try {
                            // instantiate Analyzer
                            final Constructor<?> constructor = analyzerClass.getConstructor(Version.class);
                            analyzer = (Analyzer) constructor.newInstance(Version.LUCENE_30);
                        } catch (Exception e) {
                            // attempt to use a default constructor if it exists
                            try {
                                analyzer = (Analyzer) analyzerClass.newInstance();
                            } catch (Exception e1) {
                                logger.warn("Couldn't instantiate Analyzer class: " + className
                                        + ". It must provide a constructor with a org.apache.lucene.util.Version "
                                        + "argument or a no-arg constructor.", e1);
                            }
                        }
                        if (analyzer != null) {
                            Boolean useASCIIFoldingFilter = null;
                            try {
                                useASCIIFoldingFilter = Boolean.parseBoolean(child.getAttributes()
                                        .getNamedItem("useASCIIFoldingFilter").getNodeValue().trim());
                            } catch (Exception e) {
                                // nothing to do
                            }

                            // instantiate and initialize AnalyzerCustomizer
                            final NodeList potentialCustomizers = child.getChildNodes();
                            final int potentialCustomizersNb = potentialCustomizers.getLength();
                            for (int customizerIndex = 0; customizerIndex < potentialCustomizersNb; customizerIndex++) {

                                final Node customizerNode = potentialCustomizers.item(customizerIndex);
                                if (Node.ELEMENT_NODE == customizerNode.getNodeType()) {
                                    final String customizerClassName = getClassAttribute(customizerNode);
                                    if (customizerClassName != null) {
                                        try {
                                            // todo: how should these be packaged? Class-loading this way might not
                                            // work in an OSGi environment
                                            final Class<?> customizerClass = Class.forName(customizerClassName);
                                            final AnalyzerCustomizer customizer = (AnalyzerCustomizer) customizerClass
                                                    .newInstance();

                                            // now that we have our customizer, initialize it
                                            final NodeList keys = customizerNode.getChildNodes();
                                            final int keyNb = keys.getLength();
                                            final Map<String, List<String>> props = new HashMap<String, List<String>>(
                                                    keyNb);
                                            for (int keyIndex = 0; keyIndex < keyNb; keyIndex++) {
                                                final Node item = keys.item(keyIndex);

                                                // only process element nodes
                                                if (Node.ELEMENT_NODE == item.getNodeType()) {
                                                    final String key = item.getNodeName();
                                                    final String value = getTextContent(item).trim();

                                                    List<String> values = props.get(key);
                                                    if (values == null) {
                                                        values = new ArrayList<String>();
                                                        props.put(key, values);
                                                    }

                                                    values.add(value);
                                                }
                                            }
                                            customizer.initFrom(props);

                                            // and finally, customize the Analyzer
                                            analyzer = customizer.customize(analyzer);

                                            // we only allow one customizer so stop here
                                            break;
                                        } catch (Exception e) {
                                            // we don't break out of the loop here in an attempt to check if maybe
                                            // another customizer has been configured
                                            logger.warn("Couldn't instantiate AnalyzerCustomizer class: "
                                                    + customizerClassName, e);
                                        }
                                    }
                                }
                            }

                            // and finally add the Analyzer to the registry if we managed to instantiate it
                            analyzerRegistry.addAnalyzer(lang, analyzer, useASCIIFoldingFilter);
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.warn("Couldn't process AnalyzerRegistry configuration", e);
        }
    }

    private String getClassAttribute(Node node) {
        try {
            return node.getAttributes().getNamedItem("class").getNodeValue().trim();
        } catch (Exception e) {
            return null;
        }
    }

    private void processExcludeConfiguration(Element element, NamespaceRegistry namespaceRegistry)
            throws RepositoryException {
        String nodeType = element.getAttribute("nodetype");
        if (StringUtils.isNotEmpty(nodeType)) {
            if (excludesTypesByPath.isEmpty()) {
                excludesTypesByPath = new HashSet<>();
            }
            String path = element.getAttribute("path");
            Boolean isRegexp = Boolean.valueOf(element.getAttribute("isRegexp"));
            NameFactory nf = NameFactoryImpl.getInstance();
            Name nodeTypeName = null;
            try {
                if (!nodeType.startsWith("{")) {
                    nodeTypeName = nf.create(namespaceRegistry.getURI(StringUtils.substringBefore(nodeType, ":")),
                            StringUtils.substringAfter(nodeType, ":"));
                } else {
                    nodeTypeName = nf.create(nodeType);
                }
            } catch (NamespaceException e) {
                logger.error("Cannot parse namespace for " + nodeType, e);
            } catch (IllegalArgumentException iae) {
                logger.error("Illegal node type name: " + nodeType, iae);
            }
            excludesTypesByPath.add(new ExcludedType(nodeTypeName, path, isRegexp));
        }
    }

    private Set initPropertyCollectionFrom(Node configNode, String childNameToAdd, NameResolver resolver) {
        final NodeList childNodes = configNode.getChildNodes();
        final int length = childNodes.getLength();

        // init collection if needed
        Set configurationCollection = new HashSet(length);

        for (int j = 0; j < length; j++) {
            final Node childNode = childNodes.item(j);
            if (childNode.getNodeName().equals(childNameToAdd)) {
                String propertyName = getTextContent(childNode);
                try {
                    // if we didn't pass a resolver, add the property name as-is, otherwise convert it to a proper QName
                    final Object name = resolver != null ? resolver.getQName(propertyName) : propertyName;
                    configurationCollection.add(name);
                } catch (Exception e) {
                    logger.warn("Cannot resolve configured property name : " + propertyName, e);
                }
            }
        }

        return configurationCollection;
    }

    private void registerCustomNamespaces(NamespaceRegistryImpl namespaceRegistry, Properties customNamespaces) {
        for (Object key : customNamespaces.keySet()) {
            String prefix = (String) key;
            try {
                namespaceRegistry.getURI(prefix);
            } catch (NamespaceException e) {
                String uri = customNamespaces.getProperty(prefix);
                try {
                    namespaceRegistry.registerNamespace(prefix, uri);
                    logger.info("Registered custom namespace with prefix '{}' and URI '{}'", prefix, uri);
                } catch (RepositoryException e1) {
                    logger.warn("Unable to register custom namespace with prefix '" + prefix + "' and URI '" + uri
                            + "'");
                }
            }
        }
    }

    @Override
    public boolean isIndexed(NodeState state, Name propertyName) {
        return super.isIndexed(getUntranslatedNode(state), propertyName);
    }

    @Override
    public float getPropertyBoost(NodeState state, Name propertyName) {
        return super.getPropertyBoost(getUntranslatedNode(state), propertyName);
    }

    @Override
    public float getNodeBoost(NodeState state) {
        return super.getNodeBoost(getUntranslatedNode(state));
    }

    @Override
    public boolean isIncludedInNodeScopeIndex(NodeState state, Name propertyName) {
        return super.isIncludedInNodeScopeIndex(getUntranslatedNode(state), propertyName);
    }

    @Override
    public boolean useInExcerpt(NodeState state, Name propertyName) {
        return super.useInExcerpt(getUntranslatedNode(state), propertyName);
    }

    private NodeState getUntranslatedNode(NodeState state) {
        if (TRANSLATION_TYPE.equals(state.getNodeTypeName())) {
            try {
                state = (NodeState) ism.getItemState(state.getParentId());
            } catch (ItemStateException e) {
            }
        }
        return state;
    }

    public boolean shouldPropertyBeSpellchecked(String propertyName) {
        // if includedInSpellchecking is null, we consider that no setup of spellchecking has been done and all
        // properties should therefore be included
        return includedInSpellchecking == null || includedInSpellchecking.contains(propertyName);
    }

    public LanguageCustomizingAnalyzerRegistry getAnalyzerRegistry() {
        return analyzerRegistry;
    }

    /**
     * inner object to handle excluded types in index configuration
     */
    class ExcludedType {
        private Name typeName;
        private String path;
        private Pattern regexpPathPattern;

        ExcludedType(Name typeName, String path, boolean regexp) {
            this.typeName = typeName;
            this.path = path;
            if (regexp) {
                regexpPathPattern = Pattern.compile(path);
            }
        }

        boolean matchesNodeType(NodeState nodeState) throws RepositoryException {
            Name primary = nodeState.getNodeTypeName();
            if (primary.equals(typeName)) {
                return true;
            }
            Set<Name> mixins = nodeState.getMixinTypeNames();
            if (mixins.contains(typeName)) {
                return true;
            }
            return false;
        }

        boolean matchPath(String pathToCheck) {
            if (path == null) {
                return true;
            }
            if (regexpPathPattern != null) {
                return regexpPathPattern.matcher(pathToCheck).matches();
            } else {
                return StringUtils.startsWith(pathToCheck, path);
            }
        }
    }
}