org.alfresco.solr.query.AbstractQParser.java Source code

Java tutorial

Introduction

Here is the source code for org.alfresco.solr.query.AbstractQParser.java

Source

/*
 * #%L
 * Alfresco Solr 4
 * %%
 * Copyright (C) 2005 - 2016 Alfresco Software Limited
 * %%
 * This file is part of the Alfresco software. 
 * If the software was purchased under a paid Alfresco license, the terms of 
 * the paid license agreement will prevail.  Otherwise, the software is 
 * provided under the following open source license terms:
 * 
 * Alfresco is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * Alfresco is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with Alfresco. If not, see <http://www.gnu.org/licenses/>.
 * #L%
 */
package org.alfresco.solr.query;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;

import javax.swing.text.StyledEditorKit.BoldAction;

import org.alfresco.error.AlfrescoRuntimeException;
import org.alfresco.opencmis.dictionary.CMISStrictDictionaryService;
import org.alfresco.opencmis.search.CMISQueryOptions.CMISQueryMode;
import org.alfresco.repo.search.MLAnalysisMode;
import org.alfresco.repo.search.adaptor.lucene.QueryConstants;
import org.alfresco.repo.search.impl.QueryParserUtils;
import org.alfresco.repo.search.impl.parsers.AlfrescoFunctionEvaluationContext;
import org.alfresco.service.cmr.dictionary.PropertyDefinition;
import org.alfresco.service.cmr.repository.datatype.DefaultTypeConverter;
import org.alfresco.service.cmr.search.QueryConsistency;
import org.alfresco.service.cmr.search.SearchParameters;
import org.alfresco.service.cmr.search.SearchParameters.Operator;
import org.alfresco.service.cmr.security.AuthorityType;
import org.alfresco.service.namespace.NamespaceService;
import org.alfresco.solr.AlfrescoSolrDataModel;
import org.alfresco.solr.AlfrescoSolrDataModel.ContentFieldType;
import org.alfresco.solr.AlfrescoSolrDataModel.FieldUse;
import org.alfresco.solr.AlfrescoSolrDataModel.IndexedField;
import org.alfresco.util.Pair;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.util.ArrayUtil;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.update.processor.DetectedLanguage;
import org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessor;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.extensions.surf.util.I18NUtil;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import com.cybozu.labs.langdetect.Language;

/**
 * @author Andy
 */
public abstract class AbstractQParser extends QParser implements QueryConstants {
    private static char[] SEPARATORS = new char[] { ':', ',', '-', '!', '+', '=', ';', '~', '/' };

    protected final static Logger log = LoggerFactory.getLogger(AbstractQParser.class);

    protected boolean authset;

    public static final String ALFRESCO_JSON = "ALFRESCO_JSON";

    private static final String AUTHORITY_FILTER_FROM_JSON = "AUTHORITY_FILTER_FROM_JSON";

    private static final String TENANT_FILTER_FROM_JSON = "TENANT_FILTER_FROM_JSON";

    private static final String RERANK_QUERY_FROM_CONTEXT = "RERANK_QUERY_FROM_CONTEXT";

    static final String languages[] = { "af", "ar", "bg", "bn", "cs", "da", "de", "el", "en", "es", "et", "fa",
            "fi", "fr", "gu", "he", "hi", "hr", "hu", "id", "it", "ja", "kn", "ko", "lt", "lv", "mk", "ml", "mr",
            "ne", "nl", "no", "pa", "pl", "pt", "ro", "ru", "sk", "sl", "so", "sq", "sv", "sw", "ta", "te", "th",
            "tl", "tr", "uk", "ur", "vi", "zh-cn", "zh-tw" };

    static {
        try {
            List<String> profileData = new ArrayList<>();
            for (String language : languages) {
                InputStream stream = LangDetectLanguageIdentifierUpdateProcessor.class
                        .getResourceAsStream("langdetect-profiles/" + language);
                BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
                profileData.add(new String(IOUtils.toCharArray(reader)));
                reader.close();
            }
            DetectorFactory.loadProfile(profileData);
            DetectorFactory.setSeed(0);
        } catch (Exception e) {
            throw new RuntimeException("Couldn't load profile data, will return empty languages always!", e);
        }
    }

    private boolean autoDetectQueryLocale = false;

    private HashSet<String> autoDetectQueryLocales = new HashSet<String>();

    private HashSet<String> fixedQueryLocales = new HashSet<String>();

    /**
     * @param qstr String
     * @param localParams SolrParams
     * @param params SolrParams
     * @param req SolrQueryRequest
     * @param args 
     */
    public AbstractQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req,
            NamedList args) {
        super(qstr, localParams, params, req);
        if (args != null) {
            Object arg = args.get("autoDetectQueryLocale");
            if (arg != null) {
                this.autoDetectQueryLocale = Boolean.parseBoolean(arg.toString());
            }

            arg = args.get("autoDetectQueryLocales");
            if (arg != null) {
                String[] locales = arg.toString().split(",");
                for (String locale : locales) {
                    String mappedLanguage = isKnownLocale(locale);
                    if (mappedLanguage != null) {
                        autoDetectQueryLocales.add(mappedLanguage);
                    }
                }
            }

            arg = args.get("fixedQueryLocales");
            if (arg != null) {
                String[] locales = arg.toString().split(",");
                for (String locale : locales) {
                    String mappedLanguage = isKnownLocale(locale);
                    if (mappedLanguage != null) {
                        fixedQueryLocales.add(mappedLanguage);
                    }
                }
            }
        }
    }

    private String isKnownLocale(String locale) {
        for (String test : languages) {
            if (locale.equalsIgnoreCase(test)) {
                return test;
            }
        }
        return null;
    }

    protected Pair<SearchParameters, Boolean> getSearchParameters() {
        SearchParameters searchParameters = new SearchParameters();

        Boolean isFilter = Boolean.FALSE;

        Iterable<ContentStream> streams = req.getContentStreams();

        JSONObject json = (JSONObject) req.getContext().get(ALFRESCO_JSON);

        if (json == null) {
            if (streams != null) {

                try {
                    Reader reader = null;
                    for (ContentStream stream : streams) {
                        reader = new BufferedReader(new InputStreamReader(stream.getStream(), "UTF-8"));
                    }

                    // TODO - replace with streaming-based solution e.g. SimpleJSON ContentHandler
                    if (reader != null) {
                        json = new JSONObject(new JSONTokener(reader));
                        req.getContext().put(ALFRESCO_JSON, json);
                    }
                } catch (JSONException e) {
                    // This is expected when there is no json element to the request
                } catch (IOException e) {
                    throw new AlfrescoRuntimeException("IO Error parsing query parameters", e);
                }
            }
        }

        if (json != null) {
            try {
                if (getString() != null) {
                    if (getString().equals(AUTHORITY_FILTER_FROM_JSON)) {
                        isFilter = Boolean.TRUE;

                        ArrayList<String> tenantList = new ArrayList<String>(1);
                        JSONArray tenants = json.getJSONArray("tenants");
                        for (int i = 0; i < tenants.length(); i++) {
                            String tenantString = tenants.getString(i);
                            tenantList.add(tenantString);
                        }

                        ArrayList<String> authorityList = new ArrayList<String>(1);
                        JSONArray authorities = json.getJSONArray("authorities");
                        for (int i = 0; i < authorities.length(); i++) {
                            String authorityString = authorities.getString(i);
                            authorityList.add(authorityString);
                        }

                        char separator = getSeparator(authorityList);

                        StringBuilder authQuery = new StringBuilder();
                        StringBuilder denyQuery = new StringBuilder();

                        for (String tenant : tenantList) {
                            for (String authority : authorityList) {
                                if (separator == 0) {
                                    if (authQuery.length() > 0) {
                                        authQuery.append(" ");
                                        denyQuery.append(" ");
                                    }
                                    switch (AuthorityType.getAuthorityType(authority)) {
                                    case USER:
                                        authQuery.append("|AUTHORITY:\"").append(authority).append("\"");
                                        denyQuery.append("|DENIED:\"").append(authority).append("\"");
                                        break;
                                    case GROUP:
                                    case EVERYONE:
                                    case GUEST:
                                        if (tenant.length() == 0) {
                                            // Default tenant matches 4.0
                                            authQuery.append("|AUTHORITY:\"").append(authority).append("\"");
                                            denyQuery.append("|DENIED:\"").append(authority).append("\"");
                                        } else {
                                            authQuery.append("|AUTHORITY:\"").append(authority).append("@")
                                                    .append(tenant).append("\"");
                                            denyQuery.append("|DENIED:\"").append(authority).append("@")
                                                    .append(tenant).append("\"");
                                        }
                                        break;
                                    default:
                                        authQuery.append("|AUTHORITY:\"").append(authority).append("\"");
                                        denyQuery.append("|DENIED:\"").append(authority).append("\"");
                                        break;
                                    }
                                } else {
                                    if (authQuery.length() == 0) {
                                        authset = true;
                                        authQuery.append("|AUTHSET:\"");
                                        denyQuery.append("|DENYSET:\"");
                                    }

                                    switch (AuthorityType.getAuthorityType(authority)) {
                                    case USER:
                                        authQuery.append(separator).append(authority);
                                        denyQuery.append(separator).append(authority);
                                        break;
                                    case GROUP:
                                    case EVERYONE:
                                    case GUEST:
                                        if (tenant.length() == 0) {
                                            // Default tenant matches 4.0
                                            authQuery.append(separator).append(authority);
                                            denyQuery.append(separator).append(authority);
                                        } else {
                                            authQuery.append(separator).append(authority).append("@")
                                                    .append(tenant);
                                            denyQuery.append(separator).append(authority).append("@")
                                                    .append(tenant);
                                        }
                                        break;
                                    default:
                                        authQuery.append(separator).append(authority);
                                        denyQuery.append(separator).append(authority);
                                        break;
                                    }
                                }

                            }
                        }
                        if (separator != 0) {
                            authQuery.append("\"");
                            denyQuery.append("\"");
                        }

                        if (authQuery.length() > 0) {
                            // Default to true for safety reasons.
                            final boolean anyDenyDenies = json.optBoolean("anyDenyDenies", true);

                            if (anyDenyDenies) {
                                authQuery.insert(0, "(").append(") AND NOT (").append(denyQuery).append(")");
                                // Record that the clause has been added.
                                // We only ever set this to true for solr4+
                                req.getContext().put("processedDenies", Boolean.TRUE);
                            }
                            searchParameters.setQuery(authQuery.toString());
                        }
                    } else if (getString().equals(TENANT_FILTER_FROM_JSON)) {
                        isFilter = Boolean.TRUE;

                        ArrayList<String> tenantList = new ArrayList<String>(1);
                        JSONArray tenants = json.getJSONArray("tenants");
                        for (int i = 0; i < tenants.length(); i++) {
                            String tenantString = tenants.getString(i);
                            tenantList.add(tenantString);
                        }

                        StringBuilder tenantQuery = new StringBuilder();
                        for (String tenant : tenantList) {
                            if (tenantQuery.length() > 0) {
                                tenantQuery.append(" ");
                            }

                            if (tenant.length() > 0)

                            {
                                tenantQuery.append("|TENANT:\"").append(tenant).append("\"");
                            } else {
                                // TODO: Need to check for the default tenant or no tenant (4.0) or we force a reindex
                                // requirement later ...
                                // Better to add default tenant to the 4.0 index
                                tenantQuery.append("|TENANT:\"").append("_DEFAULT_").append("\"");
                                // tenantQuery.append(" |(+ISNODE:T -TENANT:*)");
                            }

                        }
                        searchParameters.setQuery(tenantQuery.toString());
                    } else if (getString().equals(RERANK_QUERY_FROM_CONTEXT)) {
                        String searchTerm = getParam("spellcheck.q");
                        searchParameters.setQuery(searchTerm);
                    }
                } else {
                    String query = json.getString("query");
                    if (query != null) {
                        searchParameters.setQuery(query);
                    }
                }

                JSONArray locales = json.getJSONArray("locales");
                for (int i = 0; i < locales.length(); i++) {
                    String localeString = locales.getString(i);
                    Locale locale = DefaultTypeConverter.INSTANCE.convert(Locale.class, localeString);
                    searchParameters.addLocale(locale);
                }

                JSONArray templates = json.getJSONArray("templates");
                for (int i = 0; i < templates.length(); i++) {
                    JSONObject template = templates.getJSONObject(i);
                    String name = template.getString("name");
                    String queryTemplate = template.getString("template");
                    searchParameters.addQueryTemplate(name, queryTemplate);
                }

                JSONArray allAttributes = json.getJSONArray("allAttributes");
                for (int i = 0; i < allAttributes.length(); i++) {
                    String allAttribute = allAttributes.getString(i);
                    searchParameters.addAllAttribute(allAttribute);
                }

                searchParameters.setDefaultFTSOperator(Operator.valueOf(json.getString("defaultFTSOperator")));
                searchParameters
                        .setDefaultFTSFieldConnective(Operator.valueOf(json.getString("defaultFTSFieldOperator")));
                if (json.has("mlAnalaysisMode")) {
                    searchParameters.setMlAnalaysisMode(MLAnalysisMode.valueOf(json.getString("mlAnalaysisMode")));
                }
                searchParameters.setNamespace(json.getString("defaultNamespace"));

                JSONArray textAttributes = json.getJSONArray("textAttributes");
                for (int i = 0; i < textAttributes.length(); i++) {
                    String textAttribute = textAttributes.getString(i);
                    searchParameters.addAllAttribute(textAttribute);
                }

                searchParameters.setQueryConsistency(QueryConsistency.valueOf(json.getString("queryConsistency")));

            } catch (JSONException e) {
                // This is expected when there is no json element to the request
            }
        }

        if (json != null) {
            if (log.isDebugEnabled()) {
                log.debug(json.toString());
            }
        }

        if (searchParameters.getQuery() == null) {
            searchParameters.setQuery(getString());
        }

        if (searchParameters.getLocales().size() == 0) {
            searchParameters.addLocale(I18NUtil.getLocale());
        }

        String defaultField = getParam(CommonParams.DF);
        if (defaultField != null) {
            searchParameters.setDefaultFieldName(defaultField);
        }

        if (autoDetectQueryLocale) {
            String searchTerm = getParam("spellcheck.q");
            if (searchTerm != null) {
                searchParameters.setSearchTerm(searchTerm);
                List<DetectedLanguage> detetcted = detectLanguage(searchTerm);
                if ((detetcted != null) && (detetcted.size() > 0)) {
                    Locale detectedLocale = Locale.forLanguageTag(detetcted.get(0).getLangCode());
                    if (localeIsNotIncluded(searchParameters, detectedLocale)) {
                        searchParameters.addLocale(Locale.forLanguageTag(detectedLocale.getLanguage()));
                    }
                }

            }
        }

        if (fixedQueryLocales.size() > 0) {
            for (String locale : fixedQueryLocales) {
                searchParameters.addLocale(Locale.forLanguageTag(locale));
            }
        }

        // searchParameters.setMlAnalaysisMode(getMLAnalysisMode());
        searchParameters.setNamespace(NamespaceService.CONTENT_MODEL_1_0_URI);

        return new Pair<SearchParameters, Boolean>(searchParameters, isFilter);
    }

    /**
     * @param searchParameters SearchParameters
     * @param detectedLocale Locale
     * @return boolean
     */
    private boolean localeIsNotIncluded(SearchParameters searchParameters, Locale detectedLocale) {
        for (Locale locale : searchParameters.getLocales()) {
            if (locale.getLanguage().equals(detectedLocale.getLanguage())) {
                return false;
            }
        }
        return true;
    }

    /**
     * @param authorityList ArrayList<String>
     * @return char
     */
    private char getSeparator(ArrayList<String> authorityList) {
        StringBuilder builder = new StringBuilder();
        for (String auth : authorityList) {
            builder.append(auth);
        }
        String test = builder.toString();

        for (int i = 0; i < SEPARATORS.length; i++) {
            if (test.indexOf(SEPARATORS[i]) == -1) {
                return SEPARATORS[i];
            }
        }
        return 0;
    }

    /* (non-Javadoc)
     * @see org.apache.solr.search.QParser#getSort(boolean)
     */
    @Override
    public SortSpec getSort(boolean useGlobalParams) throws SyntaxError {

        getQuery(); // ensure query is parsed first

        String sortStr = null;
        String startS = null;
        String rowsS = null;

        if (localParams != null) {
            sortStr = localParams.get(CommonParams.SORT);
            startS = localParams.get(CommonParams.START);
            rowsS = localParams.get(CommonParams.ROWS);

            // if any of these parameters are present, don't go back to the global params
            if (sortStr != null || startS != null || rowsS != null) {
                useGlobalParams = false;
            }
        }

        if (useGlobalParams) {
            if (sortStr == null) {
                sortStr = params.get(CommonParams.SORT);
            }
            if (startS == null) {
                startS = params.get(CommonParams.START);
            }
            if (rowsS == null) {
                rowsS = params.get(CommonParams.ROWS);
            }
        }

        int start = startS != null ? Integer.parseInt(startS) : 0;
        int rows = rowsS != null ? Integer.parseInt(rowsS) : 10;
        // Avoid +1 in SOLR code which produces null:java.lang.NegativeArraySizeException at at org.apache.lucene.util.PriorityQueue.<init>(PriorityQueue.java:56)
        if (rows > 1000000) {
            rows = 1000000;
        }

        // Fix sort fields here
        if (sortStr != null) {
            StringBuilder builder = new StringBuilder();
            StringBuilder propertyBuilder = null;
            char c;
            for (int i = 0; i < sortStr.length(); i++) {
                c = sortStr.charAt(i);
                if (propertyBuilder == null) {
                    if (!Character.isWhitespace(c) && (c != ',')) {
                        propertyBuilder = new StringBuilder();
                        propertyBuilder.append(c);
                    } else {
                        builder.append(c);
                    }
                } else {
                    if (Character.isWhitespace(c) || (c == ',')) {
                        String toAppend = AlfrescoSolrDataModel.getInstance()
                                .mapProperty(propertyBuilder.toString(), FieldUse.SORT, getReq());
                        builder.append(toAppend);
                        builder.append(c);
                        propertyBuilder = null;
                    } else {
                        propertyBuilder.append(c);
                    }
                }
            }
            if (propertyBuilder != null) {
                String toAppend = AlfrescoSolrDataModel.getInstance().mapProperty(propertyBuilder.toString(),
                        FieldUse.SORT, getReq());
                builder.append(toAppend);
            }
            sortStr = builder.toString();
        }

        if (sortStr != null) {
            sortStr = sortStr.replaceAll("^ID(\\s)", "id$1");
            sortStr = sortStr.replaceAll("(\\s)ID(\\s)", "$1id$2");
        }
        SortSpec sort = QueryParsing.parseSortSpec(sortStr, req);

        sort.setOffset(start);
        sort.setCount(rows);
        return sort;
    }

    private List<DetectedLanguage> detectLanguage(String content) {
        if (content.trim().length() == 0) { // to be consistent with the tika impl?
            log.debug("No input text to detect language from, returning empty list");
            return Collections.emptyList();
        }

        try {
            Detector detector = DetectorFactory.create();
            detector.append(content);
            ArrayList<Language> langlist = detector.getProbabilities();
            ArrayList<DetectedLanguage> solrLangList = new ArrayList<>();
            for (Language l : langlist) {
                if ((autoDetectQueryLocales.size() == 0) || (autoDetectQueryLocales.contains(l.lang))) {
                    solrLangList.add(new DetectedLanguage(l.lang, l.prob));
                }
            }
            return solrLangList;
        } catch (LangDetectException e) {
            log.debug("Could not determine language, returning empty list: ", e);
            return Collections.emptyList();
        }
    }

    public class DetectedLanguage {
        private final String langCode;
        private final Double certainty;

        DetectedLanguage(String lang, Double certainty) {
            this.langCode = lang;
            this.certainty = certainty;
        }

        /**
         * Returns the detected language code
         * @return language code as a string
         */
        public String getLangCode() {
            return langCode;
        }

        /**
         * Returns the detected certainty for this language
         * @return certainty as a value between 0.0 and 1.0 where 1.0 is 100% certain
         */
        public Double getCertainty() {
            return certainty;
        }
    }
}