Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.StopFilterFactory; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.index.Term; import org.apache.lucene.queries.function.BoostedQuery; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.ProductFloatFunction; import org.apache.lucene.queries.function.valuesource.QueryValueSource; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.solr.analysis.TokenizerChain; import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.parser.QueryParser; import org.apache.solr.parser.SolrQueryParserBase.MagicFieldName; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; import org.apache.solr.util.SolrPluginUtils; import com.google.common.collect.Multimap; import com.google.common.collect.Multimaps; /** * Query parser that generates DisjunctionMaxQueries based on user configuration. * See Wiki page http://wiki.apache.org/solr/ExtendedDisMax */ public class ExtendedDismaxQParser extends QParser { /** * A field we can't ever find in any schema, so we can safely tell * DisjunctionMaxQueryParser to use it as our defaultField, and * map aliases from it to any field in our schema. */ private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC"; /** shorten the class references for utilities */ private static class U extends SolrPluginUtils { /* :NOOP */ } /** shorten the class references for utilities */ private static interface DMP extends DisMaxParams { /** * User fields. The fields that can be used by the end user to create field-specific queries. */ public static String UF = "uf"; /** * Lowercase Operators. If set to true, 'or' and 'and' will be considered OR and AND, otherwise * lowercase operators will be considered terms to search for. */ public static String LOWERCASE_OPS = "lowercaseOperators"; /** * Multiplicative boost. Boost functions which scores are going to be multiplied to the score * of the main query (instead of just added, like with bf) */ public static String MULT_BOOST = "boost"; /** * If set to true, stopwords are removed from the query. */ public static String STOPWORDS = "stopwords"; } private ExtendedDismaxConfiguration config; private Query parsedUserQuery; private Query altUserQuery; private List<Query> boostQueries; private boolean parsed = false; public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); config = this.createConfiguration(qstr, localParams, params, req); } @Override public Query parse() throws SyntaxError { parsed = true; /* the main query we will execute. we disable the coord because * this query is an artificial construct */ BooleanQuery.Builder query = new BooleanQuery.Builder(); /* * * Main User Query * * */ parsedUserQuery = null; String userQuery = getString(); altUserQuery = null; if (userQuery == null || userQuery.trim().length() == 0) { // If no query is specified, we may have an alternate if (config.altQ != null) { QParser altQParser = subQuery(config.altQ, null); altUserQuery = altQParser.getQuery(); query.add(altUserQuery, BooleanClause.Occur.MUST); } else { return null; // throw new SyntaxError("missing query string" ); } } else { // There is a valid query string ExtendedSolrQueryParser up = createEdismaxQueryParser(this, IMPOSSIBLE_FIELD_NAME); up.addAlias(IMPOSSIBLE_FIELD_NAME, config.tiebreaker, config.queryFields); addAliasesFromRequest(up, config.tiebreaker); up.setPhraseSlop(config.qslop); // slop for explicit user phrase queries up.setAllowLeadingWildcard(true); // defer escaping and only do if lucene parsing fails, or we need phrases // parsing fails. Need to sloppy phrase queries anyway though. List<Clause> clauses = splitIntoClauses(userQuery, false); // Always rebuild mainUserQuery from clauses to catch modifications from splitIntoClauses // This was necessary for userFields modifications to get propagated into the query. // Convert lower or mixed case operators to uppercase if we saw them. // only do this for the lucene query part and not for phrase query boosting // since some fields might not be case insensitive. // We don't use a regex for this because it might change and AND or OR in // a phrase query in a case sensitive field. String mainUserQuery = rebuildUserQuery(clauses, config.lowercaseOperators); // but always for unstructured implicit bqs created by getFieldQuery up.minShouldMatch = config.minShouldMatch; parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config); if (parsedUserQuery == null) { parsedUserQuery = parseEscapedQuery(up, escapeUserQuery(clauses), config); } query.add(parsedUserQuery, BooleanClause.Occur.MUST); addPhraseFieldQueries(query, clauses, config); } /* * * Boosting Query * * */ boostQueries = getBoostQueries(); for (Query f : boostQueries) { query.add(f, BooleanClause.Occur.SHOULD); } /* * * Boosting Functions * * */ List<Query> boostFunctions = getBoostFunctions(); for (Query f : boostFunctions) { query.add(f, BooleanClause.Occur.SHOULD); } // // create a boosted query (scores multiplied by boosts) // Query topQuery = query.build(); List<ValueSource> boosts = getMultiplicativeBoosts(); if (boosts.size() > 1) { ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()])); topQuery = new BoostedQuery(topQuery, prod); } else if (boosts.size() == 1) { topQuery = new BoostedQuery(topQuery, boosts.get(0)); } return topQuery; } /** * Adds shingled phrase queries to all the fields specified in the pf, pf2 anf pf3 parameters * */ protected void addPhraseFieldQueries(BooleanQuery.Builder query, List<Clause> clauses, ExtendedDismaxConfiguration config) throws SyntaxError { // sloppy phrase queries for proximity List<FieldParams> allPhraseFields = config.getAllPhraseFields(); if (allPhraseFields.size() > 0) { // find non-field clauses List<Clause> normalClauses = new ArrayList<>(clauses.size()); for (Clause clause : clauses) { if (clause.field != null || clause.isPhrase) continue; // check for keywords "AND,OR,TO" if (clause.isBareWord()) { String s = clause.val; // avoid putting explicit operators in the phrase query if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue; } normalClauses.add(clause); } // create a map of {wordGram, [phraseField]} Multimap<Integer, FieldParams> phraseFieldsByWordGram = Multimaps.index(allPhraseFields, FieldParams::getWordGrams); // for each {wordGram, [phraseField]} entry, create and add shingled field queries to the main user query for (Map.Entry<Integer, Collection<FieldParams>> phraseFieldsByWordGramEntry : phraseFieldsByWordGram .asMap().entrySet()) { // group the fields within this wordGram collection by their associated slop (it's possible that the same // field appears multiple times for the same wordGram count but with different slop values. In this case, we // should take the *sum* of those phrase queries, rather than the max across them). Multimap<Integer, FieldParams> phraseFieldsBySlop = Multimaps .index(phraseFieldsByWordGramEntry.getValue(), FieldParams::getSlop); for (Map.Entry<Integer, Collection<FieldParams>> phraseFieldsBySlopEntry : phraseFieldsBySlop .asMap().entrySet()) { addShingledPhraseQueries(query, normalClauses, phraseFieldsBySlopEntry.getValue(), phraseFieldsByWordGramEntry.getKey(), config.tiebreaker, phraseFieldsBySlopEntry.getKey()); } } } } /** * Creates an instance of ExtendedDismaxConfiguration. It will contain all * the necessary parameters to parse the query */ protected ExtendedDismaxConfiguration createConfiguration(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { return new ExtendedDismaxConfiguration(localParams, params, req); } /** * Creates an instance of ExtendedSolrQueryParser, the query parser that's going to be used * to parse the query. */ protected ExtendedSolrQueryParser createEdismaxQueryParser(QParser qParser, String field) { return new ExtendedSolrQueryParser(qParser, field); } /** * Parses an escaped version of the user's query. This method is called * in the event that the original query encounters exceptions during parsing. * * @param up parser used * @param escapedUserQuery query that is parsed, should already be escaped so that no trivial parse errors are encountered * @param config Configuration options for this parse request * @return the resulting query (flattened if needed) with "min should match" rules applied as specified in the config. * @see #parseOriginalQuery * @see SolrPluginUtils#flattenBooleanQuery */ protected Query parseEscapedQuery(ExtendedSolrQueryParser up, String escapedUserQuery, ExtendedDismaxConfiguration config) throws SyntaxError { Query query = up.parse(escapedUserQuery); if (query instanceof BooleanQuery) { BooleanQuery.Builder t = new BooleanQuery.Builder(); SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) query); SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax); query = t.build(); } return query; } /** * Parses the user's original query. This method attempts to cleanly parse the specified query string using the specified parser, any Exceptions are ignored resulting in null being returned. * * @param up parser used * @param mainUserQuery query string that is parsed * @param clauses used to dictate "min should match" logic * @param config Configuration options for this parse request * @return the resulting query with "min should match" rules applied as specified in the config. * @see #parseEscapedQuery */ protected Query parseOriginalQuery(ExtendedSolrQueryParser up, String mainUserQuery, List<Clause> clauses, ExtendedDismaxConfiguration config) { Query query = null; try { up.setRemoveStopFilter(!config.stopwords); up.exceptions = true; query = up.parse(mainUserQuery); if (shouldRemoveStopFilter(config, query)) { // if the query was all stop words, remove none of them up.setRemoveStopFilter(true); query = up.parse(mainUserQuery); } } catch (Exception e) { // ignore failure and reparse later after escaping reserved chars up.exceptions = false; } if (query == null) { return null; } // For correct lucene queries, turn off mm processing if no explicit mm spec was provided // and there were explicit operators (except for AND). if (query instanceof BooleanQuery) { // config.minShouldMatch holds the value of mm which MIGHT have come from the user, // but could also have been derived from q.op. String mmSpec = config.minShouldMatch; if (foundOperators(clauses, config.lowercaseOperators)) { mmSpec = params.get(DisMaxParams.MM, "0%"); // Use provided mm spec if present, otherwise turn off mm processing } query = SolrPluginUtils.setMinShouldMatch((BooleanQuery) query, mmSpec, config.mmAutoRelax); } return query; } /** * Determines if query should be re-parsed removing the stop filter. * @return true if there are stopwords configured and the parsed query was empty * false in any other case. */ protected boolean shouldRemoveStopFilter(ExtendedDismaxConfiguration config, Query query) { return config.stopwords && isEmpty(query); } private String escapeUserQuery(List<Clause> clauses) { StringBuilder sb = new StringBuilder(); for (Clause clause : clauses) { boolean doQuote = clause.isPhrase; String s = clause.val; if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) { doQuote = true; } if (clause.must != 0) { sb.append(clause.must); } if (clause.field != null) { sb.append(clause.field); sb.append(':'); } if (doQuote) { sb.append('"'); } sb.append(clause.val); if (doQuote) { sb.append('"'); } if (clause.field != null) { // Add the default user field boost, if any Float boost = config.userFields.getBoost(clause.field); if (boost != null) sb.append("^").append(boost); } sb.append(' '); } return sb.toString(); } /** * Returns true if at least one of the clauses is/has an explicit operator (except for AND) */ private boolean foundOperators(List<Clause> clauses, boolean lowercaseOperators) { for (Clause clause : clauses) { if (clause.must == '+') return true; if (clause.must == '-') return true; if (clause.isBareWord()) { String s = clause.val; if ("OR".equals(s)) { return true; } else if ("NOT".equals(s)) { return true; } else if (lowercaseOperators && "or".equals(s)) { return true; } } } return false; } /** * Generates a query string from the raw clauses, uppercasing * 'and' and 'or' as needed. * @param clauses the clauses of the query string to be rebuilt * @param lowercaseOperators if true, lowercase 'and' and 'or' clauses will * be recognized as operators and uppercased in the final query string. * @return the generated query string. */ protected String rebuildUserQuery(List<Clause> clauses, boolean lowercaseOperators) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < clauses.size(); i++) { Clause clause = clauses.get(i); String s = clause.raw; // and and or won't be operators at the start or end if (lowercaseOperators && i > 0 && i + 1 < clauses.size()) { if ("AND".equalsIgnoreCase(s)) { s = "AND"; } else if ("OR".equalsIgnoreCase(s)) { s = "OR"; } } sb.append(s); sb.append(' '); } return sb.toString(); } /** * Parses all multiplicative boosts */ protected List<ValueSource> getMultiplicativeBoosts() throws SyntaxError { List<ValueSource> boosts = new ArrayList<>(); if (config.hasMultiplicativeBoosts()) { for (String boostStr : config.multBoosts) { if (boostStr == null || boostStr.length() == 0) continue; Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery(); ValueSource vs; if (boost instanceof FunctionQuery) { vs = ((FunctionQuery) boost).getValueSource(); } else { vs = new QueryValueSource(boost, 1.0f); } boosts.add(vs); } } return boosts; } /** * Parses all function queries */ protected List<Query> getBoostFunctions() throws SyntaxError { List<Query> boostFunctions = new LinkedList<>(); if (config.hasBoostFunctions()) { for (String boostFunc : config.boostFuncs) { if (null == boostFunc || "".equals(boostFunc)) continue; Map<String, Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc); for (String f : ff.keySet()) { Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery(); Float b = ff.get(f); if (null != b && b.floatValue() != 1f) { fq = new BoostQuery(fq, b); } boostFunctions.add(fq); } } } return boostFunctions; } /** * Parses all boost queries */ protected List<Query> getBoostQueries() throws SyntaxError { List<Query> boostQueries = new LinkedList<>(); if (config.hasBoostParams()) { for (String qs : config.boostParams) { if (qs.trim().length() == 0) continue; Query q = subQuery(qs, null).getQuery(); boostQueries.add(q); } } return boostQueries; } /** * Extracts all the aliased fields from the requests and adds them to up */ private void addAliasesFromRequest(ExtendedSolrQueryParser up, float tiebreaker) { Iterator<String> it = config.solrParams.getParameterNamesIterator(); while (it.hasNext()) { String param = it.next(); if (param.startsWith("f.") && param.endsWith(".qf")) { // Add the alias String fname = param.substring(2, param.length() - 3); String qfReplacement = config.solrParams.get(param); Map<String, Float> parsedQf = SolrPluginUtils.parseFieldBoosts(qfReplacement); if (parsedQf.size() == 0) return; up.addAlias(fname, tiebreaker, parsedQf); } } } /** * Modifies the main query by adding a new optional Query consisting * of shingled phrase queries across the specified clauses using the * specified field => boost mappings. * * @param mainQuery Where the phrase boosting queries will be added * @param clauses Clauses that will be used to construct the phrases * @param fields Field => boost mappings for the phrase queries * @param shingleSize how big the phrases should be, 0 means a single phrase * @param tiebreaker tie breaker value for the DisjunctionMaxQueries */ protected void addShingledPhraseQueries(final BooleanQuery.Builder mainQuery, final List<Clause> clauses, final Collection<FieldParams> fields, int shingleSize, final float tiebreaker, final int slop) throws SyntaxError { if (null == fields || fields.isEmpty() || null == clauses || clauses.size() < shingleSize) return; if (0 == shingleSize) shingleSize = clauses.size(); final int lastClauseIndex = shingleSize - 1; StringBuilder userPhraseQuery = new StringBuilder(); for (int i = 0; i < clauses.size() - lastClauseIndex; i++) { userPhraseQuery.append('"'); for (int j = 0; j <= lastClauseIndex; j++) { userPhraseQuery.append(clauses.get(i + j).val); userPhraseQuery.append(' '); } userPhraseQuery.append('"'); userPhraseQuery.append(' '); } /* for parsing sloppy phrases using DisjunctionMaxQueries */ ExtendedSolrQueryParser pp = createEdismaxQueryParser(this, IMPOSSIBLE_FIELD_NAME); pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields)); pp.setPhraseSlop(slop); pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords /* :TODO: reevaluate using makeDismax=true vs false... * * The DismaxQueryParser always used DisjunctionMaxQueries for the * pf boost, for the same reasons it used them for the qf fields. * When Yonik first wrote the ExtendedDismaxQParserPlugin, he added * the "makeDismax=false" property to use BooleanQueries instead, but * when asked why his response was "I honestly don't recall" ... * * https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813 * * so for now, we continue to use dismax style queries because it * seems the most logical and is back compatible, but we should * try to figure out what Yonik was thinking at the time (because he * rarely does things for no reason) */ pp.makeDismax = true; // minClauseSize is independent of the shingleSize because of stop words // (if they are removed from the middle, so be it, but we need at least // two or there shouldn't be a boost) pp.minClauseSize = 2; // TODO: perhaps we shouldn't use synonyms either... Query phrase = pp.parse(userPhraseQuery.toString()); if (phrase != null) { mainQuery.add(phrase, BooleanClause.Occur.SHOULD); } } /** * @return a {fieldName, fieldBoost} map for the given fields. */ private Map<String, Float> getFieldBoosts(Collection<FieldParams> fields) { Map<String, Float> fieldBoostMap = new LinkedHashMap<>(fields.size()); for (FieldParams field : fields) { fieldBoostMap.put(field.getField(), field.getBoost()); } return fieldBoostMap; } @Override public String[] getDefaultHighlightFields() { return config.queryFields.keySet().toArray(new String[0]); } @Override public Query getHighlightQuery() throws SyntaxError { if (!parsed) parse(); return parsedUserQuery == null ? altUserQuery : parsedUserQuery; } @Override public void addDebugInfo(NamedList<Object> debugInfo) { super.addDebugInfo(debugInfo); debugInfo.add("altquerystring", altUserQuery); if (null != boostQueries) { debugInfo.add("boost_queries", config.boostParams); debugInfo.add("parsed_boost_queries", QueryParsing.toString(boostQueries, getReq().getSchema())); } debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF)); } // FIXME: Not in use // public static CharSequence partialEscape(CharSequence s) { // StringBuilder sb = new StringBuilder(); // // int len = s.length(); // for (int i = 0; i < len; i++) { // char c = s.charAt(i); // if (c == ':') { // // look forward to make sure it's something that won't // // cause a parse exception (something that won't be escaped... like // // +,-,:, whitespace // if (i+1<len && i>0) { // char ch = s.charAt(i+1); // if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) { // // OK, at this point the chars after the ':' will be fine. // // now look back and try to determine if this is a fieldname // // [+,-]? [letter,_] [letter digit,_,-,.]* // // This won't cover *all* possible lucene fieldnames, but we should // // only pick nice names to begin with // int start, pos; // for (start=i-1; start>=0; start--) { // ch = s.charAt(start); // if (Character.isWhitespace(ch)) break; // } // // // skip whitespace // pos = start+1; // // // skip leading + or - // ch = s.charAt(pos); // if (ch=='+' || ch=='-') { // pos++; // } // // // we don't need to explicitly check for end of string // // since ':' will act as our sentinal // // // first char can't be '-' or '.' // ch = s.charAt(pos++); // if (Character.isJavaIdentifierPart(ch)) { // // for(;;) { // ch = s.charAt(pos++); // if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) { // break; // } // } // // if (pos<=i) { // // OK, we got to the ':' and everything looked like a valid fieldname, so // // don't escape the ':' // sb.append(':'); // continue; // jump back to start of outer-most loop // } // // } // // // } // } // // // we fell through to here, so we should escape this like other reserved chars. // sb.append('\\'); // } // else if (c == '\\' || c == '!' || c == '(' || c == ')' || // c == '^' || c == '[' || c == ']' || // c == '{' || c == '}' || c == '~' || c == '*' || c == '?' // ) // { // sb.append('\\'); // } // sb.append(c); // } // return sb; // } protected static class Clause { boolean isBareWord() { return must == 0 && !isPhrase; } protected String field; protected String rawField; // if the clause is +(foo:bar) then rawField=(foo protected boolean isPhrase; protected boolean hasWhitespace; protected boolean hasSpecialSyntax; protected boolean syntaxError; protected char must; // + or - protected String val; // the field value (minus the field name, +/-, quotes) protected String raw; // the raw clause w/o leading/trailing whitespace } public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) { ArrayList<Clause> lst = new ArrayList<>(4); Clause clause; int pos = 0; int end = s.length(); char ch = 0; int start; boolean disallowUserField; while (pos < end) { clause = new Clause(); disallowUserField = true; ch = s.charAt(pos); while (Character.isWhitespace(ch)) { if (++pos >= end) break; ch = s.charAt(pos); } start = pos; if ((ch == '+' || ch == '-') && (pos + 1) < end) { clause.must = ch; pos++; } clause.field = getFieldName(s, pos, end); if (clause.field != null && !config.userFields.isAllowed(clause.field)) { clause.field = null; } if (clause.field != null) { disallowUserField = false; int colon = s.indexOf(':', pos); clause.rawField = s.substring(pos, colon); pos += colon - pos; // skip the field name pos++; // skip the ':' } if (pos >= end) break; char inString = 0; ch = s.charAt(pos); if (!ignoreQuote && ch == '"') { clause.isPhrase = true; inString = '"'; pos++; } StringBuilder sb = new StringBuilder(); while (pos < end) { ch = s.charAt(pos++); if (ch == '\\') { // skip escaped chars, but leave escaped sb.append(ch); if (pos >= end) { sb.append(ch); // double backslash if we are at the end of the string break; } ch = s.charAt(pos++); sb.append(ch); continue; } else if (inString != 0 && ch == inString) { inString = 0; break; } else if (Character.isWhitespace(ch)) { clause.hasWhitespace = true; if (inString == 0) { // end of the token if we aren't in a string, backing // up the position. pos--; break; } } if (inString == 0) { switch (ch) { case '!': case '(': case ')': case ':': case '^': case '[': case ']': case '{': case '}': case '~': case '*': case '?': case '"': case '+': case '-': case '\\': case '|': case '&': case '/': clause.hasSpecialSyntax = true; sb.append('\\'); } } else if (ch == '"') { // only char we need to escape in a string is double quote sb.append('\\'); } sb.append(ch); } clause.val = sb.toString(); if (clause.isPhrase) { if (inString != 0) { // detected bad quote balancing... retry // parsing with quotes like any other char return splitIntoClauses(s, true); } // special syntax in a string isn't special clause.hasSpecialSyntax = false; } else { // an empty clause... must be just a + or - on its own if (clause.val.length() == 0) { clause.syntaxError = true; if (clause.must != 0) { clause.val = "\\" + clause.must; clause.must = 0; clause.hasSpecialSyntax = true; } else { // uh.. this shouldn't happen. clause = null; } } } if (clause != null) { if (disallowUserField) { clause.raw = s.substring(start, pos); // escape colons, except for "match all" query if (!"*:*".equals(clause.raw)) { clause.raw = clause.raw.replaceAll("([^\\\\]):", "$1\\\\:"); } } else { clause.raw = s.substring(start, pos); // Add default userField boost if no explicit boost exists if (config.userFields.isAllowed(clause.field) && !clause.raw.contains("^")) { Float boost = config.userFields.getBoost(clause.field); if (boost != null) clause.raw += "^" + boost; } } lst.add(clause); } } return lst; } /** * returns a field name or legal field alias from the current * position of the string */ public String getFieldName(String s, int pos, int end) { if (pos >= end) return null; int p = pos; int colon = s.indexOf(':', pos); // make sure there is space after the colon, but not whitespace if (colon <= pos || colon + 1 >= end || Character.isWhitespace(s.charAt(colon + 1))) return null; char ch = s.charAt(p++); while ((ch == '(' || ch == '+' || ch == '-') && (pos < end)) { ch = s.charAt(p++); pos++; } if (!Character.isJavaIdentifierPart(ch)) return null; while (p < colon) { ch = s.charAt(p++); if (!(Character.isJavaIdentifierPart(ch) || ch == '-' || ch == '.')) return null; } String fname = s.substring(pos, p); boolean isInSchema = getReq().getSchema().getFieldTypeNoEx(fname) != null; boolean isAlias = config.solrParams.get("f." + fname + ".qf") != null; boolean isMagic = (null != MagicFieldName.get(fname)); return (isInSchema || isAlias || isMagic) ? fname : null; } public static List<String> split(String s, boolean ignoreQuote) { ArrayList<String> lst = new ArrayList<>(4); int pos = 0, start = 0, end = s.length(); char inString = 0; char ch = 0; while (pos < end) { char prevChar = ch; ch = s.charAt(pos++); if (ch == '\\') { // skip escaped chars pos++; } else if (inString != 0 && ch == inString) { inString = 0; } else if (!ignoreQuote && ch == '"') { // If char is directly preceeded by a number or letter // then don't treat it as the start of a string. if (!Character.isLetterOrDigit(prevChar)) { inString = ch; } } else if (Character.isWhitespace(ch) && inString == 0) { lst.add(s.substring(start, pos - 1)); start = pos; } } if (start < end) { lst.add(s.substring(start, end)); } if (inString != 0) { // unbalanced quote... ignore them return split(s, true); } return lst; } enum QType { FIELD, PHRASE, PREFIX, WILDCARD, FUZZY, RANGE } static final RuntimeException unknownField = new RuntimeException("UnknownField"); static { unknownField.fillInStackTrace(); } /** * A subclass of SolrQueryParser that supports aliasing fields for * constructing DisjunctionMaxQueries. */ public static class ExtendedSolrQueryParser extends SolrQueryParser { /** A simple container for storing alias info */ protected class Alias { public float tie; public Map<String, Float> fields; } boolean makeDismax = true; boolean allowWildcard = true; int minClauseSize = 0; // minimum number of clauses per phrase query... // used when constructing boosting part of query via sloppy phrases boolean exceptions; // allow exceptions to be thrown (for example on a missing field) private Map<String, Analyzer> nonStopFilterAnalyzerPerField; private boolean removeStopFilter; String minShouldMatch; // for inner boolean queries produced from a single fieldQuery /** * Where we store a map from field name we expect to see in our query * string, to Alias object containing the fields to use in our * DisjunctionMaxQuery and the tiebreaker to use. */ protected Map<String, Alias> aliases = new HashMap<>(3); private QType type; private String field; private String val; private String val2; private boolean bool; private boolean bool2; private float flt; private int slop; public ExtendedSolrQueryParser(QParser parser, String defaultField) { super(parser, defaultField); // Respect the q.op parameter before mm will be applied later SolrParams defaultParams = SolrParams.wrapDefaults(parser.getLocalParams(), parser.getParams()); QueryParser.Operator defaultOp = QueryParsing.getQueryParserDefaultOperator(parser.getReq().getSchema(), defaultParams.get(QueryParsing.OP)); setDefaultOperator(defaultOp); } public void setRemoveStopFilter(boolean remove) { removeStopFilter = remove; } @Override protected Query getBooleanQuery(List<BooleanClause> clauses) throws SyntaxError { Query q = super.getBooleanQuery(clauses); if (q != null) { q = QueryUtils.makeQueryable(q); } return q; } /** * Add an alias to this query parser. * * @param field the field name that should trigger alias mapping * @param fieldBoosts the mapping from fieldname to boost value that * should be used to build up the clauses of the * DisjunctionMaxQuery. * @param tiebreaker to the tiebreaker to be used in the * DisjunctionMaxQuery * @see SolrPluginUtils#parseFieldBoosts */ public void addAlias(String field, float tiebreaker, Map<String, Float> fieldBoosts) { Alias a = new Alias(); a.tie = tiebreaker; a.fields = fieldBoosts; aliases.put(field, a); } /** * Returns the aliases found for a field. * Returns null if there are no aliases for the field * @return Alias */ protected Alias getAlias(String field) { return aliases.get(field); } @Override protected Query getFieldQuery(String field, String val, boolean quoted, boolean raw) throws SyntaxError { this.type = quoted ? QType.PHRASE : QType.FIELD; this.field = field; this.val = val; this.slop = getPhraseSlop(); // unspecified return getAliasedQuery(); } @Override protected Query getFieldQuery(String field, String val, int slop) throws SyntaxError { this.type = QType.PHRASE; this.field = field; this.val = val; this.slop = slop; return getAliasedQuery(); } @Override protected Query getPrefixQuery(String field, String val) throws SyntaxError { if (val.equals("") && field.equals("*")) { return new MatchAllDocsQuery(); } this.type = QType.PREFIX; this.field = field; this.val = val; return getAliasedQuery(); } @Override protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { Analyzer actualAnalyzer; if (removeStopFilter) { if (nonStopFilterAnalyzerPerField == null) { nonStopFilterAnalyzerPerField = new HashMap<>(); } actualAnalyzer = nonStopFilterAnalyzerPerField.get(field); if (actualAnalyzer == null) { actualAnalyzer = noStopwordFilterAnalyzer(field); } } else { actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer(); } return super.newFieldQuery(actualAnalyzer, field, queryText, quoted); } @Override protected Query getRangeQuery(String field, String a, String b, boolean startInclusive, boolean endInclusive) throws SyntaxError { this.type = QType.RANGE; this.field = field; this.val = a; this.val2 = b; this.bool = startInclusive; this.bool2 = endInclusive; return getAliasedQuery(); } @Override protected Query getWildcardQuery(String field, String val) throws SyntaxError { if (val.equals("*")) { if (field.equals("*") || getExplicitField() == null) { return new MatchAllDocsQuery(); } else { return getPrefixQuery(field, ""); } } this.type = QType.WILDCARD; this.field = field; this.val = val; return getAliasedQuery(); } @Override protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws SyntaxError { this.type = QType.FUZZY; this.field = field; this.val = val; this.flt = minSimilarity; return getAliasedQuery(); } /** * Delegates to the super class unless the field has been specified * as an alias -- in which case we recurse on each of * the aliased fields, and the results are composed into a * DisjunctionMaxQuery. (so yes: aliases which point at other * aliases should work) */ protected Query getAliasedQuery() throws SyntaxError { Alias a = aliases.get(field); this.validateCyclicAliasing(field); if (a != null) { List<Query> lst = getQueries(a); if (lst == null || lst.size() == 0) return getQuery(); // make a DisjunctionMaxQuery in this case too... it will stop // the "mm" processing from making everything required in the case // that the query expanded to multiple clauses. // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway. // if (lst.size()==1) return lst.get(0); if (makeDismax) { DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie); return q; } else { BooleanQuery.Builder q = new BooleanQuery.Builder(); for (Query sub : lst) { q.add(sub, BooleanClause.Occur.SHOULD); } return q.build(); } } else { // verify that a fielded query is actually on a field that exists... if not, // then throw an exception to get us out of here, and we'll treat it like a // literal when we try the escape+re-parse. if (exceptions) { FieldType ft = schema.getFieldTypeNoEx(field); if (ft == null && null == MagicFieldName.get(field)) { throw unknownField; } } return getQuery(); } } /** * Validate there is no cyclic referencing in the aliasing */ private void validateCyclicAliasing(String field) throws SyntaxError { Set<String> set = new HashSet<>(); set.add(field); if (validateField(field, set)) { throw new SyntaxError("Field aliases lead to a cycle"); } } private boolean validateField(String field, Set<String> set) { if (this.getAlias(field) == null) { return false; } boolean hascycle = false; for (String referencedField : this.getAlias(field).fields.keySet()) { if (!set.add(referencedField)) { hascycle = true; } else { if (validateField(referencedField, set)) { hascycle = true; } set.remove(referencedField); } } return hascycle; } protected List<Query> getQueries(Alias a) throws SyntaxError { if (a == null) return null; if (a.fields.size() == 0) return null; List<Query> lst = new ArrayList<>(4); for (String f : a.fields.keySet()) { this.field = f; Query sub = getAliasedQuery(); if (sub != null) { Float boost = a.fields.get(f); if (boost != null && boost.floatValue() != 1f) { sub = new BoostQuery(sub, boost); } lst.add(sub); } } return lst; } private Query getQuery() { try { switch (type) { case FIELD: // fallthrough case PHRASE: Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false); // Boolean query on a whitespace-separated string // If these were synonyms we would have a SynonymQuery if (query instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) query; query = SolrPluginUtils.setMinShouldMatch(bq, minShouldMatch, false); } if (query instanceof PhraseQuery) { PhraseQuery pq = (PhraseQuery) query; if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null; PhraseQuery.Builder builder = new PhraseQuery.Builder(); Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } builder.setSlop(slop); query = builder.build(); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) query; if (minClauseSize > 1 && mpq.getTermArrays().length < minClauseSize) return null; if (slop != mpq.getSlop()) { query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } else if (minClauseSize > 1) { // if it's not a type of phrase query, it doesn't meet the minClauseSize requirements return null; } return query; case PREFIX: return super.getPrefixQuery(field, val); case WILDCARD: return super.getWildcardQuery(field, val); case FUZZY: return super.getFuzzyQuery(field, val, flt); case RANGE: return super.getRangeQuery(field, val, val2, bool, bool2); } return null; } catch (Exception e) { // an exception here is due to the field query not being compatible with the input text // for example, passing a string to a numeric field. return null; } } private Analyzer noStopwordFilterAnalyzer(String fieldName) { FieldType ft = parser.getReq().getSchema().getFieldType(fieldName); Analyzer qa = ft.getQueryAnalyzer(); if (!(qa instanceof TokenizerChain)) { return qa; } TokenizerChain tcq = (TokenizerChain) qa; Analyzer ia = ft.getIndexAnalyzer(); if (ia == qa || !(ia instanceof TokenizerChain)) { return qa; } TokenizerChain tci = (TokenizerChain) ia; // make sure that there isn't a stop filter in the indexer for (TokenFilterFactory tf : tci.getTokenFilterFactories()) { if (tf instanceof StopFilterFactory) { return qa; } } // now if there is a stop filter in the query analyzer, remove it int stopIdx = -1; TokenFilterFactory[] facs = tcq.getTokenFilterFactories(); for (int i = 0; i < facs.length; i++) { TokenFilterFactory tf = facs[i]; if (tf instanceof StopFilterFactory) { stopIdx = i; break; } } if (stopIdx == -1) { // no stop filter exists return qa; } TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length - 1]; for (int i = 0, j = 0; i < facs.length; i++) { if (i == stopIdx) continue; newtf[j++] = facs[i]; } TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf); newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName)); return newa; } } static boolean isEmpty(Query q) { if (q == null) return true; if (q instanceof BooleanQuery && ((BooleanQuery) q).clauses().size() == 0) return true; return false; } /** * Class that encapsulates the input from userFields parameter and can answer whether * a field allowed or disallowed as fielded query in the query string */ static class UserFields { private Map<String, Float> userFieldsMap; private DynamicField[] dynamicUserFields; private DynamicField[] negativeDynamicUserFields; UserFields(Map<String, Float> ufm) { userFieldsMap = ufm; if (0 == userFieldsMap.size()) { userFieldsMap.put("*", null); } // Process dynamic patterns in userFields ArrayList<DynamicField> dynUserFields = new ArrayList<>(); ArrayList<DynamicField> negDynUserFields = new ArrayList<>(); for (String f : userFieldsMap.keySet()) { if (f.contains("*")) { if (f.startsWith("-")) negDynUserFields.add(new DynamicField(f.substring(1))); else dynUserFields.add(new DynamicField(f)); } } Collections.sort(dynUserFields); dynamicUserFields = dynUserFields.toArray(new DynamicField[dynUserFields.size()]); Collections.sort(negDynUserFields); negativeDynamicUserFields = negDynUserFields.toArray(new DynamicField[negDynUserFields.size()]); } /** * Is the given field name allowed according to UserFields spec given in the uf parameter? * @param fname the field name to examine * @return true if the fielded queries are allowed on this field */ public boolean isAllowed(String fname) { boolean res = ((userFieldsMap.containsKey(fname) || isDynField(fname, false)) && !userFieldsMap.containsKey("-" + fname) && !isDynField(fname, true)); return res; } private boolean isDynField(String field, boolean neg) { return getDynFieldForName(field, neg) == null ? false : true; } private String getDynFieldForName(String f, boolean neg) { for (DynamicField df : neg ? negativeDynamicUserFields : dynamicUserFields) { if (df.matches(f)) return df.wildcard; } return null; } /** * Finds the default user field boost associated with the given field. * This is parsed from the uf parameter, and may be specified as wildcards, e.g. *name^2.0 or *^3.0 * @param field the field to find boost for * @return the float boost value associated with the given field or a wildcard matching the field */ public Float getBoost(String field) { return (userFieldsMap.containsKey(field)) ? userFieldsMap.get(field) : // Exact field userFieldsMap.get(getDynFieldForName(field, false)); // Dynamic field } } /* Represents a dynamic field, for easier matching, inspired by same class in IndexSchema */ static class DynamicField implements Comparable<DynamicField> { final static int STARTS_WITH = 1; final static int ENDS_WITH = 2; final static int CATCHALL = 3; final String wildcard; final int type; final String str; protected DynamicField(String wildcard) { this.wildcard = wildcard; if (wildcard.equals("*")) { type = CATCHALL; str = null; } else if (wildcard.startsWith("*")) { type = ENDS_WITH; str = wildcard.substring(1); } else if (wildcard.endsWith("*")) { type = STARTS_WITH; str = wildcard.substring(0, wildcard.length() - 1); } else { throw new RuntimeException("dynamic field name must start or end with *"); } } /* * Returns true if the regex wildcard for this DynamicField would match the input field name */ public boolean matches(String name) { if (type == CATCHALL) return true; else if (type == STARTS_WITH && name.startsWith(str)) return true; else if (type == ENDS_WITH && name.endsWith(str)) return true; else return false; } /** * Sort order is based on length of regex. Longest comes first. * @param other The object to compare to. * @return a negative integer, zero, or a positive integer * as this object is less than, equal to, or greater than * the specified object. */ @Override public int compareTo(DynamicField other) { return other.wildcard.length() - wildcard.length(); } @Override public String toString() { return this.wildcard; } } /** * Simple container for configuration information used when parsing queries */ public class ExtendedDismaxConfiguration { /** * The field names specified by 'qf' that (most) clauses will * be queried against */ protected Map<String, Float> queryFields; /** * The field names specified by 'uf' that users are * allowed to include literally in their query string. The Float * boost values will be applied automatically to any clause using that * field name. '*' will be treated as an alias for any * field that exists in the schema. Wildcards are allowed to * express dynamicFields. */ protected UserFields userFields; protected String[] boostParams; protected String[] multBoosts; protected SolrParams solrParams; protected String minShouldMatch; protected List<FieldParams> allPhraseFields; protected float tiebreaker; protected int qslop; protected boolean stopwords; protected boolean mmAutoRelax; protected String altQ; protected boolean lowercaseOperators; protected String[] boostFuncs; public ExtendedDismaxConfiguration(SolrParams localParams, SolrParams params, SolrQueryRequest req) { solrParams = SolrParams.wrapDefaults(localParams, params); minShouldMatch = DisMaxQParser.parseMinShouldMatch(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance userFields = new UserFields(U.parseFieldBoosts(solrParams.getParams(DMP.UF))); try { queryFields = DisMaxQParser.parseQueryFields(req.getSchema(), solrParams); // req.getSearcher() here causes searcher refcount imbalance } catch (SyntaxError e) { throw new RuntimeException(e); } // Phrase slop array int pslop[] = new int[4]; pslop[0] = solrParams.getInt(DisMaxParams.PS, 0); pslop[2] = solrParams.getInt(DisMaxParams.PS2, pslop[0]); pslop[3] = solrParams.getInt(DisMaxParams.PS3, pslop[0]); List<FieldParams> phraseFields = U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF), 0, pslop[0]); List<FieldParams> phraseFields2 = U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF2), 2, pslop[2]); List<FieldParams> phraseFields3 = U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF3), 3, pslop[3]); allPhraseFields = new ArrayList<>(phraseFields.size() + phraseFields2.size() + phraseFields3.size()); allPhraseFields.addAll(phraseFields); allPhraseFields.addAll(phraseFields2); allPhraseFields.addAll(phraseFields3); tiebreaker = solrParams.getFloat(DisMaxParams.TIE, 0.0f); qslop = solrParams.getInt(DisMaxParams.QS, 0); stopwords = solrParams.getBool(DMP.STOPWORDS, true); mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false); altQ = solrParams.get(DisMaxParams.ALTQ); lowercaseOperators = solrParams.getBool(DMP.LOWERCASE_OPS, true); /* * * Boosting Query * * */ boostParams = solrParams.getParams(DisMaxParams.BQ); boostFuncs = solrParams.getParams(DisMaxParams.BF); multBoosts = solrParams.getParams(DMP.MULT_BOOST); } /** * * @return true if there are valid multiplicative boost queries */ public boolean hasMultiplicativeBoosts() { return multBoosts != null && multBoosts.length > 0; } /** * * @return true if there are valid boost functions */ public boolean hasBoostFunctions() { return null != boostFuncs && 0 != boostFuncs.length; } /** * * @return true if there are valid boost params */ public boolean hasBoostParams() { return boostParams != null && boostParams.length > 0; } public List<FieldParams> getAllPhraseFields() { return allPhraseFields; } } }