Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.function.Predicate; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.BooleanClause.Occur; /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. */ public class BooleanQuery extends Query implements Iterable<BooleanClause> { private static int maxClauseCount = 1024; /** Thrown when an attempt is made to add more than {@link * #getMaxClauseCount()} clauses. This typically happens if * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery * is expanded to many terms during search. */ public static class TooManyClauses extends RuntimeException { public TooManyClauses() { super("maxClauseCount is set to " + maxClauseCount); } } /** Return the maximum number of clauses permitted, 1024 by default. * Attempts to add more than the permitted number of clauses cause {@link * TooManyClauses} to be thrown. * @see #setMaxClauseCount(int) */ public static int getMaxClauseCount() { return maxClauseCount; } /** * Set the maximum number of clauses permitted per BooleanQuery. * Default value is 1024. */ public static void setMaxClauseCount(int maxClauseCount) { if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); } BooleanQuery.maxClauseCount = maxClauseCount; } /** A builder for boolean queries. */ public static class Builder { private int minimumNumberShouldMatch; private final List<BooleanClause> clauses = new ArrayList<>(); /** Sole constructor. */ public Builder() { } /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. * * <p> * By default no optional clauses are necessary for a match * (unless there are no required clauses). If this method is used, * then the specified number of clauses is required. * </p> * <p> * Use of this method is totally independent of specifying that * any specific clauses are required (or prohibited). This number will * only be compared against the number of matching optional clauses. * </p> * * @param min the number of optional clauses that must match */ public Builder setMinimumNumberShouldMatch(int min) { this.minimumNumberShouldMatch = min; return this; } /** * Add a new clause to this {@link Builder}. Note that the order in which * clauses are added does not have any impact on matching documents or query * performance. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */ public Builder add(BooleanClause clause) { if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); } clauses.add(clause); return this; } /** * Add a new clause to this {@link Builder}. Note that the order in which * clauses are added does not have any impact on matching documents or query * performance. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number */ public Builder add(Query query, Occur occur) { return add(new BooleanClause(query, occur)); } /** Create a new {@link BooleanQuery} based on the parameters that have * been set on this builder. */ public BooleanQuery build() { return new BooleanQuery(minimumNumberShouldMatch, clauses.toArray(new BooleanClause[0])); } } private final int minimumNumberShouldMatch; private final List<BooleanClause> clauses; // used for toString() and getClauses() private final Map<Occur, Collection<Query>> clauseSets; // used for equals/hashcode private BooleanQuery(int minimumNumberShouldMatch, BooleanClause[] clauses) { this.minimumNumberShouldMatch = minimumNumberShouldMatch; this.clauses = Collections.unmodifiableList(Arrays.asList(clauses)); clauseSets = new EnumMap<>(Occur.class); // duplicates matter for SHOULD and MUST clauseSets.put(Occur.SHOULD, new Multiset<>()); clauseSets.put(Occur.MUST, new Multiset<>()); // but not for FILTER and MUST_NOT clauseSets.put(Occur.FILTER, new HashSet<>()); clauseSets.put(Occur.MUST_NOT, new HashSet<>()); for (BooleanClause clause : clauses) { clauseSets.get(clause.getOccur()).add(clause.getQuery()); } } /** * Gets the minimum number of the optional BooleanClauses * which must be satisfied. */ public int getMinimumNumberShouldMatch() { return minimumNumberShouldMatch; } /** Return a list of the clauses of this {@link BooleanQuery}. */ public List<BooleanClause> clauses() { return clauses; } /** Return the collection of queries for the given {@link Occur}. */ Collection<Query> getClauses(Occur occur) { return clauseSets.get(occur); } /** * Whether this query is a pure disjunction, ie. it only has SHOULD clauses * and it is enough for a single clause to match for this boolean query to match. */ boolean isPureDisjunction() { return clauses.size() == getClauses(Occur.SHOULD).size() && minimumNumberShouldMatch <= 1; } /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to * make it possible to do: * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre> */ @Override public final Iterator<BooleanClause> iterator() { return clauses.iterator(); } private BooleanQuery rewriteNoScoring() { boolean keepShould = getMinimumNumberShouldMatch() > 0 || (clauseSets.get(Occur.MUST).size() + clauseSets.get(Occur.FILTER).size() == 0); if (clauseSets.get(Occur.MUST).size() == 0 && keepShould) { return this; } BooleanQuery.Builder newQuery = new BooleanQuery.Builder(); newQuery.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); for (BooleanClause clause : clauses) { switch (clause.getOccur()) { case MUST: { newQuery.add(clause.getQuery(), Occur.FILTER); break; } case SHOULD: { if (keepShould) { newQuery.add(clause); } break; } default: { newQuery.add(clause); } } } return newQuery.build(); } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { BooleanQuery query = this; if (scoreMode.needsScores() == false) { query = rewriteNoScoring(); } return new BooleanWeight(query, searcher, scoreMode, boost); } @Override public Query rewrite(IndexReader reader) throws IOException { if (clauses.size() == 0) { return new MatchNoDocsQuery("empty BooleanQuery"); } // optimize 1-clause queries if (clauses.size() == 1) { BooleanClause c = clauses.get(0); Query query = c.getQuery(); if (minimumNumberShouldMatch == 1 && c.getOccur() == Occur.SHOULD) { return query; } else if (minimumNumberShouldMatch == 0) { switch (c.getOccur()) { case SHOULD: case MUST: return query; case FILTER: // no scoring clauses, so return a score of 0 return new BoostQuery(new ConstantScoreQuery(query), 0); case MUST_NOT: // no positive clauses return new MatchNoDocsQuery("pure negative BooleanQuery"); default: throw new AssertionError(); } } } // recursively rewrite { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); boolean actuallyRewritten = false; for (BooleanClause clause : this) { Query query = clause.getQuery(); Query rewritten = query.rewrite(reader); if (rewritten != query) { // rewrite clause actuallyRewritten = true; builder.add(rewritten, clause.getOccur()); } else { // leave as-is builder.add(clause); } } if (actuallyRewritten) { return builder.build(); } } // remove duplicate FILTER and MUST_NOT clauses { int clauseCount = 0; for (Collection<Query> queries : clauseSets.values()) { clauseCount += queries.size(); } if (clauseCount != clauses.size()) { // since clauseSets implicitly deduplicates FILTER and MUST_NOT // clauses, this means there were duplicates BooleanQuery.Builder rewritten = new BooleanQuery.Builder(); rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry<Occur, Collection<Query>> entry : clauseSets.entrySet()) { final Occur occur = entry.getKey(); for (Query query : entry.getValue()) { rewritten.add(query, occur); } } return rewritten.build(); } } // Check whether some clauses are both required and excluded final Collection<Query> mustNotClauses = clauseSets.get(Occur.MUST_NOT); if (!mustNotClauses.isEmpty()) { final Predicate<Query> p = clauseSets.get(Occur.MUST)::contains; if (mustNotClauses.stream().anyMatch(p.or(clauseSets.get(Occur.FILTER)::contains))) { return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT"); } if (mustNotClauses.contains(new MatchAllDocsQuery())) { return new MatchNoDocsQuery("MUST_NOT clause is MatchAllDocsQuery"); } } // remove FILTER clauses that are also MUST clauses // or that match all documents if (clauseSets.get(Occur.MUST).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { final Set<Query> filters = new HashSet<Query>(clauseSets.get(Occur.FILTER)); boolean modified = filters.remove(new MatchAllDocsQuery()); modified |= filters.removeAll(clauseSets.get(Occur.MUST)); if (modified) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.FILTER) { builder.add(clause); } } for (Query filter : filters) { builder.add(filter, Occur.FILTER); } return builder.build(); } } // convert FILTER clauses that are also SHOULD clauses to MUST clauses if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { final Collection<Query> filters = clauseSets.get(Occur.FILTER); final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD); Set<Query> intersection = new HashSet<>(filters); intersection.retainAll(shoulds); if (intersection.isEmpty() == false) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); int minShouldMatch = getMinimumNumberShouldMatch(); for (BooleanClause clause : clauses) { if (intersection.contains(clause.getQuery())) { if (clause.getOccur() == Occur.SHOULD) { builder.add(new BooleanClause(clause.getQuery(), Occur.MUST)); minShouldMatch--; } } else { builder.add(clause); } } builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch)); return builder.build(); } } // Deduplicate SHOULD clauses by summing up their boosts if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) { Map<Query, Double> shouldClauses = new HashMap<>(); for (Query query : clauseSets.get(Occur.SHOULD)) { double boost = 1; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; boost *= bq.getBoost(); query = bq.getQuery(); } shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost); } if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) { BooleanQuery.Builder builder = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry<Query, Double> entry : shouldClauses.entrySet()) { Query query = entry.getKey(); float boost = entry.getValue().floatValue(); if (boost != 1f) { query = new BoostQuery(query, boost); } builder.add(query, Occur.SHOULD); } for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.SHOULD) { builder.add(clause); } } return builder.build(); } } // Deduplicate MUST clauses by summing up their boosts if (clauseSets.get(Occur.MUST).size() > 0) { Map<Query, Double> mustClauses = new HashMap<>(); for (Query query : clauseSets.get(Occur.MUST)) { double boost = 1; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; boost *= bq.getBoost(); query = bq.getQuery(); } mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost); } if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) { BooleanQuery.Builder builder = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry<Query, Double> entry : mustClauses.entrySet()) { Query query = entry.getKey(); float boost = entry.getValue().floatValue(); if (boost != 1f) { query = new BoostQuery(query, boost); } builder.add(query, Occur.MUST); } for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.MUST) { builder.add(clause); } } return builder.build(); } } // Rewrite queries whose single scoring clause is a MUST clause on a // MatchAllDocsQuery to a ConstantScoreQuery { final Collection<Query> musts = clauseSets.get(Occur.MUST); final Collection<Query> filters = clauseSets.get(Occur.FILTER); if (musts.size() == 1 && filters.size() > 0) { Query must = musts.iterator().next(); float boost = 1f; if (must instanceof BoostQuery) { BoostQuery boostQuery = (BoostQuery) must; must = boostQuery.getQuery(); boost = boostQuery.getBoost(); } if (must.getClass() == MatchAllDocsQuery.class) { // our single scoring clause matches everything: rewrite to a CSQ on the filter // ignore SHOULD clause for now BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (BooleanClause clause : clauses) { switch (clause.getOccur()) { case FILTER: case MUST_NOT: builder.add(clause); break; default: // ignore break; } } Query rewritten = builder.build(); rewritten = new ConstantScoreQuery(rewritten); if (boost != 1f) { rewritten = new BoostQuery(rewritten, boost); } // now add back the SHOULD clauses builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()) .add(rewritten, Occur.MUST); for (Query query : clauseSets.get(Occur.SHOULD)) { builder.add(query, Occur.SHOULD); } rewritten = builder.build(); return rewritten; } } } // Flatten nested disjunctions, this is important for block-max WAND to perform well if (minimumNumberShouldMatch <= 1) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); boolean actuallyRewritten = false; try { for (BooleanClause clause : clauses) { if (clause.getOccur() == Occur.SHOULD && clause.getQuery() instanceof BooleanQuery) { BooleanQuery innerQuery = (BooleanQuery) clause.getQuery(); if (innerQuery.isPureDisjunction()) { actuallyRewritten = true; for (BooleanClause innerClause : innerQuery.clauses()) { builder.add(innerClause); } } else { builder.add(clause); } } else { builder.add(clause); } } if (actuallyRewritten) { return builder.build(); } } catch (TooManyClauses exception) { // No-op : Do not flatten when the new query will violate max clause count } } return super.rewrite(reader); } @Override public void visit(QueryVisitor visitor) { QueryVisitor sub = visitor.getSubVisitor(Occur.MUST, this); for (BooleanClause.Occur occur : clauseSets.keySet()) { if (clauseSets.get(occur).size() > 0) { if (occur == Occur.MUST) { for (Query q : clauseSets.get(occur)) { q.visit(sub); } } else { QueryVisitor v = sub.getSubVisitor(occur, this); for (Query q : clauseSets.get(occur)) { q.visit(v); } } } } } /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); boolean needParens = getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } int i = 0; for (BooleanClause c : this) { buffer.append(c.getOccur().toString()); Query subQuery = c.getQuery(); if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subQuery.toString(field)); buffer.append(")"); } else { buffer.append(subQuery.toString(field)); } if (i != clauses.size() - 1) { buffer.append(" "); } i += 1; } if (needParens) { buffer.append(")"); } if (getMinimumNumberShouldMatch() > 0) { buffer.append('~'); buffer.append(getMinimumNumberShouldMatch()); } return buffer.toString(); } /** * Compares the specified object with this boolean query for equality. * Returns true if and only if the provided object<ul> * <li>is also a {@link BooleanQuery},</li> * <li>has the same value of {@link #getMinimumNumberShouldMatch()}</li> * <li>has the same {@link Occur#SHOULD} clauses, regardless of the order</li> * <li>has the same {@link Occur#MUST} clauses, regardless of the order</li> * <li>has the same set of {@link Occur#FILTER} clauses, regardless of the * order and regardless of duplicates</li> * <li>has the same set of {@link Occur#MUST_NOT} clauses, regardless of * the order and regardless of duplicates</li></ul> */ @Override public boolean equals(Object o) { return sameClassAs(o) && equalsTo(getClass().cast(o)); } private boolean equalsTo(BooleanQuery other) { return getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && clauseSets.equals(other.clauseSets); } private int computeHashCode() { int hashCode = Objects.hash(minimumNumberShouldMatch, clauseSets); if (hashCode == 0) { hashCode = 1; } return hashCode; } // cached hash code is ok since boolean queries are immutable private int hashCode; @Override public int hashCode() { // no need for synchronization, in the worst case we would just compute the hash several times. if (hashCode == 0) { hashCode = computeHashCode(); assert hashCode != 0; } assert hashCode == computeHashCode(); return hashCode; } }