org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.java Source code

Introduction

Here is the source code for org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.analyzing;

import java.io.Closeable;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;

// TODO:
//   - a PostingsFormat that stores super-high-freq terms as
//     a bitset should be a win for the prefix terms?
//     (LUCENE-5052)
//   - we could offer a better integration with
//     DocumentDictionary and NRT?  so that your suggester
//     "automatically" keeps in sync w/ your index

/** Analyzes the input text and then suggests matches based
 *  on prefix matches to any tokens in the indexed text.
 *  This also highlights the tokens that match.
 *
 *  <p>This suggester supports payloads.  Matches are sorted only
 *  by the suggest weight; it would be nice to support
 *  blended score + weight sort in the future.  This means
 *  this suggester best applies when there is a strong
 *  a-priori ranking of all the suggestions.
 *
 *  <p>This suggester supports contexts, including arbitrary binary
 *  terms.
 *
 * @lucene.experimental */

public class AnalyzingInfixSuggester extends Lookup implements Closeable {

    /** edgegrams for searching short prefixes without Prefix Query 
     * that's  controlled by {@linkplain #minPrefixChars} */
    protected final static String TEXTGRAMS_FIELD_NAME = "textgrams";

    /** Field name used for the indexed text. */
    protected final static String TEXT_FIELD_NAME = "text";

    /** Field name used for the indexed text, as a
     *  StringField, for exact lookup. */
    protected final static String EXACT_TEXT_FIELD_NAME = "exacttext";

    /** Field name used for the indexed context, as a
     *  StringField and a SortedSetDVField, for filtering. */
    protected final static String CONTEXTS_FIELD_NAME = "contexts";

    /** Analyzer used at search time */
    protected final Analyzer queryAnalyzer;
    /** Analyzer used at index time */
    protected final Analyzer indexAnalyzer;
    private final Directory dir;
    final int minPrefixChars;

    private final boolean allTermsRequired;
    private final boolean highlight;

    private final boolean commitOnBuild;
    private final boolean closeIndexWriterOnBuild;

    /** Used for ongoing NRT additions/updates. */
    protected IndexWriter writer;

    /** {@link IndexSearcher} used for lookups. */
    protected SearcherManager searcherMgr;

    /** Used to manage concurrent access to searcherMgr */
    protected final Object searcherMgrLock = new Object();

    /** Default minimum number of leading characters before
     *  PrefixQuery is used (4). */
    public static final int DEFAULT_MIN_PREFIX_CHARS = 4;

    /** Default boolean clause option for multiple terms matching (all terms required). */
    public static final boolean DEFAULT_ALL_TERMS_REQUIRED = true;

    /** Default higlighting option. */
    public static final boolean DEFAULT_HIGHLIGHT = true;

    /** Default option to close the IndexWriter once the index has been built. */
    protected final static boolean DEFAULT_CLOSE_INDEXWRITER_ON_BUILD = true;

    /** How we sort the postings and search results. */
    private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));

    /** Create a new instance, loading from a previously built
     *  AnalyzingInfixSuggester directory, if it exists.  This directory must be
     *  private to the infix suggester (i.e., not an external
     *  Lucene index).  Note that {@link #close}
     *  will also close the provided directory. */
    public AnalyzingInfixSuggester(Directory dir, Analyzer analyzer) throws IOException {
        this(dir, analyzer, analyzer, DEFAULT_MIN_PREFIX_CHARS, false, DEFAULT_ALL_TERMS_REQUIRED,
                DEFAULT_HIGHLIGHT);
    }

    /** Create a new instance, loading from a previously built
     *  AnalyzingInfixSuggester directory, if it exists.  This directory must be
     *  private to the infix suggester (i.e., not an external
     *  Lucene index).  Note that {@link #close}
     *  will also close the provided directory.
     *
     *  @param minPrefixChars Minimum number of leading characters
     *     before PrefixQuery is used (default 4).
     *     Prefixes shorter than this are indexed as character
     *     ngrams (increasing index size but making lookups
     *     faster).
     *
     *  @param commitOnBuild Call commit after the index has finished building. This would persist the
     *                       suggester index to disk and future instances of this suggester can use this pre-built dictionary.
     */
    public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
            int minPrefixChars, boolean commitOnBuild) throws IOException {
        this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, DEFAULT_ALL_TERMS_REQUIRED,
                DEFAULT_HIGHLIGHT);
    }

    /** Create a new instance, loading from a previously built
     *  AnalyzingInfixSuggester directory, if it exists.  This directory must be
     *  private to the infix suggester (i.e., not an external
     *  Lucene index).  Note that {@link #close}
     *  will also close the provided directory.
     *
     *  @param minPrefixChars Minimum number of leading characters
     *     before PrefixQuery is used (default 4).
     *     Prefixes shorter than this are indexed as character
     *     ngrams (increasing index size but making lookups
     *     faster).
     *
     *  @param commitOnBuild Call commit after the index has finished building. This would persist the
     *                       suggester index to disk and future instances of this suggester can use this pre-built dictionary.
     *
     *  @param allTermsRequired All terms in the suggest query must be matched.
     *  @param highlight Highlight suggest query in suggestions.
     *
     */
    public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
            int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired, boolean highlight)
            throws IOException {
        this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, allTermsRequired, highlight,
                DEFAULT_CLOSE_INDEXWRITER_ON_BUILD);
    }

    /** Create a new instance, loading from a previously built
     *  AnalyzingInfixSuggester directory, if it exists.  This directory must be
     *  private to the infix suggester (i.e., not an external
     *  Lucene index).  Note that {@link #close}
     *  will also close the provided directory.
     *
     *  @param minPrefixChars Minimum number of leading characters
     *     before PrefixQuery is used (default 4).
     *     Prefixes shorter than this are indexed as character
     *     ngrams (increasing index size but making lookups
     *     faster).
     *
     *  @param commitOnBuild Call commit after the index has finished building. This would persist the
     *                       suggester index to disk and future instances of this suggester can use this pre-built dictionary.
     *
     *  @param allTermsRequired All terms in the suggest query must be matched.
     *  @param highlight Highlight suggest query in suggestions.
     *  @param closeIndexWriterOnBuild If true, the IndexWriter will be closed after the index has finished building.
     */
    public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
            int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired, boolean highlight,
            boolean closeIndexWriterOnBuild) throws IOException {

        if (minPrefixChars < 0) {
            throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
        }

        this.queryAnalyzer = queryAnalyzer;
        this.indexAnalyzer = indexAnalyzer;
        this.dir = dir;
        this.minPrefixChars = minPrefixChars;
        this.commitOnBuild = commitOnBuild;
        this.allTermsRequired = allTermsRequired;
        this.highlight = highlight;
        this.closeIndexWriterOnBuild = closeIndexWriterOnBuild;

        if (DirectoryReader.indexExists(dir)) {
            // Already built; open it:
            searcherMgr = new SearcherManager(dir, null);
        }
    }

    /** Override this to customize index settings, e.g. which
     *  codec to use. */
    protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
        IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
        iwc.setOpenMode(openMode);

        // This way all merged segments will be sorted at
        // merge time, allow for per-segment early termination
        // when those segments are searched:
        iwc.setIndexSort(SORT);

        return iwc;
    }

    /** Subclass can override to choose a specific {@link
     *  Directory} implementation. */
    protected Directory getDirectory(Path path) throws IOException {
        return FSDirectory.open(path);
    }

    @Override
    public void build(InputIterator iter) throws IOException {

        synchronized (searcherMgrLock) {
            if (searcherMgr != null) {
                searcherMgr.close();
                searcherMgr = null;
            }

            if (writer != null) {
                writer.close();
                writer = null;
            }

            boolean success = false;
            try {
                // First pass: build a temporary normal Lucene index,
                // just indexing the suggestions as they iterate:
                writer = new IndexWriter(dir,
                        getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
                //long t0 = System.nanoTime();

                // TODO: use threads?
                BytesRef text;
                while ((text = iter.next()) != null) {
                    BytesRef payload;
                    if (iter.hasPayloads()) {
                        payload = iter.payload();
                    } else {
                        payload = null;
                    }

                    add(text, iter.contexts(), iter.weight(), payload);
                }

                //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
                if (commitOnBuild || closeIndexWriterOnBuild) {
                    commit();
                }
                searcherMgr = new SearcherManager(writer, null);
                success = true;
            } finally {
                if (success) {
                    if (closeIndexWriterOnBuild) {
                        writer.close();
                        writer = null;
                    }
                } else { // failure
                    if (writer != null) {
                        writer.rollback();
                        writer = null;
                    }
                }
            }
        }
    }

    /** Commits all pending changes made to this suggester to disk.
     *
     *  @see IndexWriter#commit */
    public void commit() throws IOException {
        if (writer == null) {
            if (searcherMgr == null || closeIndexWriterOnBuild == false) {
                throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
            }
            // else no-op: writer was committed and closed after the index was built, so commit is unnecessary
        } else {
            writer.commit();
        }
    }

    private Analyzer getGramAnalyzer() {
        return new AnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
            @Override
            protected Analyzer getWrappedAnalyzer(String fieldName) {
                return indexAnalyzer;
            }

            @Override
            protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
                assert !(fieldName.equals(TEXTGRAMS_FIELD_NAME)
                        && minPrefixChars == 0) : "no need \"textgrams\" when minPrefixChars=" + minPrefixChars;
                if (fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars > 0) {
                    // TODO: should use an EdgeNGramTokenFilterFactory here
                    TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars,
                            false);
                    return new TokenStreamComponents(components.getSource(), filter);
                } else {
                    return components;
                }
            }
        };
    }

    private synchronized void ensureOpen() throws IOException {
        if (writer == null) {
            if (DirectoryReader.indexExists(dir)) {
                // Already built; open it:
                writer = new IndexWriter(dir,
                        getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
            } else {
                writer = new IndexWriter(dir,
                        getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
            }
            synchronized (searcherMgrLock) {
                SearcherManager oldSearcherMgr = searcherMgr;
                searcherMgr = new SearcherManager(writer, null);
                if (oldSearcherMgr != null) {
                    oldSearcherMgr.close();
                }
            }
        }
    }

    /** Adds a new suggestion.  Be sure to use {@link #update}
     *  instead if you want to replace a previous suggestion.
     *  After adding or updating a batch of new suggestions,
     *  you must call {@link #refresh} in the end in order to
     *  see the suggestions in {@link #lookup} */
    public void add(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
        ensureOpen();
        writer.addDocument(buildDocument(text, contexts, weight, payload));
    }

    /** Updates a previous suggestion, matching the exact same
     *  text as before.  Use this to change the weight or
     *  payload of an already added suggestion.  If you know
     *  this text is not already present you can use {@link
     *  #add} instead.  After adding or updating a batch of
     *  new suggestions, you must call {@link #refresh} in the
     *  end in order to see the suggestions in {@link #lookup} */
    public void update(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
        ensureOpen();
        writer.updateDocument(new Term(EXACT_TEXT_FIELD_NAME, text.utf8ToString()),
                buildDocument(text, contexts, weight, payload));
    }

    private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload)
            throws IOException {
        String textString = text.utf8ToString();
        Document doc = new Document();
        FieldType ft = getTextFieldType();
        doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
        if (minPrefixChars > 0) {
            doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
        }
        doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
        doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
        doc.add(new NumericDocValuesField("weight", weight));
        if (payload != null) {
            doc.add(new BinaryDocValuesField("payloads", payload));
        }
        if (contexts != null) {
            for (BytesRef context : contexts) {
                doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
                doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
            }
        }
        return doc;
    }

    /** Reopens the underlying searcher; it's best to "batch
     *  up" many additions/updates, and then call refresh
     *  once in the end. */
    public void refresh() throws IOException {
        if (searcherMgr == null) {
            throw new IllegalStateException("suggester was not built");
        }
        if (writer != null) {
            searcherMgr.maybeRefreshBlocking();
        }
        // else no-op: writer was committed and closed after the index was built
        //             and before searchMgr was constructed, so refresh is unnecessary
    }

    /**
     * Subclass can override this method to change the field type of the text field
     * e.g. to change the index options
     */
    protected FieldType getTextFieldType() {
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        ft.setIndexOptions(IndexOptions.DOCS);
        ft.setOmitNorms(true);

        return ft;
    }

    @Override
    public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num)
            throws IOException {
        return lookup(key, contexts, num, allTermsRequired, highlight);
    }

    /** Lookup, without any context. */
    public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight)
            throws IOException {
        return lookup(key, (BooleanQuery) null, num, allTermsRequired, doHighlight);
    }

    /** Lookup, with context but without booleans. Context booleans default to SHOULD,
     *  so each suggestion must have at least one of the contexts. */
    public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired,
            boolean doHighlight) throws IOException {
        return lookup(key, toQuery(contexts), num, allTermsRequired, doHighlight);
    }

    /** This is called if the last token isn't ended
     *  (e.g. user did not type a space after it).  Return an
     *  appropriate Query clause to add to the BooleanQuery. */
    protected Query getLastTokenQuery(String token) throws IOException {
        if (token.length() < minPrefixChars) {
            // The leading ngram was directly indexed:
            return new TermQuery(new Term(TEXTGRAMS_FIELD_NAME, token));
        }

        return new PrefixQuery(new Term(TEXT_FIELD_NAME, token));
    }

    /** Retrieve suggestions, specifying whether all terms
     *  must match ({@code allTermsRequired}) and whether the hits
     *  should be highlighted ({@code doHighlight}). */
    public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num,
            boolean allTermsRequired, boolean doHighlight) throws IOException {
        return lookup(key, toQuery(contextInfo), num, allTermsRequired, doHighlight);
    }

    private BooleanQuery toQuery(Map<BytesRef, BooleanClause.Occur> contextInfo) {
        if (contextInfo == null || contextInfo.isEmpty()) {
            return null;
        }

        BooleanQuery.Builder contextFilter = new BooleanQuery.Builder();
        for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
            addContextToQuery(contextFilter, entry.getKey(), entry.getValue());
        }

        return contextFilter.build();
    }

    private BooleanQuery toQuery(Set<BytesRef> contextInfo) {
        if (contextInfo == null || contextInfo.isEmpty()) {
            return null;
        }

        BooleanQuery.Builder contextFilter = new BooleanQuery.Builder();
        for (BytesRef context : contextInfo) {
            addContextToQuery(contextFilter, context, BooleanClause.Occur.SHOULD);
        }
        return contextFilter.build();
    }

    /**
     * This method is handy as we do not need access to internal fields such as CONTEXTS_FIELD_NAME in order to build queries
     * However, here may not be its best location.
     * 
     * @param query an instance of @See {@link BooleanQuery}
     * @param context the context
     * @param clause one of {@link Occur}
     */
    public void addContextToQuery(BooleanQuery.Builder query, BytesRef context, BooleanClause.Occur clause) {
        // NOTE: we "should" wrap this in
        // ConstantScoreQuery, or maybe send this as a
        // Filter instead to search.

        // TODO: if we had a BinaryTermField we could fix
        // this "must be valid ut8f" limitation:
        query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context)), clause);
    }

    /**
     * This is an advanced method providing the capability to send down to the suggester any 
     * arbitrary lucene query to be used to filter the result of the suggester
     * 
     * @param key the keyword being looked for
     * @param contextQuery an arbitrary Lucene query to be used to filter the result of the suggester. {@link #addContextToQuery} could be used to build this contextQuery.
     * @param num number of items to return
     * @param allTermsRequired all searched terms must match or not
     * @param doHighlight if true, the matching term will be highlighted in the search result
     * @return the result of the suggester
     * @throws IOException f the is IO exception while reading data from the index
     */
    public List<LookupResult> lookup(CharSequence key, BooleanQuery contextQuery, int num, boolean allTermsRequired,
            boolean doHighlight) throws IOException {

        if (searcherMgr == null) {
            throw new IllegalStateException("suggester was not built");
        }

        final BooleanClause.Occur occur;
        if (allTermsRequired) {
            occur = BooleanClause.Occur.MUST;
        } else {
            occur = BooleanClause.Occur.SHOULD;
        }

        BooleanQuery.Builder query;
        Set<String> matchedTokens;
        String prefixToken = null;

        try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
            //long t0 = System.currentTimeMillis();
            ts.reset();
            final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
            String lastToken = null;
            query = new BooleanQuery.Builder();
            int maxEndOffset = -1;
            matchedTokens = new HashSet<>();
            while (ts.incrementToken()) {
                if (lastToken != null) {
                    matchedTokens.add(lastToken);
                    query.add(new TermQuery(new Term(TEXT_FIELD_NAME, lastToken)), occur);
                }
                lastToken = termAtt.toString();
                if (lastToken != null) {
                    maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset());
                }
            }
            ts.end();

            if (lastToken != null) {
                Query lastQuery;
                if (maxEndOffset == offsetAtt.endOffset()) {
                    // Use PrefixQuery (or the ngram equivalent) when
                    // there was no trailing discarded chars in the
                    // string (e.g. whitespace), so that if query does
                    // not end with a space we show prefix matches for
                    // that token:
                    lastQuery = getLastTokenQuery(lastToken);
                    prefixToken = lastToken;
                } else {
                    // Use TermQuery for an exact match if there were
                    // trailing discarded chars (e.g. whitespace), so
                    // that if query ends with a space we only show
                    // exact matches for that term:
                    matchedTokens.add(lastToken);
                    lastQuery = new TermQuery(new Term(TEXT_FIELD_NAME, lastToken));
                }

                if (lastQuery != null) {
                    query.add(lastQuery, occur);
                }
            }

            if (contextQuery != null) {
                boolean allMustNot = true;
                for (BooleanClause clause : contextQuery.clauses()) {
                    if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) {
                        allMustNot = false;
                        break;
                    }
                }

                if (allMustNot) {
                    // All are MUST_NOT: add the contextQuery to the main query instead (not as sub-query)
                    for (BooleanClause clause : contextQuery.clauses()) {
                        query.add(clause);
                    }
                } else if (allTermsRequired == false) {
                    // We must carefully upgrade the query clauses to MUST:
                    BooleanQuery.Builder newQuery = new BooleanQuery.Builder();
                    newQuery.add(query.build(), BooleanClause.Occur.MUST);
                    newQuery.add(contextQuery, BooleanClause.Occur.MUST);
                    query = newQuery;
                } else {
                    // Add contextQuery as sub-query
                    query.add(contextQuery, BooleanClause.Occur.MUST);
                }
            }
        }

        // TODO: we could allow blended sort here, combining
        // weight w/ score.  Now we ignore score and sort only
        // by weight:

        Query finalQuery = finishQuery(query, allTermsRequired);

        //System.out.println("finalQuery=" + finalQuery);

        // Sort by weight, descending:
        TopFieldCollector c = TopFieldCollector.create(SORT, num, 1);
        List<LookupResult> results = null;
        SearcherManager mgr;
        IndexSearcher searcher;
        synchronized (searcherMgrLock) {
            mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
            searcher = mgr.acquire();
        }
        try {
            //System.out.println("got searcher=" + searcher);
            searcher.search(finalQuery, c);

            TopFieldDocs hits = c.topDocs();

            // Slower way if postings are not pre-sorted by weight:
            // hits = searcher.search(query, null, num, SORT);
            results = createResults(searcher, hits, num, key, doHighlight, matchedTokens, prefixToken);
        } finally {
            mgr.release(searcher);
        }

        //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
        //System.out.println(results);

        return results;
    }

    /**
     * Create the results based on the search hits.
     * Can be overridden by subclass to add particular behavior (e.g. weight transformation).
     * Note that there is no prefix token (the {@code prefixToken} argument will
     * be null) whenever the final token in the incoming request was in fact finished
     * (had trailing characters, such as white-space).
     *
     * @throws IOException If there are problems reading fields from the underlying Lucene index.
     */
    protected List<LookupResult> createResults(IndexSearcher searcher, TopFieldDocs hits, int num,
            CharSequence charSequence, boolean doHighlight, Set<String> matchedTokens, String prefixToken)
            throws IOException {

        List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
        List<LookupResult> results = new ArrayList<>();
        for (int i = 0; i < hits.scoreDocs.length; i++) {
            FieldDoc fd = (FieldDoc) hits.scoreDocs[i];
            BinaryDocValues textDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), TEXT_FIELD_NAME);
            textDV.advance(fd.doc);
            BytesRef term = textDV.binaryValue();
            String text = term.utf8ToString();
            long score = (Long) fd.fields[0];

            // This will just be null if app didn't pass payloads to build():
            // TODO: maybe just stored fields?  they compress...
            BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");

            BytesRef payload;
            if (payloadsDV != null) {
                if (payloadsDV.advance(fd.doc) == fd.doc) {
                    payload = BytesRef.deepCopyOf(payloadsDV.binaryValue());
                } else {
                    payload = new BytesRef(BytesRef.EMPTY_BYTES);
                }
            } else {
                payload = null;
            }

            // Must look up sorted-set by segment:
            int segment = ReaderUtil.subIndex(fd.doc, leaves);
            SortedSetDocValues contextsDV = leaves.get(segment).reader().getSortedSetDocValues(CONTEXTS_FIELD_NAME);
            Set<BytesRef> contexts;
            if (contextsDV != null) {
                contexts = new HashSet<BytesRef>();
                int targetDocID = fd.doc - leaves.get(segment).docBase;
                if (contextsDV.advance(targetDocID) == targetDocID) {
                    long ord;
                    while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                        BytesRef context = BytesRef.deepCopyOf(contextsDV.lookupOrd(ord));
                        contexts.add(context);
                    }
                }
            } else {
                contexts = null;
            }

            LookupResult result;

            if (doHighlight) {
                result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload,
                        contexts);
            } else {
                result = new LookupResult(text, score, payload, contexts);
            }

            results.add(result);
        }

        return results;
    }

    /** Subclass can override this to tweak the Query before
     *  searching. */
    protected Query finishQuery(BooleanQuery.Builder in, boolean allTermsRequired) {
        return in.build();
    }

    /** Override this method to customize the Object
     *  representing a single highlighted suggestions; the
     *  result is set on each {@link
     *  org.apache.lucene.search.suggest.Lookup.LookupResult#highlightKey} member. */
    protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
        try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
            CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
            ts.reset();
            StringBuilder sb = new StringBuilder();
            int upto = 0;
            while (ts.incrementToken()) {
                String token = termAtt.toString();
                int startOffset = offsetAtt.startOffset();
                int endOffset = offsetAtt.endOffset();
                if (upto < startOffset) {
                    addNonMatch(sb, text.substring(upto, startOffset));
                    upto = startOffset;
                } else if (upto > startOffset) {
                    continue;
                }

                if (matchedTokens.contains(token)) {
                    // Token matches.
                    addWholeMatch(sb, text.substring(startOffset, endOffset), token);
                    upto = endOffset;
                } else if (prefixToken != null && token.startsWith(prefixToken)) {
                    addPrefixMatch(sb, text.substring(startOffset, endOffset), token, prefixToken);
                    upto = endOffset;
                }
            }
            ts.end();
            int endOffset = offsetAtt.endOffset();
            if (upto < endOffset) {
                addNonMatch(sb, text.substring(upto));
            }
            return sb.toString();
        }
    }

    /** Called while highlighting a single result, to append a
     *  non-matching chunk of text from the suggestion to the
     *  provided fragments list.
     *  @param sb The {@code StringBuilder} to append to
     *  @param text The text chunk to add
     */
    protected void addNonMatch(StringBuilder sb, String text) {
        sb.append(text);
    }

    /** Called while highlighting a single result, to append
     *  the whole matched token to the provided fragments list.
     *  @param sb The {@code StringBuilder} to append to
     *  @param surface The surface form (original) text
     *  @param analyzed The analyzed token corresponding to the surface form text
     */
    protected void addWholeMatch(StringBuilder sb, String surface, String analyzed) {
        sb.append("<b>");
        sb.append(surface);
        sb.append("</b>");
    }

    /** Called while highlighting a single result, to append a
     *  matched prefix token, to the provided fragments list.
     *  @param sb The {@code StringBuilder} to append to
     *  @param surface The fragment of the surface form
     *        (indexed during {@link #build}, corresponding to
     *        this match
     *  @param analyzed The analyzed token that matched
     *  @param prefixToken The prefix of the token that matched
     */
    protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
        // TODO: apps can try to invert their analysis logic
        // here, e.g. downcase the two before checking prefix:
        if (prefixToken.length() >= surface.length()) {
            addWholeMatch(sb, surface, analyzed);
            return;
        }
        sb.append("<b>");
        sb.append(surface.substring(0, prefixToken.length()));
        sb.append("</b>");
        sb.append(surface.substring(prefixToken.length()));
    }

    @Override
    public boolean store(DataOutput in) throws IOException {
        return false;
    }

    @Override
    public boolean load(DataInput out) throws IOException {
        return false;
    }

    @Override
    public void close() throws IOException {
        if (searcherMgr != null) {
            searcherMgr.close();
            searcherMgr = null;
        }
        if (writer != null) {
            writer.close();
            writer = null;
        }
        if (dir != null) {
            dir.close();
        }
    }

    @Override
    public long ramBytesUsed() {
        long mem = RamUsageEstimator.shallowSizeOf(this);
        try {
            if (searcherMgr != null) {
                SearcherManager mgr;
                IndexSearcher searcher;
                synchronized (searcherMgrLock) {
                    mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
                    searcher = mgr.acquire();
                }
                try {
                    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
                        LeafReader reader = FilterLeafReader.unwrap(context.reader());
                        if (reader instanceof SegmentReader) {
                            mem += ((SegmentReader) context.reader()).ramBytesUsed();
                        }
                    }
                } finally {
                    mgr.release(searcher);
                }
            }
            return mem;
        } catch (IOException ioe) {
            throw new RuntimeException(ioe);
        }
    }

    @Override
    public Collection<Accountable> getChildResources() {
        List<Accountable> resources = new ArrayList<>();
        try {
            if (searcherMgr != null) {
                SearcherManager mgr;
                IndexSearcher searcher;
                synchronized (searcherMgrLock) {
                    mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
                    searcher = mgr.acquire();
                }
                try {
                    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
                        LeafReader reader = FilterLeafReader.unwrap(context.reader());
                        if (reader instanceof SegmentReader) {
                            resources.add(Accountables.namedAccountable("segment", (SegmentReader) reader));
                        }
                    }
                } finally {
                    mgr.release(searcher);
                }
            }
            return Collections.unmodifiableList(resources);
        } catch (IOException ioe) {
            throw new RuntimeException(ioe);
        }
    }

    @Override
    public long getCount() throws IOException {
        if (searcherMgr == null) {
            return 0;
        }
        SearcherManager mgr;
        IndexSearcher searcher;
        synchronized (searcherMgrLock) {
            mgr = searcherMgr; // acquire & release on same SearcherManager, via local reference
            searcher = mgr.acquire();
        }
        try {
            return searcher.getIndexReader().numDocs();
        } finally {
            mgr.release(searcher);
        }
    }
}