org.apache.lucene.search.vectorhighlight.FastVectorHighlighter.java Source code

Introduction

Here is the source code for org.apache.lucene.search.vectorhighlight.FastVectorHighlighter.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.vectorhighlight;

import java.io.IOException;
import java.util.Iterator;
import java.util.Set;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Encoder;

/**
 * Another highlighter implementation.
 *
 */
public class FastVectorHighlighter {
    public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true;
    public static final boolean DEFAULT_FIELD_MATCH = true;
    protected final boolean phraseHighlight;
    protected final boolean fieldMatch;
    private final FragListBuilder fragListBuilder;
    private final FragmentsBuilder fragmentsBuilder;
    private int phraseLimit = Integer.MAX_VALUE;

    /**
     * the default constructor.
     */
    public FastVectorHighlighter() {
        this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH);
    }

    /**
     * a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}.
     * 
     * @param phraseHighlight true or false for phrase highlighting
     * @param fieldMatch true of false for field matching
     */
    public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch) {
        this(phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder());
    }

    /**
     * a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified (plugins).
     * 
     * @param phraseHighlight true of false for phrase highlighting
     * @param fieldMatch true of false for field matching
     * @param fragListBuilder an instance of {@link FragListBuilder}
     * @param fragmentsBuilder an instance of {@link FragmentsBuilder}
     */
    public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch, FragListBuilder fragListBuilder,
            FragmentsBuilder fragmentsBuilder) {
        this.phraseHighlight = phraseHighlight;
        this.fieldMatch = fieldMatch;
        this.fragListBuilder = fragListBuilder;
        this.fragmentsBuilder = fragmentsBuilder;
    }

    /**
     * create a {@link FieldQuery} object.
     * 
     * @param query a query
     * @return the created {@link FieldQuery} object
     */
    public FieldQuery getFieldQuery(Query query) {
        // TODO: should we deprecate this? 
        // because if there is no reader, then we cannot rewrite MTQ.
        try {
            return getFieldQuery(query, null);
        } catch (IOException e) {
            // should never be thrown when reader is null
            throw new RuntimeException(e);
        }
    }

    /**
     * create a {@link FieldQuery} object.
     * 
     * @param query a query
     * @return the created {@link FieldQuery} object
     */
    public FieldQuery getFieldQuery(Query query, IndexReader reader) throws IOException {
        return new FieldQuery(query, reader, phraseHighlight, fieldMatch);
    }

    /**
     * return the best fragment.
     * 
     * @param fieldQuery {@link FieldQuery} object
     * @param reader {@link IndexReader} of the index
     * @param docId document id to be highlighted
     * @param fieldName field of the document to be highlighted
     * @param fragCharSize the length (number of chars) of a fragment
     * @return the best fragment (snippet) string
     * @throws IOException If there is a low-level I/O error
     */
    public final String getBestFragment(final FieldQuery fieldQuery, IndexReader reader, int docId,
            String fieldName, int fragCharSize) throws IOException {
        FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName,
                fragCharSize);
        return fragmentsBuilder.createFragment(reader, docId, fieldName, fieldFragList);
    }

    /**
     * return the best fragments.
     * 
     * @param fieldQuery {@link FieldQuery} object
     * @param reader {@link IndexReader} of the index
     * @param docId document id to be highlighted
     * @param fieldName field of the document to be highlighted
     * @param fragCharSize the length (number of chars) of a fragment
     * @param maxNumFragments maximum number of fragments
     * @return created fragments or null when no fragments created.
     *         size of the array can be less than maxNumFragments
     * @throws IOException If there is a low-level I/O error
     */
    public final String[] getBestFragments(final FieldQuery fieldQuery, IndexReader reader, int docId,
            String fieldName, int fragCharSize, int maxNumFragments) throws IOException {
        FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName,
                fragCharSize);
        return fragmentsBuilder.createFragments(reader, docId, fieldName, fieldFragList, maxNumFragments);
    }

    /**
     * return the best fragment.
     * 
     * @param fieldQuery {@link FieldQuery} object
     * @param reader {@link IndexReader} of the index
     * @param docId document id to be highlighted
     * @param fieldName field of the document to be highlighted
     * @param fragCharSize the length (number of chars) of a fragment
     * @param fragListBuilder {@link FragListBuilder} object
     * @param fragmentsBuilder {@link FragmentsBuilder} object
     * @param preTags pre-tags to be used to highlight terms
     * @param postTags post-tags to be used to highlight terms
     * @param encoder an encoder that generates encoded text
     * @return the best fragment (snippet) string
     * @throws IOException If there is a low-level I/O error
     */
    public final String getBestFragment(final FieldQuery fieldQuery, IndexReader reader, int docId,
            String fieldName, int fragCharSize, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
            String[] preTags, String[] postTags, Encoder encoder) throws IOException {
        FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName,
                fragCharSize);
        return fragmentsBuilder.createFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder);
    }

    /**
     * return the best fragments.
     * 
     * @param fieldQuery {@link FieldQuery} object
     * @param reader {@link IndexReader} of the index
     * @param docId document id to be highlighted
     * @param fieldName field of the document to be highlighted
     * @param fragCharSize the length (number of chars) of a fragment
     * @param maxNumFragments maximum number of fragments
     * @param fragListBuilder {@link FragListBuilder} object
     * @param fragmentsBuilder {@link FragmentsBuilder} object
     * @param preTags pre-tags to be used to highlight terms
     * @param postTags post-tags to be used to highlight terms
     * @param encoder an encoder that generates encoded text
     * @return created fragments or null when no fragments created.
     *         size of the array can be less than maxNumFragments
     * @throws IOException If there is a low-level I/O error
     */
    public final String[] getBestFragments(final FieldQuery fieldQuery, IndexReader reader, int docId,
            String fieldName, int fragCharSize, int maxNumFragments, FragListBuilder fragListBuilder,
            FragmentsBuilder fragmentsBuilder, String[] preTags, String[] postTags, Encoder encoder)
            throws IOException {
        FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName,
                fragCharSize);
        return fragmentsBuilder.createFragments(reader, docId, fieldName, fieldFragList, maxNumFragments, preTags,
                postTags, encoder);
    }

    /**
     * Return the best fragments.  Matches are scanned from matchedFields and turned into fragments against
     * storedField.  The highlighting may not make sense if matchedFields has matches with offsets that don't
     * correspond features in storedField.  It will outright throw a {@code StringIndexOutOfBoundsException}
     * if matchedFields produces offsets outside of storedField.  As such it is advisable that all
     * matchedFields share the same source as storedField or are at least a prefix of it.
     * 
     * @param fieldQuery {@link FieldQuery} object
     * @param reader {@link IndexReader} of the index
     * @param docId document id to be highlighted
     * @param storedField field of the document that stores the text
     * @param matchedFields fields of the document to scan for matches
     * @param fragCharSize the length (number of chars) of a fragment
     * @param maxNumFragments maximum number of fragments
     * @param fragListBuilder {@link FragListBuilder} object
     * @param fragmentsBuilder {@link FragmentsBuilder} object
     * @param preTags pre-tags to be used to highlight terms
     * @param postTags post-tags to be used to highlight terms
     * @param encoder an encoder that generates encoded text
     * @return created fragments or null when no fragments created.
     *         size of the array can be less than maxNumFragments
     * @throws IOException If there is a low-level I/O error
     */
    public final String[] getBestFragments(final FieldQuery fieldQuery, IndexReader reader, int docId,
            String storedField, Set<String> matchedFields, int fragCharSize, int maxNumFragments,
            FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder, String[] preTags, String[] postTags,
            Encoder encoder) throws IOException {
        FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields,
                fragCharSize);
        return fragmentsBuilder.createFragments(reader, docId, storedField, fieldFragList, maxNumFragments, preTags,
                postTags, encoder);
    }

    /**
     * Build a FieldFragList for one field.
     */
    private FieldFragList getFieldFragList(FragListBuilder fragListBuilder, final FieldQuery fieldQuery,
            IndexReader reader, int docId, String matchedField, int fragCharSize) throws IOException {
        FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, matchedField, fieldQuery);
        FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit);
        return fragListBuilder.createFieldFragList(fieldPhraseList, fragCharSize);
    }

    /**
     * Build a FieldFragList for more than one field.
     */
    private FieldFragList getFieldFragList(FragListBuilder fragListBuilder, final FieldQuery fieldQuery,
            IndexReader reader, int docId, Set<String> matchedFields, int fragCharSize) throws IOException {
        Iterator<String> matchedFieldsItr = matchedFields.iterator();
        if (!matchedFieldsItr.hasNext()) {
            throw new IllegalArgumentException("matchedFields must contain at least on field name.");
        }
        FieldPhraseList[] toMerge = new FieldPhraseList[matchedFields.size()];
        int i = 0;
        while (matchedFieldsItr.hasNext()) {
            FieldTermStack stack = new FieldTermStack(reader, docId, matchedFieldsItr.next(), fieldQuery);
            toMerge[i++] = new FieldPhraseList(stack, fieldQuery, phraseLimit);
        }
        return fragListBuilder.createFieldFragList(new FieldPhraseList(toMerge), fragCharSize);
    }

    /**
     * return whether phraseHighlight or not.
     * 
     * @return whether phraseHighlight or not
     */
    public boolean isPhraseHighlight() {
        return phraseHighlight;
    }

    /**
     * return whether fieldMatch or not.
     * 
     * @return whether fieldMatch or not
     */
    public boolean isFieldMatch() {
        return fieldMatch;
    }

    /**
     * @return the maximum number of phrases to analyze when searching for the highest-scoring phrase.
     */
    public int getPhraseLimit() {
        return phraseLimit;
    }

    /**
     * set the maximum number of phrases to analyze when searching for the highest-scoring phrase.
     * The default is unlimited (Integer.MAX_VALUE).
     */
    public void setPhraseLimit(int phraseLimit) {
        this.phraseLimit = phraseLimit;
    }
}