Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.vectorhighlight; import java.io.IOException; import java.util.Iterator; import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Encoder; /** * Another highlighter implementation. * */ public class FastVectorHighlighter { public static final boolean DEFAULT_PHRASE_HIGHLIGHT = true; public static final boolean DEFAULT_FIELD_MATCH = true; protected final boolean phraseHighlight; protected final boolean fieldMatch; private final FragListBuilder fragListBuilder; private final FragmentsBuilder fragmentsBuilder; private int phraseLimit = Integer.MAX_VALUE; /** * the default constructor. */ public FastVectorHighlighter() { this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH); } /** * a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}. * * @param phraseHighlight true or false for phrase highlighting * @param fieldMatch true of false for field matching */ public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch) { this(phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder()); } /** * a constructor. A {@link FragListBuilder} and a {@link FragmentsBuilder} can be specified (plugins). * * @param phraseHighlight true of false for phrase highlighting * @param fieldMatch true of false for field matching * @param fragListBuilder an instance of {@link FragListBuilder} * @param fragmentsBuilder an instance of {@link FragmentsBuilder} */ public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder) { this.phraseHighlight = phraseHighlight; this.fieldMatch = fieldMatch; this.fragListBuilder = fragListBuilder; this.fragmentsBuilder = fragmentsBuilder; } /** * create a {@link FieldQuery} object. * * @param query a query * @return the created {@link FieldQuery} object */ public FieldQuery getFieldQuery(Query query) { // TODO: should we deprecate this? // because if there is no reader, then we cannot rewrite MTQ. try { return getFieldQuery(query, null); } catch (IOException e) { // should never be thrown when reader is null throw new RuntimeException(e); } } /** * create a {@link FieldQuery} object. * * @param query a query * @return the created {@link FieldQuery} object */ public FieldQuery getFieldQuery(Query query, IndexReader reader) throws IOException { return new FieldQuery(query, reader, phraseHighlight, fieldMatch); } /** * return the best fragment. * * @param fieldQuery {@link FieldQuery} object * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fragCharSize the length (number of chars) of a fragment * @return the best fragment (snippet) string * @throws IOException If there is a low-level I/O error */ public final String getBestFragment(final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize) throws IOException { FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize); return fragmentsBuilder.createFragment(reader, docId, fieldName, fieldFragList); } /** * return the best fragments. * * @param fieldQuery {@link FieldQuery} object * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fragCharSize the length (number of chars) of a fragment * @param maxNumFragments maximum number of fragments * @return created fragments or null when no fragments created. * size of the array can be less than maxNumFragments * @throws IOException If there is a low-level I/O error */ public final String[] getBestFragments(final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize, int maxNumFragments) throws IOException { FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize); return fragmentsBuilder.createFragments(reader, docId, fieldName, fieldFragList, maxNumFragments); } /** * return the best fragment. * * @param fieldQuery {@link FieldQuery} object * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fragCharSize the length (number of chars) of a fragment * @param fragListBuilder {@link FragListBuilder} object * @param fragmentsBuilder {@link FragmentsBuilder} object * @param preTags pre-tags to be used to highlight terms * @param postTags post-tags to be used to highlight terms * @param encoder an encoder that generates encoded text * @return the best fragment (snippet) string * @throws IOException If there is a low-level I/O error */ public final String getBestFragment(final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder, String[] preTags, String[] postTags, Encoder encoder) throws IOException { FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize); return fragmentsBuilder.createFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder); } /** * return the best fragments. * * @param fieldQuery {@link FieldQuery} object * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fragCharSize the length (number of chars) of a fragment * @param maxNumFragments maximum number of fragments * @param fragListBuilder {@link FragListBuilder} object * @param fragmentsBuilder {@link FragmentsBuilder} object * @param preTags pre-tags to be used to highlight terms * @param postTags post-tags to be used to highlight terms * @param encoder an encoder that generates encoded text * @return created fragments or null when no fragments created. * size of the array can be less than maxNumFragments * @throws IOException If there is a low-level I/O error */ public final String[] getBestFragments(final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize, int maxNumFragments, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder, String[] preTags, String[] postTags, Encoder encoder) throws IOException { FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize); return fragmentsBuilder.createFragments(reader, docId, fieldName, fieldFragList, maxNumFragments, preTags, postTags, encoder); } /** * Return the best fragments. Matches are scanned from matchedFields and turned into fragments against * storedField. The highlighting may not make sense if matchedFields has matches with offsets that don't * correspond features in storedField. It will outright throw a {@code StringIndexOutOfBoundsException} * if matchedFields produces offsets outside of storedField. As such it is advisable that all * matchedFields share the same source as storedField or are at least a prefix of it. * * @param fieldQuery {@link FieldQuery} object * @param reader {@link IndexReader} of the index * @param docId document id to be highlighted * @param storedField field of the document that stores the text * @param matchedFields fields of the document to scan for matches * @param fragCharSize the length (number of chars) of a fragment * @param maxNumFragments maximum number of fragments * @param fragListBuilder {@link FragListBuilder} object * @param fragmentsBuilder {@link FragmentsBuilder} object * @param preTags pre-tags to be used to highlight terms * @param postTags post-tags to be used to highlight terms * @param encoder an encoder that generates encoded text * @return created fragments or null when no fragments created. * size of the array can be less than maxNumFragments * @throws IOException If there is a low-level I/O error */ public final String[] getBestFragments(final FieldQuery fieldQuery, IndexReader reader, int docId, String storedField, Set<String> matchedFields, int fragCharSize, int maxNumFragments, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder, String[] preTags, String[] postTags, Encoder encoder) throws IOException { FieldFragList fieldFragList = getFieldFragList(fragListBuilder, fieldQuery, reader, docId, matchedFields, fragCharSize); return fragmentsBuilder.createFragments(reader, docId, storedField, fieldFragList, maxNumFragments, preTags, postTags, encoder); } /** * Build a FieldFragList for one field. */ private FieldFragList getFieldFragList(FragListBuilder fragListBuilder, final FieldQuery fieldQuery, IndexReader reader, int docId, String matchedField, int fragCharSize) throws IOException { FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, matchedField, fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery, phraseLimit); return fragListBuilder.createFieldFragList(fieldPhraseList, fragCharSize); } /** * Build a FieldFragList for more than one field. */ private FieldFragList getFieldFragList(FragListBuilder fragListBuilder, final FieldQuery fieldQuery, IndexReader reader, int docId, Set<String> matchedFields, int fragCharSize) throws IOException { Iterator<String> matchedFieldsItr = matchedFields.iterator(); if (!matchedFieldsItr.hasNext()) { throw new IllegalArgumentException("matchedFields must contain at least on field name."); } FieldPhraseList[] toMerge = new FieldPhraseList[matchedFields.size()]; int i = 0; while (matchedFieldsItr.hasNext()) { FieldTermStack stack = new FieldTermStack(reader, docId, matchedFieldsItr.next(), fieldQuery); toMerge[i++] = new FieldPhraseList(stack, fieldQuery, phraseLimit); } return fragListBuilder.createFieldFragList(new FieldPhraseList(toMerge), fragCharSize); } /** * return whether phraseHighlight or not. * * @return whether phraseHighlight or not */ public boolean isPhraseHighlight() { return phraseHighlight; } /** * return whether fieldMatch or not. * * @return whether fieldMatch or not */ public boolean isFieldMatch() { return fieldMatch; } /** * @return the maximum number of phrases to analyze when searching for the highest-scoring phrase. */ public int getPhraseLimit() { return phraseLimit; } /** * set the maximum number of phrases to analyze when searching for the highest-scoring phrase. * The default is unlimited (Integer.MAX_VALUE). */ public void setPhraseLimit(int phraseLimit) { this.phraseLimit = phraseLimit; } }