org.apache.lucene.search.highlight.QueryTermScorer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.search.highlight.QueryTermScorer.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.highlight;

import java.util.HashMap;
import java.util.HashSet;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;

/**
 * {@link Scorer} implementation which scores text fragments by the number of
 * unique query terms found. This class uses the {@link QueryTermExtractor}
 * class to process determine the query terms and their boosts to be used.
 */
// TODO: provide option to boost score of fragments near beginning of document
// based on fragment.getFragNum()
public class QueryTermScorer implements Scorer {

    TextFragment currentTextFragment = null;
    HashSet<String> uniqueTermsInFragment;

    float totalScore = 0;
    float maxTermWeight = 0;
    private HashMap<String, WeightedTerm> termsToFind;

    private CharTermAttribute termAtt;

    /**
     * 
     * @param query a Lucene query (ideally rewritten using query.rewrite before
     *        being passed to this class and the searcher)
     */
    public QueryTermScorer(Query query) {
        this(QueryTermExtractor.getTerms(query));
    }

    /**
     * 
     * @param query a Lucene query (ideally rewritten using query.rewrite before
     *        being passed to this class and the searcher)
     * @param fieldName the Field name which is used to match Query terms
     */
    public QueryTermScorer(Query query, String fieldName) {
        this(QueryTermExtractor.getTerms(query, false, fieldName));
    }

    /**
     * 
     * @param query a Lucene query (ideally rewritten using query.rewrite before
     *        being passed to this class and the searcher)
     * @param reader used to compute IDF which can be used to a) score selected
     *        fragments better b) use graded highlights eg set font color
     *        intensity
     * @param fieldName the field on which Inverse Document Frequency (IDF)
     *        calculations are based
     */
    public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
        this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
    }

    public QueryTermScorer(WeightedTerm[] weightedTerms) {
        termsToFind = new HashMap<>();
        for (int i = 0; i < weightedTerms.length; i++) {
            WeightedTerm existingTerm = termsToFind.get(weightedTerms[i].term);
            if ((existingTerm == null) || (existingTerm.weight < weightedTerms[i].weight)) {
                // if a term is defined more than once, always use the highest scoring
                // weight
                termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
                maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
            }
        }
    }

    /* (non-Javadoc)
     * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
     */
    @Override
    public TokenStream init(TokenStream tokenStream) {
        termAtt = tokenStream.addAttribute(CharTermAttribute.class);
        return null;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
     * .lucene.search.highlight.TextFragment)
     */
    @Override
    public void startFragment(TextFragment newFragment) {
        uniqueTermsInFragment = new HashSet<>();
        currentTextFragment = newFragment;
        totalScore = 0;

    }

    /* (non-Javadoc)
     * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
     */
    @Override
    public float getTokenScore() {
        String termText = termAtt.toString();

        WeightedTerm queryTerm = termsToFind.get(termText);
        if (queryTerm == null) {
            // not a query term - return
            return 0;
        }
        // found a query term - is it unique in this doc?
        if (!uniqueTermsInFragment.contains(termText)) {
            totalScore += queryTerm.getWeight();
            uniqueTermsInFragment.add(termText);
        }
        return queryTerm.getWeight();
    }

    /* (non-Javadoc)
     * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
     */
    @Override
    public float getFragmentScore() {
        return totalScore;
    }

    /*
     * (non-Javadoc)
     * 
     * @see
     * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
     */
    public void allFragmentsProcessed() {
        // this class has no special operations to perform at end of processing
    }

    /**
     * 
     * @return The highest weighted term (useful for passing to GradientFormatter
     *         to set top end of coloring scale.
     */
    public float getMaxTermWeight() {
        return maxTermWeight;
    }
}