org.apache.blur.utils.HighlightHelper.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.blur.utils.HighlightHelper.java

Source

package org.apache.blur.utils;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;

import org.apache.blur.analysis.FieldManager;
import org.apache.blur.lucene.search.SuperQuery;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.util.BytesRef;

public class HighlightHelper {

    private static final Collection<String> FIELDS_NOT_TO_HIGHLIGHT = new HashSet<String>() {
        private static final long serialVersionUID = 1L;
        {
            add(BlurConstants.ROW_ID);
            add(BlurConstants.RECORD_ID);
            add(BlurConstants.PRIME_DOC);
            add(BlurConstants.FAMILY);
        }
    };

    /**
     * NOTE: This method will not preserve the correct field types.
     * 
     * @param preTag
     * @param postTag
     */
    public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
            IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

        String fieldLessFieldName = fieldManager.getFieldLessFieldName();

        Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

        Analyzer analyzer = fieldManager.getAnalyzerForQuery();

        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
        Document result = new Document();
        for (IndexableField f : document) {
            String name = f.name();
            if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
                result.add(f);
                continue;
            }
            String text = f.stringValue();
            Number numericValue = f.numericValue();

            Query fieldFixedQuery;
            if (fieldManager.isFieldLessIndexed(name)) {
                fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
            } else {
                fieldFixedQuery = fixedQuery;
            }

            if (numericValue != null) {
                if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
                    String numberHighlight = preTag + text + postTag;
                    result.add(new StringField(name, numberHighlight, Store.YES));
                }
            } else {
                Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
                TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
                for (int j = 0; j < frag.length; j++) {
                    if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                        result.add(new StringField(name, frag[j].toString(), Store.YES));
                    }
                }
            }
        }
        return result;
    }

    private static Query fixSuperQuery(Query query, String name, String fieldLessFieldName) {
        if (query instanceof BooleanQuery) {
            BooleanQuery bq = (BooleanQuery) query;
            BooleanQuery newBq = new BooleanQuery();
            for (BooleanClause booleanClause : bq) {
                newBq.add(fixSuperQuery(booleanClause.getQuery(), name, fieldLessFieldName),
                        booleanClause.getOccur());
            }
            return newBq;
        } else if (query instanceof SuperQuery) {
            SuperQuery sq = (SuperQuery) query;
            return setFieldIfNeeded(sq.getQuery(), name, fieldLessFieldName);
        } else {
            return setFieldIfNeeded(query, name, fieldLessFieldName);
        }
    }

    private static Query setFieldIfNeeded(Query query, String name, String fieldLessFieldName) {
        if (name == null) {
            return query;
        }
        if (query instanceof TermQuery) {
            TermQuery tq = (TermQuery) query;
            Term term = tq.getTerm();
            if (term.field().equals(fieldLessFieldName)) {
                return new TermQuery(new Term(name, term.bytes()));
            }
        } else if (query instanceof WildcardQuery) {
            WildcardQuery wq = (WildcardQuery) query;
            Term term = wq.getTerm();
            if (term.field().equals(fieldLessFieldName)) {
                return new WildcardQuery(new Term(name, term.bytes()));
            }
        } else if (query instanceof MultiPhraseQuery) {
            MultiPhraseQuery mpq = (MultiPhraseQuery) query;
            int[] positions = mpq.getPositions();
            List<Term[]> termArrays = mpq.getTermArrays();
            if (isTermField(termArrays, fieldLessFieldName)) {
                MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery();
                multiPhraseQuery.setSlop(mpq.getSlop());
                for (int i = 0; i < termArrays.size(); i++) {
                    multiPhraseQuery.add(changeFields(termArrays.get(i), name), positions[i]);
                }
                return multiPhraseQuery;
            }
        } else if (query instanceof PhraseQuery) {
            PhraseQuery pq = (PhraseQuery) query;
            Term[] terms = pq.getTerms();
            int[] positions = pq.getPositions();
            String field = terms[0].field();
            if (field.equals(BlurConstants.SUPER)) {
                PhraseQuery phraseQuery = new PhraseQuery();
                for (int i = 0; i < terms.length; i++) {
                    phraseQuery.add(new Term(name, terms[i].bytes()), positions[i]);
                }
                phraseQuery.setSlop(pq.getSlop());
                return phraseQuery;
            }
        } else if (query instanceof PrefixQuery) {
            PrefixQuery pq = (PrefixQuery) query;
            Term term = pq.getPrefix();
            if (term.field().equals(BlurConstants.SUPER)) {
                return new PrefixQuery(new Term(name, term.bytes()));
            }
        } else if (query instanceof TermRangeQuery) {
            TermRangeQuery trq = (TermRangeQuery) query;
            BytesRef lowerTerm = trq.getLowerTerm();
            BytesRef upperTerm = trq.getUpperTerm();
            boolean includeUpper = trq.includesUpper();
            boolean includeLower = trq.includesLower();
            String field = trq.getField();
            if (field.equals(BlurConstants.SUPER)) {
                return new TermRangeQuery(name, lowerTerm, upperTerm, includeLower, includeUpper);
            }
        }
        return query;
    }

    private static Term[] changeFields(Term[] terms, String name) {
        Term[] newTerms = new Term[terms.length];
        for (int i = 0; i < terms.length; i++) {
            newTerms[i] = new Term(name, terms[i].bytes());
        }
        return newTerms;
    }

    private static boolean isTermField(List<Term[]> termArrays, String fieldName) {
        Term[] terms = termArrays.get(0);
        return terms[0].field().equals(fieldName);
    }

    public static boolean shouldNumberBeHighlighted(String name, Number numericValue, Query query) {
        if (query instanceof BooleanQuery) {
            BooleanQuery booleanQuery = (BooleanQuery) query;
            for (BooleanClause booleanClause : booleanQuery) {
                if (booleanClause.isProhibited()) {
                    continue;
                } else {
                    if (shouldNumberBeHighlighted(name, numericValue, booleanClause.getQuery())) {
                        return true;
                    }
                }
            }
        } else {
            if (query instanceof NumericRangeQuery) {
                if (numericValue instanceof Integer) {
                    return checkInteger(name, numericValue, query);
                } else if (numericValue instanceof Double) {
                    return checkDouble(name, numericValue, query);
                } else if (numericValue instanceof Float) {
                    return checkFloat(name, numericValue, query);
                } else if (numericValue instanceof Long) {
                    return checkLong(name, numericValue, query);
                }
            }
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    public static boolean checkLong(String name, Number numericValue, Query query) {
        long value = (Long) numericValue;
        NumericRangeQuery<Long> nrq = (NumericRangeQuery<Long>) query;
        if (!name.equals(nrq.getField())) {
            return false;
        }
        if (nrq.includesMin()) {
            if (nrq.includesMax()) {
                if (value >= nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value >= nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        } else {
            if (nrq.includesMax()) {
                if (value > nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value > nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    public static boolean checkFloat(String name, Number numericValue, Query query) {
        float value = (Float) numericValue;
        NumericRangeQuery<Float> nrq = (NumericRangeQuery<Float>) query;
        if (!name.equals(nrq.getField())) {
            return false;
        }
        if (nrq.includesMin()) {
            if (nrq.includesMax()) {
                if (value >= nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value >= nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        } else {
            if (nrq.includesMax()) {
                if (value > nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value > nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    public static boolean checkDouble(String name, Number numericValue, Query query) {
        double value = (Double) numericValue;
        NumericRangeQuery<Double> nrq = (NumericRangeQuery<Double>) query;
        if (!name.equals(nrq.getField())) {
            return false;
        }
        if (nrq.includesMin()) {
            if (nrq.includesMax()) {
                if (value >= nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value >= nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        } else {
            if (nrq.includesMax()) {
                if (value > nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value > nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        }
        return false;
    }

    @SuppressWarnings("unchecked")
    public static boolean checkInteger(String name, Number numericValue, Query query) {
        int value = (Integer) numericValue;
        NumericRangeQuery<Integer> nrq = (NumericRangeQuery<Integer>) query;
        if (!name.equals(nrq.getField())) {
            return false;
        }
        if (nrq.includesMin()) {
            if (nrq.includesMax()) {
                if (value >= nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value >= nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        } else {
            if (nrq.includesMax()) {
                if (value > nrq.getMin() && value <= nrq.getMax()) {
                    return true;
                }
            } else {
                if (value > nrq.getMin() && value < nrq.getMax()) {
                    return true;
                }
            }
        }
        return false;
    }
}