org.apache.lucene.search.vectorhighlight.FieldTermStack.java Source code

Introduction

Here is the source code for org.apache.lucene.search.vectorhighlight.FieldTermStack.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.vectorhighlight;

import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;

import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;

/**
 * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
 * of the document to be highlighted.
 */
public class FieldTermStack {

    private final String fieldName;
    LinkedList<TermInfo> termList = new LinkedList<>();

    //public static void main( String[] args ) throws Exception {
    //  Analyzer analyzer = new WhitespaceAnalyzer(Version.LATEST);
    //  QueryParser parser = new QueryParser(Version.LATEST,  "f", analyzer );
    //  Query query = parser.parse( "a x:b" );
    //  FieldQuery fieldQuery = new FieldQuery( query, true, false );

    //  Directory dir = new RAMDirectory();
    //  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LATEST, analyzer));
    //  Document doc = new Document();
    //  FieldType ft = new FieldType(TextField.TYPE_STORED);
    //  ft.setStoreTermVectors(true);
    //  ft.setStoreTermVectorOffsets(true);
    //  ft.setStoreTermVectorPositions(true);
    //  doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
    //  doc.add( new Field( "f", ft, "b a b a f" ) );
    //  writer.addDocument( doc );
    //  writer.close();

    //  IndexReader reader = IndexReader.open(dir1);
    //  new FieldTermStack( reader, 0, "f", fieldQuery );
    //  reader.close();
    //}

    /**
     * a constructor.
     * 
     * @param reader IndexReader of the index
     * @param docId document id to be highlighted
     * @param fieldName field of the document to be highlighted
     * @param fieldQuery FieldQuery object
     * @throws IOException If there is a low-level I/O error
     */
    public FieldTermStack(IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery)
            throws IOException {
        this.fieldName = fieldName;

        Set<String> termSet = fieldQuery.getTermSet(fieldName);
        // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
        if (termSet == null)
            return;

        final Fields vectors = reader.getTermVectors(docId);
        if (vectors == null) {
            // null snippet
            return;
        }

        final Terms vector = vectors.terms(fieldName);
        if (vector == null || vector.hasPositions() == false) {
            // null snippet
            return;
        }

        final CharsRefBuilder spare = new CharsRefBuilder();
        final TermsEnum termsEnum = vector.iterator();
        PostingsEnum dpEnum = null;
        BytesRef text;

        int numDocs = reader.maxDoc();

        while ((text = termsEnum.next()) != null) {
            spare.copyUTF8Bytes(text);
            final String term = spare.toString();
            if (!termSet.contains(term)) {
                continue;
            }
            dpEnum = termsEnum.postings(dpEnum, PostingsEnum.POSITIONS);
            dpEnum.nextDoc();

            // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
            final float weight = (float) (Math
                    .log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0);

            final int freq = dpEnum.freq();

            for (int i = 0; i < freq; i++) {
                int pos = dpEnum.nextPosition();
                if (dpEnum.startOffset() < 0) {
                    return; // no offsets, null snippet
                }
                termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight));
            }
        }

        // sort by position
        Collections.sort(termList);

        // now look for dups at the same position, linking them together
        int currentPos = -1;
        TermInfo previous = null;
        TermInfo first = null;
        Iterator<TermInfo> iterator = termList.iterator();
        while (iterator.hasNext()) {
            TermInfo current = iterator.next();
            if (current.position == currentPos) {
                assert previous != null;
                previous.setNext(current);
                previous = current;
                iterator.remove();
            } else {
                if (previous != null) {
                    previous.setNext(first);
                }
                previous = first = current;
                currentPos = current.position;
            }
        }
        if (previous != null) {
            previous.setNext(first);
        }
    }

    /**
     * @return field name
     */
    public String getFieldName() {
        return fieldName;
    }

    /**
     * @return the top TermInfo object of the stack
     */
    public TermInfo pop() {
        return termList.poll();
    }

    /**
     * @param termInfo the TermInfo object to be put on the top of the stack
     */
    public void push(TermInfo termInfo) {
        termList.push(termInfo);
    }

    /**
     * to know whether the stack is empty
     * 
     * @return true if the stack is empty, false if not
     */
    public boolean isEmpty() {
        return termList == null || termList.size() == 0;
    }

    /**
     * Single term with its position/offsets in the document and IDF weight.
     * It is Comparable but considers only position.
     */
    public static class TermInfo implements Comparable<TermInfo> {

        private final String text;
        private final int startOffset;
        private final int endOffset;
        private final int position;

        // IDF-weight of this term
        private final float weight;

        // pointer to other TermInfo's at the same position.
        // this is a circular list, so with no syns, just points to itself
        private TermInfo next;

        TermInfo(String text, int startOffset, int endOffset, int position, float weight) {
            this.text = text;
            this.startOffset = startOffset;
            this.endOffset = endOffset;
            this.position = position;
            this.weight = weight;
            this.next = this;
        }

        void setNext(TermInfo next) {
            this.next = next;
        }

        /** 
         * Returns the next TermInfo at this same position.
         * This is a circular list!
         */
        public TermInfo getNext() {
            return next;
        }

        public String getText() {
            return text;
        }

        public int getStartOffset() {
            return startOffset;
        }

        public int getEndOffset() {
            return endOffset;
        }

        public int getPosition() {
            return position;
        }

        public float getWeight() {
            return weight;
        }

        @Override
        public String toString() {
            return text + '(' + startOffset + ',' + endOffset + ',' + position + ')';
        }

        @Override
        public int compareTo(TermInfo o) {
            return (this.position - o.position);
        }

        @Override
        public int hashCode() {
            final int prime = 31;
            int result = 1;
            result = prime * result + position;
            return result;
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null) {
                return false;
            }
            if (getClass() != obj.getClass()) {
                return false;
            }
            TermInfo other = (TermInfo) obj;
            return position == other.position;
        }
    }
}