aos.lucene.search.advanced.FunctionQueryTest.java Source code

Introduction

Here is the source code for aos.lucene.search.advanced.FunctionQueryTest.java
Source

/****************************************************************
 * Licensed to the AOS Community (AOS) under one or more        *
 * contributor license agreements.  See the NOTICE file         *
 * distributed with this work for additional information        *
 * regarding copyright ownership.  The AOS licenses this file   *
 * to you under the Apache License, Version 2.0 (the            *
 * "License"); you may not use this file except in compliance   *
 * with the License.  You may obtain a copy of the License at   *
 *                                                              *
 *   http://www.apache.org/licenses/LICENSE-2.0                 *
 *                                                              *
 * Unless required by applicable law or agreed to in writing,   *
 * software distributed under the License is distributed on an  *
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
 * KIND, either express or implied.  See the License for the    *
 * specific language governing permissions and limitations      *
 * under the License.                                           *
 ****************************************************************/
package aos.lucene.search.advanced;

import junit.framework.TestCase;

import java.util.Date;
import java.io.IOException;

import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.function.FieldScoreQuery;
import org.apache.lucene.search.function.CustomScoreQuery;
import org.apache.lucene.search.function.CustomScoreProvider;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.Version;

import io.aos.lucene.util.TestUtil;

// From chapter 5
public class FunctionQueryTest extends TestCase {

    IndexSearcher s;
    IndexWriter w;

    private void addDoc(int score, String content) throws Exception {
        Document doc = new Document();
        doc.add(new Field("score", Integer.toString(score), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
        doc.add(new Field("content", content, Field.Store.NO, Field.Index.ANALYZED));
        w.addDocument(doc);
    }

    public void setUp() throws Exception {
        Directory dir = new RAMDirectory();
        w = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_46), IndexWriter.MaxFieldLength.UNLIMITED);
        addDoc(7, "this hat is green");
        addDoc(42, "this hat is blue");
        w.close();

        s = new IndexSearcher(dir, true);
    }

    public void tearDown() throws Exception {
        super.tearDown();
        s.close();
    }

    public void testFieldScoreQuery() throws Throwable {
        Query q = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
        TopDocs hits = s.search(q, 10);
        assertEquals(2, hits.scoreDocs.length); //
        assertEquals(1, hits.scoreDocs[0].doc); //
        assertEquals(42, (int) hits.scoreDocs[0].score);
        assertEquals(0, hits.scoreDocs[1].doc);
        assertEquals(7, (int) hits.scoreDocs[1].score);
    }

    /*
      #1 All documents match
      #2 Doc 1 is first because its static score (42) is
         higher than doc 0's (7)
    */

    public void testCustomScoreQuery() throws Throwable {
        Query q = new QueryParser(Version.LUCENE_46, "content", new StandardAnalyzer(Version.LUCENE_46))
                .parse("the green hat");
        FieldScoreQuery qf = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
        CustomScoreQuery customQ = new CustomScoreQuery(q, qf) {
            public CustomScoreProvider getCustomScoreProvider(IndexReader r) {
                return new CustomScoreProvider(r) {
                    public float customScore(int doc, float subQueryScore, float valSrcScore) {
                        return (float) (Math.sqrt(subQueryScore) * valSrcScore);
                    }
                };
            }
        };

        TopDocs hits = s.search(customQ, 10);
        assertEquals(2, hits.scoreDocs.length);

        assertEquals(1, hits.scoreDocs[0].doc); //
        assertEquals(0, hits.scoreDocs[1].doc);
    }

    /*
      #1 Even though document 0 is a better match to the
         original query, document 1 gets a better score after
         multiplying in its score field
     */

    static class RecencyBoostingQuery extends CustomScoreQuery {

        double multiplier;
        int today;
        int maxDaysAgo;
        String dayField;
        static int MSEC_PER_DAY = 1000 * 3600 * 24;

        public RecencyBoostingQuery(Query q, double multiplier, int maxDaysAgo, String dayField) {
            super(q);
            today = (int) (new Date().getTime() / MSEC_PER_DAY);
            this.multiplier = multiplier;
            this.maxDaysAgo = maxDaysAgo;
            this.dayField = dayField;
        }

        private class RecencyBooster extends CustomScoreProvider {
            final int[] publishDay;

            public RecencyBooster(IndexReader r) throws IOException {
                super(r);
                publishDay = FieldCache.DEFAULT // #A
                        .getInts(r, dayField); // #A
            }

            public float customScore(int doc, float subQueryScore, float valSrcScore) {
                int daysAgo = today - publishDay[doc]; // #B
                if (daysAgo < maxDaysAgo) { // #C            
                    float boost = (float) (multiplier * // #D
                            (maxDaysAgo - daysAgo) // #D
                            / maxDaysAgo); // #D
                    return (float) (subQueryScore * (1.0 + boost));
                } else {
                    return subQueryScore; // #E
                }
            }
        }

        public CustomScoreProvider getCustomScoreProvider(IndexReader r) throws IOException {
            return new RecencyBooster(r);
        }
    }

    /*
      #A Retrieve days from field cache
      #B Compute elapsed days
      #C Skip old books
      #D Compute simple linear boost
      #E Return un-boosted score
    */

    public void testRecency() throws Throwable {
        Directory dir = TestUtil.getBookIndexDirectory();
        IndexReader r = DirectoryReader.open(dir);
        IndexSearcher s = new IndexSearcher(r);
        s.setDefaultFieldSortScoring(true, true);

        QueryParser parser = new QueryParser(Version.LUCENE_46, "contents",
                new StandardAnalyzer(Version.LUCENE_46));
        Query q = parser.parse("java in action"); // #A
        Query q2 = new RecencyBoostingQuery(q, // #B
                2.0, 2 * 365, "pubmonthAsDay");
        Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField("title2", SortField.STRING) });
        TopDocs hits = s.search(q2, null, 5, sort);

        for (int i = 0; i < hits.scoreDocs.length; i++) {
            Document doc = r.document(hits.scoreDocs[i].doc);
            LOGGER.info((1 + i) + ": " + doc.get("title") + ": pubmonth=" + doc.get("pubmonth") + " score="
                    + hits.scoreDocs[i].score);
        }
        s.close();
        r.close();
        dir.close();
    }

    /*
      #A Parse query
      #B Create recency boosting query
    */
}