searching.SearchFiles.java Source code

Java tutorial

Introduction

Here is the source code for searching.SearchFiles.java

Source

package searching;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import general.Config;

import java.io.IOException;
import java.nio.file.Paths;

import javax.management.InvalidAttributeValueException;

import model.Fieldname;
import model.SearchResult;
import model.WikiDocument;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

/** Simple command-line based search demo. */
public class SearchFiles {
    /*
     * Static variables, for performance reasons, to avoid unnecessary creation of IndexSearcher
     */

    private static IndexSearcher searcher = null;

    static {
        BooleanQuery.setMaxClauseCount(2048);

        try {
            searcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(Config.indexDir))));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public SearchResult search(String queryString, int startResult, int numberOfResults, Fieldname fieldname) {

        String[] fieldnames = new String[1];
        fieldnames[0] = fieldname.toString();

        return executeSearch(queryString, startResult, numberOfResults, fieldnames);
    }

    public SearchResult searchSimilarDocs(WikiDocument doc) {
        return searchSimilarDocs(doc, 10);
    }

    public SearchResult searchSimilarDocs(WikiDocument doc, int numResults) {
        return searchSimilarDocs(doc, numResults, 1, 1, 1);
    }

    public SearchResult searchSimilarDocs(WikiDocument doc, int numResults, float boostCat, float boostLinks,
            float boostFreqWords) {
        SearchResult searchResult = null;

        BooleanQuery bq = new BooleanQuery();

        // Add the categories to the query
        if (!doc.getCategories().isEmpty()) {
            BooleanQuery categories = new BooleanQuery();

            for (String cat : doc.getCategories()) {
                TermQuery tq = new TermQuery(new Term(Fieldname.CATEGORIES.toString(), cat));
                categories.add(tq, Occur.SHOULD);
            }
            categories.setBoost(boostCat);
            bq.add(categories, Occur.SHOULD);
        }

        // Add the categories to the query
        if (!doc.getLinks().isEmpty()) {
            BooleanQuery links = new BooleanQuery();

            for (String cat : doc.getLinks()) {
                TermQuery tq = new TermQuery(new Term(Fieldname.LINKS.toString(), cat));
                links.add(tq, Occur.SHOULD);
            }
            links.setBoost(boostLinks);
            bq.add(links, Occur.SHOULD);
        }

        // Add the frequent words to the query
        if (!doc.getFreqWords().isEmpty()) {
            BooleanQuery freqWords = new BooleanQuery();

            for (String cat : doc.getFreqWords()) {
                TermQuery tq = new TermQuery(new Term(Fieldname.FREQWORDS.toString(), cat));
                freqWords.add(tq, Occur.SHOULD);
            }
            freqWords.setBoost(boostFreqWords);
            bq.add(freqWords, Occur.SHOULD);
        }

        //      System.out.println("Query: " + bq.toString());

        try {
            searchResult = doPagingSearch(searcher, bq, 2, numResults);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidAttributeValueException e) {
            e.printStackTrace();
        }

        return searchResult;
    }

    private SearchResult executeSearch(String queryString, int startResult, int numberOfResults,
            String[] fieldnames) {
        SearchResult searchResult = null;

        try {
            QueryParser parser = new MultiFieldQueryParser(fieldnames, Config.analyzer);

            queryString = queryString.trim();

            if (queryString.length() == 0) {
                return null;
            }

            Query query = parser.parse(queryString);
            //         System.out.println("Query: " + query.toString());

            searchResult = doPagingSearch(searcher, query, startResult, numberOfResults);

        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        } catch (InvalidAttributeValueException e) {
            e.printStackTrace();
        }

        return searchResult;
    }

    /**
     * This demonstrates a typical paging search scenario, where the search engine presents pages of size n to the user.
     * The user can then go to the next page if interested in the next hits.
     * 
     * When the query is executed for the first time, then only enough results are collected to fill 5 result pages. If
     * the user wants to page beyond this limit, then the query is executed another time and all hits are collected.
     * @throws InvalidAttributeValueException 
     * 
     */
    private SearchResult doPagingSearch(IndexSearcher searcher, Query query, int offset, int numberOfResults)
            throws IOException, InvalidAttributeValueException {

        int requestedResults = offset + numberOfResults - 1;

        TopDocs results = searcher.search(query, requestedResults);

        int numTotalHits = results.totalHits;

        ScoreDoc[] hits = results.scoreDocs;

        SearchResult searchResult = new SearchResult();

        searchResult.setTotalHits(numTotalHits);

        int end = Math.min(numTotalHits, requestedResults);

        for (int i = offset - 1; i < end; i++) {
            WikiDocument wikiDoc = new WikiDocument();

            Document doc = searcher.doc(hits[i].doc);

            for (Fieldname f : Fieldname.values()) {
                if (doc.get(f.toString()) != null)
                    wikiDoc.setField(f, doc.get(f.toString()));
            }

            searchResult.addWikiDocument(wikiDoc);
        }
        return searchResult;
    }

}