edu.virginia.cs.searcher.PostSearcher.java Source code

Java tutorial

Introduction

Here is the source code for edu.virginia.cs.searcher.PostSearcher.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package edu.virginia.cs.searcher;

import edu.virginia.cs.descriptors.Posts;
import edu.virginia.cs.utility.SpecialAnalyzer;
import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 *
 * @author Wasi
 */
public class PostSearcher {

    private IndexSearcher indexSearcher;
    private SpecialAnalyzer analyzer;
    private static SimpleHTMLFormatter formatter;
    private static final int numFragments = 4;
    private int numOfResults = 10;

    /**
     * Sets up the Lucene index Searcher with the specified index.
     *
     * @param indexPath The path to the desired Lucene index.
     */
    public PostSearcher(String indexPath) {
        try {
            IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
            indexSearcher = new IndexSearcher(reader);
            analyzer = new SpecialAnalyzer();
            formatter = new SimpleHTMLFormatter("****", "****");
        } catch (IOException exception) {
            exception.printStackTrace();
        }
    }

    public void setSimilarity(Similarity sim) {
        indexSearcher.setSimilarity(sim);
    }

    /**
     * The main search function.
     *
     * @param searchQuery Set this object's attributes as needed.
     * @return
     */
    private SearchResult search(SearchQuery searchQuery) {
        searchQuery.numResults(numOfResults);
        BooleanQuery combinedQuery = new BooleanQuery();
        for (String field : searchQuery.fields()) {
            QueryParser parser = new QueryParser(Version.LUCENE_46, field, analyzer);
            try {
                Query textQuery = parser.parse(parser.escape(searchQuery.queryText()));
                combinedQuery.add(textQuery, BooleanClause.Occur.MUST);
            } catch (ParseException exception) {
                exception.printStackTrace();
            }
        }
        return runSearch(combinedQuery, searchQuery);
    }

    public void setNumOfResults(int num) {
        numOfResults = num;
    }

    /**
     * The simplest search function. Searches the abstract field and returns a
     * the default number of results.
     *
     * @param queryText The text to search
     * @return the SearchResult
     */
    public SearchResult search(String queryText, String field) {
        return search(new SearchQuery(queryText, field));
    }

    /**
     * Performs the actual Lucene search.
     *
     * @param luceneQuery
     * @param numResults
     * @return the SearchResult
     */
    private SearchResult runSearch(Query luceneQuery, SearchQuery searchQuery) {
        try {
            TopDocs docs = indexSearcher.search(luceneQuery, searchQuery.numResults());
            ScoreDoc[] hits = docs.scoreDocs;
            String field = searchQuery.fields().get(0);

            SearchResult searchResult = new SearchResult(searchQuery, docs.totalHits);
            for (ScoreDoc hit : hits) {
                Document doc = indexSearcher.doc(hit.doc);
                Posts pdoc = new Posts();
                String highlighted = null;
                try {
                    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(luceneQuery));
                    String contents = doc.getField(field).stringValue();
                    pdoc.setId(Integer.parseInt(doc.getField("id").stringValue()));
                    pdoc.setBody(doc.getField("body").stringValue());
                    pdoc.setTitle(doc.getField("title").stringValue());
                    pdoc.setCode(doc.getField("code").stringValue());
                    pdoc.setTags(doc.getField("tags").stringValue());
                    pdoc.setScore(Integer.parseInt(doc.getField("score").stringValue()));
                    pdoc.setAcceptedAnswerId(Integer.parseInt(doc.getField("acceptedAnswerId").stringValue()));
                    pdoc.setViewCount(Integer.parseInt(doc.getField("viewCount").stringValue()));
                    pdoc.setAnswerCount(Integer.parseInt(doc.getField("answerCount").stringValue()));
                    pdoc.setCommentCount(Integer.parseInt(doc.getField("commentCount").stringValue()));
                    pdoc.setFavoriteCount(Integer.parseInt(doc.getField("favoriteCount").stringValue()));

                    String[] snippets = highlighter.getBestFragments(analyzer, field, contents, numFragments);
                    highlighted = createOneSnippet(snippets);
                } catch (InvalidTokenOffsetsException exception) {
                    exception.printStackTrace();
                    highlighted = "(no snippets yet)";
                }
                searchResult.addResult(pdoc);
                searchResult.setSnippet(pdoc, highlighted);
            }
            return searchResult;
        } catch (IOException exception) {
            exception.printStackTrace();
        }
        return new SearchResult(searchQuery);
    }

    /**
     * Create one string of all the extracted snippets from the highlighter
     *
     * @param snippets
     * @return
     */
    private String createOneSnippet(String[] snippets) {
        String result = " ... ";
        for (String s : snippets) {
            result += s + " ... ";
        }
        return result;
    }
}