Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package edu.virginia.cs.searcher; import edu.virginia.cs.descriptors.Posts; import edu.virginia.cs.utility.SpecialAnalyzer; import java.io.File; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * * @author Wasi */ public class PostSearcher { private IndexSearcher indexSearcher; private SpecialAnalyzer analyzer; private static SimpleHTMLFormatter formatter; private static final int numFragments = 4; private int numOfResults = 10; /** * Sets up the Lucene index Searcher with the specified index. * * @param indexPath The path to the desired Lucene index. */ public PostSearcher(String indexPath) { try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); indexSearcher = new IndexSearcher(reader); analyzer = new SpecialAnalyzer(); formatter = new SimpleHTMLFormatter("****", "****"); } catch (IOException exception) { exception.printStackTrace(); } } public void setSimilarity(Similarity sim) { indexSearcher.setSimilarity(sim); } /** * The main search function. * * @param searchQuery Set this object's attributes as needed. * @return */ private SearchResult search(SearchQuery searchQuery) { searchQuery.numResults(numOfResults); BooleanQuery combinedQuery = new BooleanQuery(); for (String field : searchQuery.fields()) { QueryParser parser = new QueryParser(Version.LUCENE_46, field, analyzer); try { Query textQuery = parser.parse(parser.escape(searchQuery.queryText())); combinedQuery.add(textQuery, BooleanClause.Occur.MUST); } catch (ParseException exception) { exception.printStackTrace(); } } return runSearch(combinedQuery, searchQuery); } public void setNumOfResults(int num) { numOfResults = num; } /** * The simplest search function. Searches the abstract field and returns a * the default number of results. * * @param queryText The text to search * @return the SearchResult */ public SearchResult search(String queryText, String field) { return search(new SearchQuery(queryText, field)); } /** * Performs the actual Lucene search. * * @param luceneQuery * @param numResults * @return the SearchResult */ private SearchResult runSearch(Query luceneQuery, SearchQuery searchQuery) { try { TopDocs docs = indexSearcher.search(luceneQuery, searchQuery.numResults()); ScoreDoc[] hits = docs.scoreDocs; String field = searchQuery.fields().get(0); SearchResult searchResult = new SearchResult(searchQuery, docs.totalHits); for (ScoreDoc hit : hits) { Document doc = indexSearcher.doc(hit.doc); Posts pdoc = new Posts(); String highlighted = null; try { Highlighter highlighter = new Highlighter(formatter, new QueryScorer(luceneQuery)); String contents = doc.getField(field).stringValue(); pdoc.setId(Integer.parseInt(doc.getField("id").stringValue())); pdoc.setBody(doc.getField("body").stringValue()); pdoc.setTitle(doc.getField("title").stringValue()); pdoc.setCode(doc.getField("code").stringValue()); pdoc.setTags(doc.getField("tags").stringValue()); pdoc.setScore(Integer.parseInt(doc.getField("score").stringValue())); pdoc.setAcceptedAnswerId(Integer.parseInt(doc.getField("acceptedAnswerId").stringValue())); pdoc.setViewCount(Integer.parseInt(doc.getField("viewCount").stringValue())); pdoc.setAnswerCount(Integer.parseInt(doc.getField("answerCount").stringValue())); pdoc.setCommentCount(Integer.parseInt(doc.getField("commentCount").stringValue())); pdoc.setFavoriteCount(Integer.parseInt(doc.getField("favoriteCount").stringValue())); String[] snippets = highlighter.getBestFragments(analyzer, field, contents, numFragments); highlighted = createOneSnippet(snippets); } catch (InvalidTokenOffsetsException exception) { exception.printStackTrace(); highlighted = "(no snippets yet)"; } searchResult.addResult(pdoc); searchResult.setSnippet(pdoc, highlighted); } return searchResult; } catch (IOException exception) { exception.printStackTrace(); } return new SearchResult(searchQuery); } /** * Create one string of all the extracted snippets from the highlighter * * @param snippets * @return */ private String createOneSnippet(String[] snippets) { String result = " ... "; for (String s : snippets) { result += s + " ... "; } return result; } }