Java tutorial
/** * Copyright (C) 2014 Pengfei Liu <pfliu@se.cuhk.edu.hk> * The Chinese University of Hong Kong. * * This file is part of smart-search-web. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.cuhk.hccl; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.ResponseBody; import edu.cuhk.hccl.expander.QueryExpander; @Controller public class SearchController { private static final int TOP_N = 100; private static final int EXPAND_K = 5; private static IndexSearcher searcher = null; private static QueryExpander expander = null; @RequestMapping(value = "/home") public String homePage(Model model) { model.addAttribute("query", new QueryForm()); return "home"; } @RequestMapping(value = { "/data/{dataset}/{topic}/{fileName}", "query/data/{dataset}/{topic}/{fileName}" }, method = RequestMethod.GET) @ResponseBody public String openDocument(@PathVariable("dataset") String dataset, @PathVariable("topic") String topic, @PathVariable("fileName") String fileName) throws IOException { String filePath = "data/" + dataset + "/" + topic + "/" + fileName; String content = FileUtils.readFileToString(new File(filePath)); return content; } @RequestMapping(value = "/query/{key}", method = RequestMethod.GET) public String searchQuery(@PathVariable("key") String key, Model model) throws IOException { return runSearch(key, model); } @RequestMapping(value = "/search", method = RequestMethod.POST) public String searchSubmit(@ModelAttribute QueryForm queryForm, Model model) throws IOException { if (searcher == null) { searcher = new IndexSearcher(DirectoryReader.open(Indexer.index)); } // Initialize expander String method = queryForm.getMethod(); if (method.equalsIgnoreCase("WORDNET")) { expander = Application.wordNetExpander; } else { expander = Application.wordVectorExpander; } return runSearch(queryForm.getQueryString(), model); } /** * Run search engine against keyWords * @param queryStr * @param model * @return * @throws IOException */ private String runSearch(String queryStr, Model model) throws IOException { // Find synonymous words of queryStr List<String> synWords = expander.expandQuery(queryStr, EXPAND_K); List<String> wordList = new ArrayList<String>(); for (int i = 0; i < synWords.size(); i++) { wordList.add(synWords.get(i).replace('_', ' ')); } model.addAttribute("wordList", wordList); // Build query Query query = buildQuery(synWords); TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_N, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Get search results List<String> hitLines = new ArrayList<String>(); List<SearchResult> resultList = new ArrayList<SearchResult>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); String line = (i + 1) + "\t" + d.get(Indexer.PATH_FIELD) + "\t" + hits[i].score; hitLines.add(line); SearchResult result = new SearchResult(); result.setId(i + 1); String url = d.get(Indexer.PATH_FIELD); String[] urlSplit = url.split("/"); String title = urlSplit[urlSplit.length - 2] + "-" + urlSplit[urlSplit.length - 1]; result.setTitle(title); result.setUrl(url); result.setScore(hits[i].score); resultList.add(result); } // Return search results for page "result.html" model.addAttribute("resultList", resultList); return "result"; } /** * Build a Query with expansion words of expandK * * @param analyzer * @param queryStr * @return */ private static Query buildQuery(List<String> synWords) { StringBuffer newQueryBuffer = new StringBuffer(); for (String word : synWords) { newQueryBuffer.append(word); newQueryBuffer.append(" "); newQueryBuffer.append("OR"); newQueryBuffer.append(" "); } QueryParser parser = new QueryParser(Indexer.CONTENT_FIELD, Indexer.analyzer); Query query = parser.createBooleanQuery(Indexer.CONTENT_FIELD, newQueryBuffer.toString()); return query; } }