edu.cuhk.hccl.SearchController.java Source code

Java tutorial

Introduction

Here is the source code for edu.cuhk.hccl.SearchController.java

Source

/**
 * Copyright (C) 2014 Pengfei Liu <pfliu@se.cuhk.edu.hk>
 * The Chinese University of Hong Kong.
 *
 * This file is part of smart-search-web.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.cuhk.hccl;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;

import edu.cuhk.hccl.expander.QueryExpander;

@Controller
public class SearchController {

    private static final int TOP_N = 100;
    private static final int EXPAND_K = 5;

    private static IndexSearcher searcher = null;
    private static QueryExpander expander = null;

    @RequestMapping(value = "/home")
    public String homePage(Model model) {

        model.addAttribute("query", new QueryForm());
        return "home";
    }

    @RequestMapping(value = { "/data/{dataset}/{topic}/{fileName}",
            "query/data/{dataset}/{topic}/{fileName}" }, method = RequestMethod.GET)
    @ResponseBody
    public String openDocument(@PathVariable("dataset") String dataset, @PathVariable("topic") String topic,
            @PathVariable("fileName") String fileName) throws IOException {

        String filePath = "data/" + dataset + "/" + topic + "/" + fileName;
        String content = FileUtils.readFileToString(new File(filePath));

        return content;
    }

    @RequestMapping(value = "/query/{key}", method = RequestMethod.GET)
    public String searchQuery(@PathVariable("key") String key, Model model) throws IOException {

        return runSearch(key, model);
    }

    @RequestMapping(value = "/search", method = RequestMethod.POST)
    public String searchSubmit(@ModelAttribute QueryForm queryForm, Model model) throws IOException {

        if (searcher == null) {
            searcher = new IndexSearcher(DirectoryReader.open(Indexer.index));
        }

        // Initialize expander
        String method = queryForm.getMethod();
        if (method.equalsIgnoreCase("WORDNET")) {
            expander = Application.wordNetExpander;
        } else {
            expander = Application.wordVectorExpander;
        }

        return runSearch(queryForm.getQueryString(), model);
    }

    /**
     * Run search engine against keyWords
     * @param queryStr
     * @param model
     * @return
     * @throws IOException
     */
    private String runSearch(String queryStr, Model model) throws IOException {

        // Find synonymous words of queryStr
        List<String> synWords = expander.expandQuery(queryStr, EXPAND_K);
        List<String> wordList = new ArrayList<String>();
        for (int i = 0; i < synWords.size(); i++) {
            wordList.add(synWords.get(i).replace('_', ' '));
        }
        model.addAttribute("wordList", wordList);

        // Build query
        Query query = buildQuery(synWords);
        TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_N, true);

        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        // Get search results
        List<String> hitLines = new ArrayList<String>();
        List<SearchResult> resultList = new ArrayList<SearchResult>();

        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);

            String line = (i + 1) + "\t" + d.get(Indexer.PATH_FIELD) + "\t" + hits[i].score;
            hitLines.add(line);

            SearchResult result = new SearchResult();
            result.setId(i + 1);

            String url = d.get(Indexer.PATH_FIELD);
            String[] urlSplit = url.split("/");
            String title = urlSplit[urlSplit.length - 2] + "-" + urlSplit[urlSplit.length - 1];

            result.setTitle(title);
            result.setUrl(url);
            result.setScore(hits[i].score);

            resultList.add(result);
        }

        // Return search results for page "result.html"
        model.addAttribute("resultList", resultList);

        return "result";
    }

    /**
     * Build a Query with expansion words of expandK
     * 
     * @param analyzer
     * @param queryStr
     * @return
     */
    private static Query buildQuery(List<String> synWords) {

        StringBuffer newQueryBuffer = new StringBuffer();
        for (String word : synWords) {
            newQueryBuffer.append(word);
            newQueryBuffer.append(" ");
            newQueryBuffer.append("OR");
            newQueryBuffer.append(" ");
        }

        QueryParser parser = new QueryParser(Indexer.CONTENT_FIELD, Indexer.analyzer);
        Query query = parser.createBooleanQuery(Indexer.CONTENT_FIELD, newQueryBuffer.toString());

        return query;
    }
}