com.paladin.action.SearchAction.java Source code

Introduction

Here is the source code for com.paladin.action.SearchAction.java
Source

/**
 * Copyright (C) 2011 Erhu Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.paladin.action;

import com.google.common.base.Strings;
import com.paladin.common.Constants;
import com.paladin.common.LuceneHelper;
import com.paladin.common.Tools;
import com.paladin.mvc.RequestContext;
import com.paladin.sys.db.DBManager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.json.simple.JSONObject;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKSimilarity;

import javax.servlet.ServletContext;
import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * Search Action use lucene
 *
 * @author Erhu
 * @modify_date Sep 1th, 2011
 * @since Mar 12th, 2011
 */
public class SearchAction {

    // lucene  ??
    private static final String INDEX_FIELDS = "title_content_tag";
    private static Log log = LogFactory.getLog(SearchAction.class.getName());

    private int total_pages;// 
    private int curr_page_number;// ?
    private int last_page;// ?( ?)
    private int first_page;// ?( ?)

    /**
     * Search blog ,code and motto
     */
    public void doSearch(final RequestContext _reqCtxt)
            throws IOException, ParseException, InvalidTokenOffsetsException {
        HttpServletRequest request = _reqCtxt.request();
        String key = Tools.compressBlank(_reqCtxt.param("q"));

        if (!Strings.isNullOrEmpty(key)) {
            key = Tools.ISO885912UTF8(key).trim();
            log.info("Let's search: key = " + key);
            //request.setAttribute("q", key);
            JSONObject json_obj = new JSONObject();

            _search(json_obj, request, key, "blog");
            //_search(json_obj, request, t_key, "code");
            //_search(json_obj, request, t_key, "motto");

            //  JSON   ?(python)
            _reqCtxt.response().getWriter().write(json_obj.toJSONString());
        }
    }

    /**
     * search using lucene
     *
     * @param jsonObject
     * @param request
     * @param _query
     * @throws IOException
     * @throws ParseException
     */

    private void _search(JSONObject jsonObject, HttpServletRequest request, String _query, String _table)
            throws IOException, ParseException, InvalidTokenOffsetsException {

        //  Bean ???
        final String index_dir = Constants.LUCENE_INDEX_ROOT + _table;

        IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index_dir)));
        QueryParser parser = new QueryParser(Version.LUCENE_33, INDEX_FIELDS, new IKAnalyzer(false));
        TopScoreDocCollector collector = TopScoreDocCollector.create(10000, true);

        for (String key : _query.split(" ")) {
            Query query = parser.parse(key);
            searcher.search(query, collector);

            //    IKSimilarity  
            searcher.setSimilarity(new IKSimilarity());

            // 
            int size = collector.getTotalHits();
            total_pages = (size + Constants.NUM_PER_PAGE_SEARCH - 1) / Constants.NUM_PER_PAGE_SEARCH;

            curr_page_number = getCurrentPage(request, 1, total_pages);

            // ?
            first_page = curr_page_number - 5 > 0 ? curr_page_number - 5 : 1;
            last_page = first_page + 10 >= total_pages ? total_pages : first_page + 10;

            // ?
            int begin = (curr_page_number - 1) * Constants.NUM_PER_PAGE_SEARCH;
            ScoreDoc[] score_docs = collector.topDocs(begin, Constants.NUM_PER_PAGE_SEARCH).scoreDocs;

            List<Document> doc_list = new ArrayList<Document>();
            for (ScoreDoc score_doc : score_docs)
                doc_list.add(searcher.doc(score_doc.doc));

            List<Map<String, String>> blog_list = getBlogListFromDocList(query, doc_list);

            jsonObject.put(_table + "_list", blog_list);
            jsonObject.put("p_start_" + _table, first_page);
            jsonObject.put("p_end_" + _table, last_page);
            jsonObject.put("curr_page_" + _table, curr_page_number);
            jsonObject.put("total_page_" + _table, total_pages);
            jsonObject.put("total_count_" + _table, size);
        }
    }

    /**
     *  ?  ? ?
     *
     * @param _query
     * @param _doc_list
     * @return
     */
    private List<Map<String, String>> getBlogListFromDocList(Query _query, List<Document> _doc_list) {
        List<Map<String, String>> blog_list = new ArrayList<Map<String, String>>();

        for (Document doc : _doc_list) {
            Map<String, String> t_map = new java.util.HashMap<String, String>();
            t_map.put("id", doc.get("id"));
            String[] data_arr = doc.get("title_content_tag").toString().split(Constants.LUCENE_FIELD_SEP);

            final String title = Tools.highlight(_query, "title_content_tag", data_arr[0]);// 
            t_map.put("title", title == null ? data_arr[0] : title);

            if (data_arr.length == 3) {
                String tag = Tools.highlight(_query, "title_content_tag", data_arr[2]);
                t_map.put("tag", tag == null ? data_arr[2] : tag);
            } else
                t_map.put("tag", "");

            String content = data_arr[1];
            content = content.replaceAll("<[^>]*>", "");//  HTML 

            // TODO:?? Constants.LENGTH_OF_SEARCH_CONTENT
            String f_content = Tools.highlight(_query, "title_content_tag", content);
            if (f_content == null)
                f_content = content;

            if (f_content.length() > Constants.LENGTH_OF_SEARCH_CONTENT)
                f_content = f_content.substring(0, Constants.LENGTH_OF_SEARCH_CONTENT);

            // ???HTML?HTML
            if (f_content.lastIndexOf('<') > f_content.lastIndexOf('>'))
                f_content = f_content.substring(0, f_content.lastIndexOf('<'));

            t_map.put("content", f_content);
            blog_list.add(t_map);
        }
        return blog_list;
    }

    /**
     * ???
     *
     * @param request
     * @param _begin  ????
     * @param _end    ???
     * @return
     */
    protected int getCurrentPage(final HttpServletRequest request, int _begin, int _end) {
        String t_current_page = request.getParameter("p");
        if (Strings.isNullOrEmpty(t_current_page))
            return _begin;
        else {
            int current_page = Integer.parseInt(t_current_page);
            current_page = current_page < _begin ? _begin : current_page;
            current_page = current_page >= _end ? _end : current_page;
            return current_page;
        }
    }

    /**
     *  
     */
    public void updateIndex() {
        String operation = "rebuild";//_reqCtxt.param("operation");
        String[] tables = { "blog", "code", "motto" };//_reqCtxt.param("table").toString().split(",");
        log.info(operation + " lucene index...");
        LuceneHelper.index(tables, operation.equals("rebuild"));
    }

    /**
     * init
     */
    public void init(ServletContext _ctxt) {
        DBManager.getConnection();
        DBManager.closeConnection();
        log.info(this.getClass().getName() + " rebuild lucene index...");
        updateIndex();
    }
}