Java tutorial
/** * Copyright (C) 2011 Erhu Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.paladin.action; import com.google.common.base.Strings; import com.paladin.common.Constants; import com.paladin.common.LuceneHelper; import com.paladin.common.Tools; import com.paladin.mvc.RequestContext; import com.paladin.sys.db.DBManager; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.json.simple.JSONObject; import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKSimilarity; import javax.servlet.ServletContext; import javax.servlet.http.HttpServletRequest; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * Search Action use lucene * * @author Erhu * @modify_date Sep 1th, 2011 * @since Mar 12th, 2011 */ public class SearchAction { // lucene ?? private static final String INDEX_FIELDS = "title_content_tag"; private static Log log = LogFactory.getLog(SearchAction.class.getName()); private int total_pages;// private int curr_page_number;// ? private int last_page;// ?( ?) private int first_page;// ?( ?) /** * Search blog ,code and motto */ public void doSearch(final RequestContext _reqCtxt) throws IOException, ParseException, InvalidTokenOffsetsException { HttpServletRequest request = _reqCtxt.request(); String key = Tools.compressBlank(_reqCtxt.param("q")); if (!Strings.isNullOrEmpty(key)) { key = Tools.ISO885912UTF8(key).trim(); log.info("Let's search: key = " + key); //request.setAttribute("q", key); JSONObject json_obj = new JSONObject(); _search(json_obj, request, key, "blog"); //_search(json_obj, request, t_key, "code"); //_search(json_obj, request, t_key, "motto"); // JSON ?(python) _reqCtxt.response().getWriter().write(json_obj.toJSONString()); } } /** * search using lucene * * @param jsonObject * @param request * @param _query * @throws IOException * @throws ParseException */ private void _search(JSONObject jsonObject, HttpServletRequest request, String _query, String _table) throws IOException, ParseException, InvalidTokenOffsetsException { // Bean ??? final String index_dir = Constants.LUCENE_INDEX_ROOT + _table; IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index_dir))); QueryParser parser = new QueryParser(Version.LUCENE_33, INDEX_FIELDS, new IKAnalyzer(false)); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, true); for (String key : _query.split(" ")) { Query query = parser.parse(key); searcher.search(query, collector); // IKSimilarity searcher.setSimilarity(new IKSimilarity()); // int size = collector.getTotalHits(); total_pages = (size + Constants.NUM_PER_PAGE_SEARCH - 1) / Constants.NUM_PER_PAGE_SEARCH; curr_page_number = getCurrentPage(request, 1, total_pages); // ? first_page = curr_page_number - 5 > 0 ? curr_page_number - 5 : 1; last_page = first_page + 10 >= total_pages ? total_pages : first_page + 10; // ? int begin = (curr_page_number - 1) * Constants.NUM_PER_PAGE_SEARCH; ScoreDoc[] score_docs = collector.topDocs(begin, Constants.NUM_PER_PAGE_SEARCH).scoreDocs; List<Document> doc_list = new ArrayList<Document>(); for (ScoreDoc score_doc : score_docs) doc_list.add(searcher.doc(score_doc.doc)); List<Map<String, String>> blog_list = getBlogListFromDocList(query, doc_list); jsonObject.put(_table + "_list", blog_list); jsonObject.put("p_start_" + _table, first_page); jsonObject.put("p_end_" + _table, last_page); jsonObject.put("curr_page_" + _table, curr_page_number); jsonObject.put("total_page_" + _table, total_pages); jsonObject.put("total_count_" + _table, size); } } /** * ? ? ? * * @param _query * @param _doc_list * @return */ private List<Map<String, String>> getBlogListFromDocList(Query _query, List<Document> _doc_list) { List<Map<String, String>> blog_list = new ArrayList<Map<String, String>>(); for (Document doc : _doc_list) { Map<String, String> t_map = new java.util.HashMap<String, String>(); t_map.put("id", doc.get("id")); String[] data_arr = doc.get("title_content_tag").toString().split(Constants.LUCENE_FIELD_SEP); final String title = Tools.highlight(_query, "title_content_tag", data_arr[0]);// t_map.put("title", title == null ? data_arr[0] : title); if (data_arr.length == 3) { String tag = Tools.highlight(_query, "title_content_tag", data_arr[2]); t_map.put("tag", tag == null ? data_arr[2] : tag); } else t_map.put("tag", ""); String content = data_arr[1]; content = content.replaceAll("<[^>]*>", "");// HTML // TODO:?? Constants.LENGTH_OF_SEARCH_CONTENT String f_content = Tools.highlight(_query, "title_content_tag", content); if (f_content == null) f_content = content; if (f_content.length() > Constants.LENGTH_OF_SEARCH_CONTENT) f_content = f_content.substring(0, Constants.LENGTH_OF_SEARCH_CONTENT); // ???HTML?HTML if (f_content.lastIndexOf('<') > f_content.lastIndexOf('>')) f_content = f_content.substring(0, f_content.lastIndexOf('<')); t_map.put("content", f_content); blog_list.add(t_map); } return blog_list; } /** * ??? * * @param request * @param _begin ???? * @param _end ??? * @return */ protected int getCurrentPage(final HttpServletRequest request, int _begin, int _end) { String t_current_page = request.getParameter("p"); if (Strings.isNullOrEmpty(t_current_page)) return _begin; else { int current_page = Integer.parseInt(t_current_page); current_page = current_page < _begin ? _begin : current_page; current_page = current_page >= _end ? _end : current_page; return current_page; } } /** * */ public void updateIndex() { String operation = "rebuild";//_reqCtxt.param("operation"); String[] tables = { "blog", "code", "motto" };//_reqCtxt.param("table").toString().split(","); log.info(operation + " lucene index..."); LuceneHelper.index(tables, operation.equals("rebuild")); } /** * init */ public void init(ServletContext _ctxt) { DBManager.getConnection(); DBManager.closeConnection(); log.info(this.getClass().getName() + " rebuild lucene index..."); updateIndex(); } }