com.adanac.module.blog.search.LuceneHelper.java Source code

Introduction

Here is the source code for com.adanac.module.blog.search.LuceneHelper.java
Source

package com.adanac.module.blog.search;

/*
 * Copyright 2002-2015 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import com.adanac.module.blog.config.Configuration;
import com.adanac.module.blog.dao.ArticleDao;
import com.adanac.module.blog.dao.QuestionDao;
import com.adanac.module.blog.model.Status;
import com.adanac.module.blog.model.ViewMode;
import com.adanac.module.blog.orm.DaoFactory;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * @author adanac
 * @since 2015531 ?11:20:02
 */
public abstract class LuceneHelper {

    private static final Logger logger = Logger.getLogger(LuceneHelper.class);

    private static final String INDEX_PATH = Configuration.getContextPath("index");

    private static final String SEPARATOR = "##########!!!!!!!!!!&&&&&&&&&&";

    public static void generateIndex() {
        generateArticleIndex();
        generateQuestionIndex();
    }

    private static void generateQuestionIndex() {
        generateIndex("/question", "id", "title", "content",
                DaoFactory.getDao(QuestionDao.class).getAll(ViewMode.DYNAMIC));
    }

    public static List<Map<String, String>> searchQuestion(String searchText) {
        return search(searchText, "/question", "title",
                id -> DaoFactory.getDao(QuestionDao.class).getQuestion(id, ViewMode.DYNAMIC));
    }

    private static void generateArticleIndex() {
        generateIndex("/article", "id", "subject", "content",
                DaoFactory.getDao(ArticleDao.class).getArticles("create_date", Status.published, ViewMode.DYNAMIC));
    }

    public static List<Map<String, String>> searchArticle(String searchText) {
        return search(searchText, "article", "subject",
                id -> DaoFactory.getDao(ArticleDao.class).getArticle(id, ViewMode.DYNAMIC));
    }

    private static void generateIndex(String path, String id, String title, String content,
            List<Map<String, String>> dataList) {
        try {
            Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path));
            Analyzer analyzer = new SmartChineseAnalyzer();
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
            indexWriterConfig.setOpenMode(OpenMode.CREATE);
            IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
            for (Map<String, String> data : dataList) {
                Document document = new Document();
                Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES);
                Field indexedContentField = new TextField("indexedContent",
                        data.get(title) + SEPARATOR + data.get(content), Field.Store.YES);
                document.add(idField);
                document.add(indexedContentField);
                writer.addDocument(document);
                if (logger.isInfoEnabled()) {
                    logger.info("add index for : [" + data.get(title) + "]");
                }
            }
            writer.close();
        } catch (Exception e) {
            logger.error("add index failed ...", e);
        }
    }

    private static List<Map<String, String>> search(String searchText, String path, String title,
            LoadQuery loadQuery) {
        try {
            IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path)));
            IndexSearcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new SmartChineseAnalyzer();
            QueryParser parser = new QueryParser("indexedContent", analyzer);
            Query query = parser.parse(searchText);
            TopDocs resultDocs = searcher.search(query, 100);
            ScoreDoc[] scoreDocs = resultDocs.scoreDocs;
            //
            SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">",
                    "</span>");
            Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
            highlighter.setTextFragmenter(new SimpleFragmenter(150));
            List<Map<String, String>> result = new ArrayList<>();
            List<Integer> idList = new ArrayList<>();
            for (int i = 0; i < scoreDocs.length; i++) {
                Document doc = searcher.doc(scoreDocs[i].doc);
                Integer id = Integer.valueOf(doc.get("id"));
                if (!idList.contains(id)) {
                    String indexedContent = doc.get("indexedContent");
                    TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent);
                    Map<String, String> data = loadQuery.getById(id);
                    String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent);
                    if (highlighterString.contains(SEPARATOR)) {
                        String[] array = highlighterString.split(SEPARATOR);
                        data.put(title, array[0]);
                        if (array.length > 1) {
                            data.put("summary", array[1]);
                        }
                    } else {
                        data.put("summary", highlighterString);
                    }
                    result.add(data);
                    idList.add(id);
                }
            }
            return result;
        } catch (Exception e) {
            logger.error("search failed ...", e);
        }
        return new ArrayList<>();
    }

    interface LoadQuery {

        Map<String, String> getById(Integer id);

    }

}