io.jpress.module.article.searcher.LuceneSearcher.java Source code

Java tutorial

Introduction

Here is the source code for io.jpress.module.article.searcher.LuceneSearcher.java

Source

/**
 * Copyright (c) 2016-2019, Michael Yang ?? (fuhai999@gmail.com).
 * <p>
 * Licensed under the GNU Lesser General Public License (LGPL) ,Version 3.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p>
 * http://www.gnu.org/licenses/lgpl-3.0.txt
 * <p>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.jpress.module.article.searcher;

import com.jfinal.kit.PathKit;
import com.jfinal.log.Log;
import com.jfinal.plugin.activerecord.Page;
import io.jpress.commons.utils.CommonsUtils;
import io.jpress.module.article.model.Article;
import io.jpress.module.article.service.search.ArticleSearcher;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NIOFSDirectory;
import org.lionsoul.jcseg.analyzer.JcsegAnalyzer;
import org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

public class LuceneSearcher implements ArticleSearcher {

    private static final Log logger = Log.getLog(LuceneSearcher.class);

    public static String INDEX_PATH = "~/indexes/";
    private static Directory directory;

    public LuceneSearcher() {
        File indexDir = new File(PathKit.getRootClassPath(), INDEX_PATH);
        if (!indexDir.exists()) {
            indexDir.mkdirs();
        }
        try {
            directory = NIOFSDirectory.open(indexDir.toPath());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void addArticle(Article article) {
        IndexWriter writer = null;
        try {
            writer = createIndexWriter();
            Document doc = createDocument(article);
            writer.addDocument(doc);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            CommonsUtils.quietlyClose(writer);
        }
    }

    @Override
    public void deleteArticle(Object id) {
        IndexWriter writer = null;
        try {
            writer = createIndexWriter();
            writer.deleteDocuments(new Term("aid", id.toString()));
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            CommonsUtils.quietlyClose(writer);
        }
    }

    @Override
    public void updateArticle(Article article) {
        deleteArticle(article.getId());
        addArticle(article);
    }

    @Override
    public Page<Article> search(String keyword, int pageNum, int pageSize) {
        IndexReader indexReader = null;
        try {
            //Bug fix , QueryParser.escape(keyword),keyword=I/O,?buildQuery
            keyword = QueryParser.escape(keyword);
            indexReader = DirectoryReader.open(directory);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            Query query = buildQuery(keyword);

            ScoreDoc lastScoreDoc = getLastScoreDoc(pageNum, pageSize, query, indexSearcher);
            TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize);

            SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font class=\"" + HIGH_LIGHT_CLASS + "\">",
                    "</font>");
            Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
            highlighter.setTextFragmenter(new SimpleFragmenter(100));

            List<Article> articles = toArticleList(indexSearcher, topDocs, highlighter, keyword);
            int totalRow = getTotalRow(indexSearcher, query);
            return newPage(pageNum, pageSize, totalRow, articles);
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            CommonsUtils.quietlyClose(indexReader);
        }
        return null;
    }

    private static ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher indexSearcher)
            throws IOException {
        if (pageIndex == 1) {
            return null; // 
        }
        int num = pageSize * (pageIndex - 1); // ??
        TopDocs tds = indexSearcher.search(query, num);
        return tds.scoreDocs[num - 1];
    }

    public static int getTotalRow(IndexSearcher searcher, Query query) throws IOException {
        TopDocs topDocs = searcher.search(query, 1000);
        if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
            return 0;
        }
        ScoreDoc[] docs = topDocs.scoreDocs;
        return docs.length;
    }

    private static Page<Article> newPage(int pageNum, int pageSize, int totalRow, List<Article> articles) {
        int totalPages;
        if ((totalRow % pageSize) == 0) {
            totalPages = totalRow / pageSize;
        } else {
            totalPages = totalRow / pageSize + 1;
        }

        return new Page<>(articles, pageNum, pageSize, totalPages, totalRow);
    }

    private static Document createDocument(Article article) {
        Document doc = new Document();
        doc.add(new StringField("aid", article.getId().toString(), Field.Store.YES));
        doc.add(new TextField("content", article.getContent(), Field.Store.YES));
        doc.add(new TextField("text", article.getText(), Field.Store.YES));
        doc.add(new TextField("title", article.getTitle(), Field.Store.YES));
        doc.add(new StringField("created",
                DateTools.dateToString(article.getCreated() == null ? new Date() : article.getCreated(),
                        DateTools.Resolution.YEAR),
                Field.Store.NO));
        return doc;
    }

    private static IndexWriter createIndexWriter() throws IOException {
        Analyzer analyzer = new JcsegAnalyzer(JcsegTaskConfig.COMPLEX_MODE);
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        return new IndexWriter(directory, iwc);
    }

    private static Query buildQuery(String keyword) {
        try {

            Analyzer analyzer = new JcsegAnalyzer(JcsegTaskConfig.COMPLEX_MODE);
            //text?htmltagtag
            QueryParser queryParser1 = new QueryParser("text", analyzer);
            Query termQuery1 = queryParser1.parse(keyword);
            BooleanClause booleanClause1 = new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD);

            QueryParser queryParser2 = new QueryParser("title", analyzer);
            Query termQuery2 = queryParser2.parse(keyword);
            BooleanClause booleanClause2 = new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD);

            BooleanQuery.Builder builder = new BooleanQuery.Builder();
            builder.add(booleanClause1).add(booleanClause2);

            return builder.build();
        } catch (ParseException e) {
            e.printStackTrace();
        }
        return null;
    }

    private List<Article> toArticleList(IndexSearcher searcher, TopDocs topDocs, Highlighter highlighter,
            String keyword) throws IOException {
        List<Article> articles = new ArrayList<>();
        Analyzer analyzer = new JcsegAnalyzer(JcsegTaskConfig.COMPLEX_MODE);
        for (ScoreDoc item : topDocs.scoreDocs) {
            Document doc = searcher.doc(item.doc);
            Article article = new Article();
            String title = doc.get("title");
            String content = doc.get("content");
            article.setId(Long.valueOf(doc.get("aid")));
            article.setTitle(title);
            article.setContent(content);
            //
            try {
                String highlightTitle = highlighter
                        .getBestFragment(analyzer.tokenStream(keyword, new StringReader(title)), title);
                article.setHighlightTitle(highlightTitle);
                String text = article.getText();
                String highlightContent = highlighter
                        .getBestFragment(analyzer.tokenStream(keyword, new StringReader(text)), text);
                article.setHighlightContent(highlightContent);
            } catch (InvalidTokenOffsetsException e) {
                logger.error(e.getMessage(), e);
            }
            articles.add(article);
        }
        return articles;
    }

}