de.blizzy.documentr.search.PageFinder.java Source code

Java tutorial

Introduction

Here is the source code for de.blizzy.documentr.search.PageFinder.java

Source

/*
documentr - Edit, maintain, and present software documentation on the web.
Copyright (C) 2012-2013 Maik Schreiber
    
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
    
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
    
You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package de.blizzy.documentr.search;

import java.io.IOException;
import java.io.StringReader;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.util.Bits;
import org.springframework.security.core.Authentication;
import org.springframework.util.Assert;

import com.google.common.collect.Lists;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;

import de.blizzy.documentr.DocumentrConstants;
import de.blizzy.documentr.access.DocumentrPermissionEvaluator;
import de.blizzy.documentr.access.UserStore;
import de.blizzy.documentr.util.Util;

class PageFinder {
    private static final int HITS_PER_PAGE = 20;

    private SearcherManager searcherManager;
    private Analyzer analyzer;
    private ListeningExecutorService taskExecutor;
    private UserStore userStore;
    private DocumentrPermissionEvaluator permissionEvaluator;

    PageFinder(SearcherManager searcherManager, Analyzer analyzer, ListeningExecutorService taskExecutor,
            UserStore userStore, DocumentrPermissionEvaluator permissionEvaluator) {

        this.searcherManager = searcherManager;
        this.analyzer = analyzer;
        this.taskExecutor = taskExecutor;
        this.userStore = userStore;
        this.permissionEvaluator = permissionEvaluator;
    }

    public SearchResult findPages(final String searchText, final int page, final Authentication authentication)
            throws ParseException, IOException, TimeoutException {

        Assert.hasLength(searchText);
        Assert.isTrue(page >= 1);
        Assert.notNull(authentication);

        IndexSearcher searcher = null;
        Future<SearchResult> findFuture = null;
        try {
            searcher = searcherManager.acquire();
            final IndexSearcher indexSearcher = searcher;

            Callable<SearchResult> findCallable = new Callable<SearchResult>() {
                @Override
                public SearchResult call() throws ParseException, IOException, TimeoutException {
                    return findPages(searchText, page, authentication, indexSearcher);
                }
            };
            findFuture = taskExecutor.submit(findCallable);

            SearchTextSuggestion suggestion = getSearchTextSuggestion(searchText, authentication, indexSearcher);
            SearchResult result = findFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
            result.setSuggestion(suggestion);
            return result;
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof ParseException) {
                throw (ParseException) cause;
            } else if (cause instanceof IOException) {
                throw (IOException) cause;
            } else if (cause instanceof TimeoutException) {
                throw (TimeoutException) cause;
            } else {
                throw Util.toRuntimeException(cause);
            }
        } finally {
            if (findFuture != null) {
                findFuture.cancel(false);
            }
            if (searcher != null) {
                searcherManager.release(searcher);
            }
        }
    }

    private SearchResult findPages(String searchText, int page, Authentication authentication,
            IndexSearcher searcher) throws ParseException, IOException, TimeoutException {

        Future<Query> queryFuture = taskExecutor.submit(new ParseQueryTask(searchText, analyzer));
        ListenableFuture<Bits> visibleDocIdsFuture = taskExecutor.submit(
                new GetVisibleDocIdsTask(searcher, authentication, userStore, permissionEvaluator, taskExecutor));

        Query query;
        TopDocs docs;
        try {
            query = queryFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
            Bits visibleDocIds = visibleDocIdsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
            docs = searcher.search(query, new PagePermissionFilter(visibleDocIds), HITS_PER_PAGE * page);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof ParseException) {
                throw (ParseException) cause;
            } else {
                throw Util.toRuntimeException(cause);
            }
        } finally {
            queryFuture.cancel(false);
            visibleDocIdsFuture.cancel(false);
        }

        int start = HITS_PER_PAGE * (page - 1);
        int end = Math.min(HITS_PER_PAGE * page, docs.scoreDocs.length);
        IndexReader reader = searcher.getIndexReader();
        List<ListenableFuture<SearchHit>> hitFutures = Lists.newArrayList();
        for (int i = start; i < end; i++) {
            ListenableFuture<SearchHit> hitFuture = taskExecutor
                    .submit(new GetSearchHitTask(query, reader, docs.scoreDocs[i].doc, analyzer));
            hitFutures.add(hitFuture);
        }

        try {
            ListenableFuture<List<SearchHit>> allHitsFuture = Futures.allAsList(hitFutures);
            List<SearchHit> hits = allHitsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS);
            return new SearchResult(hits, docs.totalHits, HITS_PER_PAGE);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        } catch (ExecutionException e) {
            Throwable cause = e.getCause();
            if (cause instanceof IOException) {
                throw (IOException) cause;
            } else {
                throw Util.toRuntimeException(cause);
            }
        } finally {
            for (ListenableFuture<SearchHit> hitFuture : hitFutures) {
                hitFuture.cancel(false);
            }
        }
    }

    private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication,
            IndexSearcher searcher) throws IOException, ParseException, TimeoutException {

        List<WordPosition> words = Lists.newArrayList();

        TokenStream tokenStream = null;
        try {
            tokenStream = analyzer.tokenStream(PageIndex.ALL_TEXT_SUGGESTIONS, new StringReader(searchText));
            tokenStream.addAttribute(CharTermAttribute.class);
            tokenStream.addAttribute(OffsetAttribute.class);
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
                String text = charTerm.toString();
                if (StringUtils.isNotBlank(text)) {
                    OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class);
                    WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset());
                    words.add(word);
                }
            }
            tokenStream.end();
        } finally {
            Util.closeQuietly(tokenStream);
        }

        Collections.reverse(words);

        StringBuilder suggestedSearchText = new StringBuilder(searchText);
        StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText);
        boolean foundSuggestions = false;
        String now = String.valueOf(System.currentTimeMillis());
        String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
        String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$
        DirectSpellChecker spellChecker = new DirectSpellChecker();
        IndexReader reader = searcher.getIndexReader();
        for (WordPosition word : words) {
            Term term = new Term(PageIndex.ALL_TEXT_SUGGESTIONS, word.getWord());
            SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader,
                    SuggestMode.SUGGEST_MORE_POPULAR);
            if (suggestions.length > 0) {
                String suggestedWord = suggestions[0].string;
                int start = word.getStart();
                int end = word.getEnd();
                suggestedSearchText.replace(start, end, suggestedWord);
                suggestedSearchTextHtml.replace(start, end,
                        startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker);

                foundSuggestions = true;
            }
        }

        if (foundSuggestions) {
            String suggestion = suggestedSearchText.toString();
            SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher);
            int suggestionTotalHits = suggestionResult.getTotalHits();
            if (suggestionTotalHits > 0) {
                String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString())
                        .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$
                return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits);
            }
        }

        return null;
    }
}