Java tutorial
/* documentr - Edit, maintain, and present software documentation on the web. Copyright (C) 2012-2013 Maik Schreiber This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.blizzy.documentr.search; import java.io.IOException; import java.io.StringReader; import java.util.Collections; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.spell.DirectSpellChecker; import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.util.Bits; import org.springframework.security.core.Authentication; import org.springframework.util.Assert; import com.google.common.collect.Lists; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import de.blizzy.documentr.DocumentrConstants; import de.blizzy.documentr.access.DocumentrPermissionEvaluator; import de.blizzy.documentr.access.UserStore; import de.blizzy.documentr.util.Util; class PageFinder { private static final int HITS_PER_PAGE = 20; private SearcherManager searcherManager; private Analyzer analyzer; private ListeningExecutorService taskExecutor; private UserStore userStore; private DocumentrPermissionEvaluator permissionEvaluator; PageFinder(SearcherManager searcherManager, Analyzer analyzer, ListeningExecutorService taskExecutor, UserStore userStore, DocumentrPermissionEvaluator permissionEvaluator) { this.searcherManager = searcherManager; this.analyzer = analyzer; this.taskExecutor = taskExecutor; this.userStore = userStore; this.permissionEvaluator = permissionEvaluator; } public SearchResult findPages(final String searchText, final int page, final Authentication authentication) throws ParseException, IOException, TimeoutException { Assert.hasLength(searchText); Assert.isTrue(page >= 1); Assert.notNull(authentication); IndexSearcher searcher = null; Future<SearchResult> findFuture = null; try { searcher = searcherManager.acquire(); final IndexSearcher indexSearcher = searcher; Callable<SearchResult> findCallable = new Callable<SearchResult>() { @Override public SearchResult call() throws ParseException, IOException, TimeoutException { return findPages(searchText, page, authentication, indexSearcher); } }; findFuture = taskExecutor.submit(findCallable); SearchTextSuggestion suggestion = getSearchTextSuggestion(searchText, authentication, indexSearcher); SearchResult result = findFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); result.setSuggestion(suggestion); return result; } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof ParseException) { throw (ParseException) cause; } else if (cause instanceof IOException) { throw (IOException) cause; } else if (cause instanceof TimeoutException) { throw (TimeoutException) cause; } else { throw Util.toRuntimeException(cause); } } finally { if (findFuture != null) { findFuture.cancel(false); } if (searcher != null) { searcherManager.release(searcher); } } } private SearchResult findPages(String searchText, int page, Authentication authentication, IndexSearcher searcher) throws ParseException, IOException, TimeoutException { Future<Query> queryFuture = taskExecutor.submit(new ParseQueryTask(searchText, analyzer)); ListenableFuture<Bits> visibleDocIdsFuture = taskExecutor.submit( new GetVisibleDocIdsTask(searcher, authentication, userStore, permissionEvaluator, taskExecutor)); Query query; TopDocs docs; try { query = queryFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); Bits visibleDocIds = visibleDocIdsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); docs = searcher.search(query, new PagePermissionFilter(visibleDocIds), HITS_PER_PAGE * page); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof ParseException) { throw (ParseException) cause; } else { throw Util.toRuntimeException(cause); } } finally { queryFuture.cancel(false); visibleDocIdsFuture.cancel(false); } int start = HITS_PER_PAGE * (page - 1); int end = Math.min(HITS_PER_PAGE * page, docs.scoreDocs.length); IndexReader reader = searcher.getIndexReader(); List<ListenableFuture<SearchHit>> hitFutures = Lists.newArrayList(); for (int i = start; i < end; i++) { ListenableFuture<SearchHit> hitFuture = taskExecutor .submit(new GetSearchHitTask(query, reader, docs.scoreDocs[i].doc, analyzer)); hitFutures.add(hitFuture); } try { ListenableFuture<List<SearchHit>> allHitsFuture = Futures.allAsList(hitFutures); List<SearchHit> hits = allHitsFuture.get(DocumentrConstants.INTERACTIVE_TIMEOUT, TimeUnit.SECONDS); return new SearchResult(hits, docs.totalHits, HITS_PER_PAGE); } catch (InterruptedException e) { throw new RuntimeException(e); } catch (ExecutionException e) { Throwable cause = e.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } else { throw Util.toRuntimeException(cause); } } finally { for (ListenableFuture<SearchHit> hitFuture : hitFutures) { hitFuture.cancel(false); } } } private SearchTextSuggestion getSearchTextSuggestion(String searchText, Authentication authentication, IndexSearcher searcher) throws IOException, ParseException, TimeoutException { List<WordPosition> words = Lists.newArrayList(); TokenStream tokenStream = null; try { tokenStream = analyzer.tokenStream(PageIndex.ALL_TEXT_SUGGESTIONS, new StringReader(searchText)); tokenStream.addAttribute(CharTermAttribute.class); tokenStream.addAttribute(OffsetAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); String text = charTerm.toString(); if (StringUtils.isNotBlank(text)) { OffsetAttribute offset = tokenStream.getAttribute(OffsetAttribute.class); WordPosition word = new WordPosition(text, offset.startOffset(), offset.endOffset()); words.add(word); } } tokenStream.end(); } finally { Util.closeQuietly(tokenStream); } Collections.reverse(words); StringBuilder suggestedSearchText = new StringBuilder(searchText); StringBuilder suggestedSearchTextHtml = new StringBuilder(searchText); boolean foundSuggestions = false; String now = String.valueOf(System.currentTimeMillis()); String startMarker = "__SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ String endMarker = "__/SUGGESTION-" + now + "__"; //$NON-NLS-1$ //$NON-NLS-2$ DirectSpellChecker spellChecker = new DirectSpellChecker(); IndexReader reader = searcher.getIndexReader(); for (WordPosition word : words) { Term term = new Term(PageIndex.ALL_TEXT_SUGGESTIONS, word.getWord()); SuggestWord[] suggestions = spellChecker.suggestSimilar(term, 1, reader, SuggestMode.SUGGEST_MORE_POPULAR); if (suggestions.length > 0) { String suggestedWord = suggestions[0].string; int start = word.getStart(); int end = word.getEnd(); suggestedSearchText.replace(start, end, suggestedWord); suggestedSearchTextHtml.replace(start, end, startMarker + StringEscapeUtils.escapeHtml4(suggestedWord) + endMarker); foundSuggestions = true; } } if (foundSuggestions) { String suggestion = suggestedSearchText.toString(); SearchResult suggestionResult = findPages(suggestion, 1, authentication, searcher); int suggestionTotalHits = suggestionResult.getTotalHits(); if (suggestionTotalHits > 0) { String html = StringEscapeUtils.escapeHtml4(suggestedSearchTextHtml.toString()) .replaceAll(startMarker + "(.*?)" + endMarker, "<strong><em>$1</em></strong>"); //$NON-NLS-1$ //$NON-NLS-2$ return new SearchTextSuggestion(suggestedSearchText.toString(), html, suggestionTotalHits); } } return null; } }