List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragment
public final String getBestFragment(TokenStream tokenStream, String text) throws IOException, InvalidTokenOffsetsException
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);//from w w w. ja v a 2s . c o m String fragment = highlighter.getBestFragment(stream, title); LOGGER.info(fragment); } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {/*from w ww . ja v a2 s . com*/ try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }
From source file:com.aurel.track.lucene.search.LuceneSearcher.java
License:Open Source License
private static int[] getQueryResults(Query query, String userQueryString, String preprocessedQueryString, Map<Integer, String> highlightedTextMap) { int[] hitIDs = new int[0]; IndexSearcher indexSearcher = null;/*from w w w .j a v a2 s . co m*/ try { long start = 0; if (LOGGER.isDebugEnabled()) { start = new Date().getTime(); } indexSearcher = getIndexSearcher(LuceneUtil.INDEXES.WORKITEM_INDEX); if (indexSearcher == null) { return hitIDs; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAXIMAL_HITS); indexSearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { LOGGER.warn("Getting the workitem search results failed with failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } if (LOGGER.isDebugEnabled()) { long end = new Date().getTime(); LOGGER.debug("Found " + scoreDocs.length + " document(s) (in " + (end - start) + " milliseconds) that matched the user query '" + userQueryString + "' the preprocessed query '" + preprocessedQueryString + "' and the query.toString() '" + query.toString() + "'"); } QueryScorer queryScorer = new QueryScorer(query/*, LuceneUtil.HIGHLIGHTER_FIELD*/); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); // Set the best scorer fragments highlighter.setTextFragmenter(fragmenter); // Set fragment to highlight hitIDs = new int[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; Document doc = null; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the workitem documents failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } if (doc != null) { Integer itemID = Integer.valueOf(doc.get(LuceneUtil.getFieldName(SystemFields.ISSUENO))); if (itemID != null) { hitIDs[i] = itemID.intValue(); if (highlightedTextMap != null) { String highligherFieldValue = doc.get(LuceneUtil.HIGHLIGHTER_FIELD); TokenStream tokenStream = null; try { tokenStream = TokenSources.getTokenStream(LuceneUtil.HIGHLIGHTER_FIELD, null, highligherFieldValue, LuceneUtil.getAnalyzer(), -1); } catch (Exception ex) { LOGGER.debug(ex.getMessage()); } if (tokenStream != null) { String fragment = highlighter.getBestFragment(tokenStream, highligherFieldValue); if (fragment != null) { highlightedTextMap.put(itemID, fragment); } } } } } } return hitIDs; } catch (BooleanQuery.TooManyClauses e) { LOGGER.error("Searching the query resulted in too many clauses. Try to narrow the query results. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); throw e; } catch (Exception e) { LOGGER.error("Searching the workitems failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } finally { closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "workItem"); } }
From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java
License:Open Source License
public String highlight(String strToHighlight, String fieldName, Query luceneQuery) { String highlightedText;// ww w . j a v a 2s. c o m Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH); try { Directory directory = FSDirectory.open(indexDir); IndexReader indexReader = DirectoryReader.open(directory); Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader); QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery); SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>"); Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer); Fragmenter luceneFragmenter; // Si la chaine highlighter est sup 250 carac if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) { // Cration de best fragments de 100 carac chaque luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT); } else { // Toute la chaine est highlight luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE); } luceneHighlighter.setTextFragmenter(luceneFragmenter); TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight)); String fragment = null; if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) { fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT, FRAGMENT_SEP); } else { fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight); } if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) { fragment = strToHighlight; } indexReader.close(); directory.close(); highlightedText = fragment; } catch (IOException e) { throw new RuntimeException(e); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } return highlightedText; }
From source file:com.duroty.application.bookmark.manager.BookmarkManager.java
License:Open Source License
/** * DOCUMENT ME!/*from w ww .j av a 2 s .c o m*/ * * @param repositoryName DOCUMENT ME! * @param token DOCUMENT ME! * @param page DOCUMENT ME! * @param messagesByPage DOCUMENT ME! * @param order DOCUMENT ME! * @param orderType DOCUMENT ME! * * @return DOCUMENT ME! * * @throws BookmarkException DOCUMENT ME! * @throws SearchException DOCUMENT ME! */ public SearchObj search(String repositoryName, String token, int page, int messagesByPage, int order, String orderType, boolean isNotebook) throws BookmarkException { String lucenePath = ""; if (!defaultLucenePath.endsWith(File.separator)) { lucenePath = defaultLucenePath + File.separator + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK; } else { lucenePath = defaultLucenePath + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK; } Searcher searcher = null; SearchObj searchObj = new SearchObj(); Highlighter highlighter = null; try { searcher = BookmarkIndexer.getSearcher(lucenePath); Query query = null; Hits hits = null; if (StringUtils.isBlank(token)) { if (isNotebook) { query = SimpleQueryParser.parse("notebook:true", new KeywordAnalyzer()); } else { query = new MatchAllDocsQuery(); } hits = searcher.search(query, new Sort(new SortField[] { SortField.FIELD_SCORE, new SortField(Field_insert_date, SortField.STRING, true) })); } else { query = SimpleQueryParser.parse(token, analyzer); StringBuffer buffer = new StringBuffer(); if (isNotebook) { buffer.append("(" + query.toString() + ") AND "); QueryParser parser = new QueryParser(Field_notebook, new KeywordAnalyzer()); parser.setDefaultOperator(Operator.AND); Query aux = parser.parse(String.valueOf(true)); buffer.append("(" + aux.toString() + ") "); } if (buffer.length() > 0) { QueryParser parser = new QueryParser("", new WhitespaceAnalyzer()); query = parser.parse(buffer.toString()); } hits = searcher.search(query); } Date searchStart = new Date(); Date searchEnd = new Date(); //time in seconds double time = ((double) (searchEnd.getTime() - searchStart.getTime())) / (double) 1000; int hitsLength = hits.length(); if (hitsLength <= 0) { return null; } int start = page * messagesByPage; int end = start + messagesByPage; if (end > 0) { end = Math.min(hitsLength, end); } else { end = hitsLength; } if (start > end) { throw new SearchException("Search index of bound. start > end"); } Vector bookmarks = new Vector(); for (int j = start; j < end; j++) { Document doc = hits.doc(j); if (doc != null) { LuceneBookmark luceneBookmark = new LuceneBookmark(doc); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); BookmarkObj bookmarkObj = new BookmarkObj(); bookmarkObj.setCacheDate(luceneBookmark.getCacheDate()); bookmarkObj.setComments(luceneBookmark.getComments()); bookmarkObj.setContents(luceneBookmark.getCotents()); bookmarkObj.setDepth(luceneBookmark.getDepth()); bookmarkObj.setFlagged(luceneBookmark.isFlagged()); bookmarkObj.setIdint(luceneBookmark.getIdint()); bookmarkObj.setInsertDate(luceneBookmark.getInsertDate()); bookmarkObj.setKeywords(luceneBookmark.getKeywords()); bookmarkObj.setNotebook(luceneBookmark.isNotebook()); bookmarkObj.setParent(Long.parseLong(luceneBookmark.getParent())); bookmarkObj.setTitle(luceneBookmark.getTitle()); bookmarkObj.setTitleHighlight(luceneBookmark.getTitle()); bookmarkObj.setUrl(luceneBookmark.getUrl()); String contents = luceneBookmark.getCotents(); String hcontents = null; if ((contents != null) && (!contents.trim().equals(""))) { contents = contents.replaceAll("\\s+", " "); TokenStream tokenStream = analyzer.tokenStream(Field_contents, new StringReader(contents)); hcontents = highlighter.getBestFragment(tokenStream, contents); if (hcontents != null) { contents = hcontents; } else { contents = null; } } bookmarkObj.setContentsHighlight(contents); String title = luceneBookmark.getTitle(); String htitle = null; if ((title != null) && (!title.trim().equals(""))) { title = title.replaceAll("\\s+", " "); TokenStream tokenStream = analyzer.tokenStream(Field_title, new StringReader(title)); htitle = highlighter.getBestFragment(tokenStream, title); if (htitle != null) { title = htitle; } } bookmarkObj.setTitleHighlight(title); bookmarks.addElement(bookmarkObj); } } searchObj.setHits(hitsLength); searchObj.setTime(time); searchObj.setBookmarks(bookmarks); } catch (Exception ex) { throw new SearchException(ex); } finally { } return searchObj; }
From source file:com.github.wxiaoqi.search.lucene.util.DocumentUtil.java
License:Open Source License
private static String stringFormatHighlighterOut(Analyzer analyzer, Highlighter highlighter, Document document, String field) throws Exception { String fieldValue = document.get(field); if (fieldValue != null) { TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(fieldValue)); return highlighter.getBestFragment(tokenStream, fieldValue); }//from w w w . ja v a 2s . co m return null; }
From source file:com.leavesfly.lia.tool.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);//from www .ja va 2 s . c o m String fragment = highlighter.getBestFragment(stream, title); System.out.println(fragment); } }
From source file:com.mathworks.xzheng.tools.HighlightTest.java
License:Apache License
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer(Version.LUCENE_46).tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
From source file:com.mathworks.xzheng.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory())); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);/* ww w . ja va2 s. c o m*/ String fragment = highlighter.getBestFragment(stream, title); System.out.println(fragment); } }