List of usage examples for org.apache.lucene.search.highlight Highlighter setTextFragmenter
public void setTextFragmenter(Fragmenter fragmenter)
From source file:aos.lucene.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);//w w w. jav a2 s . co m } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_46, // "f", // new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHighlighting() throws Exception { String text = "The quick brown fox jumps over the lazy dog"; TermQuery query = new TermQuery(new Term("field", "fox")); TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "field"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); assertEquals("The quick brown <B>fox</B> jumps over the lazy dog", highlighter.getBestFragment(tokenStream, text)); }
From source file:aos.lucene.tools.HighlightTest.java
License:Apache License
public void testHits() throws Exception { IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory()); TermQuery query = new TermQuery(new Term("title", "action")); TopDocs hits = searcher.search(query, 10); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); Analyzer analyzer = new SimpleAnalyzer(); for (ScoreDoc sd : hits.scoreDocs) { Document doc = searcher.doc(sd.doc); String title = doc.get("title"); TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc, analyzer);// w w w . j a v a 2 s .com String fragment = highlighter.getBestFragment(stream, title); LOGGER.info(fragment); } }
From source file:apm.common.core.DaoImpl.java
License:Open Source License
/** * /*w ww . j a va 2s . c o m*/ * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:blackbelt.lucene.testHighlight.MainHighlight.java
License:Open Source License
public static void main(String[] args) throws ParseException, IOException { String keyWord = "hibernate"; String language = "en"; String text = "Hibernate is an object-relational mapping (ORM) library for the Java language," + "providing a framework for mapping an object-oriented domain model to a traditional relational" + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct " + "persistence-related database accesses with high-level object handling functions. " + "Hibernate is free software that is distributed under the GNU Lesser General Public License. " + "Hibernate's primary feature is mapping from Java classes to database tables " + "(and from Java data types to SQL data types). Hibernate also provides data query" + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve" + " the developer from manual result set handling and object conversion and keep the application" + " portable to all supported SQL databases with little performance overhead."; String result;// w ww . j av a2 s . c om QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(keyWord); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85)); try { result = highlighter.getBestFragments(tokens, text, 4, "<BR/>..."); System.out.println(result); System.out.println("\n" + result.length()); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } result = "<html><body>" + result + "</body></html>"; File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html"); try { PrintWriter pw = new PrintWriter(file); pw.print(result); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {//ww w . ja v a 2 s .c o m try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }
From source file:com.aistor.common.persistence.BaseDaoImpl.java
License:Open Source License
/** * /* www . jav a 2s .c om*/ * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String desciption = highlighter.getBestFragment(analyzer, field, text); if (desciption != null) { Reflections.invokeSetter(entity, fields[0], desciption); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:com.aurel.track.lucene.search.LuceneSearcher.java
License:Open Source License
private static int[] getQueryResults(Query query, String userQueryString, String preprocessedQueryString, Map<Integer, String> highlightedTextMap) { int[] hitIDs = new int[0]; IndexSearcher indexSearcher = null;/*w w w. ja va 2s . c om*/ try { long start = 0; if (LOGGER.isDebugEnabled()) { start = new Date().getTime(); } indexSearcher = getIndexSearcher(LuceneUtil.INDEXES.WORKITEM_INDEX); if (indexSearcher == null) { return hitIDs; } ScoreDoc[] scoreDocs; try { TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAXIMAL_HITS); indexSearcher.search(query, collector); scoreDocs = collector.topDocs().scoreDocs; } catch (IOException e) { LOGGER.warn("Getting the workitem search results failed with failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } if (LOGGER.isDebugEnabled()) { long end = new Date().getTime(); LOGGER.debug("Found " + scoreDocs.length + " document(s) (in " + (end - start) + " milliseconds) that matched the user query '" + userQueryString + "' the preprocessed query '" + preprocessedQueryString + "' and the query.toString() '" + query.toString() + "'"); } QueryScorer queryScorer = new QueryScorer(query/*, LuceneUtil.HIGHLIGHTER_FIELD*/); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); // Set the best scorer fragments highlighter.setTextFragmenter(fragmenter); // Set fragment to highlight hitIDs = new int[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { int docID = scoreDocs[i].doc; Document doc = null; try { doc = indexSearcher.doc(docID); } catch (IOException e) { LOGGER.error("Getting the workitem documents failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); } if (doc != null) { Integer itemID = Integer.valueOf(doc.get(LuceneUtil.getFieldName(SystemFields.ISSUENO))); if (itemID != null) { hitIDs[i] = itemID.intValue(); if (highlightedTextMap != null) { String highligherFieldValue = doc.get(LuceneUtil.HIGHLIGHTER_FIELD); TokenStream tokenStream = null; try { tokenStream = TokenSources.getTokenStream(LuceneUtil.HIGHLIGHTER_FIELD, null, highligherFieldValue, LuceneUtil.getAnalyzer(), -1); } catch (Exception ex) { LOGGER.debug(ex.getMessage()); } if (tokenStream != null) { String fragment = highlighter.getBestFragment(tokenStream, highligherFieldValue); if (fragment != null) { highlightedTextMap.put(itemID, fragment); } } } } } } return hitIDs; } catch (BooleanQuery.TooManyClauses e) { LOGGER.error("Searching the query resulted in too many clauses. Try to narrow the query results. " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); throw e; } catch (Exception e) { LOGGER.error("Searching the workitems failed with " + e.getMessage()); LOGGER.debug(ExceptionUtils.getStackTrace(e)); return hitIDs; } finally { closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "workItem"); } }
From source file:com.bewsia.script.safe.lucene.SEntity.java
License:Open Source License
public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); CachingTokenFilter tokenStream = new CachingTokenFilter( analyzer.tokenStream(field, new StringReader(text))); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize)); tokenStream.reset();/*from w w w. j av a 2 s .c o m*/ String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator); return rv.length() == 0 ? text : rv; }
From source file:com.bluedragon.search.search.QueryRun.java
License:Open Source License
private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount, int recordsSearched) throws CorruptIndexException, Exception { DocumentWrap document = new DocumentWrap(searcher.doc(docid)); queryResultData.addRow(1);/*from w w w . ja va2 s . co m*/ queryResultData.setCurrentRow(queryResultData.getSize()); // Add in the standard columns that we know we have for every search queryResultData.setCell(1, new cfStringData(document.getId())); queryResultData.setCell(2, new cfStringData(document.getName())); queryResultData.setCell(3, new cfNumberData(score)); queryResultData.setCell(4, new cfNumberData(searchCount)); queryResultData.setCell(5, new cfNumberData(recordsSearched)); queryResultData.setCell(6, new cfNumberData(rank + 1)); String uC = queryAttributes.getUniqueColumn(); // Now we do the custom ones List<IndexableField> fields = document.getDocument().getFields(); Iterator<IndexableField> it = fields.iterator(); while (it.hasNext()) { IndexableField fieldable = it.next(); String fieldName = fieldable.name().toLowerCase(); // Check for the unique if (uniqueSet != null && fieldName.equals(uC)) { if (uniqueSet.contains(fieldable.stringValue())) { queryResultData.deleteRow(queryResultData.getSize()); return; } else uniqueSet.add(fieldable.stringValue()); } // Check to see if we have this column if (fieldName.equals("contents") && !queryAttributes.getContentFlag()) continue; if (!activeColumns.containsKey(fieldName)) { int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(), cfArrayData.createArray(1), null); activeColumns.put(fieldName, newcolumn); } int column = activeColumns.get(fieldName); if (column <= 6) continue; queryResultData.setCell(column, new cfStringData(fieldable.stringValue())); } // Do the context stuff if enable if (queryAttributes.getContextPassages() > 0) { Scorer scorer = new QueryScorer(queryAttributes.getQuery()); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(), queryAttributes.getContextHighlightEnd()); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes()); highlighter.setTextFragmenter(fragmenter); String nextContext = ""; String contents = document.getAttribute(DocumentWrap.CONTENTS); if (contents != null) { TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS, new StringReader(contents)); String[] fragments = null; try { fragments = highlighter.getBestFragments(tokenStream, contents, queryAttributes.getContextPassages()); if (fragments.length == 1) { nextContext = fragments[0] + "..."; } else { StringBuilder context = new StringBuilder(); for (int f = 0; f < fragments.length; f++) { context.append("..."); context.append(fragments[f]); } context.append("..."); nextContext = context.toString(); } } catch (Exception e) { } // Add in the context if (!activeColumns.containsKey("context")) { int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null); activeColumns.put("context", newcolumn); } queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext)); } } }