List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter
public Highlighter(Formatter formatter, Scorer fragmentScorer)
From source file:aos.lucene.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);/*from w ww. ja va 2 s.c om*/ } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_46, // "f", // new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
From source file:apm.common.core.DaoImpl.java
License:Open Source License
/** * /*www. j av a 2s. c o m*/ * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:blackbelt.lucene.testHighlight.MainHighlight.java
License:Open Source License
public static void main(String[] args) throws ParseException, IOException { String keyWord = "hibernate"; String language = "en"; String text = "Hibernate is an object-relational mapping (ORM) library for the Java language," + "providing a framework for mapping an object-oriented domain model to a traditional relational" + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct " + "persistence-related database accesses with high-level object handling functions. " + "Hibernate is free software that is distributed under the GNU Lesser General Public License. " + "Hibernate's primary feature is mapping from Java classes to database tables " + "(and from Java data types to SQL data types). Hibernate also provides data query" + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve" + " the developer from manual result set handling and object conversion and keep the application" + " portable to all supported SQL databases with little performance overhead."; String result;//ww w.j av a2 s. c om QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(keyWord); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85)); try { result = highlighter.getBestFragments(tokens, text, 4, "<BR/>..."); System.out.println(result); System.out.println("\n" + result.length()); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } result = "<html><body>" + result + "</body></html>"; File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html"); try { PrintWriter pw = new PrintWriter(file); pw.print(result); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:ca.uhn.fhir.jpa.dao.FhirSearchDao.java
License:Apache License
@Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); }// w ww. j a v a 2 s .c om IdDt contextId = new IdDt(contextParts[0], contextParts[1]); Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextId, myEntityManager); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb.phrase().withSlop(2).onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f).andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f).sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery).createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; }
From source file:ca.uhn.fhir.jpa.dao.FulltextSearchSvcImpl.java
License:Apache License
@Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); }/* w w w . ja v a 2s.c o m*/ Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextParts[0], contextParts[1], myForcedIdDao); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb.phrase().withSlop(2).onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f).andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f).sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery).createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; }
From source file:ch.admin.isb.hermes5.business.search.HighlighterRepository.java
License:Apache License
public HighlighterWrapper getHighlighter(Analyzer analyzer, IndexSearcher isearcher, Query query) { SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); return new HighlighterWrapper(highlighter, numberOfFragments.getIntegerValue(), isearcher, analyzer, trimstringsList());/*from w w w . j av a 2s. c o m*/ }
From source file:ci6226.eval_index_reader.java
public static void Searchit(IndexReader reader, IndexSearcher searcher, Analyzer _analyzer, String field, String[] _searchList, int _topn, PrintWriter writer) throws org.apache.lucene.queryparser.classic.ParseException, IOException, InvalidTokenOffsetsException { Analyzer analyzer = _analyzer;/* w w w. j a v a 2 s. com*/ QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); String[] testString = _searchList;//{"to","123","impressed","Geezer","geezer","semi-busy","\"eggs vegetable\"","gs veget","\"gs veget\""};//,"good","I","but","coffee"}; for (int j = 0; j < testString.length; j++) { String lstr = String.valueOf(j) + "," + testString[j]; Query query = parser.parse(testString[j]); System.out.println("Searching for: " + query.toString(field)); TopDocs topdocs = searcher.search(query, _topn); lstr += "," + topdocs.totalHits; ScoreDoc[] scoreDocs = topdocs.scoreDocs; SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query.rewrite(reader))); for (int i = 0; i < scoreDocs.length; i++) { int doc = scoreDocs[i].doc; Document document = searcher.doc(doc); // System.out.println("Snippet=" + document.get(field)); System.out.println(i); String text = document.get(field); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc, field, analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); String line = ""; for (int m = 0; m < frag.length; m++) { if ((frag[m] != null) && (frag[m].getScore() > 0)) { System.out.println((frag[m].toString())); line = frag[m].toString(); line = line.replaceAll("\n", ""); line = line.replaceAll("\r", ""); line = line.replaceAll("\"", ""); line = line.replaceAll(",", " "); } } lstr += "," + line; lstr += "," + String.valueOf(scoreDocs[i].score); } writer.write(lstr + "\n"); System.out.println("Search for:" + testString[j] + " Total hits=" + scoreDocs.length); System.out.println("////////////////////////////////////////////////////"); } }
From source file:ci6226.loadIndex.java
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected.// w ww . j a va2 s. c o m * */ public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("review_id"); if (path != null) { System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String title = doc.get("business_id"); if (title != null) { String text = doc.get("text"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET); // SimpleFragmenter fragmenter = new SimpleFragmenter(80); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4); System.out.print("Snippet=\t"); for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { System.out.println((frag[j].toString())); } } //System.out.print("\n"); System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); int cpage = start / hitsPerPage; System.out.println(String.format("Current page=%d,max page=%d", cpage + 1, 1 + numTotalHits / hitsPerPage)); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java
License:Apache License
@Override public SearchResult search(SearchRequest sResquest) throws EsearchException { try {// w ww. j ava 2 s . c o m esearchSystem.flushEvents(2000); } catch (EsearchException e) { LOGGER.error("Esearch flush events error. \n{}", e); } String queryString = sResquest.getQuery(); String queryField = sResquest.getField(); LOGGER.info("The search request coming: queryField:{},queryString:{}", queryField, queryString); Analyzer analyzer = esearchSystem.getAnalyzer(); QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryField, analyzer); SearchResult result = new SearchResult(); List<EsearchMultiReader<R>> readers = null; MultiReader multiReader = null; IndexSearcher searcher = null; try { Query query = null; if (Strings.isNullOrEmpty(queryString)) { query = new MatchAllDocsQuery(); } else { query = queryParser.parse(queryString); } readers = esearchSystem.getIndexReaders(); multiReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(multiReader); long start = System.currentTimeMillis(); TopDocs docs = searcher.search(query, null, sResquest.getSize()); long end = System.currentTimeMillis(); result.setTime(end - start); result.setTotalDocs(multiReader.numDocs()); result.setTotalHits(docs.totalHits); LOGGER.info("Got {} hits. Cost:{} ms", docs.totalHits, end - start); if (sResquest.getSearchType() == SearchRequest.SearchType.COUNT) { return result; } ScoreDoc[] scoreDocs = docs.scoreDocs; ArrayList<SearchHit> hitList = new ArrayList<SearchHit>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { SearchHit hit = new SearchHit(); hit.setScore(scoreDoc.score); int docID = scoreDoc.doc; Document doc = multiReader.document(docID); String content = doc.get(queryField); Scorer qs = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hl\">", "</span>"); Highlighter hl = new Highlighter(formatter, qs); String[] fragments = hl.getBestFragments(analyzer, queryField, content, 1); Map<String, String[]> fields = convert(doc, sResquest.getSearchType()); fields.put("fragment", fragments); hit.setFields(fields); hitList.add(hit); } result.setHits(hitList.toArray(new SearchHit[hitList.size()])); return result; } catch (Exception e) { LOGGER.error(e.getMessage(), e); throw new EsearchException(e.getMessage(), e); } finally { if (multiReader != null) { try { multiReader.close(); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } } esearchSystem.returnIndexReaders(readers); } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static List<Map<String, String>> search(String searchText, String path, String title, LoadQuery loadQuery) {//from w w w.j av a2 s . c om try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(INDEX_PATH + path))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); QueryParser parser = new QueryParser("indexedContent", analyzer); Query query = parser.parse(searchText); TopDocs resultDocs = searcher.search(query, 100); ScoreDoc[] scoreDocs = resultDocs.scoreDocs; // SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>"); Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(150)); List<Map<String, String>> result = new ArrayList<>(); List<Integer> idList = new ArrayList<>(); for (int i = 0; i < scoreDocs.length; i++) { Document doc = searcher.doc(scoreDocs[i].doc); Integer id = Integer.valueOf(doc.get("id")); if (!idList.contains(id)) { String indexedContent = doc.get("indexedContent"); TokenStream tokenStream = analyzer.tokenStream("indexedContent", indexedContent); Map<String, String> data = loadQuery.getById(id); String highlighterString = highlighter.getBestFragment(tokenStream, indexedContent); if (highlighterString.contains(SEPARATOR)) { String[] array = highlighterString.split(SEPARATOR); data.put(title, array[0]); if (array.length > 1) { data.put("summary", array[1]); } } else { data.put("summary", highlighterString); } result.add(data); idList.add(id); } } return result; } catch (Exception e) { logger.error("search failed ...", e); } return new ArrayList<>(); }