List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragments
public final String getBestFragments(TokenStream tokenStream, String text, int maxNumFragments, String separator) throws IOException, InvalidTokenOffsetsException
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * /*www . j av a 2 s. co m*/ * @param analyzer * @param query * @param content * @param result * @return * @throws IOException * @throws InvalidTokenOffsetsException */ private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { if (content == null) { content = ""; } int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; QueryScorer scorer = new QueryScorer(query, "content"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); // use an artificial delimiter for the token String termTag = "!!--["; String termTagEnd = "]--!!"; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; } // clip commit message String fragment = content; if (fragment.length() > fragmentLength) { fragment = fragment.substring(0, fragmentLength) + "..."; } return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>"; } // make sure we have unique fragments Set<String> uniqueFragments = new LinkedHashSet<String>(); for (String fragment : fragments) { uniqueFragments.add(fragment); } fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]); StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; String tag = "<pre class=\"text\">"; // resurrect the raw fragment from removing the artificial delimiters String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); // determine position of the raw fragment in the content int pos = content.indexOf(raw); // restore complete first line of fragment int c = pos; while (c > 0) { c--; if (content.charAt(c) == '\n') { break; } } if (c > 0) { // inject leading chunk of first fragment line fragment = content.substring(c + 1, pos) + fragment; } if (SearchObjectType.blob == result.type) { // count lines as offset into the content for this fragment int line = Math.max(1, StringUtils.countLines(content.substring(0, pos))); // create fragment tag with line number and language String lang = ""; String ext = StringUtils.getFileExtension(result.path).toLowerCase(); if (!StringUtils.isEmpty(ext)) { // maintain leading space! lang = " lang-" + ext; } tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang); } sb.append(tag); // replace the artificial delimiter with html tags String html = StringUtils.escapeForHtml(fragment, false); html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); sb.append(html); sb.append("</pre>"); if (i < len - 1) { sb.append("<span class=\"ellipses\">...</span><br/>"); } } return sb.toString(); }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * * @param analyzer//from www . ja v a 2s.c o m * @param query * @param content * @param result * @return * @throws IOException * @throws InvalidTokenOffsetsException */ private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result) throws IOException, InvalidTokenOffsetsException { if (content == null) { content = ""; } int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4); int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150; QueryScorer scorer = new QueryScorer(query, "content"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); // use an artificial delimiter for the token String termTag = "!!--["; String termTagEnd = "]--!!"; SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; } // clip commit message String fragment = content; if (fragment.length() > fragmentLength) { fragment = fragment.substring(0, fragmentLength) + "..."; } return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>"; } // make sure we have unique fragments Set<String> uniqueFragments = new LinkedHashSet<String>(); for (String fragment : fragments) { uniqueFragments.add(fragment); } fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]); StringBuilder sb = new StringBuilder(); for (int i = 0, len = fragments.length; i < len; i++) { String fragment = fragments[i]; String tag = "<pre class=\"text\">"; // resurrect the raw fragment from removing the artificial delimiters String raw = fragment.replace(termTag, "").replace(termTagEnd, ""); // determine position of the raw fragment in the content int pos = content.indexOf(raw); // restore complete first line of fragment int c = pos; while (c > 0) { c--; if (content.charAt(c) == '\n') { break; } } if (c > 0) { // inject leading chunk of first fragment line fragment = content.substring(c + 1, pos) + fragment; } if (SearchObjectType.blob == result.type) { // count lines as offset into the content for this fragment int line = Math.max(1, StringUtils.countLines(content.substring(0, pos))); // create fragment tag with line number and language String lang = ""; String ext = StringUtils.getFileExtension(result.path).toLowerCase(); if (!StringUtils.isEmpty(ext)) { // maintain leading space! lang = " lang-" + ext; } tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang); } sb.append(tag); // replace the artificial delimiter with html tags String html = StringUtils.escapeForHtml(fragment, false); html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>"); sb.append(html); sb.append("</pre>"); if (i < len - 1) { sb.append("<span class=\"ellipses\">...</span><br/>"); } } return sb.toString(); }
From source file:com.knowledgetree.indexer.IndexerManager.java
/** * Returns a set of hits from lucene./*from ww w.j ava2 s .c o m*/ * @param queryString * @param maxHits * @return * @throws Exception */ public QueryHit[] query(String queryString, int maxHits, boolean getText) throws Exception { synchronized (this) { this.queryCount++; } String tmp = queryString.toLowerCase(); boolean queryContent = tmp.indexOf("content") != -1; boolean queryDiscussion = tmp.indexOf("discussion") != -1; QueryParser parser = new QueryParser("Content", this.analyzer); Query query = parser.parse(queryString); // rewriting is important for complex queries. this is a must-do according to sources! query = query.rewrite(this.queryReader); // run the search! Hits hits = this.querySearcher.search(query); // now we can apply the maximum hits to the results we return! int max = (maxHits == -1) ? hits.length() : maxHits; if (hits.length() < max) { max = hits.length(); } QueryHit[] results = new QueryHit[max]; Highlighter highlighter = new Highlighter(this, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(this.resultFragmentSize)); for (int i = 0; i < max; i++) { Document doc = hits.doc(i); QueryHit hit = new QueryHit(); hit.DocumentID = IndexerManager.stringToLong(doc.get("DocumentID")); hit.Rank = hits.score(i); hit.Title = doc.get("Title"); if (getText) { String text = ""; if (queryContent) { text += doc.get("Content"); } if (queryDiscussion) { text += doc.get("Discussion"); } // TODO: we can create a field.getReader(). the fragmenting needs to // be updated to deal with the reader only. would prefer not having to // load the document into a string! TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(text)); hit.Content = highlighter.getBestFragments(tokenStream, text, this.resultFragments, this.resultSeperator); } else { hit.Content = ""; } hit.Version = doc.get("Version"); results[i] = hit; } return results; }
From source file:com.leavesfly.lia.tool.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);/*from ww w . jav a 2 s .c om*/ } String filename = args[0]; String searchText = "term"; // #1 QueryParser parser = new QueryParser(Version.LUCENE_30, // #1 "f", // #1 new StandardAnalyzer(Version.LUCENE_30));// #1 Query query = parser.parse(searchText); // #1 SimpleHTMLFormatter formatter = // #2 new SimpleHTMLFormatter("<span class=\"highlight\">", // #2 "</span>"); // #2 TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30) // #3 .tokenStream("f", new StringReader(text)); // #3 QueryScorer scorer = new QueryScorer(query, "f"); // #4 Highlighter highlighter = new Highlighter(formatter, scorer); // #5 highlighter.setTextFragmenter( // #6 new SimpleSpanFragmenter(scorer)); // #6 String result = // #7 highlighter.getBestFragments(tokens, text, 3, "..."); // #7 FileWriter writer = new FileWriter(filename); // #8 writer.write("<html>"); // #8 writer.write("<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); // #8 }
From source file:com.liferay.portal.search.lucene.LuceneHelperImpl.java
License:Open Source License
public String getSnippet(Query query, String field, String s, int maxNumFragments, int fragmentLength, String fragmentSuffix, String preTag, String postTag) throws IOException { SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(preTag, postTag); QueryScorer queryScorer = new QueryScorer(query, field); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength)); TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s)); try {//from w w w . j av a2 s. c o m String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix); if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix)) { snippet = snippet.concat(fragmentSuffix); } return snippet; } catch (InvalidTokenOffsetsException itoe) { throw new IOException(itoe.getMessage()); } }
From source file:com.liferay.portal.search.lucene31.LuceneHelperImpl.java
License:Open Source License
public String getSnippet(Query query, String field, String s, int maxNumFragments, int fragmentLength, String fragmentSuffix, String preTag, String postTag) throws IOException { SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(preTag, postTag); QueryScorer queryScorer = new QueryScorer(query, field); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength)); TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s)); try {//from w w w .ja va 2 s .c om String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix); if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix)) { snippet = snippet + fragmentSuffix; } return snippet; } catch (InvalidTokenOffsetsException itoe) { throw new IOException(itoe.getMessage()); } }
From source file:com.mathworks.xzheng.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);/*from w w w .j a va 2 s. c om*/ } String filename = args[0]; String searchText = "term"; // #1 QueryParser parser = new QueryParser(Version.LUCENE_46, // #1 "f", // #1 new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // #1 SimpleHTMLFormatter formatter = // #2 new SimpleHTMLFormatter("<span class=\"highlight\">", // #2 "</span>"); // #2 TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // #3 .tokenStream("f", new StringReader(text)); // #3 QueryScorer scorer = new QueryScorer(query, "f"); // #4 Highlighter highlighter = new Highlighter(formatter, scorer); // #5 highlighter.setTextFragmenter( // #6 new SimpleSpanFragmenter(scorer)); // #6 String result = // #7 highlighter.getBestFragments(tokens, text, 3, "..."); // #7 FileWriter writer = new FileWriter(filename); // #8 writer.write("<html>"); // #8 writer.write("<style>\n" + // #8 ".highlight {\n" + // #8 " background: yellow;\n" + // #8 "}\n" + // #8 "</style>"); // #8 writer.write("<body>"); // #8 writer.write(result); // #8 writer.write("</body></html>"); // #8 writer.close(); // #8 }
From source file:com.meltmedia.cadmium.search.SearchService.java
License:Apache License
private Map<String, Object> buildSearchResults(final String query, final String path) throws Exception { logger.info("Running search for [{}]", query); final Map<String, Object> resultMap = new LinkedHashMap<String, Object>(); new SearchTemplate(provider) { public void doSearch(IndexSearcher index) throws IOException, ParseException { QueryParser parser = createParser(getAnalyzer()); resultMap.put("number-hits", 0); List<Map<String, Object>> resultList = new ArrayList<Map<String, Object>>(); resultMap.put("results", resultList); if (index != null && parser != null) { String literalQuery = query.replaceAll(ALLOWED_CHARS_PATTERN, "\\\\$1"); Query query1 = parser.parse(literalQuery); if (StringUtils.isNotBlank(path)) { Query pathPrefix = new PrefixQuery(new Term("path", path)); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(pathPrefix, Occur.MUST); boolQuery.add(query1, Occur.MUST); query1 = boolQuery;//from ww w .jav a 2s . c om } TopDocs results = index.search(query1, null, 100000); QueryScorer scorer = new QueryScorer(query1); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer); logger.info("Search returned {} hits.", results.totalHits); resultMap.put("number-hits", results.totalHits); for (ScoreDoc doc : results.scoreDocs) { Document document = index.doc(doc.doc); String content = document.get("content"); String title = document.get("title"); Map<String, Object> result = new LinkedHashMap<String, Object>(); String excerpt = ""; try { excerpt = highlighter.getBestFragments( parser.getAnalyzer().tokenStream(null, new StringReader(content)), content, 3, "..."); excerpt = fixExcerpt(excerpt); result.put("excerpt", excerpt); } catch (Exception e) { logger.debug("Failed to get search excerpt from content.", e); try { excerpt = highlighter.getBestFragments( parser.getAnalyzer().tokenStream(null, new StringReader(title)), title, 1, "..."); excerpt = fixExcerpt(excerpt); result.put("excerpt", excerpt); } catch (Exception e1) { logger.debug("Failed to get search excerpt from title.", e1); result.put("excerpt", ""); } } result.put("score", doc.score); result.put("title", title); result.put("path", document.get("path")); resultList.add(result); } } } }.search(); return resultMap; }
From source file:com.recomdata.search.Finder.java
License:Open Source License
private void display(Document doc, int id, float score, Query query, Analyzer analyzer) { System.out.println("repository = " + doc.get("repository")); System.out.println("path = " + doc.get("path")); System.out.println("extension = " + doc.get("extension")); System.out.println("title = " + doc.get("title")); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b>", "</b>"), new QueryScorer(query, "contents")); highlighter.setTextFragmenter(new SimpleFragmenter(50)); String summary = doc.get("contents"); TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(summary)); try {//from w w w .j a v a 2s. c o m System.out.println("contents = " + highlighter.getBestFragments(tokenStream, summary, 5, "...")); } catch (IOException e) { System.out.println("exception: " + e.getMessage()); } System.out.println(); }
From source file:com.taobao.common.tedis.support.lucene.analysis.xanalyzer.TestHighLight.java
License:Open Source License
/** * @param args/*from ww w.j av a 2s . co m*/ */ public static void main(String[] args) { Directory ramDir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(ramDir, /* * new * StandardAnalyzer()/ */XFactory.getWriterAnalyzer()); Document doc = new Document(); Field fd = new Field(FIELD_NAME, CONTENT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser = new QueryParser(FIELD_NAME, /* * new * StandardAnalyzer * ()/ */XFactory.getWriterAnalyzer()); Query query = parser.parse(queryString); System.out.println(query); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println(query); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(hits.id(i), FIELD_NAME); TokenStream tokenStream = TokenSources.getTokenStream(tpv); /* * TokenStream tokenStream2= (new StandardAnalyzer()) * //XFactory.getWriterAnalyzer() .tokenStream(FIELD_NAME,new * StringReader(text)); * * do { Token t = tokenStream2.next(); if(t==null)break; * System.out.println("\t" + t.startOffset() + "," + * t.endOffset() + "\t" + t.termText()); }while(true); */ String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); } catch (Exception e) { e.printStackTrace(); } }