List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragments
public final String getBestFragments(TokenStream tokenStream, String text, int maxNumFragments, String separator) throws IOException, InvalidTokenOffsetsException
From source file:aos.lucene.tools.HighlightIt.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("Usage: HighlightIt <filename-out>"); System.exit(-1);//from w w w .j ava2 s. c om } String filename = args[0]; String searchText = "term"; // QueryParser parser = new QueryParser(Version.LUCENE_46, // "f", // new StandardAnalyzer(Version.LUCENE_46));// #1 Query query = parser.parse(searchText); // SimpleHTMLFormatter formatter = // new SimpleHTMLFormatter("<span class=\"highlight\">", // "</span>"); // TokenStream tokens = new StandardAnalyzer(Version.LUCENE_46) // .tokenStream("f", new StringReader(text)); // QueryScorer scorer = new QueryScorer(query, "f"); // Highlighter highlighter = new Highlighter(formatter, scorer); // highlighter.setTextFragmenter( // new SimpleSpanFragmenter(scorer)); // String result = // highlighter.getBestFragments(tokens, text, 3, "..."); // FileWriter writer = new FileWriter(filename); // writer.write("<html>"); // writer.write("<style>\n" + // ".highlight {\n" + // " background: yellow;\n" + // "}\n" + // "</style>"); // writer.write("<body>"); // writer.write(result); // writer.write("</body></html>"); // writer.close(); // }
From source file:blackbelt.lucene.testHighlight.MainHighlight.java
License:Open Source License
public static void main(String[] args) throws ParseException, IOException { String keyWord = "hibernate"; String language = "en"; String text = "Hibernate is an object-relational mapping (ORM) library for the Java language," + "providing a framework for mapping an object-oriented domain model to a traditional relational" + "database. Hibernate solves object-relational impedance mismatch problems by replacing direct " + "persistence-related database accesses with high-level object handling functions. " + "Hibernate is free software that is distributed under the GNU Lesser General Public License. " + "Hibernate's primary feature is mapping from Java classes to database tables " + "(and from Java data types to SQL data types). Hibernate also provides data query" + " and retrieval facilities. Hibernate generates the SQL calls and attempts to relieve" + " the developer from manual result set handling and object conversion and keep the application" + " portable to all supported SQL databases with little performance overhead."; String result;// w ww . j a v a 2 s . c om QueryParser parser = new QueryParser(Version.LUCENE_30, "title", new StandardAnalyzer(Version.LUCENE_30)); Query query = parser.parse(keyWord); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>"); TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30).tokenStream("title", new StringReader(text)); QueryScorer scorer = new QueryScorer(query, "title"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 85)); try { result = highlighter.getBestFragments(tokens, text, 4, "<BR/>..."); System.out.println(result); System.out.println("\n" + result.length()); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } result = "<html><body>" + result + "</body></html>"; File file = new File("C:\\Users\\forma702\\Desktop\\testHighlight.html"); try { PrintWriter pw = new PrintWriter(file); pw.print(result); pw.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:ca.dracode.ais.indexer.FileSearcher.java
License:Open Source License
/** * Takes a list of Documents and highlights information relevant to a given Query * @param docs The documents to highlight * @param qry The query used to highlight the documents * @param type The type of the search, one of QUERY_BOOLEAN, * which just notes the page on which the term exists or QUERY_STANDARD, * which gives highlighted fragments and the page on which they exist. * @param term The term that created the query * @param maxResults The maximum number of results that will be returned * @return A SearchResult containing the results sorted by relevance and page *///ww w . j a v a2s. com private SearchResult getHighlightedResults(List<Document> docs, Query qry, int type, String term, int maxResults) { try { int numResults = 0; LinkedHashMap<String, LinkedHashMap<Integer, List<String>>> results = new LinkedHashMap<String, LinkedHashMap<Integer, List<String>>>(); for (int i = 0; i < docs.size() && numResults < maxResults; i++) { Document d = docs.get(i); int docPage = Integer.parseInt(d.get("page")); String name = d.get("path"); LinkedHashMap<Integer, List<String>> docResult = results.get(name); if (docResult == null) { docResult = new LinkedHashMap<Integer, List<String>>(); results.put(name, docResult); } if (type != FileSearcher.QUERY_BOOLEAN) { String contents = d.get("text"); Highlighter highlighter = new Highlighter(new QueryScorer(qry)); String[] frag = null; try { frag = highlighter.getBestFragments(new SimpleAnalyzer(Version.LUCENE_47), "text", contents, maxResults - numResults); numResults += frag.length; } catch (IOException e) { Log.e(TAG, "Error while reading index", e); } catch (InvalidTokenOffsetsException e) { Log.e(TAG, "Error while highlighting", e); } if (frag != null) { Log.i(TAG, "Frags: " + frag.length + " " + frag + " " + frag[0]); } ArrayList<String> tmpList = new ArrayList<String>( Arrays.asList(frag != null ? frag : new String[0])); Log.i(TAG, "list " + tmpList.getClass().getName()); docResult.put(docPage, tmpList); } else { ArrayList<String> tmp = new ArrayList<String>(); tmp.add(term); docResult.put(docPage, tmp); } } Log.i(TAG, "" + results.size()); return new SearchResult(results); } catch (Exception e) { Log.e("TAG", "Error while Highlighting", e); return null; } }
From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java
License:Apache License
@Override public SearchResult search(SearchRequest sResquest) throws EsearchException { try {/*from w w w . j a va2 s .co m*/ esearchSystem.flushEvents(2000); } catch (EsearchException e) { LOGGER.error("Esearch flush events error. \n{}", e); } String queryString = sResquest.getQuery(); String queryField = sResquest.getField(); LOGGER.info("The search request coming: queryField:{},queryString:{}", queryField, queryString); Analyzer analyzer = esearchSystem.getAnalyzer(); QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryField, analyzer); SearchResult result = new SearchResult(); List<EsearchMultiReader<R>> readers = null; MultiReader multiReader = null; IndexSearcher searcher = null; try { Query query = null; if (Strings.isNullOrEmpty(queryString)) { query = new MatchAllDocsQuery(); } else { query = queryParser.parse(queryString); } readers = esearchSystem.getIndexReaders(); multiReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(multiReader); long start = System.currentTimeMillis(); TopDocs docs = searcher.search(query, null, sResquest.getSize()); long end = System.currentTimeMillis(); result.setTime(end - start); result.setTotalDocs(multiReader.numDocs()); result.setTotalHits(docs.totalHits); LOGGER.info("Got {} hits. Cost:{} ms", docs.totalHits, end - start); if (sResquest.getSearchType() == SearchRequest.SearchType.COUNT) { return result; } ScoreDoc[] scoreDocs = docs.scoreDocs; ArrayList<SearchHit> hitList = new ArrayList<SearchHit>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { SearchHit hit = new SearchHit(); hit.setScore(scoreDoc.score); int docID = scoreDoc.doc; Document doc = multiReader.document(docID); String content = doc.get(queryField); Scorer qs = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"hl\">", "</span>"); Highlighter hl = new Highlighter(formatter, qs); String[] fragments = hl.getBestFragments(analyzer, queryField, content, 1); Map<String, String[]> fields = convert(doc, sResquest.getSearchType()); fields.put("fragment", fragments); hit.setFields(fields); hitList.add(hit); } result.setHits(hitList.toArray(new SearchHit[hitList.size()])); return result; } catch (Exception e) { LOGGER.error(e.getMessage(), e); throw new EsearchException(e.getMessage(), e); } finally { if (multiReader != null) { try { multiReader.close(); } catch (IOException e) { LOGGER.error(e.getMessage(), e); } } esearchSystem.returnIndexReaders(readers); } }
From source file:com.appeligo.alerts.KeywordAlertThread.java
License:Apache License
/** * @param searchExecutor callback to get the set of hits for the given query. This can be * executed in different ways./*from w w w . j ava 2 s.c o m*/ * @return true if we hit too many consecutive exceptions so we broke out of the loop */ private boolean executeKeywordSearch(SearchExecutor searchExecutor, String messagePrefix, boolean groupQueries) { ChunkedResults<KeywordAlert> results = KeywordAlert.getAllInNormalizedQueryOrder(); Hits hits = null; String lastNormalizedQuery = null; Query lastLuceneQuery = null; int consecutiveExceptions = 0; results.beforeFirst(); while (results.next() && isActive()) { KeywordAlert keywordAlert = results.get(); try { if (keywordAlert.isDeleted() || keywordAlert.isDisabled()) { if (log.isDebugEnabled()) log.debug("keyword alert is deleted or disabled"); continue; } User user = keywordAlert.getUser(); if (user == null) { if (log.isDebugEnabled()) log.debug("keyword alert is implicitly deleted (user is null)"); keywordAlert.setDeleted(true); keywordAlert.save(); continue; } if (helper.maxAlertsExceeded(keywordAlert)) { continue; } if (groupQueries) { if ((hits == null) || (!keywordAlert.getNormalizedQuery().equals(lastNormalizedQuery))) { hits = searchExecutor.search(null, keywordAlert.getNormalizedQuery()); lastLuceneQuery = searchExecutor.getLuceneQuery(); } else if (log.isDebugEnabled()) log.debug("Not searching on " + keywordAlert.getNormalizedQuery() + " again"); } else { hits = searchExecutor.search(keywordAlert.getUser().getLineupId(), keywordAlert.getNormalizedQuery()); // Note that I'm searching with the lineup from the user, which will // only ensure that the liveIndex doesn't return shows that don't ever // play for this lineup. However, it does not guarantee that the show // on this user's lineup is playing at the same time (meaning alerts // might tell the user of a show that is only in the future). lastLuceneQuery = searchExecutor.getLuceneQuery(); } lastNormalizedQuery = keywordAlert.getNormalizedQuery(); Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(lastLuceneQuery)); PorterStemAnalyzer analyzer = new PorterStemAnalyzer(LuceneIndexer.STOP_WORDS); for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); if (!isActive()) { break; } // if (groupQueries && (!"true".equals(doc.get("lineup-"+keywordAlert.getUser().getLineupId())))) { if (groupQueries && (doc.get("lineup-" + keywordAlert.getUser().getLineupId() + "-startTime") == null)) { // This "if" statement checks to make sure the program is or did play on the user's // lineup, which might be on a different station, a different time, past or future. if (log.isDebugEnabled()) log.debug(doc.get("programTitle") + " matched on " + keywordAlert.getNormalizedQuery() + " but it isn't airing on this user's lineup anytime soon."); continue; } Transaction transaction = HibernateUtil.currentSession().beginTransaction(); try { if ((!helper.maxAlertsExceeded(keywordAlert)) && helper.isNewMatch(keywordAlert, doc)) { if (log.isDebugEnabled()) log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for " + keywordAlert.getNormalizedQuery() + "... sending messages"); String text = doc.get("text"); String fragments = null; if (text != null) { TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, 3, "..."); } helper.incrementTodaysAlertCount(keywordAlert); helper.sendMessages(keywordAlert, fragments, doc, messagePrefix); } else if (log.isDebugEnabled()) log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for " + keywordAlert.getNormalizedQuery() + " but max exceeded or we already matched this one"); } catch (Throwable t) { log.error( "Error processing keyword alerts when searching live lucene index. Rolling back transaction.", t); transaction.rollback(); } finally { if (!transaction.wasRolledBack()) { transaction.commit(); } } } consecutiveExceptions = 0; } catch (Throwable t) { User user = keywordAlert.getUser(); log.error("Caught throwable on keyword " + keywordAlert.getId() + ", " + keywordAlert.getUserQuery() + ", user " + ((user == null) ? null : user.getUsername()), t); consecutiveExceptions++; if (consecutiveExceptions >= maxConsecutiveExceptions) { return true; } } } return false; }
From source file:com.appeligo.search.actions.SearchResults.java
License:Apache License
private void addDocument(Document doc, float score, EPGProvider epgProvider, Highlighter highlighter, Analyzer analyzer, ScheduledProgram next, ScheduledProgram last, Program programInfo) throws IOException { String text = doc.get("text"); String fragments = null;/* ww w . ja v a2 s. c o m*/ if (text != null) { TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, 3, "..."); } SearchResult searchResult = new SearchResult(lineup, new DocumentWrapper(doc, score, fragments), programInfo, last, next); results.add(searchResult); programToSearchResult.put(doc.get("programID"), searchResult); }
From source file:com.bewsia.script.safe.lucene.SEntity.java
License:Open Source License
public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); CachingTokenFilter tokenStream = new CachingTokenFilter( analyzer.tokenStream(field, new StringReader(text))); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize)); tokenStream.reset();//from www.ja v a2s . c o m String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator); return rv.length() == 0 ? text : rv; }
From source file:com.bugull.mongo.lucene.BuguHighlighter.java
License:Apache License
public String getResult(String fieldName, String fieldValue) throws Exception { BuguIndex index = BuguIndex.getInstance(); QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer()); Query query = parser.parse(keywords); TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue)); QueryScorer scorer = new QueryScorer(query, fieldName); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "..."); }
From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java
License:Open Source License
public String highlight(String strToHighlight, String fieldName, Query luceneQuery) { String highlightedText;/* w w w . ja v a2s. c o m*/ Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH); try { Directory directory = FSDirectory.open(indexDir); IndexReader indexReader = DirectoryReader.open(directory); Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader); QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery); SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>"); Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer); Fragmenter luceneFragmenter; // Si la chaine highlighter est sup 250 carac if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) { // Cration de best fragments de 100 carac chaque luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT); } else { // Toute la chaine est highlight luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE); } luceneHighlighter.setTextFragmenter(luceneFragmenter); TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight)); String fragment = null; if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) { fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT, FRAGMENT_SEP); } else { fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight); } if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) { fragment = strToHighlight; } indexReader.close(); directory.close(); highlightedText = fragment; } catch (IOException e) { throw new RuntimeException(e); } catch (InvalidTokenOffsetsException e) { throw new RuntimeException(e); } return highlightedText; }
From source file:com.edgenius.wiki.search.service.AbstractSearchService.java
License:Open Source License
/** * Match all given name-value pairs, return combined fragment. For example, spaceUname and space desc have matched * fragment, then these 2 pieces are merge into one String fragment and return. * @param namedValues// w w w .j a v a2 s . co m * @return * @throws IOException */ private String createFragment(Highlighter hl, String content) throws IOException { if (content == null) return ""; if (hl == null) return content; TokenStream tokenStream = searcherFactory.getAnalyzer().tokenStream(FieldName.CONTENT, new StringReader(content)); String frag; try { frag = hl.getBestFragments(tokenStream, content, 3, "..."); } catch (InvalidTokenOffsetsException e) { log.error("Highlight fragment error", e); frag = StringUtils.abbreviate(content, FRAGMENT_LEN); } return frag; }