List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter
public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer)
From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java
License:Apache License
/** * Searches pages using a particular combination of flags. * * @param query The query to perform in Lucene query language * @param flags A set of flags/*from ww w . j a v a 2 s. co m*/ * @return A Collection of SearchResult instances * @throws ProviderException if there is a problem with the backend */ public Collection findPages(String query, int flags) throws ProviderException { Searcher searcher = null; ArrayList<SearchResult> list = null; Highlighter highlighter = null; try { String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; QueryParser qp = new MultiFieldQueryParser(queryfields, getLuceneAnalyzer()); //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); Query luceneQuery = qp.parse(query); if ((flags & FLAG_CONTEXTS) != 0) { highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery)); } try { searcher = new IndexSearcher(m_luceneDirectory); } catch (Exception ex) { log.info("Lucene not yet ready; indexing not started", ex); return null; } Hits hits = searcher.search(luceneQuery); list = new ArrayList<SearchResult>(hits.length()); for (int curr = 0; curr < hits.length(); curr++) { Document doc = hits.doc(curr); String pageName = doc.get(LUCENE_ID); WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); if (page != null) { if (page instanceof Attachment) { // Currently attachments don't look nice on the search-results page // When the search-results are cleaned up this can be enabled again. } int score = (int) (hits.score(curr) * 100); // Get highlighted search contexts String text = doc.get(LUCENE_PAGE_CONTENTS); String[] fragments = new String[0]; if (text != null && highlighter != null) { TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); } SearchResult result = new SearchResultImpl(page, score, fragments); list.add(result); } else { log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); pageRemoved(new WikiPage(m_engine, pageName)); } } } catch (IOException e) { log.error("Failed during lucene search", e); } catch (InstantiationException e) { log.error("Unable to get a Lucene analyzer", e); } catch (IllegalAccessException e) { log.error("Unable to get a Lucene analyzer", e); } catch (ClassNotFoundException e) { log.error("Specified Lucene analyzer does not exist", e); } catch (ParseException e) { log.info("Broken query; cannot parse", e); throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage()); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { } } } return list; }
From source file:com.meltmedia.cadmium.search.SearchService.java
License:Apache License
private Map<String, Object> buildSearchResults(final String query, final String path) throws Exception { logger.info("Running search for [{}]", query); final Map<String, Object> resultMap = new LinkedHashMap<String, Object>(); new SearchTemplate(provider) { public void doSearch(IndexSearcher index) throws IOException, ParseException { QueryParser parser = createParser(getAnalyzer()); resultMap.put("number-hits", 0); List<Map<String, Object>> resultList = new ArrayList<Map<String, Object>>(); resultMap.put("results", resultList); if (index != null && parser != null) { String literalQuery = query.replaceAll(ALLOWED_CHARS_PATTERN, "\\\\$1"); Query query1 = parser.parse(literalQuery); if (StringUtils.isNotBlank(path)) { Query pathPrefix = new PrefixQuery(new Term("path", path)); BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(pathPrefix, Occur.MUST); boolQuery.add(query1, Occur.MUST); query1 = boolQuery;// ww w . j a v a 2 s . co m } TopDocs results = index.search(query1, null, 100000); QueryScorer scorer = new QueryScorer(query1); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer); logger.info("Search returned {} hits.", results.totalHits); resultMap.put("number-hits", results.totalHits); for (ScoreDoc doc : results.scoreDocs) { Document document = index.doc(doc.doc); String content = document.get("content"); String title = document.get("title"); Map<String, Object> result = new LinkedHashMap<String, Object>(); String excerpt = ""; try { excerpt = highlighter.getBestFragments( parser.getAnalyzer().tokenStream(null, new StringReader(content)), content, 3, "..."); excerpt = fixExcerpt(excerpt); result.put("excerpt", excerpt); } catch (Exception e) { logger.debug("Failed to get search excerpt from content.", e); try { excerpt = highlighter.getBestFragments( parser.getAnalyzer().tokenStream(null, new StringReader(title)), title, 1, "..."); excerpt = fixExcerpt(excerpt); result.put("excerpt", excerpt); } catch (Exception e1) { logger.debug("Failed to get search excerpt from title.", e1); result.put("excerpt", ""); } } result.put("score", doc.score); result.put("title", title); result.put("path", document.get("path")); resultList.add(result); } } } }.search(); return resultMap; }
From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java
License:Apache License
@Override public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms) throws InvalidQuerySyntaxException { Analyzer analyzer = null;//from www . ja v a 2s .com // default QueryParser.escape(pattern) method does not support phrase queries pattern = QuerySyntaxUtil.escapeQueryPattern(pattern); if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) { return Collections.emptyList(); } logger.log(Level.FINE, "Escaped search pattern: " + pattern); Lock lock = rwlock.readLock(); lock.lock(); if (exception != null) { lock.unlock(); throw new RuntimeException("Failed to refesh index reader after last commit", exception); } try { List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>(); analyzer = new TermNameAnalyzer(false); QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer); Query query = parser.parse(pattern); logger.log(Level.FINE, "Query: " + query); // For highlighting words in query results QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder(); Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer); highlighter.setMaxDocCharsToAnalyze(MAX_CHARS); scorer.setExpandMultiTermQuery(true); // Perform search ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs; for (int i = 0; i < hits.length; i++) { int id = hits[i].doc; Document doc = searcher.doc(id); String ontology = doc.get(FIELD_ONTOLOGY); String referenceId = doc.get(FIELD_ID); String term = doc.get(FIELD_TERM); byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM); boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1; if (!isSynonym || includeSynonyms) { Analyzer highlighterAnalyzer = new TermNameAnalyzer(true); TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM, highlighterAnalyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1); if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) { results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(), frag[0].getScore(), isSynonym)); } highlighterAnalyzer.close(); } } return results; } catch (ParseException e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (TokenMgrError e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (Throwable e) { String msg = "Failed to perform Lucene seach with pattern: " + pattern; logger.log(Level.WARNING, msg, e); throw new RuntimeException(msg, e); } finally { close(analyzer); lock.unlock(); } }
From source file:com.pongasoft.kiwidoc.index.impl.keyword.impl.KeywordIndexImpl.java
License:Apache License
/** * Highlights the provided results obtained using the provided query. * * @param query the query from which the results were computed * @param models the models to highlight * @return a map representing for each entry in the model its associated resource and highlight * @throws MalformedQueryException if the query cannot be parsed * @throws InternalException if there is an internal problem *///from w ww . j a v a2s . c o m public <R extends Resource> Map<R, String[]> highlightResults(KeywordQuery query, Collection<Model<R>> models) throws InternalException, MalformedQueryException { Map<R, String[]> res = new LinkedHashMap<R, String[]>(); Query parsedQuery = parseQuery(query); if (parsedQuery != null) { Highlighter highlighter = new Highlighter(_highlighterFormatter, HTML_ENCODER, new QueryScorer(parsedQuery)); for (Model<R> model : models) { Document document = new Document(); String bodyText = buildBody(model); document.add(new Field(DocumentFactory.BODY_FIELD, bodyText, Field.Store.NO, Field.Index.ANALYZED)); TokenStream tokenStream = TokenSources.getTokenStream(document, DocumentFactory.BODY_FIELD, _analyzer); try { res.put(model.getResource(), highlighter.getBestFragments(tokenStream, bodyText, 2)); } catch (IOException e) { log.warn("exception while computing highlight... [ignored]", e); } } } return res; }
From source file:com.tripod.lucene.service.AbstractLuceneService.java
License:Apache License
/** * @param query the tripod query being performed * @param luceneQuery the Lucene query being performed * @return the highlighter to use if the tripod query has one or more highlight fields, or null */// w w w . j av a 2 s .c om private Highlighter getHighlighter(final Q query, final Query luceneQuery) { Highlighter highlighter = null; if (query.getHighlightFields() != null && query.getHighlightFields().size() > 0) { SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder(); SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(query.getHighlightPreTag(), query.getHighlightPostTag()); highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(luceneQuery)); } return highlighter; }
From source file:natural.language.qa.LuceneSearch.java
License:Apache License
public List<LuceneSearchResult> search(String queryString, int maxRes) throws Exception { IndexSearcher searcher = null;/*from www . ja v a 2 s .com*/ List<LuceneSearchResult> results = new ArrayList<LuceneSearchResult>(); try { Properties indexConf = new Properties(); FileInputStream fis = new FileInputStream("index.properties"); indexConf.load(fis); String index = indexConf.getProperty("index"); String field = "contents"; Directory indexDir = FSDirectory.open(new File(index)); searcher = new IndexSearcher(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer); queryString = queryString.trim(); if (queryString.length() == 0) { return results; } Query query = parser.parse(queryString); System.out.println("Searching for: " + query.toString(field)); // ================================================ Formatter f = new SimpleHTMLFormatter("", ""); Encoder e = new DefaultEncoder(); QueryScorer fs = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(fs, 50);// new SentenceFragmenter(); Highlighter h = new Highlighter(f, e, fs); h.setTextFragmenter(fragmenter); h.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // ================================================ // Collect docs TopDocs res = searcher.search(query, maxRes); int numTotalHits = res.totalHits; ScoreDoc[] scoreDocs = res.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); String path = doc.get("path"); String content = readDocument(path); String bestFragment = h.getBestFragment(analyzer, field, content); String frag = bestFragment; //System.out.println(frag); LuceneSearchResult hit = new LuceneSearchResult(scoreDoc.doc, path, frag); results.add(hit); } System.out.println(numTotalHits + " total matching documents"); } finally { if (searcher != null) { searcher.close(); } } return results; }
From source file:net.hillsdon.reviki.search.impl.LuceneSearcher.java
License:Apache License
private LinkedHashSet<SearchMatch> doQuery(final IndexReader reader, final Analyzer analyzer, final Searcher searcher, final String field, final boolean provideExtracts, final Query query) throws IOException, CorruptIndexException { Highlighter highlighter = null; if (provideExtracts) { highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new SimpleHTMLEncoder(), new QueryScorer(query)); }/*w ww . ja v a2s . co m*/ Hits hits = searcher.search(query); LinkedHashSet<SearchMatch> results = new LinkedHashSet<SearchMatch>(); @SuppressWarnings("unchecked") Iterator<Hit> iter = hits.iterator(); while (iter.hasNext()) { Hit hit = iter.next(); String text = hit.get(field); String extract = null; // The text is not stored for all fields, just provide a null extract. if (highlighter != null && text != null) { TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text)); // Get 3 best fragments and separate with a "..." extract = highlighter.getBestFragments(tokenStream, text, 3, "..."); } results.add(new SearchMatch(_wikiName.equals(hit.get(FIELD_WIKI)), hit.get(FIELD_WIKI), hit.get(FIELD_PATH), extract)); } return results; }
From source file:org.apache.maven.index.DefaultIteratorResultSet.java
License:Apache License
protected final List<String> getBestFragments(Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments) throws IOException { Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query)); highlighter.setTextFragmenter(new OneLineFragmenter()); maxNumFragments = Math.max(1, maxNumFragments); // sanity check TextFragment[] frag;/*from w w w . j av a 2 s . c om*/ // Get text ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments); try { frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments); for (int i = 0; i < frag.length; i++) { if ((frag[i] != null) && (frag[i].getScore() > 0)) { fragTexts.add(frag[i].toString()); } } } catch (InvalidTokenOffsetsException e) { // empty? } return fragTexts; }
From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java
License:Open Source License
/** * Return a {@link org.apache.lucene.search.highlight.Highlighter} * appropriate for this field./*w ww .jav a2s . co m*/ * * @param query * The current Query * @param requestFieldname * The name of the field * @param request * The current SolrQueryRequest */ @Override protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request) { String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname, FieldUse.HIGHLIGHT, request); SolrParams params = request.getParams(); Highlighter highlighter = new Highlighter(getFormatter(requestFieldname, params), getEncoder(requestFieldname, params), getQueryScorer(query, schemaFieldName, request)); highlighter.setTextFragmenter(getFragmenter(requestFieldname, params)); return highlighter; }
From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java
License:Apache License
/** * Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field. * @param query The current Query/* w w w. ja v a2s . co m*/ * @param fieldName The name of the field * @param request The current SolrQueryRequest * @param tokenStream document text CachingTokenStream * @throws IOException If there is a low-level I/O error. */ protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException { SolrParams params = request.getParams(); Highlighter highlighter = null; highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params), getSpanQueryScorer(query, fieldName, tokenStream, request)); highlighter.setTextFragmenter(getFragmenter(fieldName, params)); return highlighter; }