List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragments
public final String[] getBestFragments(TokenStream tokenStream, String text, int maxNumFragments) throws IOException, InvalidTokenOffsetsException
From source file:ca.uhn.fhir.jpa.dao.FhirSearchDao.java
License:Apache License
@Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); }// w w w. j a va 2s . com IdDt contextId = new IdDt(contextParts[0], contextParts[1]); Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextId, myEntityManager); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb.phrase().withSlop(2).onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f).andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f).sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery).createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; }
From source file:ca.uhn.fhir.jpa.dao.FulltextSearchSvcImpl.java
License:Apache License
@Override public List<Suggestion> suggestKeywords(String theContext, String theSearchParam, String theText) { Validate.notBlank(theContext, "theContext must be provided"); Validate.notBlank(theSearchParam, "theSearchParam must be provided"); Validate.notBlank(theText, "theSearchParam must be provided"); long start = System.currentTimeMillis(); String[] contextParts = StringUtils.split(theContext, '/'); if (contextParts.length != 3 || "Patient".equals(contextParts[0]) == false || "$everything".equals(contextParts[2]) == false) { throw new InvalidRequestException("Invalid context: " + theContext); }// w w w .j a v a2s . c om Long pid = BaseHapiFhirDao.translateForcedIdToPid(contextParts[0], contextParts[1], myForcedIdDao); FullTextEntityManager em = org.hibernate.search.jpa.Search.getFullTextEntityManager(myEntityManager); QueryBuilder qb = em.getSearchFactory().buildQueryBuilder().forEntity(ResourceTable.class).get(); //@formatter:off Query textQuery = qb.phrase().withSlop(2).onField("myContentText").boostedTo(4.0f) .andField("myContentTextEdgeNGram").boostedTo(2.0f).andField("myContentTextNGram").boostedTo(1.0f) .andField("myContentTextPhonetic").boostedTo(0.5f).sentence(theText.toLowerCase()).createQuery(); Query query = qb.bool() .must(qb.keyword().onField("myResourceLinks.myTargetResourcePid").matching(pid).createQuery()) .must(textQuery).createQuery(); //@formatter:on FullTextQuery ftq = em.createFullTextQuery(query, ResourceTable.class); ftq.setProjection("myContentText"); ftq.setMaxResults(20); List<?> resultList = ftq.getResultList(); List<Suggestion> suggestions = Lists.newArrayList(); for (Object next : resultList) { Object[] nextAsArray = (Object[]) next; String nextValue = (String) nextAsArray[0]; try { MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions); Scorer scorer = new QueryScorer(textQuery); Highlighter highlighter = new Highlighter(formatter, scorer); Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class); formatter.setAnalyzer("myContentTextPhonetic"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); formatter.setAnalyzer("myContentTextNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); formatter.setFindPhrasesWith(); formatter.setAnalyzer("myContentTextEdgeNGram"); highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentText"); // highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextEdgeNGram"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10); // formatter.setAnalyzer("myContentTextPhonetic"); // highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10); } catch (Exception e) { throw new InternalErrorException(e); } } Collections.sort(suggestions); Set<String> terms = Sets.newHashSet(); for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext();) { String nextTerm = iter.next().getTerm().toLowerCase(); if (!terms.add(nextTerm)) { iter.remove(); } } long delay = System.currentTimeMillis() - start; ourLog.info("Provided {} suggestions for term {} in {} ms", new Object[] { terms.size(), theText, delay }); return suggestions; }
From source file:com.bluedragon.search.search.QueryRun.java
License:Open Source License
private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount, int recordsSearched) throws CorruptIndexException, Exception { DocumentWrap document = new DocumentWrap(searcher.doc(docid)); queryResultData.addRow(1);//from w w w . j av a 2s . com queryResultData.setCurrentRow(queryResultData.getSize()); // Add in the standard columns that we know we have for every search queryResultData.setCell(1, new cfStringData(document.getId())); queryResultData.setCell(2, new cfStringData(document.getName())); queryResultData.setCell(3, new cfNumberData(score)); queryResultData.setCell(4, new cfNumberData(searchCount)); queryResultData.setCell(5, new cfNumberData(recordsSearched)); queryResultData.setCell(6, new cfNumberData(rank + 1)); String uC = queryAttributes.getUniqueColumn(); // Now we do the custom ones List<IndexableField> fields = document.getDocument().getFields(); Iterator<IndexableField> it = fields.iterator(); while (it.hasNext()) { IndexableField fieldable = it.next(); String fieldName = fieldable.name().toLowerCase(); // Check for the unique if (uniqueSet != null && fieldName.equals(uC)) { if (uniqueSet.contains(fieldable.stringValue())) { queryResultData.deleteRow(queryResultData.getSize()); return; } else uniqueSet.add(fieldable.stringValue()); } // Check to see if we have this column if (fieldName.equals("contents") && !queryAttributes.getContentFlag()) continue; if (!activeColumns.containsKey(fieldName)) { int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(), cfArrayData.createArray(1), null); activeColumns.put(fieldName, newcolumn); } int column = activeColumns.get(fieldName); if (column <= 6) continue; queryResultData.setCell(column, new cfStringData(fieldable.stringValue())); } // Do the context stuff if enable if (queryAttributes.getContextPassages() > 0) { Scorer scorer = new QueryScorer(queryAttributes.getQuery()); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(), queryAttributes.getContextHighlightEnd()); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes()); highlighter.setTextFragmenter(fragmenter); String nextContext = ""; String contents = document.getAttribute(DocumentWrap.CONTENTS); if (contents != null) { TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS, new StringReader(contents)); String[] fragments = null; try { fragments = highlighter.getBestFragments(tokenStream, contents, queryAttributes.getContextPassages()); if (fragments.length == 1) { nextContext = fragments[0] + "..."; } else { StringBuilder context = new StringBuilder(); for (int f = 0; f < fragments.length; f++) { context.append("..."); context.append(fragments[f]); } context.append("..."); nextContext = context.toString(); } } catch (Exception e) { } // Add in the context if (!activeColumns.containsKey("context")) { int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null); activeColumns.put("context", newcolumn); } queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext)); } } }
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
@Override public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug) throws IndexException { try {//from w ww . j av a 2 s. co m //TODO: make age be a component in the ranking? BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); queryBuilder.add(parser.parse(query), Occur.MUST); queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)), Occur.FILTER); Query baseQuery = queryBuilder.build(); FunctionQuery boostQuery = new FunctionQuery( new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000, new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F)); Query q = new CustomScoreQuery(baseQuery, boostQuery); QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH); Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); Highlighter highlighter = new Highlighter(queryScorer); highlighter.setTextFragmenter(fragmenter); GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1) .setAllGroups(true).setIncludeMaxScore(true); TopGroups<?> groups = gsearch.search(searcher, q, offset, size); ArrayList<SearchResult> results = new ArrayList<>(size); for (int i = offset; i < offset + size && i < groups.groups.length; i++) { ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0]; Document luceneDoc = searcher.doc(scoreDoc.doc); IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc); TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH, reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), analyzer, highlighter.getMaxDocCharsToAnalyze() - 1); String[] snippets = highlighter.getBestFragments(tokenStream, luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3); String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n")); snippet = Jsoup.clean(snippet, Whitelist.simpleText()); String debugInfo = null; if (includeDebug) { Explanation explanation = searcher.explain(q, scoreDoc.doc); debugInfo = explanation.toString(); } results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(), doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score)); } SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset) .setMaxResultsRequested(size) .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0) .setResults(results); if (includeDebug) { wrapper.setDebugInfo(q.toString()); } return wrapper; } catch (IOException | ParseException | InvalidTokenOffsetsException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java
License:Apache License
/** * Searches pages using a particular combination of flags. * * @param query The query to perform in Lucene query language * @param flags A set of flags/* ww w. j a v a 2 s . c o m*/ * @return A Collection of SearchResult instances * @throws ProviderException if there is a problem with the backend */ public Collection findPages(String query, int flags) throws ProviderException { Searcher searcher = null; ArrayList<SearchResult> list = null; Highlighter highlighter = null; try { String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; QueryParser qp = new MultiFieldQueryParser(queryfields, getLuceneAnalyzer()); //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); Query luceneQuery = qp.parse(query); if ((flags & FLAG_CONTEXTS) != 0) { highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), new SimpleHTMLEncoder(), new QueryScorer(luceneQuery)); } try { searcher = new IndexSearcher(m_luceneDirectory); } catch (Exception ex) { log.info("Lucene not yet ready; indexing not started", ex); return null; } Hits hits = searcher.search(luceneQuery); list = new ArrayList<SearchResult>(hits.length()); for (int curr = 0; curr < hits.length(); curr++) { Document doc = hits.doc(curr); String pageName = doc.get(LUCENE_ID); WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); if (page != null) { if (page instanceof Attachment) { // Currently attachments don't look nice on the search-results page // When the search-results are cleaned up this can be enabled again. } int score = (int) (hits.score(curr) * 100); // Get highlighted search contexts String text = doc.get(LUCENE_PAGE_CONTENTS); String[] fragments = new String[0]; if (text != null && highlighter != null) { TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); } SearchResult result = new SearchResultImpl(page, score, fragments); list.add(result); } else { log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); pageRemoved(new WikiPage(m_engine, pageName)); } } } catch (IOException e) { log.error("Failed during lucene search", e); } catch (InstantiationException e) { log.error("Unable to get a Lucene analyzer", e); } catch (IllegalAccessException e) { log.error("Unable to get a Lucene analyzer", e); } catch (ClassNotFoundException e) { log.error("Specified Lucene analyzer does not exist", e); } catch (ParseException e) { log.info("Broken query; cannot parse", e); throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage()); } finally { if (searcher != null) { try { searcher.close(); } catch (IOException e) { } } } return list; }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle, boolean searchInTags, boolean searchInText, boolean superFuzzy) { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {//from ww w.ja v a 2 s . c om ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>(); for (String indexName : indexNames) { IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + indexName), new SimpleFSLockFactory(indexDir + File.separator + indexName))); IndexSearcher searcher = new IndexSearcher(reader); searchers.add(searcher); } BooleanQuery query = new BooleanQuery(); if (searchInTitle) { IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy); } if (searchInTags) { IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy); } if (searchInText) { IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy); IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy); } for (IndexSearcher searcher : searchers) { TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"), new StandardAnalyzer(Version.LUCENE_20.LUCENE_35)); QueryScorer scorer = new QueryScorer(query, "analyzedText"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5); String highlights = ""; for (String fragment : fragments) { highlights += fragment + "..."; } if (highlights.equals("")) { String text = doc.get("text"); if (text.length() > 100) { highlights += doc.get("text").substring(0, 100); } else { highlights += doc.get("text"); } } cardSnaps.add(new CardSnapshot(highlights, doc)); } searcher.getIndexReader().close(); searcher.close(); searcher = null; } } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1, String s2, String radioBtn) throws IOException, ParseException, InvalidTokenOffsetsException { //getting reference of directory Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words, takes out the stop words Analyzer analyzer = new StandardAnalyzer(); String contents = "contents"; QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); }/* ww w. j av a 2 s . c om*/ Query q1 = parser.parse(s1); Query q2 = parser.parse(s2); //conjuction, disjunction and negation BooleanQuery.Builder bq = new BooleanQuery.Builder(); //occur.must : both queries required in a doc if (radioBtn.equals("conjunction")) { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST); bq.build(); } //occur.should: one of the q1 should be presen t in doc else if (radioBtn.equals("disjunction")) { bq.add(q1, BooleanClause.Occur.SHOULD); bq.add(q2, BooleanClause.Occur.SHOULD); bq.build(); } //negation: first should present , second should not else { bq.add(q1, BooleanClause.Occur.MUST); bq.add(q2, BooleanClause.Occur.MUST_NOT); bq.build(); } TopDocs hits = searcher.search(bq.build(), 10); Formatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.main.Searcher.java
public List<Bean> searching(String s1) throws IOException, ParseException, InvalidTokenOffsetsException { //Get directory reference Directory dir = FSDirectory.open(Paths.get(Index_Dir)); //Index reader - an interface for accessing a point-in-time view of a lucene index IndexReader reader = DirectoryReader.open(dir); //CreateIndexReader reader = DirectoryReader.open(dir); lucene searcher. It search over a single IndexReader. IndexSearcher searcher = new IndexSearcher(reader); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //Query parser to be used for creating TermQuery String queries = null;/*from ww w . ja va 2 s. c o m*/ String queryString = null; //regular search String contents = "contents"; BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(contents, analyzer); int numOfDoc = reader.numDocs(); for (int i = 0; i < numOfDoc; i++) { Document d = reader.document(i); } Query q1 = parser.parse(s1); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(q1, BooleanClause.Occur.MUST); //Search the lucene documents TopDocs hits = searcher.search(bq.build(), 10); // TopScoreDocCollector collector = TopScoreDocCollector.create(5); /** * Highlighter Code Start *** */ //Uses HTML <B></B> tag to highlight the searched terms Formatter formatter = new SimpleHTMLFormatter(); //It scores cont fragments by the number of unique q1 terms found //Basically the matching score in layman terms QueryScorer scorer = new QueryScorer(bq.build()); //used to markup highlighted terms found in the best sections of a cont Highlighter highlighter = new Highlighter(formatter, scorer); //It breaks cont up into same-size texts but does not split up spans Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10); //breaks cont up into same-size fragments with no concerns over spotting sentence boundaries. //set fragmenter to highlighter highlighter.setTextFragmenter(fragmenter); //Iterate over found results for (int i = 0; i < hits.scoreDocs.length; i++) { Bean bean = new Bean(); //int rank = hits.scoreDocs.length; int outResult = hits.scoreDocs.length; bean.setNumFile(outResult); int docid = hits.scoreDocs[i].doc; double rank = hits.scoreDocs[i].score; bean.setRankSc(rank); Document doc = searcher.doc(docid); // String title = doc.get("title"); String name = doc.get("name"); String title = doc.get("title"); bean.setTitle(name); String path = doc.get("path"); bean.setPath(path); String cont = doc.get("contents"); //Create token stream TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer); //Get highlighted cont fragments String[] frags = highlighter.getBestFragments(stream, cont, 10); ArrayList<String> dummy = new ArrayList<>(); for (String frag : frags) { dummy.add(frag); } bean.setContent(dummy); beanList.add(bean); } dir.close(); // } return beanList; }
From source file:com.oneis.app.SearchResultExcerptHighlighter.java
License:Mozilla Public License
static public String[] bestHighlightedExcerpts(String escapedText, String searchTerms, int maxExcerptLength) { try {/*from w ww .j a va 2s . c o m*/ // Scorer selects the terms which need highlighting. Created from a 'query' based on the extracted search terms. Scorer scorer; Fragmenter fragmenter; if (searchTerms != null && searchTerms.length() > 0) { QueryParser queryParser = new QueryParser("FIELD", new StandardAnalyzer()); Query query = queryParser.parse(searchTerms); scorer = new QueryScorer(query); fragmenter = new SimpleSpanFragmenter((QueryScorer) scorer, maxExcerptLength); } else { scorer = new NoHighlightingScorer(); fragmenter = new SimpleFragmenter(maxExcerptLength); } // Parse the escaped text into tokens, which retain the positions in the text StandardAnalyzer analyser = new StandardAnalyzer(); TokenStream tokenStream = analyser.tokenStream("FIELD", new StringReader(escapedText)); // Finally, do the highlighting! Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b>", "</b>"), scorer); highlighter.setTextFragmenter(fragmenter); return highlighter.getBestFragments(tokenStream, escapedText, NUMBER_OF_FRAGMENTS); } catch (Exception e) { Logger.getLogger("com.oneis.app").info("Exception in SearchResultExcerptHighlighter: ", e); return null; } }
From source file:com.pongasoft.kiwidoc.index.impl.keyword.impl.KeywordIndexImpl.java
License:Apache License
/** * Highlights the provided results obtained using the provided query. * * @param query the query from which the results were computed * @param models the models to highlight * @return a map representing for each entry in the model its associated resource and highlight * @throws MalformedQueryException if the query cannot be parsed * @throws InternalException if there is an internal problem *//*from ww w .jav a 2s . c o m*/ public <R extends Resource> Map<R, String[]> highlightResults(KeywordQuery query, Collection<Model<R>> models) throws InternalException, MalformedQueryException { Map<R, String[]> res = new LinkedHashMap<R, String[]>(); Query parsedQuery = parseQuery(query); if (parsedQuery != null) { Highlighter highlighter = new Highlighter(_highlighterFormatter, HTML_ENCODER, new QueryScorer(parsedQuery)); for (Model<R> model : models) { Document document = new Document(); String bodyText = buildBody(model); document.add(new Field(DocumentFactory.BODY_FIELD, bodyText, Field.Store.NO, Field.Index.ANALYZED)); TokenStream tokenStream = TokenSources.getTokenStream(document, DocumentFactory.BODY_FIELD, _analyzer); try { res.put(model.getResource(), highlighter.getBestFragments(tokenStream, bodyText, 2)); } catch (IOException e) { log.warn("exception while computing highlight... [ignored]", e); } } } return res; }