public final String getBestFragments(TokenStream tokenStream, String text, int maxNumFragments,
        String separator) throws IOException, InvalidTokenOffsetsException 

Highlights terms in the text , extracting the most relevant sections and concatenating the chosen fragments with a separator (typically "...").


From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery orQuery = new BooleanQuery();
    BooleanQuery andQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        Query query = new FuzzyQuery(new Term(fields[i], queryString));
        query = query.rewrite(searcher.getIndexReader());
        orQuery.add(query, BooleanClause.Occur.SHOULD);
    //from  w  w w .j av  a 2 s  . co  m
    andQuery.add(orQuery, BooleanClause.Occur.MUST);
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);

    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);

    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    objectsToReturn.put("hits", hits);

    //orQuery = orQuery.rewrite(searcher.getIndexReader());
    //andQuery = andQuery.rewrite(searcher.getIndexReader());
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));

    if (hits != null) {
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);
            String summary = " ";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);
                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);

                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);


    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    return objectsToReturn;


From source file:lius.search.LiusHitList.java

License:Apache License

private LiusHit buildLiusHit(int index) throws IOException {

    LiusHit liusHit = new LiusHit();

    Document luceneDocument = luceneHits.doc(index);

    Map liusHitFieldsMap = new HashMap();
    List liusFieldsList = new ArrayList();
    Highlighter luceneHighlighter = null;

    if (liusConfig.getHighlighter() == true) {
        IndexReader luceneIndexReader = IndexReader.open(indexDirectory);

        Query rewrittenLuceneQuery = luceneQuery.rewrite(luceneIndexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);

        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"liusHit\">", "</span>");
        luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);
    }/*w  ww. ja v  a2 s. c o m*/

    for (int j = 0; j < liusConfig.getDisplayFields().size(); j++) {
        LiusField configLiusField = (LiusField) liusConfig.getDisplayFields().get(j);
        LiusField hitLiusField = new LiusField();
        String fieldName = configLiusField.getName();


        if (luceneHighlighter != null) {
            Fragmenter luceneFragmenter;
            if (configLiusField.getFragmenter() != null) {
                luceneFragmenter = new SimpleFragmenter(Integer.parseInt(configLiusField.getFragmenter()));
            } else {
                luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
        String[] luceneDocumentValues = luceneDocument.getValues(configLiusField.getName());
        if (luceneDocumentValues != null) {
            if (luceneHighlighter != null) {
                for (int k = 0; k < luceneDocumentValues.length; k++) {
                    Analyzer luceneAnalyzer = AnalyzerFactory.getAnalyzer(liusConfig);
                    TokenStream luceneTokenStream = luceneAnalyzer.tokenStream(configLiusField.getName(),
                            new StringReader(luceneDocumentValues[k]));
                    String fragment = null;
                    if (configLiusField.getFragmenter() != null)
                        fragment = luceneHighlighter.getBestFragments(luceneTokenStream,
                                luceneDocumentValues[k], 5, "...");
                    else {
                        fragment = luceneHighlighter.getBestFragment(luceneTokenStream,

                    if (fragment == null) {
                    } else {
                        luceneDocumentValues[k] = fragment;


            liusHitFieldsMap.put(configLiusField.getName(), hitLiusField);

    return liusHit;

From source file:net.hillsdon.reviki.search.impl.LuceneSearcher.java

License:Apache License

private LinkedHashSet<SearchMatch> doQuery(final IndexReader reader, final Analyzer analyzer,
        final Searcher searcher, final String field, final boolean provideExtracts, final Query query)
        throws IOException, CorruptIndexException {
    Highlighter highlighter = null;
    if (provideExtracts) {
        highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new SimpleHTMLEncoder(),
                new QueryScorer(query));
    }//from  w  ww .j  a va  2 s  . c  om
    Hits hits = searcher.search(query);
    LinkedHashSet<SearchMatch> results = new LinkedHashSet<SearchMatch>();
    Iterator<Hit> iter = hits.iterator();
    while (iter.hasNext()) {
        Hit hit = iter.next();
        String text = hit.get(field);
        String extract = null;
        // The text is not stored for all fields, just provide a null extract.
        if (highlighter != null && text != null) {
            TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
            // Get 3 best fragments and separate with a "..."
            extract = highlighter.getBestFragments(tokenStream, text, 3, "...");
        results.add(new SearchMatch(_wikiName.equals(hit.get(FIELD_WIKI)), hit.get(FIELD_WIKI),
                hit.get(FIELD_PATH), extract));
    return results;

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

 * Main search method, for internal use.
 * //from  ww w .ja  v  a2s  .c  om
 * @param q query string
 * @return a list of highlighted string objects.
 * @throws SearchException
private List<SearchResultItem> internalSearch(String q) throws SearchException {
    IndexSearcher is = null;
    try {
        is = new IndexSearcher(zekrIndexReader.indexReader);

        // analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        // resultTokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, reader);

        QueryParser parser = QueryParserFactory.create(Version.LUCENE_CURRENT, QuranTextIndexer.CONTENTS_FIELD,

        // allow search terms like "*foo" with leading star
        // parser.setFuzzyPrefixLength(10);

        // if this line is not set, highlighter doesn't work in in wildcard queries while query.rewrite() is done.
        // and sorting also doesn't work correctly for wildcard queries.

        logger.debug("Parse query.");
        query = parser.parse(q);

        logger.debug("Rewrite query.");
        query = query.rewrite(zekrIndexReader.indexReader); // required to expand search terms

        logger.debug("Searching for: " + query.toString());
        // Hits hits;
        TopFieldDocs tops = null;
        is.setDefaultFieldSortScoring(true, true);
        if (searchScope != null && searchScope.getScopeItems().size() > 0) {
            String scopeQuery = makeSearchScope();
            logger.debug("Scope is: " + scopeQuery);
            // hits = is.search(query, new QuranRangeFilter(searchScope), sortResultOrder);
            tops = is.search(query, new QuranRangeFilter(searchScope), maxSearchResult, sortResultOrder);

        } else {
            // hits = is.search(query, new QueryWrapperFilter(query), 20, sortResultOrder);
            tops = is.search(query, new QueryWrapperFilter(query), maxSearchResult, sortResultOrder);

        logger.debug("Highlight search result.");
        Highlighter highlighter = new Highlighter(highlightFormatter, new QueryScorer(query));
        // highlighter.setFragmentScorer(new QueryTermScorer(query));

        int total = Math.min(maxSearchResult, tops.totalHits);
        List<SearchResultItem> res = new ArrayList<SearchResultItem>(total);
        for (int i = 0; i < total; i++) {
            ScoreDoc[] sd = tops.scoreDocs;
            Document doc = is.doc(sd[i].doc);
            final String contents = doc.get(QuranTextIndexer.CONTENTS_FIELD);
            final IQuranLocation location = new QuranLocation(doc.get(QuranTextIndexer.LOCATION_FIELD));
            TokenStream tokenStream = analyzer.tokenStream(QuranTextIndexer.CONTENTS_FIELD,
                    new StringReader(contents));

            // String resultStr = highlighter.getBestFragment(tokenStream, contents);
            String resultStr = highlighter.getBestFragments(tokenStream, contents, 100, "...");
            SearchResultItem sri = new SearchResultItem(resultStr, location);
        matchedItemCount = highlightFormatter.getHighlightCount();
        // highlightedTermList = highlightFormatter.getHighlightedTermList();
        return res;
    } catch (Exception e) {
        throw new SearchException(e);
    } finally {
        if (is != null) {
            try {
            } catch (IOException e) {

From source file:org.archive.tnh.servlet.OpenSearchServlet.java

License:Apache License

public void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    try {// w  ww  . ja  v a 2s  .  c o m
        long responseTime = System.nanoTime();

        QueryParameters p = (QueryParameters) request.getAttribute(OpenSearchHelper.PARAMS_KEY);
        if (p == null) {
            p = getQueryParameters(request);

        BooleanQuery q = this.translator.translate(p.query, this.foldAccents);

        this.translator.addFilterGroup(q, "site", p.sites);
        this.translator.addFilterGroup(q, "type", p.types);
        this.translator.addFilterGroup(q, "collection", p.collections);
        this.translator.addFilterGroup(q, "date", p.dates);

        long parseQueryTime = System.nanoTime();

        if (Arrays.equals(p.indexNames, QueryParameters.ALL_INDEXES)) {
            if (p.excludes.length > 0) {
                // If there are indexes to exclude, exclude them.
                p.indexNames = removeExcludes(p.excludes);
        } else {
            // There are explicitly named indexes.  Weed out any unknown names.
            p.indexNames = removeUnknownIndexNames(p.indexNames);

        Search.Result result;
        if (p.indexNames.length == 0) {
            result = new Search.Result();
            result.hits = new Hit[0];
        } else {
            result = this.searcher.search(p.indexNames, q, p.start + (p.hitsPerPage * 3), p.hitsPerSite);

        long executeQueryTime = System.nanoTime();

        // The 'end' is usually just the end of the current page
        // (start+hitsPerPage); but if we are on the last page
        // of de-duped results, then the end is hits.getLength().
        int end = Math.min(result.hits.length, p.start + p.hitsPerPage);

        // The length is usually just (end-start), unless the start
        // position is past the end of the results -- which is common when
        // de-duping.  The user could easily jump past the true end of the
        // de-dup'd results.  If the start is past the end, we use a
        // length of '0' to produce an empty results page.
        int length = Math.max(end - p.start, 0);

        // Usually, the total results is the total number of non-de-duped
        // results.  Howerver, if we are on last page of de-duped results,
        // then we know our de-dup'd total is result.hits.length.
        long totalResults = result.hits.length < (p.start + p.hitsPerPage) ? result.hits.length
                : result.numRawHits;

        Document doc = new Document();

        Element channel = OpenSearchHelper.startResponse(doc, p, request, totalResults);

        // Add hits to XML Document
        for (int i = p.start; i < end; i++) {
            org.apache.lucene.document.Document hit = result.searcher.doc(result.hits[i].id);

            Element item = JDOMHelper.add(channel, "item");

            // Replace & and < with their XML entity counterparts to
            // ensure that any HTML markup in the snippet is escaped
            // before we do the highlighting.
            String title = hit.get("title");
            if (title != null) {
                title = title.replaceAll("[&]", "&amp;");
                title = title.replaceAll("[<]", "&lt;");
            JDOMHelper.add(item, "title", title);

            JDOMHelper.add(item, "link", hit.get("url"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "docId", String.valueOf(result.hits[i].id));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "score", String.valueOf(result.hits[i].score));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "site", result.hits[i].site);
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "length", hit.get("length"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "type", hit.get("type"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "boost", hit.get("boost"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "collection", hit.get("collection"));

            String indexName = this.searcher.resolveIndexName(result.searcher, result.hits[i].id);
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "index", indexName);

            for (String date : hit.getValues("date")) {
                JDOMHelper.add(item, "date", date);

            String raw = getContent(hit);

            StringBuilder buf = new StringBuilder(100);

            Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new NonBrokenHTMLEncoder(),
                    new QueryScorer(q, "content"));

            CustomAnalyzer analyzer = new CustomAnalyzer();

            for (String snippet : highlighter.getBestFragments(analyzer, "content", raw,
                    this.contextSnippetsPerResult)) {

            JDOMHelper.add(item, "description", buf.toString());

            // Last, but not least, add a hit explanation, if enabled
            if (explain) {
                JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "explain",
                        result.searcher.explain(q, result.hits[i].id).toHtml());

        OpenSearchHelper.addResponseTime(channel, System.nanoTime() - responseTime);

        long buildResultsTime = System.nanoTime();

        OpenSearchHelper.writeResponse(doc, response, "application/rss+xml");

        long writeResponseTime = System.nanoTime();

        LOG.info("S: " + ((parseQueryTime - responseTime) / 1000 / 1000) + " "
                + ((executeQueryTime - parseQueryTime) / 1000 / 1000) + " "
                + ((buildResultsTime - executeQueryTime) / 1000 / 1000) + " "
                + ((writeResponseTime - buildResultsTime) / 1000 / 1000) + " " + p.query);
    } catch (Exception e) {
        throw new ServletException(e);

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String fragmentsWithSeparator(Resource resource, String propertyName, String text)
        throws SearchEngineException {
    Highlighter highlighter = createHighlighter(propertyName);
    TokenStream tokenStream = createTokenStream(resource, propertyName, text);
    try {//  w ww  .jav  a2  s  .c o m
        String actualSeparator = getActualSeparator();
        return highlighter.getBestFragments(tokenStream, text, getMaxNumFragments(), actualSeparator);
    } catch (IOException e) {
        throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias()
                + "] and property [" + propertyName + "]");

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

public synchronized String getSnippet(String fieldName, String text, Highlighter highlighter) {
    String snippet;//from ww  w .  j a  v  a  2 s  .c o m
    try {
        TokenStream tokenStream = getAnalyzer().tokenStream(fieldName, new StringReader(text));
        snippet = highlighter.getBestFragments(tokenStream, text, 2, "...");
    } catch (Exception e) {
        logger.error("Exception while getting snippet for field " + fieldName, e);
        snippet = null;
    return snippet;

From source file:org.eclipse.skalli.core.search.LuceneIndex.java

License:Open Source License

private String doHighlight(final Highlighter highlighter, final List<String> fields, final String fieldName,
        String fieldContents) throws IOException {
    String highlighted = fieldContents;
    if (fieldContents != null && fields.contains(fieldName)) {
        try {//w  ww .  j  av a2s .co  m
            String[] fragments = highlighter.getBestFragments(analyzer, fieldName, fieldContents,
            if (fragments != null && fragments.length > 0) {
                highlighted = LuceneUtil.withEllipsis(fragments, fieldContents);
        } catch (Exception e) {
            LOG.error(MessageFormat.format("Failed to highlight search result ''{0}''", fieldContents), e);
    return highlighted;

From source file:org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighterTests.java

License:Apache License

public void testHighlightPhrase() throws Exception {
    Query query = new PhraseQuery.Builder().add(new Term("field", "foo")).add(new Term("field", "bar")).build();
    QueryScorer queryScorer = new CustomQueryScorer(query);
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
            queryScorer);//  w w w. jav  a 2  s .com
    String[] frags = highlighter.getBestFragments(new MockAnalyzer(random()), "field", "bar foo bar foo", 10);
    assertArrayEquals(new String[] { "bar <B>foo</B> <B>bar</B> foo" }, frags);

From source file:org.jamwiki.search.LuceneSearchEngine.java


 *//*from   w w  w.ja  va 2s.c  o  m*/
private String retrieveResultSummary(Document document, Highlighter highlighter, StandardAnalyzer analyzer)
        throws Exception {
    String content = document.get(ITYPE_CONTENT_PLAIN);
    TokenStream tokenStream = analyzer.tokenStream(ITYPE_CONTENT_PLAIN, new StringReader(content));
    String summary = highlighter.getBestFragments(tokenStream, content, 3, "...");
    if (StringUtils.isBlank(summary) && !StringUtils.isBlank(content)) {
        summary = StringEscapeUtils.escapeHtml(content.substring(0, Math.min(200, content.length())));
        if (Math.min(200, content.length()) == 200) {
            summary += "...";
    return summary;