In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.


public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer) 

From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java

License:Apache License

 *  Searches pages using a particular combination of flags.
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags/*from ww w . j a  v a 2  s.  co m*/
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
public Collection findPages(String query, int flags) throws ProviderException {
    Searcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        QueryParser qp = new MultiFieldQueryParser(queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));

        try {
            searcher = new IndexSearcher(m_luceneDirectory);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;

        Hits hits = searcher.search(luceneQuery);

        list = new ArrayList<SearchResult>(hits.length());
        for (int curr = 0; curr < hits.length(); curr++) {
            Document doc = hits.doc(curr);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.

                int score = (int) (hits.score(curr) * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);


                SearchResult result = new SearchResultImpl(page, score, fragments);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (InstantiationException e) {
        log.error("Unable to get a Lucene analyzer", e);
    } catch (IllegalAccessException e) {
        log.error("Unable to get a Lucene analyzer", e);
    } catch (ClassNotFoundException e) {
        log.error("Specified Lucene analyzer does not exist", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } finally {
        if (searcher != null) {
            try {
            } catch (IOException e) {

    return list;

From source file:com.meltmedia.cadmium.search.SearchService.java

License:Apache License

private Map<String, Object> buildSearchResults(final String query, final String path) throws Exception {
    logger.info("Running search for [{}]", query);
    final Map<String, Object> resultMap = new LinkedHashMap<String, Object>();

    new SearchTemplate(provider) {
        public void doSearch(IndexSearcher index) throws IOException, ParseException {
            QueryParser parser = createParser(getAnalyzer());

            resultMap.put("number-hits", 0);

            List<Map<String, Object>> resultList = new ArrayList<Map<String, Object>>();

            resultMap.put("results", resultList);

            if (index != null && parser != null) {
                String literalQuery = query.replaceAll(ALLOWED_CHARS_PATTERN, "\\\\$1");
                Query query1 = parser.parse(literalQuery);
                if (StringUtils.isNotBlank(path)) {
                    Query pathPrefix = new PrefixQuery(new Term("path", path));
                    BooleanQuery boolQuery = new BooleanQuery();
                    boolQuery.add(pathPrefix, Occur.MUST);
                    boolQuery.add(query1, Occur.MUST);
                    query1 = boolQuery;//  ww w  . j a v  a 2 s  . co  m
                TopDocs results = index.search(query1, null, 100000);
                QueryScorer scorer = new QueryScorer(query1);
                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),

                logger.info("Search returned {} hits.", results.totalHits);
                resultMap.put("number-hits", results.totalHits);

                for (ScoreDoc doc : results.scoreDocs) {
                    Document document = index.doc(doc.doc);
                    String content = document.get("content");
                    String title = document.get("title");

                    Map<String, Object> result = new LinkedHashMap<String, Object>();
                    String excerpt = "";

                    try {
                        excerpt = highlighter.getBestFragments(
                                parser.getAnalyzer().tokenStream(null, new StringReader(content)), content, 3,
                        excerpt = fixExcerpt(excerpt);

                        result.put("excerpt", excerpt);
                    } catch (Exception e) {
                        logger.debug("Failed to get search excerpt from content.", e);

                        try {
                            excerpt = highlighter.getBestFragments(
                                    parser.getAnalyzer().tokenStream(null, new StringReader(title)), title, 1,
                            excerpt = fixExcerpt(excerpt);

                            result.put("excerpt", excerpt);
                        } catch (Exception e1) {
                            logger.debug("Failed to get search excerpt from title.", e1);

                            result.put("excerpt", "");

                    result.put("score", doc.score);
                    result.put("title", title);
                    result.put("path", document.get("path"));



    return resultMap;

From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java

License:Apache License

public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms)
        throws InvalidQuerySyntaxException {
    Analyzer analyzer = null;//from www  .  ja  v  a 2s  .com

    // default QueryParser.escape(pattern) method does not support phrase queries
    pattern = QuerySyntaxUtil.escapeQueryPattern(pattern);
    if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) {
        return Collections.emptyList();

    logger.log(Level.FINE, "Escaped search pattern: " + pattern);

    Lock lock = rwlock.readLock();
    if (exception != null) {
        throw new RuntimeException("Failed to refesh index reader after last commit", exception);

    try {
        List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>();
        analyzer = new TermNameAnalyzer(false);

        QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer);
        Query query = parser.parse(pattern);

        logger.log(Level.FINE, "Query: " + query);

        // For highlighting words in query results
        QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder();
        Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer);

        // Perform search
        ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String ontology = doc.get(FIELD_ONTOLOGY);
            String referenceId = doc.get(FIELD_ID);
            String term = doc.get(FIELD_TERM);
            byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM);
            boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1;

            if (!isSynonym || includeSynonyms) {
                Analyzer highlighterAnalyzer = new TermNameAnalyzer(true);
                TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM,
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1);
                if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) {
                    results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(),
                            frag[0].getScore(), isSynonym));

        return results;
    } catch (ParseException e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (TokenMgrError e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (Throwable e) {
        String msg = "Failed to perform Lucene seach with pattern: " + pattern;
        logger.log(Level.WARNING, msg, e);
        throw new RuntimeException(msg, e);
    } finally {

From source file:com.pongasoft.kiwidoc.index.impl.keyword.impl.KeywordIndexImpl.java

License:Apache License

 * Highlights the provided results obtained using the provided query.
 * @param query  the query from which the results were computed
 * @param models the models to highlight
 * @return a map representing for each entry in the model its associated resource and highlight
 * @throws MalformedQueryException if the query cannot be parsed
 * @throws InternalException if there is an internal problem
 *///from   w  ww  . j a v a2s  . c o m
public <R extends Resource> Map<R, String[]> highlightResults(KeywordQuery query, Collection<Model<R>> models)
        throws InternalException, MalformedQueryException {
    Map<R, String[]> res = new LinkedHashMap<R, String[]>();

    Query parsedQuery = parseQuery(query);

    if (parsedQuery != null) {
        Highlighter highlighter = new Highlighter(_highlighterFormatter, HTML_ENCODER,
                new QueryScorer(parsedQuery));

        for (Model<R> model : models) {
            Document document = new Document();
            String bodyText = buildBody(model);
            document.add(new Field(DocumentFactory.BODY_FIELD, bodyText, Field.Store.NO, Field.Index.ANALYZED));
            TokenStream tokenStream = TokenSources.getTokenStream(document, DocumentFactory.BODY_FIELD,
            try {
                res.put(model.getResource(), highlighter.getBestFragments(tokenStream, bodyText, 2));
            } catch (IOException e) {
                log.warn("exception while computing highlight... [ignored]", e);

    return res;

From source file:com.tripod.lucene.service.AbstractLuceneService.java

License:Apache License

 * @param query the tripod query being performed
 * @param luceneQuery the Lucene query being performed
 * @return the highlighter to use if the tripod query has one or more highlight fields, or null
 *///  w  w w .  j  av  a 2  s .c om
private Highlighter getHighlighter(final Q query, final Query luceneQuery) {
    Highlighter highlighter = null;
    if (query.getHighlightFields() != null && query.getHighlightFields().size() > 0) {
        SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(query.getHighlightPreTag(),
        highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(luceneQuery));
    return highlighter;

From source file:natural.language.qa.LuceneSearch.java

License:Apache License

public List<LuceneSearchResult> search(String queryString, int maxRes) throws Exception {
    IndexSearcher searcher = null;/*from  www  . ja v a 2 s .com*/
    List<LuceneSearchResult> results = new ArrayList<LuceneSearchResult>();
    try {
        Properties indexConf = new Properties();
        FileInputStream fis = new FileInputStream("index.properties");

        String index = indexConf.getProperty("index");
        String field = "contents";

        Directory indexDir = FSDirectory.open(new File(index));

        searcher = new IndexSearcher(indexDir);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);

        QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);

        queryString = queryString.trim();
        if (queryString.length() == 0) {
            return results;

        Query query = parser.parse(queryString);
        System.out.println("Searching for: " + query.toString(field));

        // ================================================
        Formatter f = new SimpleHTMLFormatter("", "");
        Encoder e = new DefaultEncoder();
        QueryScorer fs = new QueryScorer(query);
        Fragmenter fragmenter = new SimpleSpanFragmenter(fs, 50);// new SentenceFragmenter();
        Highlighter h = new Highlighter(f, e, fs);

        // ================================================

        // Collect docs
        TopDocs res = searcher.search(query, maxRes);
        int numTotalHits = res.totalHits;
        ScoreDoc[] scoreDocs = res.scoreDocs;

        for (ScoreDoc scoreDoc : scoreDocs) {
            Document doc = searcher.doc(scoreDoc.doc);
            String path = doc.get("path");
            String content = readDocument(path);
            String bestFragment = h.getBestFragment(analyzer, field, content);
            String frag = bestFragment;
            LuceneSearchResult hit = new LuceneSearchResult(scoreDoc.doc, path, frag);
        System.out.println(numTotalHits + " total matching documents");
    } finally {
        if (searcher != null) {
    return results;

From source file:net.hillsdon.reviki.search.impl.LuceneSearcher.java

License:Apache License

private LinkedHashSet<SearchMatch> doQuery(final IndexReader reader, final Analyzer analyzer,
        final Searcher searcher, final String field, final boolean provideExtracts, final Query query)
        throws IOException, CorruptIndexException {
    Highlighter highlighter = null;
    if (provideExtracts) {
        highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new SimpleHTMLEncoder(),
                new QueryScorer(query));
    }/*w  ww  . ja v a2s . co  m*/
    Hits hits = searcher.search(query);
    LinkedHashSet<SearchMatch> results = new LinkedHashSet<SearchMatch>();
    Iterator<Hit> iter = hits.iterator();
    while (iter.hasNext()) {
        Hit hit = iter.next();
        String text = hit.get(field);
        String extract = null;
        // The text is not stored for all fields, just provide a null extract.
        if (highlighter != null && text != null) {
            TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
            // Get 3 best fragments and separate with a "..."
            extract = highlighter.getBestFragments(tokenStream, text, 3, "...");
        results.add(new SearchMatch(_wikiName.equals(hit.get(FIELD_WIKI)), hit.get(FIELD_WIKI),
                hit.get(FIELD_PATH), extract));
    return results;

From source file:org.apache.maven.index.DefaultIteratorResultSet.java

License:Apache License

protected final List<String> getBestFragments(Query query, Formatter formatter, TokenStream tokenStream,
        String text, int maxNumFragments) throws IOException {
    Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));

    highlighter.setTextFragmenter(new OneLineFragmenter());

    maxNumFragments = Math.max(1, maxNumFragments); // sanity check

    TextFragment[] frag;/*from w w w .  j  av a  2  s .  c  om*/
    // Get text
    ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);

    try {
        frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);

        for (int i = 0; i < frag.length; i++) {
            if ((frag[i] != null) && (frag[i].getScore() > 0)) {
    } catch (InvalidTokenOffsetsException e) {
        // empty?

    return fragTexts;

From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java

License:Open Source License

 * Return a {@link org.apache.lucene.search.highlight.Highlighter}
 * appropriate for this field./*w  ww  .jav  a2s .  co m*/
 * @param query
 *            The current Query
 * @param requestFieldname
 *            The name of the field
 * @param request
 *            The current SolrQueryRequest
protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request) {
    String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname,
            FieldUse.HIGHLIGHT, request);
    SolrParams params = request.getParams();
    Highlighter highlighter = new Highlighter(getFormatter(requestFieldname, params),
            getEncoder(requestFieldname, params), getQueryScorer(query, schemaFieldName, request));
    highlighter.setTextFragmenter(getFragmenter(requestFieldname, params));
    return highlighter;

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

 * Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
 * @param query The current Query/*  w  w  w.  ja  v  a2s  . co m*/
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
 * @param tokenStream document text CachingTokenStream
 * @throws IOException If there is a low-level I/O error.
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request,
        CachingTokenFilter tokenStream) throws IOException {
    SolrParams params = request.getParams();
    Highlighter highlighter = null;

    highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params),
            getSpanQueryScorer(query, fieldName, tokenStream, request));

    highlighter.setTextFragmenter(getFragmenter(fieldName, params));

    return highlighter;