public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer) 

Source Link


From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

 * Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
 * @param query The current Query//from www .j  a v a  2 s.  c  o  m
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
    SolrParams params = request.getParams();
    Highlighter highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params),
            getQueryScorer(query, fieldName, request));
    highlighter.setTextFragmenter(getFragmenter(fieldName, params));
    return highlighter;

From source file:org.apache.wiki.search.LuceneSearchProvider.java

License:Apache License

 *  Searches pages using a particular combination of flags.
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags// w  w w.  j a v  a 2  s .  c om
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));

        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader = IndexReader.open(luceneDir);
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;

        ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.

                int score = (int) (hits[curr].score * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);


                SearchResult result = new SearchResultImpl(page, score, fragments);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse query ", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
            } catch (IOException e) {

    return list;

From source file:org.archive.tnh.servlet.OpenSearchServlet.java

License:Apache License

public void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    try {//from www .j av  a2s. c om
        long responseTime = System.nanoTime();

        QueryParameters p = (QueryParameters) request.getAttribute(OpenSearchHelper.PARAMS_KEY);
        if (p == null) {
            p = getQueryParameters(request);

        BooleanQuery q = this.translator.translate(p.query, this.foldAccents);

        this.translator.addFilterGroup(q, "site", p.sites);
        this.translator.addFilterGroup(q, "type", p.types);
        this.translator.addFilterGroup(q, "collection", p.collections);
        this.translator.addFilterGroup(q, "date", p.dates);

        long parseQueryTime = System.nanoTime();

        if (Arrays.equals(p.indexNames, QueryParameters.ALL_INDEXES)) {
            if (p.excludes.length > 0) {
                // If there are indexes to exclude, exclude them.
                p.indexNames = removeExcludes(p.excludes);
        } else {
            // There are explicitly named indexes.  Weed out any unknown names.
            p.indexNames = removeUnknownIndexNames(p.indexNames);

        Search.Result result;
        if (p.indexNames.length == 0) {
            result = new Search.Result();
            result.hits = new Hit[0];
        } else {
            result = this.searcher.search(p.indexNames, q, p.start + (p.hitsPerPage * 3), p.hitsPerSite);

        long executeQueryTime = System.nanoTime();

        // The 'end' is usually just the end of the current page
        // (start+hitsPerPage); but if we are on the last page
        // of de-duped results, then the end is hits.getLength().
        int end = Math.min(result.hits.length, p.start + p.hitsPerPage);

        // The length is usually just (end-start), unless the start
        // position is past the end of the results -- which is common when
        // de-duping.  The user could easily jump past the true end of the
        // de-dup'd results.  If the start is past the end, we use a
        // length of '0' to produce an empty results page.
        int length = Math.max(end - p.start, 0);

        // Usually, the total results is the total number of non-de-duped
        // results.  Howerver, if we are on last page of de-duped results,
        // then we know our de-dup'd total is result.hits.length.
        long totalResults = result.hits.length < (p.start + p.hitsPerPage) ? result.hits.length
                : result.numRawHits;

        Document doc = new Document();

        Element channel = OpenSearchHelper.startResponse(doc, p, request, totalResults);

        // Add hits to XML Document
        for (int i = p.start; i < end; i++) {
            org.apache.lucene.document.Document hit = result.searcher.doc(result.hits[i].id);

            Element item = JDOMHelper.add(channel, "item");

            // Replace & and < with their XML entity counterparts to
            // ensure that any HTML markup in the snippet is escaped
            // before we do the highlighting.
            String title = hit.get("title");
            if (title != null) {
                title = title.replaceAll("[&]", "&amp;");
                title = title.replaceAll("[<]", "&lt;");
            JDOMHelper.add(item, "title", title);

            JDOMHelper.add(item, "link", hit.get("url"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "docId", String.valueOf(result.hits[i].id));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "score", String.valueOf(result.hits[i].score));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "site", result.hits[i].site);
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "length", hit.get("length"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "type", hit.get("type"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "boost", hit.get("boost"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "collection", hit.get("collection"));

            String indexName = this.searcher.resolveIndexName(result.searcher, result.hits[i].id);
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "index", indexName);

            for (String date : hit.getValues("date")) {
                JDOMHelper.add(item, "date", date);

            String raw = getContent(hit);

            StringBuilder buf = new StringBuilder(100);

            Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new NonBrokenHTMLEncoder(),
                    new QueryScorer(q, "content"));

            CustomAnalyzer analyzer = new CustomAnalyzer();

            for (String snippet : highlighter.getBestFragments(analyzer, "content", raw,
                    this.contextSnippetsPerResult)) {

            JDOMHelper.add(item, "description", buf.toString());

            // Last, but not least, add a hit explanation, if enabled
            if (explain) {
                JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "explain",
                        result.searcher.explain(q, result.hits[i].id).toHtml());

        OpenSearchHelper.addResponseTime(channel, System.nanoTime() - responseTime);

        long buildResultsTime = System.nanoTime();

        OpenSearchHelper.writeResponse(doc, response, "application/rss+xml");

        long writeResponseTime = System.nanoTime();

        LOG.info("S: " + ((parseQueryTime - responseTime) / 1000 / 1000) + " "
                + ((executeQueryTime - parseQueryTime) / 1000 / 1000) + " "
                + ((buildResultsTime - executeQueryTime) / 1000 / 1000) + " "
                + ((writeResponseTime - buildResultsTime) / 1000 / 1000) + " " + p.query);
    } catch (Exception e) {
        throw new ServletException(e);

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

protected Highlighter createHighlighter(String propertyName) throws SearchEngineException {
    Highlighter highlighter = new Highlighter(highlighterSettings.getFormatter(),
            highlighterSettings.getEncoder(), createScorer(propertyName));
    Fragmenter f = highlighterSettings.getFragmenter();
    highlighter.setTextFragmenter(f);//from   w w w  .  j  av a2  s.c  om
    if (maxBytesToAnalyze == -1) {
    } else {
    return highlighter;

From source file:org.jamwiki.search.LuceneSearchEngine.java


 * Find all documents that contain a specific search term, ordered by relevance.
 * This method supports all Lucene search query syntax.
 * @param virtualWiki The virtual wiki for the topic.
 * @param text The search term being searched for.
 * @return A collection of SearchResultEntry objects for all documents that
 *  contain the search term./*from   w w  w .  j  a va2  s.co m*/
public Collection findResults(String virtualWiki, String text) {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    Collection results = new Vector();
    logger.fine("search text: " + text);
    IndexSearcher searcher = null;
    try {
        BooleanQuery query = new BooleanQuery();
        QueryParser qp;
        qp = new QueryParser(ITYPE_TOPIC, analyzer);
        query.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(ITYPE_CONTENT, analyzer);
        query.add(qp.parse(text), Occur.SHOULD);
        searcher = new IndexSearcher(FSDirectory.getDirectory(getSearchIndexPath(virtualWiki)));
        // rewrite the query to expand it - required for wildcards to work with highlighter
        Query rewrittenQuery = searcher.rewrite(query);
        // actually perform the search
        Hits hits = searcher.search(rewrittenQuery);
        Highlighter highlighter = new Highlighter(
                new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"), new SimpleHTMLEncoder(),
                new QueryScorer(rewrittenQuery));
        for (int i = 0; i < hits.length(); i++) {
            String summary = retrieveResultSummary(hits.doc(i), highlighter, analyzer);
            SearchResultEntry result = new SearchResultEntry();
    } catch (Exception e) {
        logger.severe("Exception while searching for " + text, e);
    } finally {
        if (searcher != null) {
            try {
            } catch (Exception e) {
    return results;

From source file:org.jamwiki.search.RankingAlgorithmSearchEngine.java


 * Find all documents that contain a specific search term, ordered by relevance.
 * This method supports all Lucene search query syntax.
 * @param virtualWiki The virtual wiki for the topic.
 * @param text The search term being searched for.
 * @return A list of SearchResultEntry objects for all documents that
 *  contain the search term.//w  w  w .  j  av a 2 s  .c om
public List<SearchResultEntry> findResults(String virtualWiki, String text, List<Integer> namespaces) {
    StandardAnalyzer analyzer = new StandardAnalyzer(USE_LUCENE_VERSION);
    List<SearchResultEntry> results = new ArrayList<SearchResultEntry>();
    logger.trace("search text: " + text);
    try {
        IndexSearcher searcher = this.retrieveIndexSearcher(virtualWiki);
        Query query = this.createSearchQuery(searcher, analyzer, text, namespaces);
        // actually perform the search
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAXIMUM_RESULTS_PER_SEARCH, true);
        Highlighter highlighter = new Highlighter(
                new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"), new SimpleHTMLEncoder(),
                new QueryScorer(query));
        try {
            Class classRQ = Class.forName("com.transaxtions.search.rankingalgorithm.RankingQuery");
            Class classQuery = Class.forName("org.apache.lucene.search.Query");
            Object rq = classRQ.newInstance();
            Class classArray[] = new Class[2];
            classArray[0] = classQuery;
            classArray[1] = searcher.getClass();
            Object args[] = new Object[2];
            args[0] = query;
            args[1] = searcher;
            Method methodRQ_search = classRQ.getMethod("search", classArray);
            Object hitsobject = methodRQ_search.invoke(rq, args);
            Class classRH = hitsobject.getClass();
            classArray = new Class[1];
            classArray[0] = int.class;
            Method methodRH_length = classRH.getMethod("length", null);
            Method methodRH_docid = classRH.getMethod("docid", classArray);
            Method methodRH_score = classRH.getMethod("score", classArray);
            Object lenobject = methodRH_length.invoke(hitsobject);
            int length = ((Integer) lenobject).intValue();
            for (int i = 0; i < length; i++) {
                args = new Object[1];
                args[0] = new Integer(i);
                Object docobject = methodRH_docid.invoke(hitsobject, args);
                int docId = ((Integer) docobject).intValue();
                Document doc = searcher.doc(docId);
                String summary = retrieveResultSummary(doc, highlighter, analyzer);
                Object scoreobject = methodRH_score.invoke(hitsobject, args);
                float score = ((Float) scoreobject).floatValue();
                SearchResultEntry result = new SearchResultEntry(doc.get(FIELD_TOPIC_NAME), score, summary);
        } catch (Throwable t) {
            logger.error("Failure while executing RankingAlgorithm search", t);
    } catch (Exception e) {
        logger.error("Exception while searching for " + text, e);
    return results;

From source file:org.lukhnos.lucenestudy.HighlightingHelper.java

License:MIT License

HighlightingHelper(Query query, Analyzer analyzer) {
    this.analyzer = analyzer;

    Formatter formatter = new SimpleHTMLFormatter();
    Encoder encoder = new MinimalHTMLEncoder();
    scorer = new QueryScorer(query);
    highlighter = new Highlighter(formatter, encoder, scorer);

    fragmentLength = DEFAULT_FRAGMENT_LENGTH;
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

From source file:org.riotfamily.search.ResultHighlighter.java

License:Apache License

public HighlightingContext createContext(IndexSearcher indexSearcher, Query query) throws IOException {

    Scorer scorer = new QueryScorer(indexSearcher.rewrite(query));
    if (formatter == null) {
        formatter = new SimpleHTMLFormatter("<" + highlightPreTag + ">", "</" + highlightPostTag + ">");
    }//from  w  w w  . j  a v  a 2  s  . c om
    if (fragmenter == null) {
        fragmenter = new SimpleFragmenter(fragmentSize);
    Highlighter highlighter = new Highlighter(formatter, encoder, scorer);
    return new HighlightingContext(highlighter);

From source file:org.sakaiproject.search.component.service.impl.SearchResultImpl.java

License:Educational Community License

public String getSearchResult() {
    try {//from w  w  w  .  j a  va 2  s.  co  m
        Scorer scorer = new QueryScorer(query);
        Highlighter hightlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer);
        StringBuilder sb = new StringBuilder();
        // contents no longer contains the digested contents, so we need to
        // fetch it from the EntityContentProducer

        byte[][] references = doc.getBinaryValues(SearchService.FIELD_REFERENCE);
        DigestStorageUtil digestStorageUtil = new DigestStorageUtil(searchService);
        if (references != null && references.length > 0) {

            for (int i = 0; i < references.length; i++) {
                EntityContentProducer sep = searchIndexBuilder
                if (sep != null) {
                    //does this ecp store on the FS?
                    if (sep instanceof StoredDigestContentProducer) {
                        String digestCount = doc.get(SearchService.FIELD_DIGEST_COUNT);
                        if (digestCount == null) {
                            digestCount = "1";
                        log.debug("This file possibly has FS digests with index of " + digestCount);
                        StringBuilder sb1 = digestStorageUtil.getFileContents(CompressionTools.decompressString(
                                doc.getBinaryValue(SearchService.FIELD_REFERENCE)), digestCount);
                        if (sb1.length() > 0) {

                        } else {
                            String digest = sep.getContent(CompressionTools.decompressString(references[i]));
                            //we need to save this
                                    sb.toString(), 1);


                    } else {

        String text = sb.toString();
        TokenStream tokenStream = analyzer.tokenStream(SearchService.FIELD_CONTENTS, new StringReader(text));
        return hightlighter.getBestFragments(tokenStream, text, 5, " ... "); //$NON-NLS-1$
    } catch (IOException e) {
        return Messages.getString("SearchResultImpl.2") + e.getMessage(); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage();
    } catch (DataFormatException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage();

From source file:org.sakaiproject.search.component.service.impl.SearchResultResponseImpl.java

License:Educational Community License

public String getSearchResult() {
    try {// www.j  a va  2 s. co  m
        Scorer scorer = new QueryScorer(query);
        Highlighter hightlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer);
        StringBuilder sb = new StringBuilder();
        // contents no longer contains the digested contents, so we need to
        // fetch it from the EntityContentProducer

        EntityContentProducer sep = searchIndexBuilder.newEntityContentProducer(getReference());
        if (sep != null) {
        String text = sb.toString();
        TokenStream tokenStream = analyzer.tokenStream(SearchService.FIELD_CONTENTS, new StringReader(text));
        return hightlighter.getBestFragments(tokenStream, text, 5, " ... "); //$NON-NLS-1$
    } catch (IOException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage(); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage();