Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer) 

Source Link

Usage

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

/**
 * Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
 * @param query The current Query//from www .j  a v a  2 s.  c  o  m
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
 */
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
    SolrParams params = request.getParams();
    Highlighter highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params),
            getQueryScorer(query, fieldName, request));
    highlighter.setTextFragmenter(getFragmenter(fieldName, params));
    return highlighter;
}

From source file:org.apache.wiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags// w  w w.  j a v  a 2  s .  c om
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }

        try {
            File dir = new File(m_luceneDirectory);
            Directory luceneDir = new SimpleFSDirectory(dir, null);
            IndexReader reader = IndexReader.open(luceneDir);
            searcher = new IndexSearcher(reader);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }

        ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

        list = new ArrayList<SearchResult>(hits.length);
        for (int curr = 0; curr < hits.length; curr++) {
            int docID = hits[curr].doc;
            Document doc = searcher.doc(docID);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.
                }

                int score = (int) (hits[curr].score * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                }

                SearchResult result = new SearchResultImpl(page, score, fragments);
                list.add(result);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse query ", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
        log.error("Tokens are incompatible with provided text ", e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error(e);
            }
        }
    }

    return list;
}

From source file:org.archive.tnh.servlet.OpenSearchServlet.java

License:Apache License

public void doGet(HttpServletRequest request, HttpServletResponse response)
        throws ServletException, IOException {
    try {//from www .j av  a2s. c om
        long responseTime = System.nanoTime();

        QueryParameters p = (QueryParameters) request.getAttribute(OpenSearchHelper.PARAMS_KEY);
        if (p == null) {
            p = getQueryParameters(request);
        }

        BooleanQuery q = this.translator.translate(p.query, this.foldAccents);

        this.translator.addFilterGroup(q, "site", p.sites);
        this.translator.addFilterGroup(q, "type", p.types);
        this.translator.addFilterGroup(q, "collection", p.collections);
        this.translator.addFilterGroup(q, "date", p.dates);

        long parseQueryTime = System.nanoTime();

        if (Arrays.equals(p.indexNames, QueryParameters.ALL_INDEXES)) {
            if (p.excludes.length > 0) {
                // If there are indexes to exclude, exclude them.
                p.indexNames = removeExcludes(p.excludes);
            }
        } else {
            // There are explicitly named indexes.  Weed out any unknown names.
            p.indexNames = removeUnknownIndexNames(p.indexNames);
        }

        Search.Result result;
        if (p.indexNames.length == 0) {
            result = new Search.Result();
            result.hits = new Hit[0];
        } else {
            result = this.searcher.search(p.indexNames, q, p.start + (p.hitsPerPage * 3), p.hitsPerSite);
        }

        long executeQueryTime = System.nanoTime();

        // The 'end' is usually just the end of the current page
        // (start+hitsPerPage); but if we are on the last page
        // of de-duped results, then the end is hits.getLength().
        int end = Math.min(result.hits.length, p.start + p.hitsPerPage);

        // The length is usually just (end-start), unless the start
        // position is past the end of the results -- which is common when
        // de-duping.  The user could easily jump past the true end of the
        // de-dup'd results.  If the start is past the end, we use a
        // length of '0' to produce an empty results page.
        int length = Math.max(end - p.start, 0);

        // Usually, the total results is the total number of non-de-duped
        // results.  Howerver, if we are on last page of de-duped results,
        // then we know our de-dup'd total is result.hits.length.
        long totalResults = result.hits.length < (p.start + p.hitsPerPage) ? result.hits.length
                : result.numRawHits;

        Document doc = new Document();

        Element channel = OpenSearchHelper.startResponse(doc, p, request, totalResults);

        // Add hits to XML Document
        for (int i = p.start; i < end; i++) {
            org.apache.lucene.document.Document hit = result.searcher.doc(result.hits[i].id);

            Element item = JDOMHelper.add(channel, "item");

            // Replace & and < with their XML entity counterparts to
            // ensure that any HTML markup in the snippet is escaped
            // before we do the highlighting.
            String title = hit.get("title");
            if (title != null) {
                title = title.replaceAll("[&]", "&amp;");
                title = title.replaceAll("[<]", "&lt;");
            }
            JDOMHelper.add(item, "title", title);

            JDOMHelper.add(item, "link", hit.get("url"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "docId", String.valueOf(result.hits[i].id));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "score", String.valueOf(result.hits[i].score));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "site", result.hits[i].site);
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "length", hit.get("length"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "type", hit.get("type"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "boost", hit.get("boost"));
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "collection", hit.get("collection"));

            String indexName = this.searcher.resolveIndexName(result.searcher, result.hits[i].id);
            JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "index", indexName);

            for (String date : hit.getValues("date")) {
                JDOMHelper.add(item, "date", date);
            }

            String raw = getContent(hit);

            StringBuilder buf = new StringBuilder(100);

            Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new NonBrokenHTMLEncoder(),
                    new QueryScorer(q, "content"));

            CustomAnalyzer analyzer = new CustomAnalyzer();
            analyzer.setFoldAccents(this.foldAccents);

            for (String snippet : highlighter.getBestFragments(analyzer, "content", raw,
                    this.contextSnippetsPerResult)) {
                buf.append(snippet);
                buf.append("...");
            }

            JDOMHelper.add(item, "description", buf.toString());

            // Last, but not least, add a hit explanation, if enabled
            if (explain) {
                JDOMHelper.add(item, OpenSearchHelper.NS_ARCHIVE, "explain",
                        result.searcher.explain(q, result.hits[i].id).toHtml());
            }
        }

        OpenSearchHelper.addResponseTime(channel, System.nanoTime() - responseTime);

        long buildResultsTime = System.nanoTime();

        OpenSearchHelper.writeResponse(doc, response, "application/rss+xml");

        long writeResponseTime = System.nanoTime();

        LOG.info("S: " + ((parseQueryTime - responseTime) / 1000 / 1000) + " "
                + ((executeQueryTime - parseQueryTime) / 1000 / 1000) + " "
                + ((buildResultsTime - executeQueryTime) / 1000 / 1000) + " "
                + ((writeResponseTime - buildResultsTime) / 1000 / 1000) + " " + p.query);
    } catch (Exception e) {
        throw new ServletException(e);
    }
}

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

protected Highlighter createHighlighter(String propertyName) throws SearchEngineException {
    Highlighter highlighter = new Highlighter(highlighterSettings.getFormatter(),
            highlighterSettings.getEncoder(), createScorer(propertyName));
    Fragmenter f = highlighterSettings.getFragmenter();
    highlighter.setTextFragmenter(f);//from   w w w  .  j  av a2  s.c  om
    if (maxBytesToAnalyze == -1) {
        highlighter.setMaxDocBytesToAnalyze(highlighterSettings.getMaxBytesToAnalyze());
    } else {
        highlighter.setMaxDocBytesToAnalyze(maxBytesToAnalyze);
    }
    return highlighter;
}

From source file:org.jamwiki.search.LuceneSearchEngine.java

License:LGPL

/**
 * Find all documents that contain a specific search term, ordered by relevance.
 * This method supports all Lucene search query syntax.
 *
 * @param virtualWiki The virtual wiki for the topic.
 * @param text The search term being searched for.
 * @return A collection of SearchResultEntry objects for all documents that
 *  contain the search term./*from   w w  w .  j  a va2  s.co m*/
 */
public Collection findResults(String virtualWiki, String text) {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    Collection results = new Vector();
    logger.fine("search text: " + text);
    IndexSearcher searcher = null;
    try {
        BooleanQuery query = new BooleanQuery();
        QueryParser qp;
        qp = new QueryParser(ITYPE_TOPIC, analyzer);
        query.add(qp.parse(text), Occur.SHOULD);
        qp = new QueryParser(ITYPE_CONTENT, analyzer);
        query.add(qp.parse(text), Occur.SHOULD);
        searcher = new IndexSearcher(FSDirectory.getDirectory(getSearchIndexPath(virtualWiki)));
        // rewrite the query to expand it - required for wildcards to work with highlighter
        Query rewrittenQuery = searcher.rewrite(query);
        // actually perform the search
        Hits hits = searcher.search(rewrittenQuery);
        Highlighter highlighter = new Highlighter(
                new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"), new SimpleHTMLEncoder(),
                new QueryScorer(rewrittenQuery));
        for (int i = 0; i < hits.length(); i++) {
            String summary = retrieveResultSummary(hits.doc(i), highlighter, analyzer);
            SearchResultEntry result = new SearchResultEntry();
            result.setRanking(hits.score(i));
            result.setTopic(hits.doc(i).get(ITYPE_TOPIC_PLAIN));
            result.setSummary(summary);
            results.add(result);
        }
    } catch (Exception e) {
        logger.severe("Exception while searching for " + text, e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (Exception e) {
            }
        }
    }
    return results;
}

From source file:org.jamwiki.search.RankingAlgorithmSearchEngine.java

License:LGPL

/**
 * Find all documents that contain a specific search term, ordered by relevance.
 * This method supports all Lucene search query syntax.
 *
 * @param virtualWiki The virtual wiki for the topic.
 * @param text The search term being searched for.
 * @return A list of SearchResultEntry objects for all documents that
 *  contain the search term.//w  w  w .  j  av a 2 s  .c om
 */
public List<SearchResultEntry> findResults(String virtualWiki, String text, List<Integer> namespaces) {
    StandardAnalyzer analyzer = new StandardAnalyzer(USE_LUCENE_VERSION);
    List<SearchResultEntry> results = new ArrayList<SearchResultEntry>();
    logger.trace("search text: " + text);
    try {
        IndexSearcher searcher = this.retrieveIndexSearcher(virtualWiki);
        Query query = this.createSearchQuery(searcher, analyzer, text, namespaces);
        // actually perform the search
        TopScoreDocCollector collector = TopScoreDocCollector.create(MAXIMUM_RESULTS_PER_SEARCH, true);
        Highlighter highlighter = new Highlighter(
                new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"), new SimpleHTMLEncoder(),
                new QueryScorer(query));
        try {
            Class classRQ = Class.forName("com.transaxtions.search.rankingalgorithm.RankingQuery");
            Class classQuery = Class.forName("org.apache.lucene.search.Query");
            Object rq = classRQ.newInstance();
            Class classArray[] = new Class[2];
            classArray[0] = classQuery;
            classArray[1] = searcher.getClass();
            Object args[] = new Object[2];
            args[0] = query;
            args[1] = searcher;
            Method methodRQ_search = classRQ.getMethod("search", classArray);
            Object hitsobject = methodRQ_search.invoke(rq, args);
            Class classRH = hitsobject.getClass();
            classArray = new Class[1];
            classArray[0] = int.class;
            Method methodRH_length = classRH.getMethod("length", null);
            Method methodRH_docid = classRH.getMethod("docid", classArray);
            Method methodRH_score = classRH.getMethod("score", classArray);
            Object lenobject = methodRH_length.invoke(hitsobject);
            int length = ((Integer) lenobject).intValue();
            for (int i = 0; i < length; i++) {
                args = new Object[1];
                args[0] = new Integer(i);
                Object docobject = methodRH_docid.invoke(hitsobject, args);
                int docId = ((Integer) docobject).intValue();
                Document doc = searcher.doc(docId);
                String summary = retrieveResultSummary(doc, highlighter, analyzer);
                Object scoreobject = methodRH_score.invoke(hitsobject, args);
                float score = ((Float) scoreobject).floatValue();
                SearchResultEntry result = new SearchResultEntry(doc.get(FIELD_TOPIC_NAME), score, summary);
                results.add(result);
            }
        } catch (Throwable t) {
            logger.error("Failure while executing RankingAlgorithm search", t);
        }
    } catch (Exception e) {
        logger.error("Exception while searching for " + text, e);
    }
    return results;
}

From source file:org.lukhnos.lucenestudy.HighlightingHelper.java

License:MIT License

HighlightingHelper(Query query, Analyzer analyzer) {
    this.analyzer = analyzer;

    Formatter formatter = new SimpleHTMLFormatter();
    Encoder encoder = new MinimalHTMLEncoder();
    scorer = new QueryScorer(query);
    highlighter = new Highlighter(formatter, encoder, scorer);

    fragmentLength = DEFAULT_FRAGMENT_LENGTH;
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
    highlighter.setTextFragmenter(fragmenter);
}

From source file:org.riotfamily.search.ResultHighlighter.java

License:Apache License

public HighlightingContext createContext(IndexSearcher indexSearcher, Query query) throws IOException {

    Scorer scorer = new QueryScorer(indexSearcher.rewrite(query));
    if (formatter == null) {
        formatter = new SimpleHTMLFormatter("<" + highlightPreTag + ">", "</" + highlightPostTag + ">");
    }//from  w  w w  . j  a v  a 2  s  . c om
    if (fragmenter == null) {
        fragmenter = new SimpleFragmenter(fragmentSize);
    }
    Highlighter highlighter = new Highlighter(formatter, encoder, scorer);
    highlighter.setTextFragmenter(fragmenter);
    return new HighlightingContext(highlighter);
}

From source file:org.sakaiproject.search.component.service.impl.SearchResultImpl.java

License:Educational Community License

public String getSearchResult() {
    try {//from w  w  w  .  j a  va 2  s.  co  m
        Scorer scorer = new QueryScorer(query);
        Highlighter hightlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer);
        StringBuilder sb = new StringBuilder();
        // contents no longer contains the digested contents, so we need to
        // fetch it from the EntityContentProducer

        byte[][] references = doc.getBinaryValues(SearchService.FIELD_REFERENCE);
        DigestStorageUtil digestStorageUtil = new DigestStorageUtil(searchService);
        if (references != null && references.length > 0) {

            for (int i = 0; i < references.length; i++) {
                EntityContentProducer sep = searchIndexBuilder
                        .newEntityContentProducer(CompressionTools.decompressString(references[i]));
                if (sep != null) {
                    //does this ecp store on the FS?
                    if (sep instanceof StoredDigestContentProducer) {
                        String digestCount = doc.get(SearchService.FIELD_DIGEST_COUNT);
                        if (digestCount == null) {
                            digestCount = "1";
                        }
                        log.debug("This file possibly has FS digests with index of " + digestCount);
                        StringBuilder sb1 = digestStorageUtil.getFileContents(CompressionTools.decompressString(
                                doc.getBinaryValue(SearchService.FIELD_REFERENCE)), digestCount);
                        if (sb1.length() > 0) {
                            sb.append(sb1);

                        } else {
                            String digest = sep.getContent(CompressionTools.decompressString(references[i]));
                            sb.append(digest);
                            //we need to save this
                            digestStorageUtil.saveContentToStore(
                                    CompressionTools.decompressString(
                                            doc.getBinaryValue(SearchService.FIELD_REFERENCE)),
                                    sb.toString(), 1);

                        }

                    } else {
                        sb.append(CompressionTools.decompressString(references[i]));

                    }
                }
            }
        }
        String text = sb.toString();
        TokenStream tokenStream = analyzer.tokenStream(SearchService.FIELD_CONTENTS, new StringReader(text));
        return hightlighter.getBestFragments(tokenStream, text, 5, " ... "); //$NON-NLS-1$
    } catch (IOException e) {
        return Messages.getString("SearchResultImpl.2") + e.getMessage(); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage();
    } catch (DataFormatException e) {
        e.printStackTrace();
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage();
    }
}

From source file:org.sakaiproject.search.component.service.impl.SearchResultResponseImpl.java

License:Educational Community License

public String getSearchResult() {
    try {// www.j  a va  2 s. co  m
        Scorer scorer = new QueryScorer(query);
        Highlighter hightlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), scorer);
        StringBuilder sb = new StringBuilder();
        // contents no longer contains the digested contents, so we need to
        // fetch it from the EntityContentProducer

        EntityContentProducer sep = searchIndexBuilder.newEntityContentProducer(getReference());
        if (sep != null) {
            sb.append(sep.getContent(getReference()));
        }
        String text = sb.toString();
        TokenStream tokenStream = analyzer.tokenStream(SearchService.FIELD_CONTENTS, new StringReader(text));
        return hightlighter.getBestFragments(tokenStream, text, 5, " ... "); //$NON-NLS-1$
    } catch (IOException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage(); //$NON-NLS-1$
    } catch (InvalidTokenOffsetsException e) {
        return Messages.getString("SearchResultResponseImpl.11") + e.getMessage();
    }
}