Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Scorer fragmentScorer) 

Source Link

Usage

From source file:org.intermine.api.search.SearchResults.java

License:GNU General Public License

/**
 * Actually filter the web searchable items we have to get a reduced list of matches.
 * @param origQueryString A query to filter the items against. Assumes the query
 *                        string is neither null not empty.
 * @param target Information about the scope and type of items to receive.
 * @param profileRepo The repository of the user who wants to find something.
 * @return A set of search results.//w  w w  .  j  av a2s  . c o  m
 * @throws ParseException If the query string cannot be parsed.
 * @throws IOException If there is an issue opening the indices.
 */
private static SearchResults doFilteredSearch(String origQueryString, SearchTarget target,
        SearchRepository profileRepo) throws ParseException, IOException {

    Map<WebSearchable, String> highlightedDescMap = new HashMap<WebSearchable, String>();

    String queryString = prepareQueryString(origQueryString);

    LOG.info("Searching " + target + " for " + " was:" + origQueryString + " now:" + queryString);
    long time = System.currentTimeMillis();

    org.apache.lucene.search.Query query;

    Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    // The default search field is the content buffer.
    QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
    query = queryParser.parse(queryString);

    // Get directories.
    String type = target.getType();
    Map<String, WebSearchable> globalWebSearchables = new HashMap<String, WebSearchable>();
    Set<SearchRepository> globals = SearchRepository.getGlobalSearchRepositories();
    List<Directory> globalDirs = new ArrayList<Directory>();
    for (SearchRepository sr : globals) {
        globalWebSearchables.putAll(sr.getWebSearchableMap(type));
        globalDirs.add(sr.getSearchIndex(type));
    }
    Map<String, WebSearchable> userWebSearchables = profileRepo.getWebSearchableMap(type);
    Directory userDirectory = profileRepo.getSearchIndex(type);

    MultiSearcher searcher = prepareSearcher(target, userDirectory, globalDirs);

    // required to expand search terms
    query = searcher.rewrite(query);
    TopDocs topDocs = searcher.search(query, 1000); //FIXME: hardcoded limit

    time = System.currentTimeMillis() - time;
    LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in " + time
            + " milliseconds:");

    QueryScorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(FORMATTER, scorer);

    Map<WebSearchable, Float> hitMap = new HashMap<WebSearchable, Float>();
    Map<WebSearchable, Set<String>> tags = new HashMap<WebSearchable, Set<String>>();

    for (int i = 0; i < topDocs.totalHits; i++) {
        WebSearchable webSearchable = null;
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        //String docScope = doc.get("scope");
        String name = doc.get("name");

        webSearchable = userWebSearchables.get(name);
        if (webSearchable == null) {
            webSearchable = globalWebSearchables.get(name);
        }
        if (webSearchable == null) {
            throw new RuntimeException("unknown WebSearchable: " + name);
        }

        Float luceneScore = new Float(topDocs.scoreDocs[i].score);
        hitMap.put(webSearchable, luceneScore);

        tags.put(webSearchable, new HashSet<String>(asList(split(doc.get("tags")))));

        try {
            if (highlightedDescMap != null) {
                String highlightString = webSearchable.getDescription();
                if (highlightString == null) {
                    highlightString = "";
                }
                TokenStream tokenStream = analyzer.tokenStream("", new StringReader(highlightString));
                highlighter.setTextFragmenter(new NullFragmenter());
                highlightedDescMap.put(webSearchable,
                        highlighter.getBestFragment(tokenStream, highlightString));
            }
        } catch (InvalidTokenOffsetsException e) {
            LOG.warn("Highlighter exception", e);
        }
    }

    Map<String, WebSearchable> wsMap = new HashMap<String, WebSearchable>();
    for (WebSearchable ws : hitMap.keySet()) {
        wsMap.put(ws.getName(), ws);
    }

    return new SearchResults(hitMap, wsMap, highlightedDescMap, tags);
}

From source file:org.jboss.seam.wiki.core.search.metamodel.SearchSupport.java

License:LGPL

/**
 * Returns the hits of the given query as fragments, highlighted, concatenated, and separated.
 * <p>/*from w  w w .j  a va2 s . c om*/
 * Pass in a <tt>NullFragmenter</tt> if you don't want any fragmentation by terms but
 * simply the hits highlighted. Otherwise, you will most likely use <tt>SimpleFragmenter</tt>.
 * The text you supply must be the same that was indexed, it will go through the same
 * analysis procedure to find the hits. Do not pass a different String than the one indexed
 * by Hibernate Search! If you use transparent string bridge with Hibernate Search, run the
 * bridge before passing the string into this method.
 * <p>
 * This method escapes any dangerous HTML characters in the indexed text and fragments by
 * replacing it with HTML entities. You can use the returned string directly to build a
 * <tt>SearchHit</tt>.
 *
 * @param query the query that produced hits
 * @param fragmenter a fragmenter that can split the indexed text
 * @param indexedText the original text that was analyzed and indexed by Hibernate Search (after any bridges!)
 * @param numOfFragments the number of fragments to include in the returned result
 * @param alternativeLength if there are no hits to highlight, how many characters of the original text to return
 * @return the fragmented, highglighted, and then concatenated substring of the indexed text
 */
protected String escapeBestFragments(Query query, Fragmenter fragmenter, String indexedText, int numOfFragments,
        int alternativeLength) {

    // The HTML escaping forces us to first fragment with internal placeholders...
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(INTERNAL_BEGIN_HIT, INTERNAL_END_HIT),
            new QueryScorer(query));
    highlighter.setTextFragmenter(fragmenter);
    try {
        // Use the same analyzer as the indexer!
        TokenStream tokenStream = new StandardAnalyzer().tokenStream(null, new StringReader(indexedText));

        String unescapedFragements = highlighter.getBestFragments(tokenStream, indexedText, numOfFragments,
                getFragmentSeparator());

        String escapedFragments = WikiUtil.escapeHtml(WikiUtil.removeMacros(unescapedFragements), false, false);

        // .. and then replace the internal placeholders with real tags after HTML has been escaped
        escapedFragments = escapedFragments.replaceAll(INTERNAL_BEGIN_HIT, getBeginHitTag());
        escapedFragments = escapedFragments.replaceAll(INTERNAL_END_HIT, getEndHitTag());

        // Strip out macros

        // If no fragments were produced (no hits), return the original text as an alternative
        if (escapedFragments.length() == 0 && alternativeLength != 0) {
            return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText.substring(0,
                    indexedText.length() > alternativeLength ? alternativeLength : indexedText.length())),
                    false, false);
        } else if (escapedFragments.length() == 0 && alternativeLength == 0) {
            return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText), false, false);
        }

        return escapedFragments;

    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}

From source file:org.mskcc.pathdb.lucene.LuceneResults.java

License:Open Source License

private Highlighter createHighlighter(String term) throws IOException, ParseException {

    //  Standard Analyzer to extract words using a list of English stop words.
    StandardAnalyzer analyzer = new StandardAnalyzer();

    //  Standard Query Parser
    QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

    // for the usage of highlighting with wildcards
    // Necessary to expand search terms
    IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
    Query luceneQuery = queryParser.parse(term);
    luceneQuery = luceneQuery.rewrite(reader);

    //  Scorer implementation which scores text fragments by the number of
    //  unique query terms found.
    QueryScorer queryScorer = new QueryScorer(luceneQuery);

    //  HTML Formatted surrounds matching text with <B></B> tags.
    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

    //  Highligher Class
    Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

    //  XXX Characters Max in Each Fragment
    Fragmenter fragmenter = new SimpleFragmenter(100);
    highLighter.setTextFragmenter(fragmenter);
    return highLighter;
}

From source file:org.mskcc.pathdb.lucene.PsiInteractorExtractor.java

License:Open Source License

/**
 * Constructor//from  w ww.j a  va 2s . co  m
 *
 * @param entrySet PSI-MI Entry Set Object.
 * @param queryStr Query String.
 * @param xdebug   XDebug Object.
 * @throws IOException         Input Output Exception.
 * @throws ParseException      Parsing Exception.
 * @throws ValidationException Validation Exception.
 * @throws MarshalException    Marshaling Exception.
 */
public PsiInteractorExtractor(EntrySet entrySet, String queryStr, XDebug xdebug)
        throws IOException, ParseException, ValidationException, MarshalException {
    try {
        this.xdebug = xdebug;
        this.entrySet = entrySet;
        interactors = new HashSet();
        analyzer = new StandardAnalyzer();
        reader = IndexReader.open(LuceneConfig.getLuceneDirectory());
        if (queryStr != null) {
            QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);
            query = queryParser.parse(queryStr);
            query = query.rewrite(reader);
            SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
            highLighter = new Highlighter(htmlFormatter, new QueryScorer(query));
            checkAllEntries();
        }
    } finally {
        if (reader != null) {
            reader.close();
        }
    }
}

From source file:org.mskcc.pathdb.tool.QueryFullText.java

License:Open Source License

/**
 * Executes Full Text Query.//from ww  w . j  av a 2  s  .  c  om
 *
 * @param term Search Term
 * @throws QueryException Lucene Query Error
 * @throws IOException    I/O Error
 * @throws ParseException Lucene Parsing Error
 */
public static void queryFullText(String term) throws QueryException, IOException, ParseException {
    System.out.println("Using search term:  " + term);
    LuceneReader luceneReader = new LuceneReader();
    Hits hits = luceneReader.executeQuery(term);
    int num = Math.min(10, hits.length());
    System.out.println("Total Number of Hits:  " + hits.length());
    if (hits.length() > 0) {

        //  Standard Analyzer to extract words using a list of English stop words.
        StandardAnalyzer analyzer = new StandardAnalyzer();

        //  Standard Query Parser
        QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

        // for the usage of highlighting with wildcards
        // Necessary to expand search terms
        IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
        Query luceneQuery = queryParser.parse(term);
        luceneQuery = luceneQuery.rewrite(reader);

        //  Scorer implementation which scores text fragments by the number of
        //  unique query terms found.
        QueryScorer queryScorer = new QueryScorer(luceneQuery);

        //  HTML Formatted surrounds matching text with <B></B> tags.
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

        //  Highligher Class
        Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

        //  XXX Characters Max in Each Fragment
        Fragmenter fragmenter = new SimpleFragmenter(100);
        highLighter.setTextFragmenter(fragmenter);

        System.out.println("Showing hits:  0-" + (num - 1));
        for (int i = 0; i < num; i++) {
            System.out.print("Hit " + i + ":  ");

            //  Get the Matching Hit
            Document doc = hits.doc(i);

            //  Get the Field of Interest
            Field field = doc.getField(LuceneConfig.FIELD_ALL);

            //  Create the Token Stream
            TokenStream tokenStream = new StandardAnalyzer().tokenStream(LuceneConfig.FIELD_ALL,
                    new StringReader(field.stringValue()));

            //  Get the Best Fragment
            String formattedText = highLighter.getBestFragments(tokenStream, field.stringValue(), 5, "...");
            System.out.println(formattedText);
        }
    }
}

From source file:org.olat.search.service.searcher.SearchResultsImpl.java

License:Apache License

/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * /*from  ww w  . ja  va  2  s . c  o m*/
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc,
        final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(
            new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME,
                new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME,
                    new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}

From source file:org.openrdf.sail.lucene.LuceneQuery.java

License:BSD License

@Override
public void highlight(URI property) {
    Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG,
            SearchFields.HIGHLIGHTER_POST_TAG);
    highlighter = new Highlighter(formatter, new QueryScorer(query));
}

From source file:org.openrdf.sail.lucene.LuceneQueryIterator.java

License:BSD License

/**
 * Evaluates one Lucene Query. It distinguishes between two cases,
 * the one where no subject is given and the one were it is given.
 * @param query the lucene query to evaluate
 * @return the lucene hits/*from   ww w  .  ja  v  a  2s  .c o m*/
 */
private TopDocs evaluate(QuerySpec query) {
    // get the subject of the query
    Resource subject = query.getSubject();

    try {
        // parse the query string to a lucene query
        Query lucenequery = this.index.parseQuery(query.getQueryString(), query.getPropertyURI());

        // if the query requests for the snippet, create a highlighter using this query
        if (query.getSnippetVariableName() != null) {
            Highlighter highlighter = new Highlighter(formatter, new QueryScorer(lucenequery));
            this.highlighters.put(query, highlighter);
        }

        // distinguish the two cases of subject == null
        if (subject == null) {
            return this.index.search(lucenequery);
        } else {
            return this.index.search(subject, lucenequery);
        }
    } catch (Exception e) {
        log.error("There was a problem evaluating query '" + query.getQueryString() + "' for property '"
                + query.getPropertyURI() + "!", e);
    }

    return null;
}

From source file:org.openrdf.sail.lucene.LuceneSailConnection.java

License:BSD License

/**
 * Evaluates one Lucene Query. It distinguishes between two cases, the one
 * where no subject is given and the one were it is given.
 * /*from   ww  w  .  j a va  2 s  .  co  m*/
 * @param query
 *        the Lucene query to evaluate
 * @return QueryResult consisting of hits and highlighter
 */
private QueryResult evaluate(QuerySpec query) {
    TopDocs hits = null;
    Highlighter highlighter = null;

    // get the subject of the query
    Resource subject = query.getSubject();

    try {
        // parse the query string to a lucene query

        String sQuery = query.getQueryString();

        if (!sQuery.isEmpty()) {
            Query lucenequery = this.luceneIndex.parseQuery(query.getQueryString(), query.getPropertyURI());

            // if the query requests for the snippet, create a highlighter using
            // this query
            if (query.getSnippetVariableName() != null) {
                Formatter formatter = new SimpleHTMLFormatter();
                highlighter = new Highlighter(formatter, new QueryScorer(lucenequery));
            }

            // distinguish the two cases of subject == null
            if (subject == null) {
                hits = this.luceneIndex.search(lucenequery);
            } else {
                hits = this.luceneIndex.search(subject, lucenequery);
            }
        } else {
            hits = new TopDocs(0, new ScoreDoc[0], 0.0f);
        }
    } catch (Exception e) {
        logger.error("There was a problem evaluating query '" + query.getQueryString() + "' for property '"
                + query.getPropertyURI() + "!", e);
    }

    return new QueryResult(hits, highlighter);
}

From source file:org.schors.evlampia.search.LogEntryAggregator.java

License:Open Source License

private final String tryHighlight(String text, String[] fields)
        throws IOException, InvalidTokenOffsetsException {

    if (null == text)
        return null;

    if (null == highlighter) {
        final QueryScorer scorer = new QueryScorer(query.rewrite(indexSearcher.getIndexReader()));
        highlighter = new Highlighter(new SimpleHTMLFormatter("<span class='highlighted'>", "</span>"), scorer);
        highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 330));
    }//  w  ww. jav  a  2s.  c o m

    for (final String field : fields) {
        final String highlighted = highlighter.getBestFragment(Constants.analyzer, field, text);
        if (null != highlighted)
            return highlighted;
    }

    return text;
}