Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer) 

Source Link

Usage

From source file:com.ecyrd.jspwiki.search.LuceneSearchProvider.java

License:Apache License

/**
 *  Searches pages using a particular combination of flags.
 *
 *  @param query The query to perform in Lucene query language
 *  @param flags A set of flags/*from ww w . j a  v a 2  s.  co m*/
 *  @return A Collection of SearchResult instances
 *  @throws ProviderException if there is a problem with the backend
 */
public Collection findPages(String query, int flags) throws ProviderException {
    Searcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
        String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
        QueryParser qp = new MultiFieldQueryParser(queryfields, getLuceneAnalyzer());

        //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
        Query luceneQuery = qp.parse(query);

        if ((flags & FLAG_CONTEXTS) != 0) {
            highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                    new SimpleHTMLEncoder(), new QueryScorer(luceneQuery));
        }

        try {
            searcher = new IndexSearcher(m_luceneDirectory);
        } catch (Exception ex) {
            log.info("Lucene not yet ready; indexing not started", ex);
            return null;
        }

        Hits hits = searcher.search(luceneQuery);

        list = new ArrayList<SearchResult>(hits.length());
        for (int curr = 0; curr < hits.length(); curr++) {
            Document doc = hits.doc(curr);
            String pageName = doc.get(LUCENE_ID);
            WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

            if (page != null) {
                if (page instanceof Attachment) {
                    // Currently attachments don't look nice on the search-results page
                    // When the search-results are cleaned up this can be enabled again.
                }

                int score = (int) (hits.score(curr) * 100);

                // Get highlighted search contexts
                String text = doc.get(LUCENE_PAGE_CONTENTS);

                String[] fragments = new String[0];
                if (text != null && highlighter != null) {
                    TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS,
                            new StringReader(text));
                    fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);

                }

                SearchResult result = new SearchResultImpl(page, score, fragments);
                list.add(result);
            } else {
                log.error("Lucene found a result page '" + pageName
                        + "' that could not be loaded, removing from Lucene cache");
                pageRemoved(new WikiPage(m_engine, pageName));
            }
        }
    } catch (IOException e) {
        log.error("Failed during lucene search", e);
    } catch (InstantiationException e) {
        log.error("Unable to get a Lucene analyzer", e);
    } catch (IllegalAccessException e) {
        log.error("Unable to get a Lucene analyzer", e);
    } catch (ClassNotFoundException e) {
        log.error("Specified Lucene analyzer does not exist", e);
    } catch (ParseException e) {
        log.info("Broken query; cannot parse", e);

        throw new ProviderException("You have entered a query Lucene cannot process: " + e.getMessage());
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
            }
        }
    }

    return list;
}

From source file:com.meltmedia.cadmium.search.SearchService.java

License:Apache License

private Map<String, Object> buildSearchResults(final String query, final String path) throws Exception {
    logger.info("Running search for [{}]", query);
    final Map<String, Object> resultMap = new LinkedHashMap<String, Object>();

    new SearchTemplate(provider) {
        public void doSearch(IndexSearcher index) throws IOException, ParseException {
            QueryParser parser = createParser(getAnalyzer());

            resultMap.put("number-hits", 0);

            List<Map<String, Object>> resultList = new ArrayList<Map<String, Object>>();

            resultMap.put("results", resultList);

            if (index != null && parser != null) {
                String literalQuery = query.replaceAll(ALLOWED_CHARS_PATTERN, "\\\\$1");
                Query query1 = parser.parse(literalQuery);
                if (StringUtils.isNotBlank(path)) {
                    Query pathPrefix = new PrefixQuery(new Term("path", path));
                    BooleanQuery boolQuery = new BooleanQuery();
                    boolQuery.add(pathPrefix, Occur.MUST);
                    boolQuery.add(query1, Occur.MUST);
                    query1 = boolQuery;//  ww w  . j a v  a 2 s  . co  m
                }
                TopDocs results = index.search(query1, null, 100000);
                QueryScorer scorer = new QueryScorer(query1);
                Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
                        scorer);

                logger.info("Search returned {} hits.", results.totalHits);
                resultMap.put("number-hits", results.totalHits);

                for (ScoreDoc doc : results.scoreDocs) {
                    Document document = index.doc(doc.doc);
                    String content = document.get("content");
                    String title = document.get("title");

                    Map<String, Object> result = new LinkedHashMap<String, Object>();
                    String excerpt = "";

                    try {
                        excerpt = highlighter.getBestFragments(
                                parser.getAnalyzer().tokenStream(null, new StringReader(content)), content, 3,
                                "...");
                        excerpt = fixExcerpt(excerpt);

                        result.put("excerpt", excerpt);
                    } catch (Exception e) {
                        logger.debug("Failed to get search excerpt from content.", e);

                        try {
                            excerpt = highlighter.getBestFragments(
                                    parser.getAnalyzer().tokenStream(null, new StringReader(title)), title, 1,
                                    "...");
                            excerpt = fixExcerpt(excerpt);

                            result.put("excerpt", excerpt);
                        } catch (Exception e1) {
                            logger.debug("Failed to get search excerpt from title.", e1);

                            result.put("excerpt", "");
                        }
                    }

                    result.put("score", doc.score);
                    result.put("title", title);
                    result.put("path", document.get("path"));

                    resultList.add(result);
                }
            }

        }
    }.search();

    return resultMap;
}

From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java

License:Apache License

@Override
public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms)
        throws InvalidQuerySyntaxException {
    Analyzer analyzer = null;//from www  .  ja  v  a 2s  .com

    // default QueryParser.escape(pattern) method does not support phrase queries
    pattern = QuerySyntaxUtil.escapeQueryPattern(pattern);
    if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) {
        return Collections.emptyList();
    }

    logger.log(Level.FINE, "Escaped search pattern: " + pattern);

    Lock lock = rwlock.readLock();
    lock.lock();
    if (exception != null) {
        lock.unlock();
        throw new RuntimeException("Failed to refesh index reader after last commit", exception);
    }

    try {
        List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>();
        analyzer = new TermNameAnalyzer(false);

        QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer);
        Query query = parser.parse(pattern);

        logger.log(Level.FINE, "Query: " + query);

        // For highlighting words in query results
        QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder();
        Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer);
        highlighter.setMaxDocCharsToAnalyze(MAX_CHARS);
        scorer.setExpandMultiTermQuery(true);

        // Perform search
        ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String ontology = doc.get(FIELD_ONTOLOGY);
            String referenceId = doc.get(FIELD_ID);
            String term = doc.get(FIELD_TERM);
            byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM);
            boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1;

            if (!isSynonym || includeSynonyms) {
                Analyzer highlighterAnalyzer = new TermNameAnalyzer(true);
                TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM,
                        highlighterAnalyzer);
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1);
                if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) {
                    results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(),
                            frag[0].getScore(), isSynonym));
                }
                highlighterAnalyzer.close();
            }
        }

        return results;
    } catch (ParseException e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (TokenMgrError e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (Throwable e) {
        String msg = "Failed to perform Lucene seach with pattern: " + pattern;
        logger.log(Level.WARNING, msg, e);
        throw new RuntimeException(msg, e);
    } finally {
        close(analyzer);
        lock.unlock();
    }
}

From source file:com.pongasoft.kiwidoc.index.impl.keyword.impl.KeywordIndexImpl.java

License:Apache License

/**
 * Highlights the provided results obtained using the provided query.
 *
 * @param query  the query from which the results were computed
 * @param models the models to highlight
 * @return a map representing for each entry in the model its associated resource and highlight
 * @throws MalformedQueryException if the query cannot be parsed
 * @throws InternalException if there is an internal problem
 *///from   w  ww  . j a v a2s  . c o m
public <R extends Resource> Map<R, String[]> highlightResults(KeywordQuery query, Collection<Model<R>> models)
        throws InternalException, MalformedQueryException {
    Map<R, String[]> res = new LinkedHashMap<R, String[]>();

    Query parsedQuery = parseQuery(query);

    if (parsedQuery != null) {
        Highlighter highlighter = new Highlighter(_highlighterFormatter, HTML_ENCODER,
                new QueryScorer(parsedQuery));

        for (Model<R> model : models) {
            Document document = new Document();
            String bodyText = buildBody(model);
            document.add(new Field(DocumentFactory.BODY_FIELD, bodyText, Field.Store.NO, Field.Index.ANALYZED));
            TokenStream tokenStream = TokenSources.getTokenStream(document, DocumentFactory.BODY_FIELD,
                    _analyzer);
            try {
                res.put(model.getResource(), highlighter.getBestFragments(tokenStream, bodyText, 2));
            } catch (IOException e) {
                log.warn("exception while computing highlight... [ignored]", e);
            }
        }
    }

    return res;
}

From source file:com.tripod.lucene.service.AbstractLuceneService.java

License:Apache License

/**
 * @param query the tripod query being performed
 * @param luceneQuery the Lucene query being performed
 * @return the highlighter to use if the tripod query has one or more highlight fields, or null
 *///  w  w w .  j  av  a 2  s .c om
private Highlighter getHighlighter(final Q query, final Query luceneQuery) {
    Highlighter highlighter = null;
    if (query.getHighlightFields() != null && query.getHighlightFields().size() > 0) {
        SimpleHTMLEncoder simpleHTMLEncoder = new SimpleHTMLEncoder();
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(query.getHighlightPreTag(),
                query.getHighlightPostTag());
        highlighter = new Highlighter(simpleHTMLFormatter, simpleHTMLEncoder, new QueryScorer(luceneQuery));
    }
    return highlighter;
}

From source file:natural.language.qa.LuceneSearch.java

License:Apache License

public List<LuceneSearchResult> search(String queryString, int maxRes) throws Exception {
    IndexSearcher searcher = null;/*from  www  . ja v a 2 s .com*/
    List<LuceneSearchResult> results = new ArrayList<LuceneSearchResult>();
    try {
        Properties indexConf = new Properties();
        FileInputStream fis = new FileInputStream("index.properties");
        indexConf.load(fis);

        String index = indexConf.getProperty("index");
        String field = "contents";

        Directory indexDir = FSDirectory.open(new File(index));

        searcher = new IndexSearcher(indexDir);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);

        QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);

        queryString = queryString.trim();
        if (queryString.length() == 0) {
            return results;
        }

        Query query = parser.parse(queryString);
        System.out.println("Searching for: " + query.toString(field));

        // ================================================
        Formatter f = new SimpleHTMLFormatter("", "");
        Encoder e = new DefaultEncoder();
        QueryScorer fs = new QueryScorer(query);
        Fragmenter fragmenter = new SimpleSpanFragmenter(fs, 50);// new SentenceFragmenter();
        Highlighter h = new Highlighter(f, e, fs);
        h.setTextFragmenter(fragmenter);
        h.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

        // ================================================

        // Collect docs
        TopDocs res = searcher.search(query, maxRes);
        int numTotalHits = res.totalHits;
        ScoreDoc[] scoreDocs = res.scoreDocs;

        for (ScoreDoc scoreDoc : scoreDocs) {
            Document doc = searcher.doc(scoreDoc.doc);
            String path = doc.get("path");
            String content = readDocument(path);
            String bestFragment = h.getBestFragment(analyzer, field, content);
            String frag = bestFragment;
            //System.out.println(frag);
            LuceneSearchResult hit = new LuceneSearchResult(scoreDoc.doc, path, frag);
            results.add(hit);
        }
        System.out.println(numTotalHits + " total matching documents");
    } finally {
        if (searcher != null) {
            searcher.close();
        }
    }
    return results;
}

From source file:net.hillsdon.reviki.search.impl.LuceneSearcher.java

License:Apache License

private LinkedHashSet<SearchMatch> doQuery(final IndexReader reader, final Analyzer analyzer,
        final Searcher searcher, final String field, final boolean provideExtracts, final Query query)
        throws IOException, CorruptIndexException {
    Highlighter highlighter = null;
    if (provideExtracts) {
        highlighter = new Highlighter(new SimpleHTMLFormatter("<strong>", "</strong>"), new SimpleHTMLEncoder(),
                new QueryScorer(query));
    }/*w  ww  . ja v a2s . co  m*/
    Hits hits = searcher.search(query);
    LinkedHashSet<SearchMatch> results = new LinkedHashSet<SearchMatch>();
    @SuppressWarnings("unchecked")
    Iterator<Hit> iter = hits.iterator();
    while (iter.hasNext()) {
        Hit hit = iter.next();
        String text = hit.get(field);
        String extract = null;
        // The text is not stored for all fields, just provide a null extract.
        if (highlighter != null && text != null) {
            TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
            // Get 3 best fragments and separate with a "..."
            extract = highlighter.getBestFragments(tokenStream, text, 3, "...");
        }
        results.add(new SearchMatch(_wikiName.equals(hit.get(FIELD_WIKI)), hit.get(FIELD_WIKI),
                hit.get(FIELD_PATH), extract));
    }
    return results;
}

From source file:org.apache.maven.index.DefaultIteratorResultSet.java

License:Apache License

protected final List<String> getBestFragments(Query query, Formatter formatter, TokenStream tokenStream,
        String text, int maxNumFragments) throws IOException {
    Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));

    highlighter.setTextFragmenter(new OneLineFragmenter());

    maxNumFragments = Math.max(1, maxNumFragments); // sanity check

    TextFragment[] frag;/*from w w w .  j  av a  2  s .  c  om*/
    // Get text
    ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);

    try {
        frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);

        for (int i = 0; i < frag.length; i++) {
            if ((frag[i] != null) && (frag[i].getScore() > 0)) {
                fragTexts.add(frag[i].toString());
            }
        }
    } catch (InvalidTokenOffsetsException e) {
        // empty?
    }

    return fragTexts;
}

From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java

License:Open Source License

/**
 * Return a {@link org.apache.lucene.search.highlight.Highlighter}
 * appropriate for this field./*w  ww  .jav  a2s .  co m*/
 * 
 * @param query
 *            The current Query
 * @param requestFieldname
 *            The name of the field
 * @param request
 *            The current SolrQueryRequest
 */
@Override
protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request) {
    String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname,
            FieldUse.HIGHLIGHT, request);
    SolrParams params = request.getParams();
    Highlighter highlighter = new Highlighter(getFormatter(requestFieldname, params),
            getEncoder(requestFieldname, params), getQueryScorer(query, schemaFieldName, request));
    highlighter.setTextFragmenter(getFragmenter(requestFieldname, params));
    return highlighter;
}

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

/**
 * Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
 * @param query The current Query/*  w  w  w.  ja  v  a2s  . co m*/
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
 * @param tokenStream document text CachingTokenStream
 * @throws IOException If there is a low-level I/O error.
 */
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request,
        CachingTokenFilter tokenStream) throws IOException {
    SolrParams params = request.getParams();
    Highlighter highlighter = null;

    highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params),
            getSpanQueryScorer(query, fieldName, tokenStream, request));

    highlighter.setTextFragmenter(getFragmenter(fieldName, params));

    return highlighter;
}