In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter.


public void setTextFragmenter(Fragmenter fragmenter) 

From source file:org.apache.solr.handler.component.AlfrescoSolrHighlighter.java

License:Open Source License

 * Return a {@link org.apache.lucene.search.highlight.Highlighter}
 * appropriate for this field./*  w ww. j av  a 2  s .  c o  m*/
 * @param query
 *            The current Query
 * @param requestFieldname
 *            The name of the field
 * @param request
 *            The current SolrQueryRequest
protected Highlighter getHighlighter(Query query, String requestFieldname, SolrQueryRequest request) {
    String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(requestFieldname,
            FieldUse.HIGHLIGHT, request);
    SolrParams params = request.getParams();
    Highlighter highlighter = new Highlighter(getFormatter(requestFieldname, params),
            getEncoder(requestFieldname, params), getQueryScorer(query, schemaFieldName, request));
    highlighter.setTextFragmenter(getFragmenter(requestFieldname, params));
    return highlighter;

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

 * Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
 * @param query The current Query/*  w  w w  .  j av a2 s  .  c o  m*/
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
 * @param tokenStream document text CachingTokenStream
 * @throws IOException If there is a low-level I/O error.
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request,
        CachingTokenFilter tokenStream) throws IOException {
    SolrParams params = request.getParams();
    Highlighter highlighter = null;

    highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params),
            getSpanQueryScorer(query, fieldName, tokenStream, request));

    highlighter.setTextFragmenter(getFragmenter(fieldName, params));

    return highlighter;

From source file:org.apache.solr.highlight.DefaultSolrHighlighter.java

License:Apache License

 * Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
 * @param query The current Query//from ww w  . j a  va 2 s. c o  m
 * @param fieldName The name of the field
 * @param request The current SolrQueryRequest
protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
    SolrParams params = request.getParams();
    Highlighter highlighter = new Highlighter(getFormatter(fieldName, params), getEncoder(fieldName, params),
            getQueryScorer(query, fieldName, request));
    highlighter.setTextFragmenter(getFragmenter(fieldName, params));
    return highlighter;

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

protected Highlighter createHighlighter(String propertyName) throws SearchEngineException {
    Highlighter highlighter = new Highlighter(highlighterSettings.getFormatter(),
            highlighterSettings.getEncoder(), createScorer(propertyName));
    Fragmenter f = highlighterSettings.getFragmenter();
    if (maxBytesToAnalyze == -1) {
    } else {//from w ww .j  a v a2s. c  om
    return highlighter;

From source file:org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter.java

License:Apache License

public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper mapper = highlighterContext.mapper;

    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML
            : HighlightUtils.Encoders.DEFAULT;

    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>();
        hitContext.cache().put(CACHE_KEY, mappers);
    }/*  w  w  w .  j  ava  2 s.co  m*/
    Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext

    org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
    if (entry == null) {
        QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query,
                field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null);
        Fragmenter fragmenter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragmenter = new NullFragmenter();
        } else if (field.fieldOptions().fragmenter() == null) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else if ("simple".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
        } else if ("span".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else {
            throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter()
                    + "] for the field [" + highlighterContext.fieldName + "]");
        Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0],

        entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
        // always highlight across all data

        cache.put(mapper, entry);

    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1
            : field.fieldOptions().numberOfFragments();
    ArrayList<TextFragment> fragsList = new ArrayList<>();
    List<Object> textsToHighlight;
    Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()

    try {
        textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);

        for (Object textToHighlight : textsToHighlight) {
            String text = textToHighlight.toString();

            try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
                if (!tokenStream.hasAttribute(CharTermAttribute.class)
                        || !tokenStream.hasAttribute(OffsetAttribute.class)) {
                    // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
                TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false,
                for (TextFragment bestTextFragment : bestTextFragments) {
                    if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
    } catch (Exception e) {
        if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
            // this can happen if for example a field is not_analyzed and ignore_above option is set.
            // the field will be ignored when indexing but the huge term is still in the source and
            // the plain highlighter will parse the source and try to analyze it.
            return null;
        } else {
            throw new FetchPhaseExecutionException(context,
                    "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    if (field.fieldOptions().scoreOrdered()) {
        CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
            public int compare(TextFragment o1, TextFragment o2) {
                return Math.round(o2.getScore() - o1.getScore());
    String[] fragments;
    // number_of_fragments is set to 0 but we have a multivalued field
    if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
        fragments = new String[fragsList.size()];
        for (int i = 0; i < fragsList.size(); i++) {
            fragments[i] = fragsList.get(i).toString();
    } else {
        // refine numberOfFragments if needed
        numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
        fragments = new String[numberOfFragments];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = fragsList.get(i).toString();

    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));

    int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0 && textsToHighlight.size() > 0) {
        // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
        String fieldContents = textsToHighlight.get(0).toString();
        int end;
        try {
            end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(),
        } catch (Exception e) {
            throw new FetchPhaseExecutionException(context,
                    "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        if (end > 0) {
            return new HighlightField(highlighterContext.fieldName,
                    new Text[] { new Text(fieldContents.substring(0, end)) });
    return null;

From source file:org.haplo.app.SearchResultExcerptHighlighter.java
    License:Mozilla Public License
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper<?> mapper = highlighterContext.mapper;

    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML
            : HighlightUtils.Encoders.DEFAULT;

    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> mappers = Maps.newHashMap();
        hitContext.cache().put(CACHE_KEY, mappers);
    }/*  w  w w .  j a  v a2 s . com*/
    Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper<?>, org.apache.lucene.search.highlight.Highlighter>) hitContext

    org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
    if (entry == null) {
        Query query = highlighterContext.query.originalQuery();
        QueryScorer queryScorer = new CustomQueryScorer(query,
                field.fieldOptions().requireFieldMatch() ? mapper.names().indexName() : null);
        Fragmenter fragmenter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragmenter = new NullFragmenter();
        } else if (field.fieldOptions().fragmenter() == null) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else if ("simple".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
        } else if ("span".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else {
            throw new ElasticsearchIllegalArgumentException(
                    "unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field ["
                            + highlighterContext.fieldName + "]");
        Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0],

        entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
        // always highlight across all data

        cache.put(mapper, entry);

    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1
            : field.fieldOptions().numberOfFragments();
    ArrayList<TextFragment> fragsList = new ArrayList<TextFragment>();
    List<Object> textsToHighlight;

    try {
        textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);

        for (Object textToHighlight : textsToHighlight) {
            String text = textToHighlight.toString();
            Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()
            TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), text);
            if (!tokenStream.hasAttribute(CharTermAttribute.class)
                    || !tokenStream.hasAttribute(OffsetAttribute.class)) {
                // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
            TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false,
            for (TextFragment bestTextFragment : bestTextFragments) {
                if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
    } catch (Exception e) {
        throw new FetchPhaseExecutionException(context,
                "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    if (field.fieldOptions().scoreOrdered()) {
        CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {
            public int compare(TextFragment o1, TextFragment o2) {
                return Math.round(o2.getScore() - o1.getScore());
    String[] fragments;
    // number_of_fragments is set to 0 but we have a multivalued field
    if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
        fragments = new String[fragsList.size()];
        for (int i = 0; i < fragsList.size(); i++) {
            fragments[i] = fragsList.get(i).toString();
    } else {
        // refine numberOfFragments if needed
        numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
        fragments = new String[numberOfFragments];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = fragsList.get(i).toString();

    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, StringText.convertFromStringArray(fragments));

    int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0 && textsToHighlight.size() > 0) {
        // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
        String fieldContents = textsToHighlight.get(0).toString();
        Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers()
        int end;
        try {
            end = findGoodEndForNoHighlightExcerpt(noMatchSize,
                    analyzer.tokenStream(mapper.names().indexName(), fieldContents));
        } catch (Exception e) {
            throw new FetchPhaseExecutionException(context,
                    "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        if (end > 0) {
            return new HighlightField(highlighterContext.fieldName,
                    new Text[] { new StringText(fieldContents.substring(0, end)) });
    return null;

static public String[] bestHighlightedExcerpts(String escapedText, String searchTerms, int maxExcerptLength) {
    try {/*from   www  .j  a v a 2  s.co  m*/
        // Scorer selects the terms which need highlighting. Created from a 'query' based on the extracted search terms.
        Scorer scorer;
        Fragmenter fragmenter;
        if (searchTerms != null && searchTerms.length() > 0) {
            QueryParser queryParser = new QueryParser("FIELD", new StandardAnalyzer());
            Query query = queryParser.parse(searchTerms);
            scorer = new QueryScorer(query);
            fragmenter = new SimpleSpanFragmenter((QueryScorer) scorer, maxExcerptLength);
        } else {
            scorer = new NoHighlightingScorer();
            fragmenter = new SimpleFragmenter(maxExcerptLength);

        // Parse the escaped text into tokens, which retain the positions in the text
        StandardAnalyzer analyser = new StandardAnalyzer();
        TokenStream tokenStream = analyser.tokenStream("FIELD", new StringReader(escapedText));

        // Finally, do the highlighting!
        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b>", "</b>"), scorer);
        return highlighter.getBestFragments(tokenStream, escapedText, NUMBER_OF_FRAGMENTS);
    } catch (Exception e) {
        Logger.getLogger("org.haplo.app").info("Exception in SearchResultExcerptHighlighter: ", e);
        return null;

 From source file:org.intermine.api.search.SearchResults.java
 License:GNU General Public License
 *                        string is neither null not empty.
 * @param target Information about the scope and type of items to receive.
 * @param profileRepo The repository of the user who wants to find something.
 * @return A set of search results.//from w  w w .  jav a2s.c o  m
 * @throws ParseException If the query string cannot be parsed.
 * @throws IOException If there is an issue opening the indices.
private static SearchResults doFilteredSearch(String origQueryString, SearchTarget target,
        SearchRepository profileRepo) throws ParseException, IOException {

    Map<WebSearchable, String> highlightedDescMap = new HashMap<WebSearchable, String>();

    String queryString = prepareQueryString(origQueryString);

    LOG.info("Searching " + target + " for " + " was:" + origQueryString + " now:" + queryString);
    long time = System.currentTimeMillis();

    org.apache.lucene.search.Query query;

    Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    // The default search field is the content buffer.
    QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
    query = queryParser.parse(queryString);

    // Get directories.
    String type = target.getType();
    Map<String, WebSearchable> globalWebSearchables = new HashMap<String, WebSearchable>();
    Set<SearchRepository> globals = SearchRepository.getGlobalSearchRepositories();
    List<Directory> globalDirs = new ArrayList<Directory>();
    for (SearchRepository sr : globals) {
    Map<String, WebSearchable> userWebSearchables = profileRepo.getWebSearchableMap(type);
    Directory userDirectory = profileRepo.getSearchIndex(type);

    MultiSearcher searcher = prepareSearcher(target, userDirectory, globalDirs);

    // required to expand search terms
    query = searcher.rewrite(query);
    TopDocs topDocs = searcher.search(query, 1000); //FIXME: hardcoded limit

    time = System.currentTimeMillis() - time;
    LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in " + time
            + " milliseconds:");

    QueryScorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(FORMATTER, scorer);

    Map<WebSearchable, Float> hitMap = new HashMap<WebSearchable, Float>();
    Map<WebSearchable, Set<String>> tags = new HashMap<WebSearchable, Set<String>>();

    for (int i = 0; i < topDocs.totalHits; i++) {
        WebSearchable webSearchable = null;
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        //String docScope = doc.get("scope");
        String name = doc.get("name");

        webSearchable = userWebSearchables.get(name);
        if (webSearchable == null) {
            webSearchable = globalWebSearchables.get(name);
        if (webSearchable == null) {
            throw new RuntimeException("unknown WebSearchable: " + name);

        Float luceneScore = new Float(topDocs.scoreDocs[i].score);
        hitMap.put(webSearchable, luceneScore);

        tags.put(webSearchable, new HashSet<String>(asList(split(doc.get("tags")))));

        try {
            if (highlightedDescMap != null) {
                String highlightString = webSearchable.getDescription();
                if (highlightString == null) {
                    highlightString = "";
                TokenStream tokenStream = analyzer.tokenStream("", new StringReader(highlightString));
                highlighter.setTextFragmenter(new NullFragmenter());
                        highlighter.getBestFragment(tokenStream, highlightString));
        } catch (InvalidTokenOffsetsException e) {
            LOG.warn("Highlighter exception", e);

    Map<String, WebSearchable> wsMap = new HashMap<String, WebSearchable>();
    for (WebSearchable ws : hitMap.keySet()) {
        wsMap.put(ws.getName(), ws);

    return new SearchResults(hitMap, wsMap, highlightedDescMap, tags);

From source file:org.jboss.seam.wiki.core.search.metamodel.SearchSupport.java


 * Returns the hits of the given query as fragments, highlighted, concatenated, and separated.
 * <p>/*w  w  w .  j a  v a2  s  .  com*/
 * Pass in a <tt>NullFragmenter</tt> if you don't want any fragmentation by terms but
 * simply the hits highlighted. Otherwise, you will most likely use <tt>SimpleFragmenter</tt>.
 * The text you supply must be the same that was indexed, it will go through the same
 * analysis procedure to find the hits. Do not pass a different String than the one indexed
 * by Hibernate Search! If you use transparent string bridge with Hibernate Search, run the
 * bridge before passing the string into this method.
 * <p>
 * This method escapes any dangerous HTML characters in the indexed text and fragments by
 * replacing it with HTML entities. You can use the returned string directly to build a
 * <tt>SearchHit</tt>.
 * @param query the query that produced hits
 * @param fragmenter a fragmenter that can split the indexed text
 * @param indexedText the original text that was analyzed and indexed by Hibernate Search (after any bridges!)
 * @param numOfFragments the number of fragments to include in the returned result
 * @param alternativeLength if there are no hits to highlight, how many characters of the original text to return
 * @return the fragmented, highglighted, and then concatenated substring of the indexed text
protected String escapeBestFragments(Query query, Fragmenter fragmenter, String indexedText, int numOfFragments,
        int alternativeLength) {

    // The HTML escaping forces us to first fragment with internal placeholders...
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(INTERNAL_BEGIN_HIT, INTERNAL_END_HIT),
            new QueryScorer(query));
    try {
        // Use the same analyzer as the indexer!
        TokenStream tokenStream = new StandardAnalyzer().tokenStream(null, new StringReader(indexedText));

        String unescapedFragements = highlighter.getBestFragments(tokenStream, indexedText, numOfFragments,

        String escapedFragments = WikiUtil.escapeHtml(WikiUtil.removeMacros(unescapedFragements), false, false);

        // .. and then replace the internal placeholders with real tags after HTML has been escaped
        escapedFragments = escapedFragments.replaceAll(INTERNAL_BEGIN_HIT, getBeginHitTag());
        escapedFragments = escapedFragments.replaceAll(INTERNAL_END_HIT, getEndHitTag());

        // Strip out macros

        // If no fragments were produced (no hits), return the original text as an alternative
        if (escapedFragments.length() == 0 && alternativeLength != 0) {
            return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText.substring(0,
                    indexedText.length() > alternativeLength ? alternativeLength : indexedText.length())),
                    false, false);
        } else if (escapedFragments.length() == 0 && alternativeLength == 0) {
            return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText), false, false);

        return escapedFragments;

    } catch (Exception ex) {
        throw new RuntimeException(ex);

From source file:org.mskcc.pathdb.lucene.LuceneResults.java

    License:Open Source License
    StandardAnalyzer analyzer = new StandardAnalyzer();

    //  Standard Query Parser
    QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

    // for the usage of highlighting with wildcards
    // Necessary to expand search terms
    IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
    Query luceneQuery = queryParser.parse(term);
    luceneQuery = luceneQuery.rewrite(reader);

    //  Scorer implementation which scores text fragments by the number of
    //  unique query terms found.
    QueryScorer queryScorer = new QueryScorer(luceneQuery);

    //  HTML Formatted surrounds matching text with <B></B> tags.
    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

    //  Highligher Class
    Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

    //  XXX Characters Max in Each Fragment
    Fragmenter fragmenter = new SimpleFragmenter(100);
    return highLighter;