In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragment.


public final String getBestFragment(TokenStream tokenStream, String text)
        throws IOException, InvalidTokenOffsetsException 

Source Link


Highlights chosen terms in a text, extracting the most relevant section.


From source file:net.skyatlas.icd.test.AnsegTest.java

private String toHighlighter(Analyzer analyzer, Query query, Document doc) throws InvalidTokenOffsetsException {
    String field = "text";
    try {//w w  w .j a v  a2s .com
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
        TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
        String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
        return highlighterStr == null ? doc.get(field) : highlighterStr;
    } catch (IOException e) {
        // TODO Auto-generated catch block
    } catch (InvalidTokenOffsetsException e) {
        // TODO Auto-generated catch block
    return null;

From source file:org.apache.nutch.searcher.Summarizer.java

License:Apache License

public static String getsummary(String queryString, String content, Analyzer analyzer) {
    if (queryString == null && content != null) {
        if (content.length() > SUM_LENGTH)
            return content.substring(0, (SUM_LENGTH) - 1);
        else//  w w w .j  a va2 s . co m
            return content;
    } else if (queryString != null && content == null)
        return "";
    else if (queryString == null && content == null)
        return "";
    SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter(cssfront, cssend);

    org.apache.lucene.search.Query summarizerQuery = null;
    QueryParser queryParse = new QueryParser("content", analyzer);
    try {
        summarizerQuery = queryParse.parse(queryString);
    } catch (ParseException ex) {
        if (content.length() > SUM_LENGTH)
            return content.substring(0, (SUM_LENGTH) - 1);
            return content;
    QueryScorer qs = new QueryScorer(summarizerQuery);
    Highlighter highlighter = new Highlighter(sHtmlF, qs);
    highlighter.setTextFragmenter(new SimpleFragmenter(SUM_LENGTH));
    TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
    String str;
    try {
        str = highlighter.getBestFragment(tokenStream, content);
    } catch (IOException e) {
        str = null;
    if (str == null) {
        if (content.length() > SUM_LENGTH)
            str = content.substring(0, (SUM_LENGTH) - 1);
            str = content;
    return str;

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String fragment(Resource resource, String propertyName, String text) throws SearchEngineException {

    Highlighter highlighter = createHighlighter(propertyName);
    TokenStream tokenStream = createTokenStream(resource, propertyName, text);

    try {//w w  w  . j a  va2s  . c  o  m
        return highlighter.getBestFragment(tokenStream, text);
    } catch (IOException e) {
        throw new SearchEngineException("Failed to highlight fragments for alias [" + resource.getAlias()
                + "] and property [" + propertyName + "]");

From source file:org.compass.core.lucene.engine.LuceneSearchEngineHighlighter.java

License:Apache License

public String[] multiValueFragment(Resource resource, String propertyName, String[] texts)
        throws SearchEngineException {
    List fragmentList = new ArrayList();
    Highlighter highlighter = createHighlighter(propertyName);
    for (int i = 0; i < texts.length; i++) {
        String text = texts[i];/*from  w w  w  .j  a v a  2 s. c  om*/
        if (text != null && text.length() > 0) {
            //TokenStream tokenStream = createTokenStream(resource, propertyName, text);
            // We have to re-analyze one field value at a time
            TokenStream tokenStream = createTokenStreamFromAnalyzer(propertyName, text);
            try {
                String fragment = highlighter.getBestFragment(tokenStream, text);
                if (fragment != null && fragment.length() > 0) {
            } catch (IOException e) {
                throw new SearchEngineException("Failed to highlight fragments for alias ["
                        + resource.getAlias() + "] and property [" + propertyName + "]");
    return (String[]) fragmentList.toArray(new String[fragmentList.size()]);

From source file:org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighterTests.java

License:Apache License

public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
    Map analysers = new HashMap<String, Analyzer>();
    analysers.put("text", new StandardAnalyzer());
    FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
    Query termQuery = new TermQuery(new Term("text", "failure"));
    Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD))
            .add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
            new CustomQueryScorer(boolQuery));
    String fragment = highlighter.getBestFragment(
            fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"),
            "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
    // TODO: This test will fail if we pass in an instance of GeoPointInBBoxQueryImpl too. Should we also find a way to work around that
    // or can the query not be rewritten before it is passed into the highlighter?

From source file:org.elasticsearch.search.highlight.PlainHighlighterTests.java

License:Apache License

public void checkGeoQueryHighlighting(Query geoQuery) throws IOException, InvalidTokenOffsetsException {
    Map analysers = new HashMap<String, Analyzer>();
    analysers.put("text", new StandardAnalyzer());
    FieldNameAnalyzer fieldNameAnalyzer = new FieldNameAnalyzer(analysers);
    Query termQuery = new TermQuery(new Term("text", "failure"));
    Query boolQuery = new BooleanQuery.Builder().add(new BooleanClause(geoQuery, BooleanClause.Occur.SHOULD))
            .add(new BooleanClause(termQuery, BooleanClause.Occur.SHOULD)).build();
    org.apache.lucene.search.highlight.Highlighter highlighter = new org.apache.lucene.search.highlight.Highlighter(
            new CustomQueryScorer(boolQuery));
    String fragment = highlighter.getBestFragment(
            fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"),
            "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));
    Query rewritten = boolQuery.rewrite(null);
    highlighter = new org.apache.lucene.search.highlight.Highlighter(new CustomQueryScorer(rewritten));
    fragment = highlighter.getBestFragment(
            fieldNameAnalyzer.tokenStream("text", "Arbitrary text field which should not cause " + "a failure"),
            "Arbitrary text field which should not cause a failure");
    assertThat(fragment, equalTo("Arbitrary text field which should not cause a <B>failure</B>"));

From source file:org.intermine.api.search.SearchResults.java

License:GNU General Public License

 * Actually filter the web searchable items we have to get a reduced list of matches.
 * @param origQueryString A query to filter the items against. Assumes the query
 *                        string is neither null not empty.
 * @param target Information about the scope and type of items to receive.
 * @param profileRepo The repository of the user who wants to find something.
 * @return A set of search results./*w ww .ja  va2s  . c om*/
 * @throws ParseException If the query string cannot be parsed.
 * @throws IOException If there is an issue opening the indices.
private static SearchResults doFilteredSearch(String origQueryString, SearchTarget target,
        SearchRepository profileRepo) throws ParseException, IOException {

    Map<WebSearchable, String> highlightedDescMap = new HashMap<WebSearchable, String>();

    String queryString = prepareQueryString(origQueryString);

    LOG.info("Searching " + target + " for " + " was:" + origQueryString + " now:" + queryString);
    long time = System.currentTimeMillis();

    org.apache.lucene.search.Query query;

    Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    // The default search field is the content buffer.
    QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
    query = queryParser.parse(queryString);

    // Get directories.
    String type = target.getType();
    Map<String, WebSearchable> globalWebSearchables = new HashMap<String, WebSearchable>();
    Set<SearchRepository> globals = SearchRepository.getGlobalSearchRepositories();
    List<Directory> globalDirs = new ArrayList<Directory>();
    for (SearchRepository sr : globals) {
    Map<String, WebSearchable> userWebSearchables = profileRepo.getWebSearchableMap(type);
    Directory userDirectory = profileRepo.getSearchIndex(type);

    MultiSearcher searcher = prepareSearcher(target, userDirectory, globalDirs);

    // required to expand search terms
    query = searcher.rewrite(query);
    TopDocs topDocs = searcher.search(query, 1000); //FIXME: hardcoded limit

    time = System.currentTimeMillis() - time;
    LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in " + time
            + " milliseconds:");

    QueryScorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(FORMATTER, scorer);

    Map<WebSearchable, Float> hitMap = new HashMap<WebSearchable, Float>();
    Map<WebSearchable, Set<String>> tags = new HashMap<WebSearchable, Set<String>>();

    for (int i = 0; i < topDocs.totalHits; i++) {
        WebSearchable webSearchable = null;
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        //String docScope = doc.get("scope");
        String name = doc.get("name");

        webSearchable = userWebSearchables.get(name);
        if (webSearchable == null) {
            webSearchable = globalWebSearchables.get(name);
        if (webSearchable == null) {
            throw new RuntimeException("unknown WebSearchable: " + name);

        Float luceneScore = new Float(topDocs.scoreDocs[i].score);
        hitMap.put(webSearchable, luceneScore);

        tags.put(webSearchable, new HashSet<String>(asList(split(doc.get("tags")))));

        try {
            if (highlightedDescMap != null) {
                String highlightString = webSearchable.getDescription();
                if (highlightString == null) {
                    highlightString = "";
                TokenStream tokenStream = analyzer.tokenStream("", new StringReader(highlightString));
                highlighter.setTextFragmenter(new NullFragmenter());
                        highlighter.getBestFragment(tokenStream, highlightString));
        } catch (InvalidTokenOffsetsException e) {
            LOG.warn("Highlighter exception", e);

    Map<String, WebSearchable> wsMap = new HashMap<String, WebSearchable>();
    for (WebSearchable ws : hitMap.keySet()) {
        wsMap.put(ws.getName(), ws);

    return new SearchResults(hitMap, wsMap, highlightedDescMap, tags);

From source file:org.zilverline.service.SearchServiceImpl.java

License:Open Source License

 * Helper that creates a Result from a Document.
 * /*  w  w  w .  j a  v a 2 s.  c o m*/
 * @param doc the Document
 * @param score the score of the Document in the hit
 * @param hl the Highlighter used
 * @param an the Analyzer used
 * @return Result the resulting object that is used in the model.
private Result doc2ResultHelper(final Document doc, final float score, final Highlighter hl,
        final Analyzer an) {
    String docTitle = doc.get("title");
    String docName = doc.get("name");
    String docPath = doc.get("path");
    String zipName = doc.get("zipName");
    if (log.isDebugEnabled()) {
        log.debug("Preparing result " + docName + ":" + zipName);
    String zipPath = doc.get("zipPath");
    String docURL = "";
    String docCache = "";
    String docCollection = doc.get("collection");

    // get the collection
    DocumentCollection thisCollection = collectionManager.getCollectionByName(docCollection);

    if (thisCollection != null) {
        docURL = thisCollection.getUrlDefault();

        if (thisCollection.isKeepCacheWithManagerDefaults()) {
            docCache = thisCollection.getCacheUrlWithManagerDefaults();
    } else {
        log.error("Unknown collection '" + docCollection + "' found, can not find its URL.");

    // get the modification date, and convert it to a readable form date is stored as (yyyyMMdd)
    DateFormat df1 = new SimpleDateFormat("yyyyMMdd");
    Date docDate = null;

    try {
        docDate = df1.parse(doc.get("modified"));
    } catch (ParseException e) {
        log.debug("Invalid date retrieved, trying backward compatibility with v 1.0-rc3-patch1");

        // backward compatibility with v 1.0-rc3-patch1 storage of date:
        // Keyword<modified:0cee68g00>
        docDate = DateField.stringToDate(doc.get("modified"));
        if (docDate == null) {
            log.warn("Invalid date retrieved, returning epoch (1970) for " + docName);
            docDate = new Date(0);

    String docSize = doc.get("size");
    String docType = doc.get("type");
    String docISBN = doc.get("isbn");

    // use the name if it has no title
    // TODO this logic could go into Result
    if ((docTitle == null) || docTitle.equals("")) {
        if ((zipName == null) || zipName.equals("")) {
            docTitle = docName;
        } else {
            docTitle = zipName;
    // then make a Result
    Result thisResult = new Result();

    // highlight the title with search terms
    String highlightedText;
    TokenStream tokenStream = an.tokenStream("title", new StringReader(docTitle));

    try {
        highlightedText = hl.getBestFragment(tokenStream, docTitle);

        if ((highlightedText != null) && (highlightedText.length() > 0)) {
            docTitle = highlightedText;
    } catch (IOException e1) {
        log.warn("Can't highlight " + docTitle, e1);


    // highlight the name with search terms
     * tokenStream = an.tokenStream("title", new StringReader(docName)); try { highlightedText = hl.getBestFragment(tokenStream,
     * docName); log.debug("name after highlighting: " + highlightedText); if (highlightedText != null &&
     * highlightedText.length() > 0) { docName = highlightedText; } } catch (IOException e1) { log.warn("Can't highlight " +
     * docName, e1); }

    String text = doc.get("summary");
    if (text == null) {
        text = "";

    // highlight the summary with search terms
    tokenStream = an.tokenStream("summary", new StringReader(text));

    try {
        highlightedText = hl.getBestFragment(tokenStream, text);

        if ((highlightedText != null) && (highlightedText.length() > 0)) {
            text = highlightedText;
    } catch (IOException e1) {
        log.warn("Can't highlight " + text, e1);



    return thisResult;