Example usage for org.apache.lucene.search.highlight Highlighter getBestFragment

List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragment

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragment.

Prototype

public final String getBestFragment(TokenStream tokenStream, String text)
        throws IOException, InvalidTokenOffsetsException 

Source Link

Document

Highlights chosen terms in a text, extracting the most relevant section.

Usage

From source file:com.paladin.common.Tools.java

License:Apache License

/**
 *   ? /*  w ww .  j  av a  2 s. c o  m*/
 *
 * @param _query
 * @param _field
 * @param _content
 * @return
 */
public static String highlight(final Query _query, final String _field, final String _content) {
    // 
    Scorer scorer = new QueryScorer(_query);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(Constants.HIGHLIGHT_STYLE, "</span>");
    Highlighter hl = new Highlighter(formatter, scorer);
    TokenStream tokens = new IKAnalyzer().tokenStream(_field, new StringReader(_content));
    try {
        return hl.getBestFragment(tokens, _content);
    } catch (IOException e) {
        e.printStackTrace();
    } catch (InvalidTokenOffsetsException e) {
        e.printStackTrace();
    }
    return null;
}

From source file:com.searchlocal.lucene.ContentSearcher.java

License:Open Source License

/** 
 * ? // w  w w  .j  a v  a 2  s.co m
 * 
 * @param param ?
 * @param indexlocal ?
 * @return list 
 */
public static List<ResultBean> query(SearchParam param) throws IOException, LogicException {
    // ?
    String indexPath = param.getIndexPath();
    // 
    if (null == fsd) {
        fsd = SimpleFSDirectory.open(new File(indexPath));
    }
    List<ResultBean> beanList = new ArrayList<ResultBean>();
    try {
        // ?KeyWord
        Analyzer analyzer = new PaodingAnalyzer();
        QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);
        Query query;
        query = parser.parse(param.getKeyWord());

        TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
        // ?
        if (null == is) {
            is = new IndexSearcher(fsd, true);
        }
        is.search(query, collector);

        ScoreDoc[] scoreDoc = collector.topDocs().scoreDocs;

        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");

        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(CONTENTS_SHOW_LENGTH));

        if (scoreDoc.length == 0) {
            return beanList;
        }
        int startRow = param.getStartRow();
        int endRow = param.getEndRow();
        endRow = scoreDoc.length > endRow ? endRow : scoreDoc.length;
        for (int i = startRow; i < endRow; i++) {
            Document doc = is.doc(scoreDoc[i].doc);
            String content = doc.get("content");
            // 
            TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
            content = highlighter.getBestFragment(tokenStream, content);
            ResultBean bean = BeanUtil.getBean(doc, content);
            beanList.add(bean);
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (InvalidTokenOffsetsException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    }
    return beanList;
}

From source file:de.innovationgate.wga.server.api.Lucene.java

License:Open Source License

@CodeCompletion
public List<String> highlightLuceneField(String field, String originalText, String prefix, String suffix)
        throws WGException {
    WGACore core = _wga.getCore();//from  w ww.  j  av  a2s .c om
    if (!core.isLuceneEnabled()) {
        _wga.getLog().warn("Unable to highlight text bc. lucene is not enabled.");
        return Collections.singletonList(originalText);
    }
    // try to retrieve last lucene query for highlighting
    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) _wga.getRequest().getSession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session - highlighting not possible
        return Collections.singletonList(originalText);
    }

    // create htmlformatter to highlight fragments with "$HIGHLIGHT_PREFIX$", "$HIGHLIGHT_SUFFIX$"
    // these placeholders are later on replaced by the given prefix and suffix
    // this additional step is necessary to encode the fragment text properly
    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = core.getLuceneManager().createHighlighter(field, query, formatter);

    // create tokenstream
    TokenStream tokenStream = core.getLuceneManager().createTokenStream(originalText, _cx.content());

    // create fragmenter and set fragmentsize to metaText.length to ensure only one fragments with the whole metaText is returned        
    Fragmenter fragmenter = new SimpleFragmenter(originalText.length() + 1); // +1 is necessary here 
    highlighter.setTextFragmenter(fragmenter);

    try {
        String highlighted = highlighter.getBestFragment(tokenStream, originalText);
        if (highlighted != null) {

            // replace highlight placeholders with correct prefix and suffix
            highlighted = WGUtils.strReplace(highlighted, prefixPlaceholder, prefix, true);
            highlighted = WGUtils.strReplace(highlighted, suffixPlaceholder, suffix, true);

            return Collections.singletonList(highlighted);
        } else {
            return Collections.singletonList(originalText);
        }
    } catch (IOException e) {
        _wga.getLog().warn("Unable to highlight text bc. of exception '" + e.getMessage() + "'.");
        return Collections.singletonList(originalText);
    } catch (InvalidTokenOffsetsException e) {
        _wga.getLog().warn("Unable to highlight meta text bc. of exception '" + e.getMessage() + "'.");
        return Collections.singletonList(originalText);
    }

}

From source file:de.innovationgate.wgpublisher.webtml.utils.TMLContext.java

License:Open Source License

@CodeCompletion
public String highlightitem(String name, String prefix, String suffix, String encode) throws WGAPIException {
    if (name == null) {
        return null;
    }//from   ww w  . ja va  2  s  .co m

    // lowercase name
    name = name.toLowerCase();

    // retrieve itemtext
    String originalText = itemTextValue(name, encode);
    if (originalText == null) {
        return null;
    }

    if (!getwgacore().isLuceneEnabled()) {
        addwarning("Unable to highlight item '" + name + "' bc. lucene is not enabled.");
        return originalText;
    }
    // try to retrieve last lucene query for highlighting
    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) getrequest().getSession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session - highlighting not possible
        addwarning(
                "Lucene highlighting not possible because there is no query with enabled highlighting support");
        return originalText;
    }

    // create htmlformatter to highlight fragments with "$HIGHLIGHT_PREFIX$", "$HIGHLIGHT_SUFFIX$"
    // these placeholders are later on replaced by the given prefix and suffix
    // this additional step is necessary to encode the fragment text properly
    // see F00004C66
    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(name, query, formatter);

    // create text to analyze
    LuceneConfiguration config = getwgacore().getLuceneManager()
            .retrieveLuceneConfig(content().getDatabase().getDbReference());
    LuceneIndexItemRule rule = config.getMatchingItemRule(name);
    String analyzeText = rule.parseItemValue(originalText);

    // create tokenstream
    TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(analyzeText, content());

    // create fragmenter and set fragmentsize to itemText.length to ensure only one fragments with the whole itemText is returned        
    Fragmenter fragmenter = new SimpleFragmenter(originalText.length() + 1); // if analyzeText.length == originalText.length we might get two fragments from lucene without the +1 (possible lucene bug)
    highlighter.setTextFragmenter(fragmenter);

    try {
        String highlighted = highlighter.getBestFragment(tokenStream, originalText);
        if (highlighted != null) {
            // replace highlight placeholders with correct prefix and suffix
            highlighted = WGUtils.strReplace(highlighted, prefixPlaceholder, prefix, true);
            highlighted = WGUtils.strReplace(highlighted, suffixPlaceholder, suffix, true);

            return highlighted;
        }
    } catch (IOException e) {
        addwarning("Unable to highlight item '" + name + "' bc. of exception '" + e.getMessage() + "'.");
    } catch (InvalidTokenOffsetsException e) {
        addwarning("Unable to highlight item '" + name + "' bc. of exception '" + e.getMessage() + "'.");
    }

    return originalText;

}

From source file:de.innovationgate.wgpublisher.webtml.utils.TMLContext.java

License:Open Source License

/**
 * returns a singleton list with metavalues highlighted (surrounded with given <prefix> and <suffix>) based uppon the last lucene query with highlight attribute set to true
 * if highlighting is not possible this method returns metalist(<name>);
 * @param name/*  w  w w  .j av a 2  s .  c  o m*/
 * @param prefix
 * @param suffix
 * @param encode
 * @return list 
 * @throws WGAPIException
 */
@CodeCompletion
public List highlightMeta(String name, String prefix, String suffix, String encode) throws WGAPIException {
    if (name == null) {
        return null;
    }

    String originalText = metaTextValue(name, encode);
    List<String> originalTextAsList = Collections.singletonList(originalText);

    if (!getwgacore().isLuceneEnabled()) {
        addwarning("Unable to highlight meta '" + name + "' bc. lucene is not enabled.");
        return originalTextAsList;
    }
    // try to retrieve last lucene query for highlighting
    org.apache.lucene.search.Query query = (org.apache.lucene.search.Query) getrequest().getSession()
            .getAttribute(Query.SESSION_ATTRIBUTE_SIMPLIFIED_LUCENEQUERY);
    if (query == null) {
        // no query in session - highlighting not possible
        return originalTextAsList;
    }

    // create htmlformatter to highlight fragments with "$HIGHLIGHT_PREFIX$", "$HIGHLIGHT_SUFFIX$"
    // these placeholders are later on replaced by the given prefix and suffix
    // this additional step is necessary to encode the fragment text properly
    String prefixPlaceholder = "$HIGHLIGHT_PREFIX$";
    String suffixPlaceholder = "$HIGHLIGHT_SUFFIX$";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(prefixPlaceholder, suffixPlaceholder);

    // create highlighter
    Highlighter highlighter = getwgacore().getLuceneManager().createHighlighter(name.toUpperCase(), query,
            formatter);

    // create tokenstream
    TokenStream tokenStream = getwgacore().getLuceneManager().createTokenStream(originalText, content());

    // create fragmenter and set fragmentsize to metaText.length to ensure only one fragments with the whole metaText is returned        
    Fragmenter fragmenter = new SimpleFragmenter(originalText.length() + 1); // +1 is necessary here 
    highlighter.setTextFragmenter(fragmenter);

    try {
        String highlighted = highlighter.getBestFragment(tokenStream, originalText);
        if (highlighted != null) {

            // replace highlight placeholders with correct prefix and suffix
            highlighted = WGUtils.strReplace(highlighted, prefixPlaceholder, prefix, true);
            highlighted = WGUtils.strReplace(highlighted, suffixPlaceholder, suffix, true);

            return Collections.singletonList(highlighted);
        } else {
            return originalTextAsList;
        }
    } catch (IOException e) {
        addwarning("Unable to highlight meta '" + name + "' bc. of exception '" + e.getMessage() + "'.");
        return originalTextAsList;
    } catch (InvalidTokenOffsetsException e) {
        addwarning("Unable to highlight meta '" + name + "' bc. of exception '" + e.getMessage() + "'.");
        return originalTextAsList;
    }

}

From source file:de.spartusch.nasfvi.server.NSearcher.java

License:Apache License

/**
 * Extracts a field's values from a document. This method is aware of
 * <i>collapsed</i> or <i>merged</i> fields and handles them properly. 
 * @param nquery NQuery used for searching
 * @param doc Document to extract the field's values from
 * @param field Name of the field to extract values for
 * @return Set of extracted values/*from w  w  w . j av  a 2  s.com*/
 */
private Set<String> extractValues(final NQuery nquery, final Document doc, final String field) {
    Set<String> values = new HashSet<String>();

    if (NQuery.isFieldToCollapse(field)) {
        // process merged field
        String mfield = NQuery.getMergedField();
        QueryScorer scorer = new QueryScorer(nquery.getQuery(), mfield);
        Highlighter highlighter = new Highlighter(scorer);
        highlighter.setTextFragmenter(new NullFragmenter());

        try {
            Set<String> buffer = new HashSet<String>();

            for (Fieldable f : doc.getFieldables(mfield)) {
                String content = f.stringValue();
                String value = normalizeValue(NQuery.extractValue(field, content));

                // Test if the field was matched by the query
                TokenStream ts = TokenSources.getTokenStream(mfield, content, nquery.getAnalyzer());
                if (highlighter.getBestFragment(ts, content) != null) {
                    values.add(value);
                } else {
                    // Buffer the value - in case no field matches
                    buffer.add(value);
                }
            }

            if (values.isEmpty()) {
                // No field was matched by the query
                values.addAll(buffer);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (InvalidTokenOffsetsException e) {
            throw new RuntimeException(e);
        }
    } else {
        for (String v : doc.getValues(field)) {
            values.add(normalizeValue(v));
        }
    }

    return values;
}

From source file:io.jpress.module.article.searcher.LuceneSearcher.java

License:LGPL

private List<Article> toArticleList(IndexSearcher searcher, TopDocs topDocs, Highlighter highlighter,
        String keyword) throws IOException {
    List<Article> articles = new ArrayList<>();
    Analyzer analyzer = new JcsegAnalyzer(JcsegTaskConfig.COMPLEX_MODE);
    for (ScoreDoc item : topDocs.scoreDocs) {
        Document doc = searcher.doc(item.doc);
        Article article = new Article();
        String title = doc.get("title");
        String content = doc.get("content");
        article.setId(Long.valueOf(doc.get("aid")));
        article.setTitle(title);//  ww  w  . ja v a 2 s .  c  om
        article.setContent(content);
        //
        try {
            String highlightTitle = highlighter
                    .getBestFragment(analyzer.tokenStream(keyword, new StringReader(title)), title);
            article.setHighlightTitle(highlightTitle);
            String text = article.getText();
            String highlightContent = highlighter
                    .getBestFragment(analyzer.tokenStream(keyword, new StringReader(text)), text);
            article.setHighlightContent(highlightContent);
        } catch (InvalidTokenOffsetsException e) {
            logger.error(e.getMessage(), e);
        }
        articles.add(article);
    }
    return articles;
}

From source file:lius.search.LiusHitList.java

License:Apache License

private LiusHit buildLiusHit(int index) throws IOException {

    LiusHit liusHit = new LiusHit();
    liusHit.setScore(luceneHits.score(index));
    liusHit.setDocId(luceneHits.id(index));

    Document luceneDocument = luceneHits.doc(index);

    Map liusHitFieldsMap = new HashMap();
    List liusFieldsList = new ArrayList();
    Highlighter luceneHighlighter = null;

    if (liusConfig.getHighlighter() == true) {
        IndexReader luceneIndexReader = IndexReader.open(indexDirectory);

        Query rewrittenLuceneQuery = luceneQuery.rewrite(luceneIndexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);

        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"liusHit\">", "</span>");
        luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);
    }/*from   ww w  . j a va 2 s .c  o  m*/

    for (int j = 0; j < liusConfig.getDisplayFields().size(); j++) {
        LiusField configLiusField = (LiusField) liusConfig.getDisplayFields().get(j);
        LiusField hitLiusField = new LiusField();
        String fieldName = configLiusField.getName();

        hitLiusField.setName(fieldName);
        hitLiusField.setLabel(configLiusField.getLabel());

        if (luceneHighlighter != null) {
            Fragmenter luceneFragmenter;
            if (configLiusField.getFragmenter() != null) {
                luceneFragmenter = new SimpleFragmenter(Integer.parseInt(configLiusField.getFragmenter()));
            } else {
                luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
            }
            luceneHighlighter.setTextFragmenter(luceneFragmenter);
        }
        String[] luceneDocumentValues = luceneDocument.getValues(configLiusField.getName());
        if (luceneDocumentValues != null) {
            if (luceneHighlighter != null) {
                for (int k = 0; k < luceneDocumentValues.length; k++) {
                    Analyzer luceneAnalyzer = AnalyzerFactory.getAnalyzer(liusConfig);
                    TokenStream luceneTokenStream = luceneAnalyzer.tokenStream(configLiusField.getName(),
                            new StringReader(luceneDocumentValues[k]));
                    String fragment = null;
                    if (configLiusField.getFragmenter() != null)
                        fragment = luceneHighlighter.getBestFragments(luceneTokenStream,
                                luceneDocumentValues[k], 5, "...");
                    else {
                        fragment = luceneHighlighter.getBestFragment(luceneTokenStream,
                                luceneDocumentValues[k]);
                    }

                    if (fragment == null) {
                    } else {
                        luceneDocumentValues[k] = fragment;
                    }
                }
            }

            hitLiusField.setValue(luceneDocumentValues[0]);
            hitLiusField.setValues(luceneDocumentValues);

            liusHitFieldsMap.put(configLiusField.getName(), hitLiusField);
            liusFieldsList.add(hitLiusField);
        }

    }
    liusHit.setLiusFieldsMap(liusHitFieldsMap);
    liusHit.setLiusFields(liusFieldsList);
    return liusHit;
}

From source file:lucandra.LucandraTests.java

License:Apache License

public void testHighlight() throws Exception {

    // This tests the TermPositionVector classes

    IndexReader indexReader = new IndexReader(indexName, client);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);

    // check exact
    Query q = qp.parse("+key:\"foobar foobar\"");
    TopDocs docs = searcher.search(q, 10);
    assertEquals(1, docs.totalHits);/* ww w.ja  v a  2s  .c o m*/

    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    QueryScorer scorer = new QueryScorer(q, "key", text);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

    TokenStream tvStream = TokenSources.getTokenStream(indexReader, docs.scoreDocs[0].doc, "key");

    String rv = highlighter.getBestFragment(tvStream, text);

    assertNotNull(rv);
    assertEquals(rv, highlightedText);
}

From source file:net.paoding.analysis.TestPaodingAnalyzer.java

License:Apache License

@Test
public void testHighlighting() throws Exception {

    Analyzer a = new PaodingAnalyzer();
    QueryParser parser = new QueryParser(Version.LUCENE_46, "f", a);

    Query q = parser.parse("domnick");
    String txt = "Domnick Hunter 0.01m , ?OIL-X Plus";

    Highlighter highlighter = new Highlighter(new QueryScorer(q));
    String resp = highlighter.getBestFragment(a.tokenStream("f", txt), txt);

    assertTrue(resp + " is not correctly highlighted", resp.contains("<B>Domnick</B>"));

}