Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Scorer fragmentScorer) 

Source Link

Usage

From source file:fr.mael.microrss.dao.impl.UserArticleDaoImpl.java

License:Open Source License

public List<UserArticle> search(String queryStr, User user, int start, int nb)
        throws ParseException, IOException, InvalidTokenOffsetsException {
    FullTextSession searchSession = Search.getFullTextSession(getSessionFactory().getCurrentSession());
    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_31,
            new String[] { "article.content", "article.title" }, new StandardAnalyzer(Version.LUCENE_31));
    org.apache.lucene.search.Query query = parser.parse(queryStr);
    FullTextQuery hibQuery = searchSession.createFullTextQuery(query, UserArticle.class);
    Criteria fetchingStrategy = searchSession.createCriteria(UserArticle.class);
    fetchingStrategy.setFetchMode("article.feed", FetchMode.JOIN);
    fetchingStrategy.setFetchMode("userLabels", FetchMode.JOIN);
    fetchingStrategy.add(Property.forName("user").eq(user));
    hibQuery.setCriteriaQuery(fetchingStrategy);
    hibQuery.setFirstResult(start);/*  www.  ja va  2 s .  c  om*/
    hibQuery.setMaxResults(nb);

    QueryScorer scorer = new QueryScorer(query);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("[highlight]", "[/highlight]");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 200));

    List<UserArticle> userArticles = (List<UserArticle>) hibQuery.list();

    for (UserArticle userArticle : userArticles) {
        String highlight = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_32), "content",
                userArticle.getArticle().getContent());
        if (highlight != null) {
            highlight = highlight.replaceAll("\\<.*?>", "").replace("\n", " ");
            userArticle.getArticle().setHighlight(highlight);
        }
    }

    return userArticles;
}

From source file:framework.retrieval.engine.query.formatter.impl.HighlighterMaker.java

License:Apache License

public String getHighlighter(Query query, String fieldName, String keyWord, int resumeLength) {

    QueryScorer scorer = new QueryScorer(query);

    Highlighter highlighter = new Highlighter(getFormatter(), scorer);

    Fragmenter fragmenter = new SimpleFragmenter(resumeLength);
    highlighter.setTextFragmenter(fragmenter);

    String result = "";

    try {/*  w w  w .  jav  a 2  s.com*/
        result = highlighter.getBestFragment(analyzer, fieldName, keyWord);
    } catch (Exception e) {
        throw new RetrievalQueryException(e);
    }

    return result;
}

From source file:index.IndexUtils.java

public static List highlight(IndexSearcher indexSearcher, String key) throws ClassNotFoundException {
    try {//from  ww w  . j a va  2s .  c  o  m
        QueryParser queryParser = new QueryParser("name", new StandardAnalyzer());
        Query query = queryParser.parse(key);
        TopDocCollector collector = new TopDocCollector(800);
        indexSearcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        Highlighter highlighter = null;
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
        highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(200));
        List list = new ArrayList();
        Document doc;
        for (int i = 0; i < hits.length; i++) {
            //System.out.println(hits[i].score);  
            doc = indexSearcher.doc(hits[i].doc);
            TokenStream tokenStream = new StandardAnalyzer().tokenStream("name",
                    new StringReader(doc.get("name")));
            //                IndexResult ir = getIndexResult(doc,"index.IndexResult");  
            //                ir.setName(highlighter.getBestFragment(tokenStream, doc.get("name")));  
            //                list.add(ir);  
        }
        return list;
    } catch (ParseException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;

}

From source file:io.bdrc.lucene.bo.TibetanAnalyzerTest.java

License:Apache License

@Test
public void ewtsOffsetBug2() throws IOException, ParseException, InvalidTokenOffsetsException {
    String input = "(cha) bka' bkan gnyis kyi lung";
    String queryLucene = "test:\"bka'\"";
    Analyzer indexingAnalyzer = new TibetanAnalyzer(false, true, false, "ewts", "");
    Analyzer queryAnalyzer = new TibetanAnalyzer(false, true, false, "ewts", "");
    TokenStream indexTk = indexingAnalyzer.tokenStream("", input);
    QueryParser queryParser = new QueryParser("test", queryAnalyzer);
    Query query = queryParser.parse(queryLucene);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("->", "<-");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(10));
    TextFragment[] frags = highlighter.getBestTextFragments(indexTk, input, true, 128);
    final String firstFrag = frags[0].toString();
    System.out.println(firstFrag);
    assert (firstFrag.equals("(cha) ->bka'<- bkan gnyis kyi lung"));
    indexingAnalyzer.close();/*from  w  w w.j ava 2s .  c o m*/
    queryAnalyzer.close();
}

From source file:io.jpress.module.article.searcher.LuceneSearcher.java

License:LGPL

@Override
public Page<Article> search(String keyword, int pageNum, int pageSize) {
    IndexReader indexReader = null;//  w ww  .  jav a 2  s .c om
    try {
        //Bug fix , QueryParser.escape(keyword),keyword=I/O,?buildQuery
        keyword = QueryParser.escape(keyword);
        indexReader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        Query query = buildQuery(keyword);

        ScoreDoc lastScoreDoc = getLastScoreDoc(pageNum, pageSize, query, indexSearcher);
        TopDocs topDocs = indexSearcher.searchAfter(lastScoreDoc, query, pageSize);

        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font class=\"" + HIGH_LIGHT_CLASS + "\">",
                "</font>");
        Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(100));

        List<Article> articles = toArticleList(indexSearcher, topDocs, highlighter, keyword);
        int totalRow = getTotalRow(indexSearcher, query);
        return newPage(pageNum, pageSize, totalRow, articles);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        CommonsUtils.quietlyClose(indexReader);
    }
    return null;
}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndex(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery andQuery = new BooleanQuery();
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }//from   ww  w .  j a v  a 2s .  c o m
    Query query = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer).parse(queryString);
    andQuery.add(query, BooleanClause.Occur.MUST);
    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);
    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);

    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    //setsback to action
    objectsToReturn.put("hits", hits);

    //highlighter
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));
    if (hits != null) {
        logger.debug("hits size: " + hits.length);
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);

            String[] subobjNames = doc.getValues(IndexingConstants.SUBOBJ_NAME);
            if (subobjNames != null && subobjNames.length != 0) {
                String views = "";
                for (int k = 0; k < subobjNames.length; k++) {
                    views += subobjNames[k] + " ";
                }
                objectsToReturn.put(biobjId + "-views", views);
            }
            String summary = "";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);

                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);
                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (NumberFormatException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }
            }
        }
    }
    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java

License:Mozilla Public License

public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index,
        String[] fields, String metaDataToSearch) throws IOException, ParseException {
    logger.debug("IN");
    HashMap<String, Object> objectsToReturn = new HashMap<String, Object>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    BooleanQuery orQuery = new BooleanQuery();
    BooleanQuery andQuery = new BooleanQuery();
    for (int i = 0; i < fields.length; i++) {
        Query query = new FuzzyQuery(new Term(fields[i], queryString));
        query = query.rewrite(searcher.getIndexReader());
        orQuery.add(query, BooleanClause.Occur.SHOULD);
    }/*from   w  w  w  . j  a v  a2 s  .c o m*/
    andQuery.add(orQuery, BooleanClause.Occur.MUST);
    if (metaDataToSearch != null) {
        //search for query string on metadata name field and content
        //where metadata name = metaDataToSearch
        Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch));
        andQuery.add(queryMetadata, BooleanClause.Occur.MUST);
    }

    Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant()));
    andQuery.add(tenantQuery, BooleanClause.Occur.MUST);

    logger.debug("Searching for: " + andQuery.toString());
    int hitsPerPage = 50;

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(andQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    objectsToReturn.put("hits", hits);

    //highlighter
    //orQuery = orQuery.rewrite(searcher.getIndexReader());
    //andQuery = andQuery.rewrite(searcher.getIndexReader());
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery));

    if (hits != null) {
        for (int i = 0; i < hits.length; i++) {
            ScoreDoc hit = hits[i];
            Document doc = searcher.doc(hit.doc);
            String biobjId = doc.get(IndexingConstants.BIOBJ_ID);
            String summary = " ";
            if (highlighter != null) {
                String[] summaries;
                try {
                    Integer idobj = (Integer.valueOf(biobjId));

                    String contentToSearchOn = fillSummaryText(idobj);
                    summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT),
                            IndexingConstants.CONTENTS, contentToSearchOn, 3);

                    StringBuffer summaryBuffer = new StringBuffer();
                    if (summaries.length > 0) {
                        summaryBuffer.append(summaries[0]);
                    }
                    for (int j = 1; j < summaries.length; j++) {
                        summaryBuffer.append(" ... ");
                        summaryBuffer.append(summaries[j]);
                    }
                    summary = summaryBuffer.toString();
                    //get only a portion of summary
                    if (summary.length() > 101) {
                        summary = summary.substring(0, 100);
                        summary += "...";
                    }
                    objectsToReturn.put(biobjId, summary);
                } catch (InvalidTokenOffsetsException e) {
                    logger.error(e.getMessage(), e);
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                }

            }
        }
    }

    int numTotalHits = collector.getTotalHits();
    logger.info(numTotalHits + " total matching documents");

    logger.debug("OUT");
    return objectsToReturn;

}

From source file:lius.search.LiusHitList.java

License:Apache License

private LiusHit buildLiusHit(int index) throws IOException {

    LiusHit liusHit = new LiusHit();
    liusHit.setScore(luceneHits.score(index));
    liusHit.setDocId(luceneHits.id(index));

    Document luceneDocument = luceneHits.doc(index);

    Map liusHitFieldsMap = new HashMap();
    List liusFieldsList = new ArrayList();
    Highlighter luceneHighlighter = null;

    if (liusConfig.getHighlighter() == true) {
        IndexReader luceneIndexReader = IndexReader.open(indexDirectory);

        Query rewrittenLuceneQuery = luceneQuery.rewrite(luceneIndexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);

        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"liusHit\">", "</span>");
        luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);
    }//  w ww .java  2s. c o  m

    for (int j = 0; j < liusConfig.getDisplayFields().size(); j++) {
        LiusField configLiusField = (LiusField) liusConfig.getDisplayFields().get(j);
        LiusField hitLiusField = new LiusField();
        String fieldName = configLiusField.getName();

        hitLiusField.setName(fieldName);
        hitLiusField.setLabel(configLiusField.getLabel());

        if (luceneHighlighter != null) {
            Fragmenter luceneFragmenter;
            if (configLiusField.getFragmenter() != null) {
                luceneFragmenter = new SimpleFragmenter(Integer.parseInt(configLiusField.getFragmenter()));
            } else {
                luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
            }
            luceneHighlighter.setTextFragmenter(luceneFragmenter);
        }
        String[] luceneDocumentValues = luceneDocument.getValues(configLiusField.getName());
        if (luceneDocumentValues != null) {
            if (luceneHighlighter != null) {
                for (int k = 0; k < luceneDocumentValues.length; k++) {
                    Analyzer luceneAnalyzer = AnalyzerFactory.getAnalyzer(liusConfig);
                    TokenStream luceneTokenStream = luceneAnalyzer.tokenStream(configLiusField.getName(),
                            new StringReader(luceneDocumentValues[k]));
                    String fragment = null;
                    if (configLiusField.getFragmenter() != null)
                        fragment = luceneHighlighter.getBestFragments(luceneTokenStream,
                                luceneDocumentValues[k], 5, "...");
                    else {
                        fragment = luceneHighlighter.getBestFragment(luceneTokenStream,
                                luceneDocumentValues[k]);
                    }

                    if (fragment == null) {
                    } else {
                        luceneDocumentValues[k] = fragment;
                    }
                }
            }

            hitLiusField.setValue(luceneDocumentValues[0]);
            hitLiusField.setValues(luceneDocumentValues);

            liusHitFieldsMap.put(configLiusField.getName(), hitLiusField);
            liusFieldsList.add(hitLiusField);
        }

    }
    liusHit.setLiusFieldsMap(liusHitFieldsMap);
    liusHit.setLiusFields(liusFieldsList);
    return liusHit;
}

From source file:lucandra.LucandraTests.java

License:Apache License

public void testHighlight() throws Exception {

    // This tests the TermPositionVector classes

    IndexReader indexReader = new IndexReader(indexName, client);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);

    // check exact
    Query q = qp.parse("+key:\"foobar foobar\"");
    TopDocs docs = searcher.search(q, 10);
    assertEquals(1, docs.totalHits);//from  w  ww.j  av  a  2s . c o m

    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    QueryScorer scorer = new QueryScorer(q, "key", text);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

    TokenStream tvStream = TokenSources.getTokenStream(indexReader, docs.scoreDocs[0].doc, "key");

    String rv = highlighter.getBestFragment(tvStream, text);

    assertNotNull(rv);
    assertEquals(rv, highlightedText);
}

From source file:lucee.runtime.search.lucene2.highlight._Highlight.java

License:Open Source License

public static Object createHighlighter(Query query, String highlightBegin, String highlightEnd) {

    return new Highlighter(
            //new SimpleHTMLFormatter("<span class=\"matching-term\">","</span>"),
            new SimpleHTMLFormatter(highlightBegin, highlightEnd), new QueryScorer(query));

}