Example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter.

Prototype

public void setTextFragmenter(Fragmenter fragmenter)

Source Link

Usage

From source file:org.mskcc.pathdb.tool.QueryFullText.java

License:Open Source License

/**
 * Executes Full Text Query.//  ww  w .j a va2s.c om
 *
 * @param term Search Term
 * @throws QueryException Lucene Query Error
 * @throws IOException    I/O Error
 * @throws ParseException Lucene Parsing Error
 */
public static void queryFullText(String term) throws QueryException, IOException, ParseException {
    System.out.println("Using search term:  " + term);
    LuceneReader luceneReader = new LuceneReader();
    Hits hits = luceneReader.executeQuery(term);
    int num = Math.min(10, hits.length());
    System.out.println("Total Number of Hits:  " + hits.length());
    if (hits.length() > 0) {

        //  Standard Analyzer to extract words using a list of English stop words.
        StandardAnalyzer analyzer = new StandardAnalyzer();

        //  Standard Query Parser
        QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

        // for the usage of highlighting with wildcards
        // Necessary to expand search terms
        IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
        Query luceneQuery = queryParser.parse(term);
        luceneQuery = luceneQuery.rewrite(reader);

        //  Scorer implementation which scores text fragments by the number of
        //  unique query terms found.
        QueryScorer queryScorer = new QueryScorer(luceneQuery);

        //  HTML Formatted surrounds matching text with <B></B> tags.
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

        //  Highligher Class
        Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

        //  XXX Characters Max in Each Fragment
        Fragmenter fragmenter = new SimpleFragmenter(100);
        highLighter.setTextFragmenter(fragmenter);

        System.out.println("Showing hits:  0-" + (num - 1));
        for (int i = 0; i < num; i++) {
            System.out.print("Hit " + i + ":  ");

            //  Get the Matching Hit
            Document doc = hits.doc(i);

            //  Get the Field of Interest
            Field field = doc.getField(LuceneConfig.FIELD_ALL);

            //  Create the Token Stream
            TokenStream tokenStream = new StandardAnalyzer().tokenStream(LuceneConfig.FIELD_ALL,
                    new StringReader(field.stringValue()));

            //  Get the Best Fragment
            String formattedText = highLighter.getBestFragments(tokenStream, field.stringValue(), 5, "...");
            System.out.println(formattedText);
        }
    }
}

From source file:org.riotfamily.search.ResultHighlighter.java

License:Apache License

public HighlightingContext createContext(IndexSearcher indexSearcher, Query query) throws IOException {

    Scorer scorer = new QueryScorer(indexSearcher.rewrite(query));
    if (formatter == null) {
        formatter = new SimpleHTMLFormatter("<" + highlightPreTag + ">", "</" + highlightPostTag + ">");
    }//from  www  .j a va2  s. c om
    if (fragmenter == null) {
        fragmenter = new SimpleFragmenter(fragmentSize);
    }
    Highlighter highlighter = new Highlighter(formatter, encoder, scorer);
    highlighter.setTextFragmenter(fragmenter);
    return new HighlightingContext(highlighter);
}

From source file:org.shredzone.cilla.service.search.strategy.LuceneSearchStrategy.java

License:Open Source License

/**
 * Creates a list of highlights for a search result.
 *
 * @param pq//  w w w .  j av  a 2 s .  co m
 *            {@link Query} that was used
 * @param result
 *            List of {@link Page} results
 * @return matching list of text extracts with highlights
 */
private List<String> createHighlights(Query pq, List<Page> result) {
    QueryScorer scorer = new QueryScorer(pq, "text");
    Fragmenter fragmenter = searchResultRenderer.createFragmenter(scorer);
    Formatter formatter = searchResultRenderer.createFormatter();

    Highlighter hilighter = new Highlighter(formatter, scorer);
    hilighter.setTextFragmenter(fragmenter);

    PageBridge bridge = new PageBridge();

    return result.stream().parallel().map(bridge::objectToString).map(plain -> highlight(plain, hilighter))
            .collect(Collectors.toList());
}

From source file:searchEngine.SearchFiles.java

License:Apache License

public static QueryResult doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
        int hitsPerPage, int pageNumber, boolean raw, boolean interactive)
        throws IOException, InvalidTokenOffsetsException {

    QueryResult queryResults = new QueryResult();
    TopDocs results = searcher.search(query, pageNumber * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=\"red\">", "</font>"),
            new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(20));

    int start = 0;
    int end = Math.min(numTotalHits, pageNumber * hitsPerPage);
    for (int i = start; i < end; i++) {
        if (raw) {
            System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
            continue;
        }//from ww  w  .  jav  a 2  s.  co  m

        Document doc = searcher.doc(hits[i].doc);
        Analyzer analyzer = new SmartChineseAnalyzer();
        String text = doc.get("contents");
        TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(text));
        String highlighterResult = highlighter.getBestFragments(tokenStream, text, 2, "");
        System.out.println("########### " + highlighterResult);

        String path = doc.get("path");
        if (path != null) {
            System.out.println((i + 1) + ". " + path + " Score: " + hits[i].score);
            //System.out.println(results[i].)
            queryResults.addUrl(path);
            String title = doc.get("title");
            queryResults.addTitle(title);
            queryResults.addContent(highlighterResult);
        } else {
            System.out.println((i + 1) + ". " + "No path for this document");
        }
    }
    return queryResults;
}

From source file:top.sj.lucene.LuceneSearchUtil.java

License:Open Source License

/**
 * ???/*  www. ja v  a2s .  c  o m*/
 * 
 * @param primaryKeyByHibernateEntity
 *            Hibernate??
 * @param analysisTarget
 *            ?????
 * @param analysisCondition
 *            ????
 * @return ????
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException
 */
public static List<LuceneSearchDTO> searchOfSingleAreaAndSingleCondition(String primaryKeyByHibernateEntity,
        String analysisTarget, String analysisCondition)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    String configPath = PropertiesTool.getPropertiesFileAsObject("lucene_config.properties")
            .getProperty("index_location");
    Directory dir = null;
    try {
        dir = FSDirectory.open(new File(configPath));
    } catch (Exception e) {
        e.printStackTrace();
    }

    // Directory dir = FSDirectory.open(new File("D:\\lucene"));

    IndexSearcher searcher = new IndexSearcher(dir);

    QueryParser parser = new QueryParser(Version.LUCENE_30, analysisTarget,
            new StandardAnalyzer(Version.LUCENE_30));

    // ??
    Query query = parser.parse(analysisCondition);

    TopDocs topDocs = searcher.search(query, MAX_SEARCH_RESULT);

    /**
     * 
     */
    QueryScorer queryScorer = new QueryScorer(query);
    Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
    Formatter formatter = new SimpleHTMLFormatter("<b>", "<b/>");
    Highlighter highlighter = new Highlighter(formatter, queryScorer);
    highlighter.setTextFragmenter(fragmenter);

    List<LuceneSearchDTO> analysisResults = new ArrayList<LuceneSearchDTO>();

    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        int docId = topDocs.scoreDocs[i].doc;
        Document doc = searcher.doc(docId);
        String attr = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30), analysisTarget,
                doc.get(analysisTarget));
        analysisResults.add(new LuceneSearchDTO(Integer.valueOf(doc.get(primaryKeyByHibernateEntity)), attr));
    }
    return analysisResults;
}

From source file:top.sj.lucene.LuceneSearchUtil.java

License:Open Source License

/**
 * ??/*w  w  w.jav a2 s  . c o  m*/
 * 
 * @param primaryKeyByHibernateEntity
 *            Hibernate??
 * @param analysisTarget
 *            ?????
 * @param analysisConditions
 *            ?????????????
 * @return ????
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException
 */
public static List<LuceneSearchDTO> searchOfSingleAreaAndMultiCondition(String primaryKeyByHibernateEntity,
        String analysisTarget, String... analysisConditions)
        throws IOException, ParseException, InvalidTokenOffsetsException {
    String configPath = PropertiesTool.getPropertiesFileAsObject("lucene_config.properties")
            .getProperty("index_location");
    Directory dir = FSDirectory.open(new File(configPath));

    // Directory dir = FSDirectory.open(new File("D://lucene"));
    IndexSearcher searcher = new IndexSearcher(dir);

    QueryParser parser = new QueryParser(Version.LUCENE_30, analysisTarget,
            new StandardAnalyzer(Version.LUCENE_30));

    BooleanQuery query = new BooleanQuery();

    for (int i = 0; i < analysisConditions.length; i++) {
        Query query1 = parser.parse(analysisConditions[i]);
        query.add(query1, i == 0 ? Occur.MUST : Occur.SHOULD);
    }
    TopDocs topDocs = searcher.search(query, MAX_SEARCH_RESULT);

    /**
     * 
     */
    QueryScorer queryScorer = new QueryScorer(query);
    Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
    Formatter formatter = new SimpleHTMLFormatter("<b>", "<b/>");
    Highlighter highlighter = new Highlighter(formatter, queryScorer);
    highlighter.setTextFragmenter(fragmenter);

    List<LuceneSearchDTO> analysisResults = new ArrayList<LuceneSearchDTO>();

    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        int docId = topDocs.scoreDocs[i].doc;
        Document doc = searcher.doc(docId);
        String attr = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30), analysisTarget,
                doc.get(analysisTarget));
        analysisResults.add(new LuceneSearchDTO(Integer.valueOf(doc.get(primaryKeyByHibernateEntity)), attr));
    }
    return analysisResults;
}

From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.QueryHighlighter.java

License:Apache License

public String highlightQuery(QueryInfo queryInfo, String fieldName, String text) {
    try {/*from www  . j a va  2  s . c  o  m*/
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(HIT_OPEN_MARK, HIT_CLOSE_MARK);
        Highlighter highlighter = new Highlighter(htmlFormatter,
                new QueryScorer(queryInfo.getQuery(), fieldName, this.env.defaultField));
        highlighter.setTextFragmenter(new NullFragmenter());

        String str = highlighter.getBestFragment(this.env.indexAnalyzer,
                "".equals(fieldName) ? this.env.defaultField : fieldName, text);

        return null != str ? str : text;
    } catch (Exception x) {
        logger.error("Caught an exception:", x);
    }
    return text;
}

From source file:uk.ac.ebi.arrayexpress.utils.search.EFOExpandedHighlighter.java

License:Apache License

private String doHighlightQuery(Query query, String fieldName, String text, String openMark, String closeMark) {
    try {/*from  w w  w  .ja  va2 s .c o  m*/
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(openMark, closeMark);
        Highlighter highlighter = new Highlighter(htmlFormatter,
                new QueryScorer(query, fieldName, this.env.defaultField));
        highlighter.setTextFragmenter(new NullFragmenter());

        String str = highlighter.getBestFragment(this.env.indexAnalyzer,
                "".equals(fieldName) ? this.env.defaultField : fieldName, text);

        return null != str ? str : text;
    }

    catch (Exception x) {
        logger.error("Caught an exception:", x);
    }
    return text;

}

From source file:uk.ac.ebi.biostudies.efo.EFOExpandedHighlighter.java

License:Apache License

private String doHighlightQuery(Query query, String fieldName, String text, String openMark, String closeMark,
        boolean fragmentOnly) {
    try {/*  w ww  .j  a  v a 2 s  .  com*/
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(openMark, closeMark);
        QueryScorer scorer = new QueryScorer(query, fieldName, indexConfig.getDefaultField());
        Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
        highlighter.setTextFragmenter(
                fragmentOnly ? new SimpleSpanFragmenter(scorer, indexConfig.getSearchSnippetFragmentSize())
                        : new NullFragmenter());
        String str = highlighter.getBestFragment(new ExperimentTextAnalyzer(),
                "".equals(fieldName) ? indexConfig.getDefaultField() : fieldName, text);
        return str;
    } catch (Exception x) {
        logger.error("Caught an exception:", x);
    }
    return text;

}