From source file:org.mskcc.pathdb.tool.QueryFullText.java

License:Open Source License

 * Executes Full Text Query.//  ww  w .j a va2s.c om
 * @param term Search Term
 * @throws QueryException Lucene Query Error
 * @throws IOException    I/O Error
 * @throws ParseException Lucene Parsing Error
public static void queryFullText(String term) throws QueryException, IOException, ParseException {
    System.out.println("Using search term:  " + term);
    LuceneReader luceneReader = new LuceneReader();
    Hits hits = luceneReader.executeQuery(term);
    int num = Math.min(10, hits.length());
    System.out.println("Total Number of Hits:  " + hits.length());
    if (hits.length() > 0) {

        //  Standard Analyzer to extract words using a list of English stop words.
        StandardAnalyzer analyzer = new StandardAnalyzer();

        //  Standard Query Parser
        QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

        // for the usage of highlighting with wildcards
        // Necessary to expand search terms
        IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
        Query luceneQuery = queryParser.parse(term);
        luceneQuery = luceneQuery.rewrite(reader);

        //  Scorer implementation which scores text fragments by the number of
        //  unique query terms found.
        QueryScorer queryScorer = new QueryScorer(luceneQuery);

        //  HTML Formatted surrounds matching text with <B></B> tags.
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

        //  Highligher Class
        Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

        //  XXX Characters Max in Each Fragment
        Fragmenter fragmenter = new SimpleFragmenter(100);

        System.out.println("Showing hits:  0-" + (num - 1));
        for (int i = 0; i < num; i++) {
            System.out.print("Hit " + i + ":  ");

            //  Get the Matching Hit
            Document doc = hits.doc(i);

            //  Get the Field of Interest
            Field field = doc.getField(LuceneConfig.FIELD_ALL);

            //  Create the Token Stream
            TokenStream tokenStream = new StandardAnalyzer().tokenStream(LuceneConfig.FIELD_ALL,
                    new StringReader(field.stringValue()));

            //  Get the Best Fragment
            String formattedText = highLighter.getBestFragments(tokenStream, field.stringValue(), 5, "...");

From source file:org.riotfamily.search.ResultHighlighter.java

License:Apache License

public HighlightingContext createContext(IndexSearcher indexSearcher, Query query) throws IOException {

    Scorer scorer = new QueryScorer(indexSearcher.rewrite(query));
    if (formatter == null) {
        formatter = new SimpleHTMLFormatter("<" + highlightPreTag + ">", "</" + highlightPostTag + ">");
    }//from  www  .j a va2  s. c om
    if (fragmenter == null) {
        fragmenter = new SimpleFragmenter(fragmentSize);
    Highlighter highlighter = new Highlighter(formatter, encoder, scorer);
    return new HighlightingContext(highlighter);

From source file:org.shredzone.cilla.service.search.strategy.LuceneSearchStrategy.java

License:Open Source License

 * Creates a list of highlights for a search result.
 * @param pq//  w w w .  j av  a 2 s .  co m
 *            {@link Query} that was used
 * @param result
 *            List of {@link Page} results
 * @return matching list of text extracts with highlights
private List<String> createHighlights(Query pq, List<Page> result) {
    QueryScorer scorer = new QueryScorer(pq, "text");
    Fragmenter fragmenter = searchResultRenderer.createFragmenter(scorer);
    Formatter formatter = searchResultRenderer.createFormatter();

    Highlighter hilighter = new Highlighter(formatter, scorer);

    PageBridge bridge = new PageBridge();

    return result.stream().parallel().map(bridge::objectToString).map(plain -> highlight(plain, hilighter))

From source file:searchEngine.SearchFiles.java

License:Apache License

public static QueryResult doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
        int hitsPerPage, int pageNumber, boolean raw, boolean interactive)
        throws IOException, InvalidTokenOffsetsException {

    QueryResult queryResults = new QueryResult();
    TopDocs results = searcher.search(query, pageNumber * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=\"red\">", "</font>"),
            new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(20));

    int start = 0;
    int end = Math.min(numTotalHits, pageNumber * hitsPerPage);
    for (int i = start; i < end; i++) {
        if (raw) {
            System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
        }//from ww  w  .  jav  a 2  s.  co  m

        Document doc = searcher.doc(hits[i].doc);
        Analyzer analyzer = new SmartChineseAnalyzer();
        String text = doc.get("contents");
        TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(text));
        String highlighterResult = highlighter.getBestFragments(tokenStream, text, 2, "");
        System.out.println("########### " + highlighterResult);

        String path = doc.get("path");
        if (path != null) {
            System.out.println((i + 1) + ". " + path + " Score: " + hits[i].score);
            String title = doc.get("title");
        } else {
            System.out.println((i + 1) + ". " + "No path for this document");
    return queryResults;

From source file:top.sj.lucene.LuceneSearchUtil.java

License:Open Source License

 * ???/*  www. ja v  a2s .  c  o m*/
 * @param primaryKeyByHibernateEntity
 *            Hibernate??
 * @param analysisTarget
 *            ?????
 * @param analysisCondition
 *            ????
 * @return ????
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException
public static List<LuceneSearchDTO> searchOfSingleAreaAndSingleCondition(String primaryKeyByHibernateEntity,
        String analysisTarget, String analysisCondition)
        throws IOException, ParseException, InvalidTokenOffsetsException {

    String configPath = PropertiesTool.getPropertiesFileAsObject("lucene_config.properties")
    Directory dir = null;
    try {
        dir = FSDirectory.open(new File(configPath));
    } catch (Exception e) {

    // Directory dir = FSDirectory.open(new File("D:\\lucene"));

    IndexSearcher searcher = new IndexSearcher(dir);

    QueryParser parser = new QueryParser(Version.LUCENE_30, analysisTarget,
            new StandardAnalyzer(Version.LUCENE_30));

    // ??
    Query query = parser.parse(analysisCondition);

    TopDocs topDocs = searcher.search(query, MAX_SEARCH_RESULT);

    QueryScorer queryScorer = new QueryScorer(query);
    Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
    Formatter formatter = new SimpleHTMLFormatter("<b>", "<b/>");
    Highlighter highlighter = new Highlighter(formatter, queryScorer);

    List<LuceneSearchDTO> analysisResults = new ArrayList<LuceneSearchDTO>();

    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        int docId = topDocs.scoreDocs[i].doc;
        Document doc = searcher.doc(docId);
        String attr = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30), analysisTarget,
        analysisResults.add(new LuceneSearchDTO(Integer.valueOf(doc.get(primaryKeyByHibernateEntity)), attr));
    return analysisResults;

From source file:top.sj.lucene.LuceneSearchUtil.java

License:Open Source License

 * ??/*w  w  w.jav a2 s  . c o  m*/
 * @param primaryKeyByHibernateEntity
 *            Hibernate??
 * @param analysisTarget
 *            ?????
 * @param analysisConditions
 *            ?????????????
 * @return ????
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException
public static List<LuceneSearchDTO> searchOfSingleAreaAndMultiCondition(String primaryKeyByHibernateEntity,
        String analysisTarget, String... analysisConditions)
        throws IOException, ParseException, InvalidTokenOffsetsException {
    String configPath = PropertiesTool.getPropertiesFileAsObject("lucene_config.properties")
    Directory dir = FSDirectory.open(new File(configPath));

    // Directory dir = FSDirectory.open(new File("D://lucene"));
    IndexSearcher searcher = new IndexSearcher(dir);

    QueryParser parser = new QueryParser(Version.LUCENE_30, analysisTarget,
            new StandardAnalyzer(Version.LUCENE_30));

    BooleanQuery query = new BooleanQuery();

    for (int i = 0; i < analysisConditions.length; i++) {
        Query query1 = parser.parse(analysisConditions[i]);
        query.add(query1, i == 0 ? Occur.MUST : Occur.SHOULD);
    TopDocs topDocs = searcher.search(query, MAX_SEARCH_RESULT);

    QueryScorer queryScorer = new QueryScorer(query);
    Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
    Formatter formatter = new SimpleHTMLFormatter("<b>", "<b/>");
    Highlighter highlighter = new Highlighter(formatter, queryScorer);

    List<LuceneSearchDTO> analysisResults = new ArrayList<LuceneSearchDTO>();

    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        int docId = topDocs.scoreDocs[i].doc;
        Document doc = searcher.doc(docId);
        String attr = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_30), analysisTarget,
        analysisResults.add(new LuceneSearchDTO(Integer.valueOf(doc.get(primaryKeyByHibernateEntity)), attr));
    return analysisResults;

From source file:uk.ac.ebi.arrayexpress.utils.saxon.search.QueryHighlighter.java

License:Apache License

public String highlightQuery(QueryInfo queryInfo, String fieldName, String text) {
    try {/*from www  . j a va  2  s . c  o  m*/
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(HIT_OPEN_MARK, HIT_CLOSE_MARK);
        Highlighter highlighter = new Highlighter(htmlFormatter,
                new QueryScorer(queryInfo.getQuery(), fieldName, this.env.defaultField));
        highlighter.setTextFragmenter(new NullFragmenter());

        String str = highlighter.getBestFragment(this.env.indexAnalyzer,
                "".equals(fieldName) ? this.env.defaultField : fieldName, text);

        return null != str ? str : text;
    } catch (Exception x) {
        logger.error("Caught an exception:", x);
    return text;

From source file:uk.ac.ebi.arrayexpress.utils.search.EFOExpandedHighlighter.java

License:Apache License

private String doHighlightQuery(Query query, String fieldName, String text, String openMark, String closeMark) {
    try {/*from  w w  w  .ja  va2 s .c o  m*/
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(openMark, closeMark);
        Highlighter highlighter = new Highlighter(htmlFormatter,
                new QueryScorer(query, fieldName, this.env.defaultField));
        highlighter.setTextFragmenter(new NullFragmenter());

        String str = highlighter.getBestFragment(this.env.indexAnalyzer,
                "".equals(fieldName) ? this.env.defaultField : fieldName, text);

        return null != str ? str : text;

    catch (Exception x) {
        logger.error("Caught an exception:", x);
    return text;


From source file:uk.ac.ebi.biostudies.efo.EFOExpandedHighlighter.java

License:Apache License

private String doHighlightQuery(Query query, String fieldName, String text, String openMark, String closeMark,
        boolean fragmentOnly) {
    try {/*  w ww  .j  a  v a 2 s  .  com*/
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(openMark, closeMark);
        QueryScorer scorer = new QueryScorer(query, fieldName, indexConfig.getDefaultField());
        Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
                fragmentOnly ? new SimpleSpanFragmenter(scorer, indexConfig.getSearchSnippetFragmentSize())
                        : new NullFragmenter());
        String str = highlighter.getBestFragment(new ExperimentTextAnalyzer(),
                "".equals(fieldName) ? indexConfig.getDefaultField() : fieldName, text);
        return str;
    } catch (Exception x) {
        logger.error("Caught an exception:", x);
    return text;
