Example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter.

Prototype

public void setTextFragmenter(Fragmenter fragmenter)

Source Link

Usage

From source file:com.green.common.persistence.BaseDao.java

License:Open Source License

/**
 * /*from   w w w .j a v  a 2 s  .  c o  m*/
 * @param query 
 * @param list 
 * @param subLength ?
 * @param fields ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, int subLength, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(subLength));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String description = highlighter.getBestFragment(analyzer, field, text);
                if (description != null) {
                    Reflections.invokeSetter(entity, fields[0], description);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2));
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is evaluated by Lucene, so query result are already pruned. This means that every node
 * should have its security (user and role) info stored in Lucene. This provides very quick search
 * but security modifications need to be recursively applied to reach every document node in the
 * repository. This may take several hours (or days) is big repositories.
 *//*from w w w .ja  v a2 s . c  o m*/
@SuppressWarnings("unchecked")
private NodeResultSet runQueryLucene(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, HibernateException {
    log.debug("runQueryLucene({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);

    // Set limits
    ftq.setFirstResult(offset);
    ftq.setMaxResults(limit);

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    for (Iterator<Object[]> it = ftq.iterate(); it.hasNext();) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        // Add result
        addResult(ftSession, results, highlighter, score, nBase);
    }

    result.setTotal(ftq.getResultSize());
    result.setResults(results);
    log.debug("runQueryLucene: {}", result);
    return result;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * /*from w ww  .j a  va  2 s  .co m*/
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there is another document more who the user can read.
 */
@SuppressWarnings("unchecked")
private NodeResultSet runQueryAccessManagerMore(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerMore({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);
        }
    }

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < offset + limit + 1) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    result.setTotal(count);
    result.setResults(results);
    log.debug("runQueryAccessManagerMore: {}", result);
    return result;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * /*from w  ww.j  a  va2 s.  c om*/
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there are more documents (2 * limit) the user can read.
 */
@SuppressWarnings("unchecked")
private NodeResultSet runQueryAccessManagerWindow(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerWindow({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);
        }
    }

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < offset + limit * 2) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    result.setTotal(count);
    result.setResults(results);
    log.debug("runQueryAccessManagerWindow: {}", result);
    return result;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * // w ww.j  a v  a2s.  c  o m
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there are more documents (MAX_SEARCH_RESULTS) the user can read.
 */
@SuppressWarnings("unchecked")
private NodeResultSet runQueryAccessManagerLimited(FullTextSession ftSession, Query query, int offset,
        int limit) throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerLimited({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
            NodeMail.class);
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    ftq.enableFullTextFilter("readAccess");
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);
        }
    }

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < Config.MAX_SEARCH_RESULTS) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            count++;
        }
    }

    result.setTotal(count);
    result.setResults(results);
    log.debug("Size: {}", results.size());
    log.debug("runQueryAccessManagerLimited: {}", result);
    return result;
}

From source file:com.knowledgetree.indexer.IndexerManager.java

/**
 * Returns a set of hits from lucene.//  w w w  .j a va2  s . co  m
 * @param queryString
 * @param maxHits
 * @return
 * @throws Exception
 */
public QueryHit[] query(String queryString, int maxHits, boolean getText) throws Exception {
    synchronized (this) {
        this.queryCount++;
    }

    String tmp = queryString.toLowerCase();
    boolean queryContent = tmp.indexOf("content") != -1;
    boolean queryDiscussion = tmp.indexOf("discussion") != -1;

    QueryParser parser = new QueryParser("Content", this.analyzer);
    Query query = parser.parse(queryString);

    // rewriting is important for complex queries. this is a must-do according to sources!
    query = query.rewrite(this.queryReader);

    // run the search!
    Hits hits = this.querySearcher.search(query);

    // now we can apply the maximum hits to the results we return!
    int max = (maxHits == -1) ? hits.length() : maxHits;

    if (hits.length() < max) {
        max = hits.length();
    }

    QueryHit[] results = new QueryHit[max];

    Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(this.resultFragmentSize));
    for (int i = 0; i < max; i++) {
        Document doc = hits.doc(i);

        QueryHit hit = new QueryHit();
        hit.DocumentID = IndexerManager.stringToLong(doc.get("DocumentID"));
        hit.Rank = hits.score(i);
        hit.Title = doc.get("Title");
        if (getText) {
            String text = "";
            if (queryContent) {
                text += doc.get("Content");
            }
            if (queryDiscussion) {
                text += doc.get("Discussion");
            }

            // TODO: we can create a field.getReader(). the fragmenting needs to
            // be updated to deal with the reader only. would prefer not having to
            // load the document into a string!
            TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(text));

            hit.Content = highlighter.getBestFragments(tokenStream, text, this.resultFragments,
                    this.resultSeperator);
        } else {
            hit.Content = "";
        }

        hit.Version = doc.get("Version");

        results[i] = hit;
    }

    return results;
}

From source file:com.leavesfly.lia.tool.HighlightIt.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: HighlightIt <filename-out>");
        System.exit(-1);/* w  ww  .  j a v  a2  s. c o  m*/
    }

    String filename = args[0];

    String searchText = "term"; // #1
    QueryParser parser = new QueryParser(Version.LUCENE_30, // #1
            "f", // #1
            new StandardAnalyzer(Version.LUCENE_30));// #1
    Query query = parser.parse(searchText); // #1

    SimpleHTMLFormatter formatter = // #2
            new SimpleHTMLFormatter("<span class=\"highlight\">", // #2
                    "</span>"); // #2

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30) // #3
            .tokenStream("f", new StringReader(text)); // #3

    QueryScorer scorer = new QueryScorer(query, "f"); // #4

    Highlighter highlighter = new Highlighter(formatter, scorer); // #5
    highlighter.setTextFragmenter( // #6
            new SimpleSpanFragmenter(scorer)); // #6

    String result = // #7
            highlighter.getBestFragments(tokens, text, 3, "..."); // #7

    FileWriter writer = new FileWriter(filename); // #8
    writer.write("<html>"); // #8
    writer.write("<style>\n" + // #8
            ".highlight {\n" + // #8
            " background: yellow;\n" + // #8
            "}\n" + // #8
            "</style>"); // #8
    writer.write("<body>"); // #8
    writer.write(result); // #8
    writer.write("</body></html>"); // #8
    writer.close(); // #8
}

From source file:com.leavesfly.lia.tool.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//  w w w.ja va 2s .  co m
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:com.liferay.portal.search.lucene.LuceneHelperImpl.java

License:Open Source License

public String getSnippet(Query query, String field, String s, int maxNumFragments, int fragmentLength,
        String fragmentSuffix, String preTag, String postTag) throws IOException {

    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(preTag, postTag);

    QueryScorer queryScorer = new QueryScorer(query, field);

    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer);

    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));

    TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s));

    try {/*  w  w w . jav a  2  s.  c o  m*/
        String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix);

        if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix)) {

            snippet = snippet.concat(fragmentSuffix);
        }

        return snippet;
    } catch (InvalidTokenOffsetsException itoe) {
        throw new IOException(itoe.getMessage());
    }
}

From source file:com.liferay.portal.search.lucene31.LuceneHelperImpl.java

License:Open Source License

public String getSnippet(Query query, String field, String s, int maxNumFragments, int fragmentLength,
        String fragmentSuffix, String preTag, String postTag) throws IOException {

    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(preTag, postTag);

    QueryScorer queryScorer = new QueryScorer(query, field);

    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer);

    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));

    TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s));

    try {//w  ww . ja va  2 s  .c o m
        String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix);

        if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix)) {

            snippet = snippet + fragmentSuffix;
        }

        return snippet;
    } catch (InvalidTokenOffsetsException itoe) {
        throw new IOException(itoe.getMessage());
    }
}