From source file:com.green.common.persistence.BaseDao.java

License:Open Source License

 * /*from   w w w .j a v  a 2 s  .  c o  m*/
 * @param query 
 * @param list 
 * @param subLength ?
 * @param fields ??
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, int subLength, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(subLength));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String description = highlighter.getBestFragment(analyzer, field, text);
                if (description != null) {
                    Reflections.invokeSetter(entity, fields[0], description);
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2));
        } catch (IOException e) {
        } catch (InvalidTokenOffsetsException e) {
    return list;

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

 * Security is evaluated by Lucene, so query result are already pruned. This means that every node
 * should have its security (user and role) info stored in Lucene. This provides very quick search
 * but security modifications need to be recursively applied to reach every document node in the
 * repository. This may take several hours (or days) is big repositories.
 *//*from w w w .ja  v a2 s . c  o m*/
private NodeResultSet runQueryLucene(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, HibernateException {
    log.debug("runQueryLucene({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);

    // Set limits

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    for (Iterator<Object[]> it = ftq.iterate(); it.hasNext();) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        // Add result
        addResult(ftSession, results, highlighter, score, nBase);

    log.debug("runQueryLucene: {}", result);
    return result;

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * /*from w ww  .j a  va  2 s  .co m*/
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there is another document more who the user can read.
private NodeResultSet runQueryAccessManagerMore(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerMore({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < offset + limit + 1) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {

    log.debug("runQueryAccessManagerMore: {}", result);
    return result;

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * /*from w  ww.j  a  va2 s.  c om*/
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there are more documents (2 * limit) the user can read.
private NodeResultSet runQueryAccessManagerWindow(FullTextSession ftSession, Query query, int offset, int limit)
        throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerWindow({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < offset + limit * 2) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {

    log.debug("runQueryAccessManagerWindow: {}", result);
    return result;

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

 * Security is not evaluate in Lucene but by AccessManager. This means that Lucene will return all the
 * matched documents and this list need further prune by checking the READ permission in the AccessManager.
 * If the returned document list is very big, maybe lots of documents will be pruned because the user has
 * no read access and this would be a time consuming task.
 * // w ww.j  a v  a2s.  c  o m
 * This method will read and check document from the Lucene query result until reach a given offset. After
 * that will add all the given document which the user have read access until the limit is reached. After
 * that will check if there are more documents (MAX_SEARCH_RESULTS) the user can read.
private NodeResultSet runQueryAccessManagerLimited(FullTextSession ftSession, Query query, int offset,
        int limit) throws IOException, InvalidTokenOffsetsException, DatabaseException, HibernateException {
    log.debug("runQueryAccessManagerLimited({}, {}, {}, {})", new Object[] { ftSession, query, offset, limit });
    List<NodeQueryResult> results = new ArrayList<NodeQueryResult>();
    NodeResultSet result = new NodeResultSet();
    FullTextQuery ftq = ftSession.createFullTextQuery(query, NodeDocument.class, NodeFolder.class,
    ftq.setProjection(FullTextQuery.SCORE, FullTextQuery.THIS);
    QueryScorer scorer = new QueryScorer(query, NodeDocument.TEXT_FIELD);
    int count = 0;

    // Highlight using a CSS style
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class='highlight'>", "</span>");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, MAX_FRAGMENT_LEN));

    // Set limits
    Iterator<Object[]> it = ftq.iterate();
    DbAccessManager am = SecurityHelper.getAccessManager();

    // Bypass offset
    while (it.hasNext() && count < offset) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {

    // Read limit results
    while (it.hasNext() && results.size() < limit) {
        Object[] qRes = it.next();
        Float score = (Float) qRes[0];
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {
            // Add result
            addResult(ftSession, results, highlighter, score, nBase);

    // Check if pending results
    count = results.size() + offset;

    while (it.hasNext() && count < Config.MAX_SEARCH_RESULTS) {
        Object[] qRes = it.next();
        NodeBase nBase = (NodeBase) qRes[1];

        if (am.isGranted(nBase, Permission.READ)) {

    log.debug("Size: {}", results.size());
    log.debug("runQueryAccessManagerLimited: {}", result);
    return result;

From source file:com.knowledgetree.indexer.IndexerManager.java

 * Returns a set of hits from lucene.//  w w w  .j a va2  s . co  m
 * @param queryString
 * @param maxHits
 * @return
 * @throws Exception
public QueryHit[] query(String queryString, int maxHits, boolean getText) throws Exception {
    synchronized (this) {

    String tmp = queryString.toLowerCase();
    boolean queryContent = tmp.indexOf("content") != -1;
    boolean queryDiscussion = tmp.indexOf("discussion") != -1;

    QueryParser parser = new QueryParser("Content", this.analyzer);
    Query query = parser.parse(queryString);

    // rewriting is important for complex queries. this is a must-do according to sources!
    query = query.rewrite(this.queryReader);

    // run the search!
    Hits hits = this.querySearcher.search(query);

    // now we can apply the maximum hits to the results we return!
    int max = (maxHits == -1) ? hits.length() : maxHits;

    if (hits.length() < max) {
        max = hits.length();

    QueryHit[] results = new QueryHit[max];

    Highlighter highlighter = new Highlighter(this, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(this.resultFragmentSize));
    for (int i = 0; i < max; i++) {
        Document doc = hits.doc(i);

        QueryHit hit = new QueryHit();
        hit.DocumentID = IndexerManager.stringToLong(doc.get("DocumentID"));
        hit.Rank = hits.score(i);
        hit.Title = doc.get("Title");
        if (getText) {
            String text = "";
            if (queryContent) {
                text += doc.get("Content");
            if (queryDiscussion) {
                text += doc.get("Discussion");

            // TODO: we can create a field.getReader(). the fragmenting needs to
            // be updated to deal with the reader only. would prefer not having to
            // load the document into a string!
            TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(text));

            hit.Content = highlighter.getBestFragments(tokenStream, text, this.resultFragments,
        } else {
            hit.Content = "";

        hit.Version = doc.get("Version");

        results[i] = hit;

    return results;

From source file:com.leavesfly.lia.tool.HighlightIt.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 1) {
        System.err.println("Usage: HighlightIt <filename-out>");
        System.exit(-1);/* w  ww  .  j a v  a2  s. c o  m*/

    String filename = args[0];

    String searchText = "term"; // #1
    QueryParser parser = new QueryParser(Version.LUCENE_30, // #1
            "f", // #1
            new StandardAnalyzer(Version.LUCENE_30));// #1
    Query query = parser.parse(searchText); // #1

    SimpleHTMLFormatter formatter = // #2
            new SimpleHTMLFormatter("<span class=\"highlight\">", // #2
                    "</span>"); // #2

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30) // #3
            .tokenStream("f", new StringReader(text)); // #3

    QueryScorer scorer = new QueryScorer(query, "f"); // #4

    Highlighter highlighter = new Highlighter(formatter, scorer); // #5
    highlighter.setTextFragmenter( // #6
            new SimpleSpanFragmenter(scorer)); // #6

    String result = // #7
            highlighter.getBestFragments(tokens, text, 3, "..."); // #7

    FileWriter writer = new FileWriter(filename); // #8
    writer.write("<html>"); // #8
    writer.write("<style>\n" + // #8
            ".highlight {\n" + // #8
            " background: yellow;\n" + // #8
            "}\n" + // #8
            "</style>"); // #8
    writer.write("<body>"); // #8
    writer.write(result); // #8
    writer.write("</body></html>"); // #8
    writer.close(); // #8

From source file:com.leavesfly.lia.tool.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//  w w w.ja va 2s .  co m
        String fragment = highlighter.getBestFragment(stream, title);


From source file:com.liferay.portal.search.lucene.LuceneHelperImpl.java

License:Open Source License

public String getSnippet(Query query, String field, String s, int maxNumFragments, int fragmentLength,
        String fragmentSuffix, String preTag, String postTag) throws IOException {

    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(preTag, postTag);

    QueryScorer queryScorer = new QueryScorer(query, field);

    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer);

    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));

    TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s));

    try {/*  w  w w . jav a  2  s.  c o  m*/
        String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix);

        if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix)) {

            snippet = snippet.concat(fragmentSuffix);

        return snippet;
    } catch (InvalidTokenOffsetsException itoe) {
        throw new IOException(itoe.getMessage());

From source file:com.liferay.portal.search.lucene31.LuceneHelperImpl.java

License:Open Source License

public String getSnippet(Query query, String field, String s, int maxNumFragments, int fragmentLength,
        String fragmentSuffix, String preTag, String postTag) throws IOException {

    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(preTag, postTag);

    QueryScorer queryScorer = new QueryScorer(query, field);

    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, queryScorer);

    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));

    TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s));

    try {//w  ww . ja va  2 s  .c o m
        String snippet = highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix);

        if (Validator.isNotNull(snippet) && !StringUtil.endsWith(snippet, fragmentSuffix)) {

            snippet = snippet + fragmentSuffix;

        return snippet;
    } catch (InvalidTokenOffsetsException itoe) {
        throw new IOException(itoe.getMessage());