Example usage for org.apache.lucene.search.highlight Highlighter getBestFragment

List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragment

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestFragment.

Prototype

public final String getBestFragment(Analyzer analyzer, String fieldName, String text)
        throws IOException, InvalidTokenOffsetsException 

Source Link

Document

Highlights chosen terms in a text, extracting the most relevant section.

Usage

From source file:apm.common.core.DaoImpl.java

License:Open Source License

/**
 * // w w w  .j  av a  2 s . co m
 * @param query 
 * @param list 
 * @param fields ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(130));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String description = highlighter.getBestFragment(analyzer, field, text);
                if (description != null) {
                    Reflections.invokeSetter(entity, fields[0], description);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130));
            }
            //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Search the index for the given expression
 * @param expr the expression to be parsed
 * @param langCode the language of the expression and index
 * @param profile the hit profile (where to start from etc)
 * @return the result docs/*from   w  w  w.  ja  v a 2  s  .co  m*/
 */
public static String searchIndex(String expr, String langCode, HitProfile profile) {
    StringBuilder sb = new StringBuilder();
    try {
        Analyzer analyzer = AeseSearch.createAnalyzer(langCode);
        DirectoryReader reader = DirectoryReader.open(AeseSearch.index);
        if (reader != null) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer);
            Query q = qp.parse(expr);
            TopDocs hits = searcher.search(q, AeseSearch.maxHits);
            ScoreDoc[] docs = hits.scoreDocs;
            for (int j = profile.from; j < profile.to && j < docs.length; j++) {
                Document doc = searcher.doc(docs[j].doc);
                String vid = doc.get(LuceneFields.VID);
                String docID = doc.get(LuceneFields.DOCID);
                Highlighter h = new Highlighter(new QueryScorer(q));
                String text = getCorTexVersion(docID, vid);
                sb.append(formatDocID(docID));
                sb.append(" ");
                sb.append(formatVersionID(vid));
                sb.append(" ");
                String frag = h.getBestFragment(analyzer, "text", text);
                sb.append("<span class=\"found\">");
                sb.append(frag);
                sb.append("</span>\n");
            }
            profile.numHits = docs.length;
        }
        reader.close();
    } catch (Exception e) {
        sb.append(e.getMessage());
    }
    return sb.toString();
}

From source file:com.aistor.common.persistence.BaseDaoImpl.java

License:Open Source License

/**
 * /*  www  .ja va2  s.  com*/
 * @param query 
 * @param list 
 * @param fields ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(130));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String desciption = highlighter.getBestFragment(analyzer, field, text);
                if (desciption != null) {
                    Reflections.invokeSetter(entity, fields[0], desciption);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130));
            }
            //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:com.green.common.persistence.BaseDao.java

License:Open Source License

/**
 * /*  w  w w  .j  av a 2  s .c om*/
 * @param query 
 * @param list 
 * @param subLength ?
 * @param fields ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, int subLength, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(subLength));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String description = highlighter.getBestFragment(analyzer, field, text);
                if (description != null) {
                    Reflections.invokeSetter(entity, fields[0], description);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2));
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:com.ikon.dao.SearchDAO.java

License:Open Source License

/**
 * Add result//  w w w .ja v a 2 s .  co  m
 */
private void addResult(FullTextSession ftSession, List<NodeQueryResult> results, Highlighter highlighter,
        Float score, NodeBase nBase) throws IOException, InvalidTokenOffsetsException {
    NodeQueryResult qr = new NodeQueryResult();
    NodeDocument nDocument = null;
    NodeMail nMail = null;
    String excerpt = null;

    if (nBase instanceof NodeDocument) {
        nDocument = (NodeDocument) nBase;

        if (NodeMailDAO.getInstance().isMail(ftSession, nDocument.getParent())) {
            log.debug("NODE DOCUMENT - ATTACHMENT");
            qr.setAttachment(nDocument);
        } else {
            log.debug("NODE DOCUMENT");
            qr.setDocument(nDocument);
        }
    } else if (nBase instanceof NodeFolder) {
        log.debug("NODE FOLDER");
        NodeFolder nFld = (NodeFolder) nBase;
        qr.setFolder(nFld);
    } else if (nBase instanceof NodeMail) {
        log.debug("NODE MAIL");
        nMail = (NodeMail) nBase;
        qr.setMail(nMail);
    } else {
        log.warn("NODE UNKNOWN");
    }

    if (nDocument != null && nDocument.getText() != null) {
        excerpt = highlighter.getBestFragment(analyzer, NodeDocument.TEXT_FIELD, nDocument.getText());
    } else if (nMail != null && nMail.getContent() != null) {
        excerpt = highlighter.getBestFragment(analyzer, NodeMail.CONTENT_FIELD, nMail.getContent());
    }

    log.debug("Result: SCORE({}), EXCERPT({}), DOCUMENT({})", new Object[] { score, excerpt, nBase });
    qr.setScore(score);
    qr.setExcerpt(excerpt);

    if (qr.getDocument() != null) {
        NodeDocumentDAO.getInstance().initialize(qr.getDocument(), false);
        results.add(qr);
    } else if (qr.getFolder() != null) {
        NodeFolderDAO.getInstance().initialize(qr.getFolder());
        results.add(qr);
    } else if (qr.getMail() != null) {
        NodeMailDAO.getInstance().initialize(qr.getMail());
        results.add(qr);
    } else if (qr.getAttachment() != null) {
        NodeDocumentDAO.getInstance().initialize(qr.getAttachment(), false);
        results.add(qr);
    }
}

From source file:com.openkm.dao.SearchDAO.java

License:Open Source License

/**
 * Add result/*w  ww  . j a v a  2 s  .  com*/
 */
private void addResult(FullTextSession ftSession, List<NodeQueryResult> results, Highlighter highlighter,
        Float score, NodeBase nBase) throws IOException, InvalidTokenOffsetsException {
    NodeQueryResult qr = new NodeQueryResult();
    NodeDocument nDocument = null;
    NodeMail nMail = null;
    String excerpt = null;

    if (nBase instanceof NodeDocument) {
        nDocument = (NodeDocument) nBase;

        if (NodeMailDAO.getInstance().isMail(ftSession, nDocument.getParent())) {
            log.debug("NODE DOCUMENT - ATTACHMENT");
            qr.setAttachment(nDocument);
        } else {
            log.debug("NODE DOCUMENT");
            qr.setDocument(nDocument);
        }
    } else if (nBase instanceof NodeFolder) {
        log.debug("NODE FOLDER");
        NodeFolder nFld = (NodeFolder) nBase;
        qr.setFolder(nFld);
    } else if (nBase instanceof NodeMail) {
        log.debug("NODE MAIL");
        nMail = (NodeMail) nBase;
        qr.setMail(nMail);
    } else {
        log.warn("NODE UNKNOWN");
    }

    if (nDocument != null && nDocument.getText() != null) {
        excerpt = highlighter.getBestFragment(analyzer, NodeDocument.TEXT_FIELD, nDocument.getText());
    } else if (nMail != null && nMail.getContent() != null) {
        excerpt = highlighter.getBestFragment(analyzer, NodeMail.CONTENT_FIELD, nMail.getContent());
    }

    log.debug("Result: SCORE({}), EXCERPT({}), DOCUMENT({})", new Object[] { score, excerpt, nBase });
    qr.setScore(score);
    qr.setExcerpt(FormatUtil.stripNonValidXMLCharacters(excerpt));

    if (qr.getDocument() != null) {
        NodeDocumentDAO.getInstance().initialize(qr.getDocument(), false);
        results.add(qr);
    } else if (qr.getFolder() != null) {
        NodeFolderDAO.getInstance().initialize(qr.getFolder());
        results.add(qr);
    } else if (qr.getMail() != null) {
        NodeMailDAO.getInstance().initialize(qr.getMail());
        results.add(qr);
    } else if (qr.getAttachment() != null) {
        NodeDocumentDAO.getInstance().initialize(qr.getAttachment(), false);
        results.add(qr);
    }
}

From source file:com.thinkgem.jeesite.common.persistence.BaseDaoImpl.java

License:Open Source License

/**
 * /*from w  ww . j  a va  2  s  . com*/
 * 
 * @param query
 *            
 * @param list
 *            
 * @param fields
 *            ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(100));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String desciption = highlighter.getBestFragment(analyzer, field, text);
                if (desciption != null) {
                    Reflections.invokeSetter(entity, fields[0], desciption);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbreviate(text, 100));
            }
            // Reflections.invokeSetter(entity, fields[1],
            // "sdfkjsdlkfjklsdjf");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java

public List<StoredDocument> doSearch(String queryString) throws IOException, ParseException {
    String field = "contents";
    String queries = null;//from  ww w  . j  ava  2 s .co  m
    boolean raw = false;
    int hitsPerPage = 10;

    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    if (queries != null) {
        in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8);
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8));
    }
    QueryParser parser = new QueryParser(field, analyzer);

    Query query = parser.parse(queryString);

    Highlighter highlighter = new Highlighter(new QueryScorer(query));

    TotalHitCountCollector collector = new TotalHitCountCollector();
    searcher.search(query, collector);
    TopDocs topDocs = searcher.search(query, Math.max(1, collector.getTotalHits()));

    List<StoredDocument> results = new ArrayList<>();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        StoredDocument doc = searcher.doc(scoreDoc.doc);
        try {
            File file = new File(doc.get("path"));
            BufferedReader docReader = new BufferedReader(
                    new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8));

            List<String> lines = new ArrayList<>();
            while (docReader.ready()) {
                lines.add(docReader.readLine());
            }
            lines.remove(0);
            lines.remove(0);
            lines.remove(0);

            String content = "";

            for (String s : lines) {
                content = content + s;
            }
            String highLight = highlighter.getBestFragment(analyzer, null, content);
            if (highLight == null) {
                LOGGER.warn("No Highlight found");
            } else {
                doc.add(new TextField("highlight", highLight, Field.Store.YES));
            }
        } catch (InvalidTokenOffsetsException ex) {
            LOGGER.warn("No Highlight found");
        }

        results.add(doc);
    }

    reader.close();
    return results;

}

From source file:fr.mael.microrss.dao.impl.UserArticleDaoImpl.java

License:Open Source License

public List<UserArticle> search(String queryStr, User user, int start, int nb)
        throws ParseException, IOException, InvalidTokenOffsetsException {
    FullTextSession searchSession = Search.getFullTextSession(getSessionFactory().getCurrentSession());
    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_31,
            new String[] { "article.content", "article.title" }, new StandardAnalyzer(Version.LUCENE_31));
    org.apache.lucene.search.Query query = parser.parse(queryStr);
    FullTextQuery hibQuery = searchSession.createFullTextQuery(query, UserArticle.class);
    Criteria fetchingStrategy = searchSession.createCriteria(UserArticle.class);
    fetchingStrategy.setFetchMode("article.feed", FetchMode.JOIN);
    fetchingStrategy.setFetchMode("userLabels", FetchMode.JOIN);
    fetchingStrategy.add(Property.forName("user").eq(user));
    hibQuery.setCriteriaQuery(fetchingStrategy);
    hibQuery.setFirstResult(start);/*from   w  ww. j av  a2  s.c  o  m*/
    hibQuery.setMaxResults(nb);

    QueryScorer scorer = new QueryScorer(query);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("[highlight]", "[/highlight]");
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 200));

    List<UserArticle> userArticles = (List<UserArticle>) hibQuery.list();

    for (UserArticle userArticle : userArticles) {
        String highlight = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_32), "content",
                userArticle.getArticle().getContent());
        if (highlight != null) {
            highlight = highlight.replaceAll("\\<.*?>", "").replace("\n", " ");
            userArticle.getArticle().setHighlight(highlight);
        }
    }

    return userArticles;
}

From source file:framework.retrieval.engine.query.formatter.impl.HighlighterMaker.java

License:Apache License

public String getHighlighter(Query query, String fieldName, String keyWord, int resumeLength) {

    QueryScorer scorer = new QueryScorer(query);

    Highlighter highlighter = new Highlighter(getFormatter(), scorer);

    Fragmenter fragmenter = new SimpleFragmenter(resumeLength);
    highlighter.setTextFragmenter(fragmenter);

    String result = "";

    try {/*ww  w.j a  v  a2  s . c  o m*/
        result = highlighter.getBestFragment(analyzer, fieldName, keyWord);
    } catch (Exception e) {
        throw new RetrievalQueryException(e);
    }

    return result;
}