List of usage examples for org.apache.lucene.search.highlight Highlighter getBestFragment
public final String getBestFragment(Analyzer analyzer, String fieldName, String text) throws IOException, InvalidTokenOffsetsException
From source file:apm.common.core.DaoImpl.java
License:Open Source License
/** * // w w w .j av a 2 s . co m * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Search the index for the given expression * @param expr the expression to be parsed * @param langCode the language of the expression and index * @param profile the hit profile (where to start from etc) * @return the result docs/*from w w w. ja v a 2 s .co m*/ */ public static String searchIndex(String expr, String langCode, HitProfile profile) { StringBuilder sb = new StringBuilder(); try { Analyzer analyzer = AeseSearch.createAnalyzer(langCode); DirectoryReader reader = DirectoryReader.open(AeseSearch.index); if (reader != null) { IndexSearcher searcher = new IndexSearcher(reader); QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer); Query q = qp.parse(expr); TopDocs hits = searcher.search(q, AeseSearch.maxHits); ScoreDoc[] docs = hits.scoreDocs; for (int j = profile.from; j < profile.to && j < docs.length; j++) { Document doc = searcher.doc(docs[j].doc); String vid = doc.get(LuceneFields.VID); String docID = doc.get(LuceneFields.DOCID); Highlighter h = new Highlighter(new QueryScorer(q)); String text = getCorTexVersion(docID, vid); sb.append(formatDocID(docID)); sb.append(" "); sb.append(formatVersionID(vid)); sb.append(" "); String frag = h.getBestFragment(analyzer, "text", text); sb.append("<span class=\"found\">"); sb.append(frag); sb.append("</span>\n"); } profile.numHits = docs.length; } reader.close(); } catch (Exception e) { sb.append(e.getMessage()); } return sb.toString(); }
From source file:com.aistor.common.persistence.BaseDaoImpl.java
License:Open Source License
/** * /* www .ja va2 s. com*/ * @param query * @param list * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(130)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String desciption = highlighter.getBestFragment(analyzer, field, text); if (desciption != null) { Reflections.invokeSetter(entity, fields[0], desciption); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130)); } //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:com.green.common.persistence.BaseDao.java
License:Open Source License
/** * /* w w w .j av a 2 s .c om*/ * @param query * @param list * @param subLength ? * @param fields ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, int subLength, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(subLength)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String description = highlighter.getBestFragment(analyzer, field, text); if (description != null) { Reflections.invokeSetter(entity, fields[0], description); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2)); } } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:com.ikon.dao.SearchDAO.java
License:Open Source License
/** * Add result// w w w .ja v a 2 s . co m */ private void addResult(FullTextSession ftSession, List<NodeQueryResult> results, Highlighter highlighter, Float score, NodeBase nBase) throws IOException, InvalidTokenOffsetsException { NodeQueryResult qr = new NodeQueryResult(); NodeDocument nDocument = null; NodeMail nMail = null; String excerpt = null; if (nBase instanceof NodeDocument) { nDocument = (NodeDocument) nBase; if (NodeMailDAO.getInstance().isMail(ftSession, nDocument.getParent())) { log.debug("NODE DOCUMENT - ATTACHMENT"); qr.setAttachment(nDocument); } else { log.debug("NODE DOCUMENT"); qr.setDocument(nDocument); } } else if (nBase instanceof NodeFolder) { log.debug("NODE FOLDER"); NodeFolder nFld = (NodeFolder) nBase; qr.setFolder(nFld); } else if (nBase instanceof NodeMail) { log.debug("NODE MAIL"); nMail = (NodeMail) nBase; qr.setMail(nMail); } else { log.warn("NODE UNKNOWN"); } if (nDocument != null && nDocument.getText() != null) { excerpt = highlighter.getBestFragment(analyzer, NodeDocument.TEXT_FIELD, nDocument.getText()); } else if (nMail != null && nMail.getContent() != null) { excerpt = highlighter.getBestFragment(analyzer, NodeMail.CONTENT_FIELD, nMail.getContent()); } log.debug("Result: SCORE({}), EXCERPT({}), DOCUMENT({})", new Object[] { score, excerpt, nBase }); qr.setScore(score); qr.setExcerpt(excerpt); if (qr.getDocument() != null) { NodeDocumentDAO.getInstance().initialize(qr.getDocument(), false); results.add(qr); } else if (qr.getFolder() != null) { NodeFolderDAO.getInstance().initialize(qr.getFolder()); results.add(qr); } else if (qr.getMail() != null) { NodeMailDAO.getInstance().initialize(qr.getMail()); results.add(qr); } else if (qr.getAttachment() != null) { NodeDocumentDAO.getInstance().initialize(qr.getAttachment(), false); results.add(qr); } }
From source file:com.openkm.dao.SearchDAO.java
License:Open Source License
/** * Add result/*w ww . j a v a 2 s . com*/ */ private void addResult(FullTextSession ftSession, List<NodeQueryResult> results, Highlighter highlighter, Float score, NodeBase nBase) throws IOException, InvalidTokenOffsetsException { NodeQueryResult qr = new NodeQueryResult(); NodeDocument nDocument = null; NodeMail nMail = null; String excerpt = null; if (nBase instanceof NodeDocument) { nDocument = (NodeDocument) nBase; if (NodeMailDAO.getInstance().isMail(ftSession, nDocument.getParent())) { log.debug("NODE DOCUMENT - ATTACHMENT"); qr.setAttachment(nDocument); } else { log.debug("NODE DOCUMENT"); qr.setDocument(nDocument); } } else if (nBase instanceof NodeFolder) { log.debug("NODE FOLDER"); NodeFolder nFld = (NodeFolder) nBase; qr.setFolder(nFld); } else if (nBase instanceof NodeMail) { log.debug("NODE MAIL"); nMail = (NodeMail) nBase; qr.setMail(nMail); } else { log.warn("NODE UNKNOWN"); } if (nDocument != null && nDocument.getText() != null) { excerpt = highlighter.getBestFragment(analyzer, NodeDocument.TEXT_FIELD, nDocument.getText()); } else if (nMail != null && nMail.getContent() != null) { excerpt = highlighter.getBestFragment(analyzer, NodeMail.CONTENT_FIELD, nMail.getContent()); } log.debug("Result: SCORE({}), EXCERPT({}), DOCUMENT({})", new Object[] { score, excerpt, nBase }); qr.setScore(score); qr.setExcerpt(FormatUtil.stripNonValidXMLCharacters(excerpt)); if (qr.getDocument() != null) { NodeDocumentDAO.getInstance().initialize(qr.getDocument(), false); results.add(qr); } else if (qr.getFolder() != null) { NodeFolderDAO.getInstance().initialize(qr.getFolder()); results.add(qr); } else if (qr.getMail() != null) { NodeMailDAO.getInstance().initialize(qr.getMail()); results.add(qr); } else if (qr.getAttachment() != null) { NodeDocumentDAO.getInstance().initialize(qr.getAttachment(), false); results.add(qr); } }
From source file:com.thinkgem.jeesite.common.persistence.BaseDaoImpl.java
License:Open Source License
/** * /*from w ww . j a va 2 s . com*/ * * @param query * * @param list * * @param fields * ?? */ public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) { Analyzer analyzer = new IKAnalyzer(); Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(100)); for (T entity : list) { try { for (String field : fields) { String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field)); String desciption = highlighter.getBestFragment(analyzer, field, text); if (desciption != null) { Reflections.invokeSetter(entity, fields[0], desciption); break; } Reflections.invokeSetter(entity, fields[0], StringUtils.abbreviate(text, 100)); } // Reflections.invokeSetter(entity, fields[1], // "sdfkjsdlkfjklsdjf"); } catch (IOException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } } return list; }
From source file:de.hsmannheim.ss15.alr.searchengine.DefaultLuceneController.java
public List<StoredDocument> doSearch(String queryString) throws IOException, ParseException { String field = "contents"; String queries = null;//from ww w . j ava 2 s .co m boolean raw = false; int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; if (queries != null) { in = Files.newBufferedReader(Paths.get(queries), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(queryString); Highlighter highlighter = new Highlighter(new QueryScorer(query)); TotalHitCountCollector collector = new TotalHitCountCollector(); searcher.search(query, collector); TopDocs topDocs = searcher.search(query, Math.max(1, collector.getTotalHits())); List<StoredDocument> results = new ArrayList<>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { StoredDocument doc = searcher.doc(scoreDoc.doc); try { File file = new File(doc.get("path")); BufferedReader docReader = new BufferedReader( new InputStreamReader(Files.newInputStream(file.toPath()), StandardCharsets.UTF_8)); List<String> lines = new ArrayList<>(); while (docReader.ready()) { lines.add(docReader.readLine()); } lines.remove(0); lines.remove(0); lines.remove(0); String content = ""; for (String s : lines) { content = content + s; } String highLight = highlighter.getBestFragment(analyzer, null, content); if (highLight == null) { LOGGER.warn("No Highlight found"); } else { doc.add(new TextField("highlight", highLight, Field.Store.YES)); } } catch (InvalidTokenOffsetsException ex) { LOGGER.warn("No Highlight found"); } results.add(doc); } reader.close(); return results; }
From source file:fr.mael.microrss.dao.impl.UserArticleDaoImpl.java
License:Open Source License
public List<UserArticle> search(String queryStr, User user, int start, int nb) throws ParseException, IOException, InvalidTokenOffsetsException { FullTextSession searchSession = Search.getFullTextSession(getSessionFactory().getCurrentSession()); QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_31, new String[] { "article.content", "article.title" }, new StandardAnalyzer(Version.LUCENE_31)); org.apache.lucene.search.Query query = parser.parse(queryStr); FullTextQuery hibQuery = searchSession.createFullTextQuery(query, UserArticle.class); Criteria fetchingStrategy = searchSession.createCriteria(UserArticle.class); fetchingStrategy.setFetchMode("article.feed", FetchMode.JOIN); fetchingStrategy.setFetchMode("userLabels", FetchMode.JOIN); fetchingStrategy.add(Property.forName("user").eq(user)); hibQuery.setCriteriaQuery(fetchingStrategy); hibQuery.setFirstResult(start);/*from w ww. j av a2 s.c o m*/ hibQuery.setMaxResults(nb); QueryScorer scorer = new QueryScorer(query); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("[highlight]", "[/highlight]"); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 200)); List<UserArticle> userArticles = (List<UserArticle>) hibQuery.list(); for (UserArticle userArticle : userArticles) { String highlight = highlighter.getBestFragment(new StandardAnalyzer(Version.LUCENE_32), "content", userArticle.getArticle().getContent()); if (highlight != null) { highlight = highlight.replaceAll("\\<.*?>", "").replace("\n", " "); userArticle.getArticle().setHighlight(highlight); } } return userArticles; }
From source file:framework.retrieval.engine.query.formatter.impl.HighlighterMaker.java
License:Apache License
public String getHighlighter(Query query, String fieldName, String keyWord, int resumeLength) { QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(getFormatter(), scorer); Fragmenter fragmenter = new SimpleFragmenter(resumeLength); highlighter.setTextFragmenter(fragmenter); String result = ""; try {/*ww w.j a v a2 s . c o m*/ result = highlighter.getBestFragment(analyzer, fieldName, keyWord); } catch (Exception e) { throw new RetrievalQueryException(e); } return result; }