Example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter

List of usage examples for org.apache.lucene.search.highlight Highlighter setTextFragmenter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter setTextFragmenter.

Prototype

public void setTextFragmenter(Fragmenter fragmenter) 

Source Link

Usage

From source file:com.bugull.mongo.lucene.BuguHighlighter.java

License:Apache License

public String getResult(String fieldName, String fieldValue) throws Exception {
    BuguIndex index = BuguIndex.getInstance();
    QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
    Query query = parser.parse(keywords);
    TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");
}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug)
        throws IndexException {
    try {//ww  w  .  j a v a  2  s.c o  m
        //TODO: make age be a component in the ranking?
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(parser.parse(query), Occur.MUST);
        queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)),
                Occur.FILTER);
        Query baseQuery = queryBuilder.build();

        FunctionQuery boostQuery = new FunctionQuery(
                new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000,
                        new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F));

        Query q = new CustomScoreQuery(baseQuery, boostQuery);

        QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setTextFragmenter(fragmenter);

        GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1)
                .setAllGroups(true).setIncludeMaxScore(true);
        TopGroups<?> groups = gsearch.search(searcher, q, offset, size);

        ArrayList<SearchResult> results = new ArrayList<>(size);
        for (int i = offset; i < offset + size && i < groups.groups.length; i++) {
            ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0];
            Document luceneDoc = searcher.doc(scoreDoc.doc);
            IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc);

            TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH,
                    reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH),
                    analyzer, highlighter.getMaxDocCharsToAnalyze() - 1);

            String[] snippets = highlighter.getBestFragments(tokenStream,
                    luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3);
            String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n"));
            snippet = Jsoup.clean(snippet, Whitelist.simpleText());

            String debugInfo = null;
            if (includeDebug) {
                Explanation explanation = searcher.explain(q, scoreDoc.doc);
                debugInfo = explanation.toString();
            }

            results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(),
                    doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score));
        }

        SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset)
                .setMaxResultsRequested(size)
                .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0)
                .setResults(results);

        if (includeDebug) {
            wrapper.setDebugInfo(q.toString());
        }

        return wrapper;

    } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

public String highlight(String strToHighlight, String fieldName, Query luceneQuery) {
    String highlightedText;/*from  w  w w .ja v  a  2s  .  co  m*/
    Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
    try {
        Directory directory = FSDirectory.open(indexDir);
        IndexReader indexReader = DirectoryReader.open(directory);
        Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);
        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>");
        Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);

        Fragmenter luceneFragmenter;
        // Si la chaine  highlighter est sup  250 carac
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            // Cration de best fragments de 100 carac chaque
            luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT);
        } else {
            // Toute la chaine est highlight
            luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
        }
        luceneHighlighter.setTextFragmenter(luceneFragmenter);

        TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight));
        String fragment = null;
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT,
                    FRAGMENT_SEP);
        } else {
            fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight);
        }

        if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) {
            fragment = strToHighlight;
        }
        indexReader.close();
        directory.close();

        highlightedText = fragment;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InvalidTokenOffsetsException e) {
        throw new RuntimeException(e);
    }
    return highlightedText;
}

From source file:com.duroty.application.bookmark.manager.BookmarkManager.java

License:Open Source License

/**
 * DOCUMENT ME!/*from  w  w  w  .ja  va2  s.c  om*/
 *
 * @param repositoryName DOCUMENT ME!
 * @param token DOCUMENT ME!
 * @param page DOCUMENT ME!
 * @param messagesByPage DOCUMENT ME!
 * @param order DOCUMENT ME!
 * @param orderType DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 *
 * @throws BookmarkException DOCUMENT ME!
 * @throws SearchException DOCUMENT ME!
 */
public SearchObj search(String repositoryName, String token, int page, int messagesByPage, int order,
        String orderType, boolean isNotebook) throws BookmarkException {
    String lucenePath = "";

    if (!defaultLucenePath.endsWith(File.separator)) {
        lucenePath = defaultLucenePath + File.separator + repositoryName + File.separator
                + Constants.BOOKMARK_LUCENE_BOOKMARK;
    } else {
        lucenePath = defaultLucenePath + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK;
    }

    Searcher searcher = null;
    SearchObj searchObj = new SearchObj();
    Highlighter highlighter = null;

    try {
        searcher = BookmarkIndexer.getSearcher(lucenePath);

        Query query = null;
        Hits hits = null;

        if (StringUtils.isBlank(token)) {
            if (isNotebook) {
                query = SimpleQueryParser.parse("notebook:true", new KeywordAnalyzer());
            } else {
                query = new MatchAllDocsQuery();
            }

            hits = searcher.search(query, new Sort(new SortField[] { SortField.FIELD_SCORE,
                    new SortField(Field_insert_date, SortField.STRING, true) }));
        } else {
            query = SimpleQueryParser.parse(token, analyzer);

            StringBuffer buffer = new StringBuffer();

            if (isNotebook) {
                buffer.append("(" + query.toString() + ") AND ");

                QueryParser parser = new QueryParser(Field_notebook, new KeywordAnalyzer());
                parser.setDefaultOperator(Operator.AND);

                Query aux = parser.parse(String.valueOf(true));

                buffer.append("(" + aux.toString() + ") ");
            }

            if (buffer.length() > 0) {
                QueryParser parser = new QueryParser("", new WhitespaceAnalyzer());
                query = parser.parse(buffer.toString());
            }

            hits = searcher.search(query);
        }

        Date searchStart = new Date();

        Date searchEnd = new Date();

        //time in seconds
        double time = ((double) (searchEnd.getTime() - searchStart.getTime())) / (double) 1000;

        int hitsLength = hits.length();

        if (hitsLength <= 0) {
            return null;
        }

        int start = page * messagesByPage;
        int end = start + messagesByPage;

        if (end > 0) {
            end = Math.min(hitsLength, end);
        } else {
            end = hitsLength;
        }

        if (start > end) {
            throw new SearchException("Search index of bound. start > end");
        }

        Vector bookmarks = new Vector();

        for (int j = start; j < end; j++) {
            Document doc = hits.doc(j);

            if (doc != null) {
                LuceneBookmark luceneBookmark = new LuceneBookmark(doc);

                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
                highlighter = new Highlighter(formatter, new QueryScorer(query));
                highlighter.setTextFragmenter(new SimpleFragmenter(150));

                BookmarkObj bookmarkObj = new BookmarkObj();
                bookmarkObj.setCacheDate(luceneBookmark.getCacheDate());
                bookmarkObj.setComments(luceneBookmark.getComments());
                bookmarkObj.setContents(luceneBookmark.getCotents());
                bookmarkObj.setDepth(luceneBookmark.getDepth());
                bookmarkObj.setFlagged(luceneBookmark.isFlagged());
                bookmarkObj.setIdint(luceneBookmark.getIdint());
                bookmarkObj.setInsertDate(luceneBookmark.getInsertDate());
                bookmarkObj.setKeywords(luceneBookmark.getKeywords());
                bookmarkObj.setNotebook(luceneBookmark.isNotebook());
                bookmarkObj.setParent(Long.parseLong(luceneBookmark.getParent()));
                bookmarkObj.setTitle(luceneBookmark.getTitle());
                bookmarkObj.setTitleHighlight(luceneBookmark.getTitle());
                bookmarkObj.setUrl(luceneBookmark.getUrl());

                String contents = luceneBookmark.getCotents();
                String hcontents = null;

                if ((contents != null) && (!contents.trim().equals(""))) {
                    contents = contents.replaceAll("\\s+", " ");

                    TokenStream tokenStream = analyzer.tokenStream(Field_contents, new StringReader(contents));
                    hcontents = highlighter.getBestFragment(tokenStream, contents);

                    if (hcontents != null) {
                        contents = hcontents;
                    } else {
                        contents = null;
                    }
                }

                bookmarkObj.setContentsHighlight(contents);

                String title = luceneBookmark.getTitle();
                String htitle = null;

                if ((title != null) && (!title.trim().equals(""))) {
                    title = title.replaceAll("\\s+", " ");

                    TokenStream tokenStream = analyzer.tokenStream(Field_title, new StringReader(title));
                    htitle = highlighter.getBestFragment(tokenStream, title);

                    if (htitle != null) {
                        title = htitle;
                    }
                }

                bookmarkObj.setTitleHighlight(title);

                bookmarks.addElement(bookmarkObj);
            }
        }

        searchObj.setHits(hitsLength);
        searchObj.setTime(time);
        searchObj.setBookmarks(bookmarks);
    } catch (Exception ex) {
        throw new SearchException(ex);
    } finally {
    }

    return searchObj;
}

From source file:com.edgenius.wiki.search.service.AbstractSearchService.java

License:Open Source License

private int detach(IndexSearcher searcher, List<SearchResultItem> viewableMatchedResults, TopDocs hits,
        Query hlQuery, int from, int to, User user) throws IOException {

    Assert.isTrue(from <= to && from >= 0 && (to >= 0 || to == -1));

    //For performance issue, we simply return total result set length without permission filter out.
    //This means is total length might be larger than the set that user can view, as some result will be filter out
    //if user doesn't have permission to see.
    int len = hits.totalHits;

    if (len > 0 && from < len) {
        to = to == -1 ? len : (len > to ? to : len);
        //security filter from return result

        List<Integer> resultIdx = new ArrayList<Integer>();
        for (int idx = from; idx < to; idx++) {
            //does not include "to" , For example, from:to is 0:10, then return index is 0-9

            //TODO: if page includes some result that invisible to user, it is better display message to tell user
            //some result is hidden for security reason.
            if (!isAllowView(searcher.doc(hits.scoreDocs[idx].doc), user))
                continue;

            resultIdx.add(idx);//from   w  w  w  .j  a  va2 s  . c o m
        }

        //create a highlighter for all fragment parser
        Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");
        Highlighter hl = null;
        if (hlQuery != null) {
            Scorer scorer = new QueryScorer(hlQuery);
            hl = new Highlighter(formatter, scorer);
            Fragmenter fragmenter = new SimpleFragmenter(FRAGMENT_LEN);
            hl.setTextFragmenter(fragmenter);
        }

        for (int idx : resultIdx) {
            SearchResultItem item = new SearchResultItem();

            Document doc = searcher.doc(hits.scoreDocs[idx].doc);
            String docType = doc.get(FieldName.DOC_TYPE);

            //common items in search results
            item.setType(NumberUtils.toInt(docType));
            item.setDatetime(doc.get(FieldName.UPDATE_DATE));
            if (userReadingService != null
                    && !new Integer(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                String username = doc.get(FieldName.CONTRIBUTOR);
                User contirUser = userReadingService.getUserByName(username);
                if (contirUser != null) {
                    item.setContributor(contirUser.getFullname());
                    item.setContributorUsername(username);
                }
            }
            if (Integer.valueOf(SharedConstants.SEARCH_PAGE).toString().equals(docType)) {
                String content = doc.get(FieldName.PAGE_CONTENT);
                item.setTitle(doc.get(FieldName.PAGE_TITLE));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));

                //does set item.desc() as content, which maybe very big string. no necessary returned
                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), content)));

            } else if (Integer.valueOf(SharedConstants.SEARCH_COMMENT).toString().equals(docType)) {
                String content = doc.get(FieldName.CONTENT);

                item.setItemUid(doc.get(FieldName.COMMENT_UID));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                item.setTitle(doc.get(FieldName.UNSEARCH_PAGE_TITLE));
                //does set item.desc() as content, which maybe very big string. no necessary returned
                item.setFragment(createFragment(hl, content));

            } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE).toString().equals(docType)) {
                String title = doc.get(FieldName.SPACE_NAME);
                item.setTitle(title);
                item.setSpaceUname(doc.get(FieldName.SPACE_UNIXNAME));
                item.setDesc(doc.get(FieldName.SPACE_DESC));
                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), item.getDesc())));

            } else if (Integer.valueOf(SharedConstants.SEARCH_WIDGET).toString().equals(docType)) {
                //wTitle-> title; wDesc-> desc; wTitle(could be pageTitle or markup title) ->spaceUname
                String widgetType = doc.get(FieldName.WIDGET_TYPE);
                String title = doc.get(FieldName.WIDGET_TITLE);

                //does content need transfer back?? so far no
                String content = doc.get(FieldName.WIDGET_CONTENT);
                if (WidgetModel.TYPE_PAGE_LINKER.equals(widgetType)) {
                    //don't use as Highlighter fragment
                    content = "";
                }

                String desc = doc.get(FieldName.WIDGET_DESC);

                item.setDesc(desc);
                item.setTitle(title);

                //add little confuse field mapping :(
                item.setSpaceUname(doc.get(FieldName.WIDGET_KEY));
                item.setItemUid(widgetType);

                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getDesc(), content)));

            } else if (Integer.valueOf(SharedConstants.SEARCH_PAGE_TAG).toString().equals(docType)) {
                //page tag
                item.setTitle(doc.get(FieldName.PAGE_TAG_NAME));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                item.setFragment(createFragment(hl, item.getTitle()));
            } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE_TAG).toString().equals(docType)) {
                //space tag
                item.setTitle(doc.get(FieldName.SPACE_TAG_NAME));
                item.setFragment(createFragment(hl, item.getTitle()));

            } else if (Integer.valueOf(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                String username = doc.get(FieldName.USER_NAME);
                item.setTitle(username);
                String fullname = doc.get(FieldName.USER_FULLNAME);
                //hacker - contributor is current user fullname
                item.setContributor(fullname);
                if (userReadingService != null)
                    item.setDesc(userReadingService.getUserByName(username).getSetting().getStatus());
                item.setFragment(createFragment(hl, fullname));

            } else if (Integer.valueOf(SharedConstants.SEARCH_ROLE).toString().equals(docType)) {
                item.setSpaceUname(doc.get(FieldName.ROLE_NAME));
                item.setTitle(doc.get(FieldName.ROLE_DISPLAY_NAME));
                item.setDesc(doc.get(FieldName.ROLE_DESC));
                //item.setFragment("");

            } else if (Integer.valueOf(SharedConstants.SEARCH_ATTACHMENT).toString().equals(docType)) {
                item.setTitle(doc.get(FieldName.FILE_NAME));
                item.setDesc(doc.get(FieldName.FILE_COMMENT));
                item.setItemUid(doc.get(FieldName.FILE_NODE_UUID));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                String text = doc.get(FieldName.TEXT);
                //does not mark file content fragment, because it does not store in index
                String fragment = createFragment(hl, StringUtil.join(" ", item.getDesc(), text));
                item.setFragment(
                        (fragment == null || fragment.trim().length() == 0) ? ("Comment: " + item.getDesc())
                                : fragment);
            }
            viewableMatchedResults.add(item);
        }
    }
    return len;
}

From source file:com.flaptor.hounder.searcher.SnippetSearcher.java

License:Apache License

/**
 * Adds snippets to the search results.//from w w  w .j a  va2  s.  c  o m
 * How stuff works:
 * For each 'group g' in provided GroupedSearchResults:
 *   For each result in 'g':
 *     Use the lucene highlighter to get the terms highlighted on the required field.
 *     Then call getSnippet(...) to get the resulting snippet
 */
private void addSnippets(GroupedSearchResults res, String snippetOfField, int snippetLength, QueryScorer scorer,
        Formatter simpleHtmlFormatter) throws IOException {

    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
    highlighter.setTextFragmenter(NULL_FRAGMENTER);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed
    // Here we store every seen phrase. It is used to give less score to
    // recurrying phrases
    Set<String> usedSnippets = new HashSet<String>();

    for (int j = 0; j < res.groups(); j++) { // for each group
        Vector<Document> resDocs = res.getGroup(j).last();
        int docsLen = resDocs.size();
        for (int i = 0; i < docsLen; i++) { // for each document on that group               
            Document doc = resDocs.get(i); // get the document i
            String text = doc.get(snippetOfField); // text to be snippeted
            if (null == text) {
                logger.warn("Asked to snippet an unexisting field: " + snippetOfField);
                continue;
            }

            TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text));
            TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1);

            String result = null;
            if (null != fragments && 0 < fragments.length) {
                result = getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets);
            }

            if (null == result || 0 == result.length()) { // 
                if (emptySnippetsAllowed) {
                    result = "";
                } else {
                    result = text.substring(0, Math.min(text.length(), snippetLength));
                }
            }
            String snippetF = SNIPPET_FIELDNAME_PREFIX + snippetOfField;
            doc.add(new Field(snippetF, result.toString(), Field.Store.YES, Field.Index.NO));
        }
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle,
        boolean searchInTags, boolean searchInText, boolean superFuzzy) {
    ArrayList<CardSnapshot> cardSnaps = new ArrayList();
    try {/*  ww w  .  j  a  v  a2  s.co  m*/
        ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>();

        for (String indexName : indexNames) {
            IndexReader reader = IndexReader
                    .open(FSDirectory.open(new File(indexDir + File.separator + indexName),
                            new SimpleFSLockFactory(indexDir + File.separator + indexName)));
            IndexSearcher searcher = new IndexSearcher(reader);
            searchers.add(searcher);
        }

        BooleanQuery query = new BooleanQuery();
        if (searchInTitle) {
            IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy);
        }
        if (searchInTags) {
            IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy);
        }
        if (searchInText) {
            IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy);
            IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy);
        }

        for (IndexSearcher searcher : searchers) {
            TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false);
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            for (ScoreDoc hit : hits) {
                Document doc = searcher.doc(hit.doc);

                TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"),
                        new StandardAnalyzer(Version.LUCENE_20.LUCENE_35));
                QueryScorer scorer = new QueryScorer(query, "analyzedText");
                Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20);
                Highlighter highlighter = new Highlighter(scorer);
                highlighter.setTextFragmenter(fragmenter);
                String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5);
                String highlights = "";

                for (String fragment : fragments) {
                    highlights += fragment + "...";
                }

                if (highlights.equals("")) {
                    String text = doc.get("text");
                    if (text.length() > 100) {
                        highlights += doc.get("text").substring(0, 100);
                    } else {
                        highlights += doc.get("text");
                    }
                }

                cardSnaps.add(new CardSnapshot(highlights, doc));
            }
            searcher.getIndexReader().close();
            searcher.close();
            searcher = null;
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return cardSnaps;
}

From source file:com.gitblit.LuceneExecutor.java

License:Apache License

/**
 * //from w w w .j av  a 2s  .  co m
 * @param analyzer
 * @param query
 * @param content
 * @param result
 * @return
 * @throws IOException
 * @throws InvalidTokenOffsetsException
 */
private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result)
        throws IOException, InvalidTokenOffsetsException {
    if (content == null) {
        content = "";
    }

    int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;

    QueryScorer scorer = new QueryScorer(query, "content");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

    // use an artificial delimiter for the token
    String termTag = "!!--[";
    String termTagEnd = "]--!!";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(fragmenter);

    String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
    if (ArrayUtils.isEmpty(fragments)) {
        if (SearchObjectType.blob == result.type) {
            return "";
        }
        // clip commit message
        String fragment = content;
        if (fragment.length() > fragmentLength) {
            fragment = fragment.substring(0, fragmentLength) + "...";
        }
        return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
    }

    // make sure we have unique fragments
    Set<String> uniqueFragments = new LinkedHashSet<String>();
    for (String fragment : fragments) {
        uniqueFragments.add(fragment);
    }
    fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);

    StringBuilder sb = new StringBuilder();
    for (int i = 0, len = fragments.length; i < len; i++) {
        String fragment = fragments[i];
        String tag = "<pre class=\"text\">";

        // resurrect the raw fragment from removing the artificial delimiters
        String raw = fragment.replace(termTag, "").replace(termTagEnd, "");

        // determine position of the raw fragment in the content
        int pos = content.indexOf(raw);

        // restore complete first line of fragment
        int c = pos;
        while (c > 0) {
            c--;
            if (content.charAt(c) == '\n') {
                break;
            }
        }
        if (c > 0) {
            // inject leading chunk of first fragment line
            fragment = content.substring(c + 1, pos) + fragment;
        }

        if (SearchObjectType.blob == result.type) {
            // count lines as offset into the content for this fragment
            int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));

            // create fragment tag with line number and language
            String lang = "";
            String ext = StringUtils.getFileExtension(result.path).toLowerCase();
            if (!StringUtils.isEmpty(ext)) {
                // maintain leading space!
                lang = " lang-" + ext;
            }
            tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);

        }

        sb.append(tag);

        // replace the artificial delimiter with html tags
        String html = StringUtils.escapeForHtml(fragment, false);
        html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
        sb.append(html);
        sb.append("</pre>");
        if (i < len - 1) {
            sb.append("<span class=\"ellipses\">...</span><br/>");
        }
    }
    return sb.toString();
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 *
 * @param analyzer/*w w  w .  j  a v a2 s .  c o m*/
 * @param query
 * @param content
 * @param result
 * @return
 * @throws IOException
 * @throws InvalidTokenOffsetsException
 */
private String getHighlightedFragment(Analyzer analyzer, Query query, String content, SearchResult result)
        throws IOException, InvalidTokenOffsetsException {
    if (content == null) {
        content = "";
    }

    int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
    int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;

    QueryScorer scorer = new QueryScorer(query, "content");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

    // use an artificial delimiter for the token
    String termTag = "!!--[";
    String termTagEnd = "]--!!";
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(fragmenter);

    String[] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
    if (ArrayUtils.isEmpty(fragments)) {
        if (SearchObjectType.blob == result.type) {
            return "";
        }
        // clip commit message
        String fragment = content;
        if (fragment.length() > fragmentLength) {
            fragment = fragment.substring(0, fragmentLength) + "...";
        }
        return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
    }

    // make sure we have unique fragments
    Set<String> uniqueFragments = new LinkedHashSet<String>();
    for (String fragment : fragments) {
        uniqueFragments.add(fragment);
    }
    fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);

    StringBuilder sb = new StringBuilder();
    for (int i = 0, len = fragments.length; i < len; i++) {
        String fragment = fragments[i];
        String tag = "<pre class=\"text\">";

        // resurrect the raw fragment from removing the artificial delimiters
        String raw = fragment.replace(termTag, "").replace(termTagEnd, "");

        // determine position of the raw fragment in the content
        int pos = content.indexOf(raw);

        // restore complete first line of fragment
        int c = pos;
        while (c > 0) {
            c--;
            if (content.charAt(c) == '\n') {
                break;
            }
        }
        if (c > 0) {
            // inject leading chunk of first fragment line
            fragment = content.substring(c + 1, pos) + fragment;
        }

        if (SearchObjectType.blob == result.type) {
            // count lines as offset into the content for this fragment
            int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));

            // create fragment tag with line number and language
            String lang = "";
            String ext = StringUtils.getFileExtension(result.path).toLowerCase();
            if (!StringUtils.isEmpty(ext)) {
                // maintain leading space!
                lang = " lang-" + ext;
            }
            tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);

        }

        sb.append(tag);

        // replace the artificial delimiter with html tags
        String html = StringUtils.escapeForHtml(fragment, false);
        html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
        sb.append(html);
        sb.append("</pre>");
        if (i < len - 1) {
            sb.append("<span class=\"ellipses\">...</span><br/>");
        }
    }
    return sb.toString();
}

From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java

License:Open Source License

private Highlighter addStringHighlighter(Query query) {
    QueryScorer scorer = new QueryScorer(query);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
    highlighter.setTextFragmenter(fragmenter);
    return highlighter;
}