public Highlighter(Formatter formatter, Scorer fragmentScorer) 

Source Link


From source file:com.aistor.common.persistence.BaseDaoImpl.java

License:Open Source License

 * @param query 
 * @param list 
 * @param fields ??
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(130));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String desciption = highlighter.getBestFragment(analyzer, field, text);
                if (desciption != null) {
                    Reflections.invokeSetter(entity, fields[0], desciption);
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130));
            //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf");
        } catch (IOException e) {
        } catch (InvalidTokenOffsetsException e) {
    return list;

From source file:com.appeligo.alerts.KeywordAlertThread.java

License:Apache License

 * @param searchExecutor callback to get the set of hits for the given query. This can be
 * executed in different ways.//from w  w  w .j  a v a2s.  c o m
 * @return true if we hit too many consecutive exceptions so we broke out of the loop
private boolean executeKeywordSearch(SearchExecutor searchExecutor, String messagePrefix,
        boolean groupQueries) {
    ChunkedResults<KeywordAlert> results = KeywordAlert.getAllInNormalizedQueryOrder();
    Hits hits = null;
    String lastNormalizedQuery = null;
    Query lastLuceneQuery = null;
    int consecutiveExceptions = 0;
    while (results.next() && isActive()) {
        KeywordAlert keywordAlert = results.get();
        try {
            if (keywordAlert.isDeleted() || keywordAlert.isDisabled()) {
                if (log.isDebugEnabled())
                    log.debug("keyword alert is deleted or disabled");
            User user = keywordAlert.getUser();
            if (user == null) {
                if (log.isDebugEnabled())
                    log.debug("keyword alert is implicitly deleted (user is null)");

            if (helper.maxAlertsExceeded(keywordAlert)) {

            if (groupQueries) {
                if ((hits == null) || (!keywordAlert.getNormalizedQuery().equals(lastNormalizedQuery))) {
                    hits = searchExecutor.search(null, keywordAlert.getNormalizedQuery());
                    lastLuceneQuery = searchExecutor.getLuceneQuery();
                } else if (log.isDebugEnabled())
                    log.debug("Not searching on " + keywordAlert.getNormalizedQuery() + " again");
            } else {
                hits = searchExecutor.search(keywordAlert.getUser().getLineupId(),
                // Note that I'm searching with the lineup from the user, which will
                // only ensure that the liveIndex doesn't return shows that don't ever
                // play for this lineup.  However, it does not guarantee that the show
                // on this user's lineup is playing at the same time (meaning alerts
                // might tell the user of a show that is only in the future).
                lastLuceneQuery = searchExecutor.getLuceneQuery();
            lastNormalizedQuery = keywordAlert.getNormalizedQuery();
            Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(lastLuceneQuery));
            PorterStemAnalyzer analyzer = new PorterStemAnalyzer(LuceneIndexer.STOP_WORDS);

            for (int i = 0; i < hits.length(); i++) {
                Document doc = hits.doc(i);

                if (!isActive()) {

                //                 if (groupQueries && (!"true".equals(doc.get("lineup-"+keywordAlert.getUser().getLineupId())))) {
                if (groupQueries
                        && (doc.get("lineup-" + keywordAlert.getUser().getLineupId() + "-startTime") == null)) {
                    // This "if" statement checks to make sure the program is or did play on the user's
                    // lineup, which might be on a different station, a different time, past or future.
                    if (log.isDebugEnabled())
                        log.debug(doc.get("programTitle") + " matched on " + keywordAlert.getNormalizedQuery()
                                + " but it isn't airing on this user's lineup anytime soon.");

                Transaction transaction = HibernateUtil.currentSession().beginTransaction();
                try {
                    if ((!helper.maxAlertsExceeded(keywordAlert)) && helper.isNewMatch(keywordAlert, doc)) {
                        if (log.isDebugEnabled())
                            log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for "
                                    + keywordAlert.getNormalizedQuery() + "... sending messages");
                        String text = doc.get("text");
                        String fragments = null;
                        if (text != null) {
                            TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(text));
                            fragments = highlighter.getBestFragments(tokenStream, text, 3, "...");

                        helper.sendMessages(keywordAlert, fragments, doc, messagePrefix);
                    } else if (log.isDebugEnabled())
                        log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for "
                                + keywordAlert.getNormalizedQuery()
                                + " but max exceeded or we already matched this one");
                } catch (Throwable t) {
                            "Error processing keyword alerts when searching live lucene index. Rolling back transaction.",
                } finally {
                    if (!transaction.wasRolledBack()) {
            consecutiveExceptions = 0;
        } catch (Throwable t) {
            User user = keywordAlert.getUser();
            log.error("Caught throwable on keyword " + keywordAlert.getId() + ", " + keywordAlert.getUserQuery()
                    + ", user " + ((user == null) ? null : user.getUsername()), t);
            if (consecutiveExceptions >= maxConsecutiveExceptions) {
                return true;
    return false;

From source file:com.appeligo.search.actions.SearchResults.java

License:Apache License

public List<SearchResult> getSearchResults(int startIndex) {

    initializeStatics();// w  w w  .  j  a  v a  2s  .co  m

    hasMoreResults = false;
    try {
        IndexSearcher searcher = null;

        try {
            searcher = newIndexSearcher();
            IndexReader reader = searcher.getIndexReader();

            Query luceneQuery = generateLuceneQuery(searcher);
            luceneQuery = luceneQuery.rewrite(reader);
            Hits hits = searcher.search(luceneQuery);

            usingSuggestedQuery = false;
            suggestedQuery = null;
            if ((didYouMeanParser != null)
                    && ((hits.length() < minimumHits) || (calcScore(searcher, getQuery()) < minimumScore))) {
                if (log.isDebugEnabled()) {
                    log.debug("Need to suggest because either num hits " + hits.length() + " < " + minimumHits
                            + "\n or top hit score " + (hits.length() > 0 ? hits.score(0) : "[NO HITS]") + " < "
                            + minimumScore);
                IndexSearcher compositeSearcher = new IndexSearcher(compositeIndexLocation);
                try {
                    log.debug("calling suggest() with query=" + getQuery() + " and composite index from "
                            + compositeIndexLocation);
                    //Query didYouMean = didYouMeanParser.suggest(getQuery(), compositeSearcher.getIndexReader());
                    Query suggestedQueries[] = didYouMeanParser.getSuggestions(getQuery(),
                    TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>();

                    if (suggestedQueries != null) {
                        for (int i = 0; i < suggestedQueries.length; i++) {
                            log.debug("trying suggested query: " + suggestedQueries[i].toString(defaultField));
                            String suggestedQueryString = suggestedQueries[i].toString(defaultField);
                            String constrainedQueryString = suggestedQueryString;
                            if (constrainedQueryString.indexOf('"') < 0
                                    && constrainedQueryString.indexOf('\'') < 0) {
                                constrainedQueryString = "\"" + constrainedQueryString + "\"~5"; // proximity/distance query (within 5 words of each other)
                            Query suggestedLuceneQuery = generateLuceneQuery(constrainedQueryString, searcher);
                            suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader);
                            Hits suggestedHits = searcher.search(suggestedLuceneQuery);

                            float score = calcScore(suggestedQueryString, suggestedHits);

                            log.debug("SCORE = " + score);

                                    new Suggestion(suggestedQueryString, suggestedLuceneQuery, suggestedHits,
                                            score, ((i == 0) ? didYouMeanParser.includesOriginal() : false)));
                            log.debug("hits=" + suggestedHits.length() + ", score=" + score);

                    Suggestion best = null;
                    if (suggestions.size() > 0) {
                        best = suggestions.last();

                    if (best != null && !best.isOriginal()) {
                        suggestedQuery = best.getQueryString();
                        if (suggestedQuery != null && suggestedQuery.indexOf('+') >= 0
                                && getQuery().indexOf('+') < 0) {
                            suggestedQuery = suggestedQuery.replaceAll("\\+", "");
                        if (hits.length() == 0) {
                            if (best.getHits().length() > 0) {
                                // Requery probably required because we added proximity before
                                String suggestedQueryString = best.getQueryString();
                                luceneQuery = generateLuceneQuery(suggestedQueryString, searcher);
                                luceneQuery = luceneQuery.rewrite(reader);
                                hits = searcher.search(luceneQuery);
                                //hits = best.getHits();
                                //luceneQuery = best.getLuceneQuery();
                                usingSuggestedQuery = true;
                        log.debug("DidYouMeanParser suggested " + suggestedQuery);
                    } else {
                        if (best != null && best.isOriginal()) {
                            log.debug("The suggestion was the original query after all");
                        log.debug("DidYouMeanParser did not suggest anything");
                } finally {
            if (hits.length() == 0 && suggestedQuery != null) {
            // If we didn't find anything at all, go ahead and show them what the suggested query
            // will give them
            Query suggestedLuceneQuery = generateLuceneQuery(suggestedQuery, searcher);
            suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader);
               Hits suggestedHits = searcher.search(suggestedLuceneQuery);
               if (suggestedHits.length() > 0) {
             hits = suggestedHits;
             luceneQuery = suggestedLuceneQuery;
             usingSuggestedQuery = true;
            totalHits = hits.length();
            //Get the genere matches:
            try {
                BitSetFacetHitCounter facetHitCounter = new BitSetFacetHitCounter();
                String baseQueryString = (isUsingSuggestedQuery() ? suggestedQuery : query);
                String quotedQueryString = baseQueryString;
                if (quotedQueryString.indexOf('"') == -1 && quotedQueryString.indexOf(' ') > -1) {
                    quotedQueryString = "\"" + quotedQueryString + "\"";
                facetHitCounter.setBaseQuery(luceneQuery, baseQueryString);

                List<HitCount> subQueries = new ArrayList<HitCount>();
                for (Map.Entry<String, Query> entry : genreQueries.entrySet()) {
                            new HitCount(entry.getKey(), entry.getValue(), entry.getValue().toString(), 0));
                genreCounts = facetHitCounter.getFacetHitCounts(true);

                whatMatchedCounts = new ArrayList<HitCount>();
                        .add(new HitCount("Title", getFieldQuery(baseQueryString, "programTitle", searcher),
                                "programTitle:" + quotedQueryString, 0));
                        new HitCount("Episode Title", getFieldQuery(baseQueryString, "episodeTitle", searcher),
                                "episodeTitle:" + quotedQueryString, 0));
                        new HitCount("Description", getFieldQuery(baseQueryString, "description", searcher),
                                "description:" + quotedQueryString, 0));
                whatMatchedCounts.add(new HitCount("Content", getFieldQuery(baseQueryString, "text", searcher),
                        "text:" + quotedQueryString, 0));
                        .add(new HitCount("Credits", getFieldQuery(baseQueryString, "credits", searcher),
                                "credits:" + quotedQueryString, 0));
                whatMatchedCounts = facetHitCounter.getFacetHitCounts(true);

                //Program Count  -- Not sure if there is a better way to do this.
                HashSet<String> programTitles = new HashSet<String>();
                programCounts = new ArrayList<HitCount>();
                for (int i = 0; i < hits.length() && programCounts.size() < 5; i++) {
                    String title = hits.doc(i).get("programTitle");
                    if (!programTitles.contains(title)) {
                        String queryTitle = title;
                        queryTitle = QueryParser.escape(title);
                        if (queryTitle.indexOf('"') > -1) {
                            queryTitle.replace("\"", "\\\"");
                        if (queryTitle.indexOf(' ') > -1) {
                            queryTitle = "\"" + queryTitle + "\"";

                                .add(new HitCount(title, getFieldQuery(queryTitle, "programTitle", searcher),
                                        "programTitle:" + queryTitle, 0));
                programCounts = facetHitCounter.getFacetHitCounts(false);
            } catch (Exception e) {

            results = new ArrayList<SearchResult>();
            Query userQuery = getContentQuery(query, searcher);
            Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(userQuery, "text"));

            log.debug("#hits=" + hits.length());

            EPGProvider epgProvider = DefaultEpg.getInstance();

            boolean missingWebPaths = false; // We added this to the index midstream, so some do and some don't.
            // Next index rebuild, and they'll all have it.
            for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) {
                if (hits.doc(i + startIndex).get("webPath") == null) {
                    missingWebPaths = true;
            Program[] programs = null;
            if (missingWebPaths) {
                List<String> programIds = new ArrayList<String>(pageSize);
                for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) {
                    programIds.add(hits.doc(i + startIndex).get("programID"));
                programs = DefaultEpg.getInstance().getProgramList(programIds);
            for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) {
                addDocument(hits.doc(i + startIndex), hits.score(i + startIndex), epgProvider, highlighter,
                        analyzer, null, null, (programs == null ? null : programs[i]));
            if (results.size() + startIndex < hits.length()) {
                hasMoreResults = true;
        } finally {
            if (searcher != null) {
    } catch (IOException e) {
        log.error("Error searching index", e);
    } catch (ParseException e) {
        log.error("Error searching index", e);
    return results;

From source file:com.bewsia.script.safe.lucene.SEntity.java

License:Open Source License

public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments,
        String separator) throws Exception {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    CachingTokenFilter tokenStream = new CachingTokenFilter(
            analyzer.tokenStream(field, new StringReader(text)));
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
    tokenStream.reset();//  ww  w. j  a  v  a2s .  c o m
    String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
    return rv.length() == 0 ? text : rv;

From source file:com.bluedragon.search.search.QueryRun.java

License:Open Source License

private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount,
        int recordsSearched) throws CorruptIndexException, Exception {
    DocumentWrap document = new DocumentWrap(searcher.doc(docid));

    queryResultData.addRow(1);//from   w w  w  .  j  a v  a  2s . c  o  m

    // Add in the standard columns that we know we have for every search
    queryResultData.setCell(1, new cfStringData(document.getId()));
    queryResultData.setCell(2, new cfStringData(document.getName()));
    queryResultData.setCell(3, new cfNumberData(score));
    queryResultData.setCell(4, new cfNumberData(searchCount));
    queryResultData.setCell(5, new cfNumberData(recordsSearched));
    queryResultData.setCell(6, new cfNumberData(rank + 1));

    String uC = queryAttributes.getUniqueColumn();

    // Now we do the custom ones
    List<IndexableField> fields = document.getDocument().getFields();
    Iterator<IndexableField> it = fields.iterator();
    while (it.hasNext()) {
        IndexableField fieldable = it.next();

        String fieldName = fieldable.name().toLowerCase();

        // Check for the unique
        if (uniqueSet != null && fieldName.equals(uC)) {
            if (uniqueSet.contains(fieldable.stringValue())) {
            } else

        // Check to see if we have this column
        if (fieldName.equals("contents") && !queryAttributes.getContentFlag())

        if (!activeColumns.containsKey(fieldName)) {
            int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(),
                    cfArrayData.createArray(1), null);
            activeColumns.put(fieldName, newcolumn);

        int column = activeColumns.get(fieldName);
        if (column <= 6)

        queryResultData.setCell(column, new cfStringData(fieldable.stringValue()));

    // Do the context stuff if enable
    if (queryAttributes.getContextPassages() > 0) {

        Scorer scorer = new QueryScorer(queryAttributes.getQuery());
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(),
        Highlighter highlighter = new Highlighter(formatter, scorer);
        Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes());

        String nextContext = "";
        String contents = document.getAttribute(DocumentWrap.CONTENTS);

        if (contents != null) {
            TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS,
                    new StringReader(contents));
            String[] fragments = null;
            try {
                fragments = highlighter.getBestFragments(tokenStream, contents,
                if (fragments.length == 1) {
                    nextContext = fragments[0] + "...";
                } else {
                    StringBuilder context = new StringBuilder();
                    for (int f = 0; f < fragments.length; f++) {
                    nextContext = context.toString();
            } catch (Exception e) {

            // Add in the context
            if (!activeColumns.containsKey("context")) {
                int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null);
                activeColumns.put("context", newcolumn);

            queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext));

From source file:com.bugull.mongo.lucene.BuguHighlighter.java

License:Apache License

public String getResult(String fieldName, String fieldValue) throws Exception {
    BuguIndex index = BuguIndex.getInstance();
    QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
    Query query = parser.parse(keywords);
    TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

public String highlight(String strToHighlight, String fieldName, Query luceneQuery) {
    String highlightedText;/*ww w  .  j  a va2s.  c  o  m*/
    Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
    try {
        Directory directory = FSDirectory.open(indexDir);
        IndexReader indexReader = DirectoryReader.open(directory);
        Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);
        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>");
        Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);

        Fragmenter luceneFragmenter;
        // Si la chaine  highlighter est sup  250 carac
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            // Cration de best fragments de 100 carac chaque
            luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT);
        } else {
            // Toute la chaine est highlight
            luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);

        TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight));
        String fragment = null;
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT,
        } else {
            fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight);

        if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) {
            fragment = strToHighlight;

        highlightedText = fragment;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InvalidTokenOffsetsException e) {
        throw new RuntimeException(e);
    return highlightedText;

From source file:com.duroty.application.bookmark.manager.BookmarkManager.java

License:Open Source License

 * DOCUMENT ME!/*  w  w  w .jav  a2 s  .com*/
 * @param repositoryName DOCUMENT ME!
 * @param token DOCUMENT ME!
 * @param page DOCUMENT ME!
 * @param messagesByPage DOCUMENT ME!
 * @param order DOCUMENT ME!
 * @param orderType DOCUMENT ME!
 * @return DOCUMENT ME!
 * @throws BookmarkException DOCUMENT ME!
 * @throws SearchException DOCUMENT ME!
public SearchObj search(String repositoryName, String token, int page, int messagesByPage, int order,
        String orderType, boolean isNotebook) throws BookmarkException {
    String lucenePath = "";

    if (!defaultLucenePath.endsWith(File.separator)) {
        lucenePath = defaultLucenePath + File.separator + repositoryName + File.separator
                + Constants.BOOKMARK_LUCENE_BOOKMARK;
    } else {
        lucenePath = defaultLucenePath + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK;

    Searcher searcher = null;
    SearchObj searchObj = new SearchObj();
    Highlighter highlighter = null;

    try {
        searcher = BookmarkIndexer.getSearcher(lucenePath);

        Query query = null;
        Hits hits = null;

        if (StringUtils.isBlank(token)) {
            if (isNotebook) {
                query = SimpleQueryParser.parse("notebook:true", new KeywordAnalyzer());
            } else {
                query = new MatchAllDocsQuery();

            hits = searcher.search(query, new Sort(new SortField[] { SortField.FIELD_SCORE,
                    new SortField(Field_insert_date, SortField.STRING, true) }));
        } else {
            query = SimpleQueryParser.parse(token, analyzer);

            StringBuffer buffer = new StringBuffer();

            if (isNotebook) {
                buffer.append("(" + query.toString() + ") AND ");

                QueryParser parser = new QueryParser(Field_notebook, new KeywordAnalyzer());

                Query aux = parser.parse(String.valueOf(true));

                buffer.append("(" + aux.toString() + ") ");

            if (buffer.length() > 0) {
                QueryParser parser = new QueryParser("", new WhitespaceAnalyzer());
                query = parser.parse(buffer.toString());

            hits = searcher.search(query);

        Date searchStart = new Date();

        Date searchEnd = new Date();

        //time in seconds
        double time = ((double) (searchEnd.getTime() - searchStart.getTime())) / (double) 1000;

        int hitsLength = hits.length();

        if (hitsLength <= 0) {
            return null;

        int start = page * messagesByPage;
        int end = start + messagesByPage;

        if (end > 0) {
            end = Math.min(hitsLength, end);
        } else {
            end = hitsLength;

        if (start > end) {
            throw new SearchException("Search index of bound. start > end");

        Vector bookmarks = new Vector();

        for (int j = start; j < end; j++) {
            Document doc = hits.doc(j);

            if (doc != null) {
                LuceneBookmark luceneBookmark = new LuceneBookmark(doc);

                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
                highlighter = new Highlighter(formatter, new QueryScorer(query));
                highlighter.setTextFragmenter(new SimpleFragmenter(150));

                BookmarkObj bookmarkObj = new BookmarkObj();

                String contents = luceneBookmark.getCotents();
                String hcontents = null;

                if ((contents != null) && (!contents.trim().equals(""))) {
                    contents = contents.replaceAll("\\s+", " ");

                    TokenStream tokenStream = analyzer.tokenStream(Field_contents, new StringReader(contents));
                    hcontents = highlighter.getBestFragment(tokenStream, contents);

                    if (hcontents != null) {
                        contents = hcontents;
                    } else {
                        contents = null;


                String title = luceneBookmark.getTitle();
                String htitle = null;

                if ((title != null) && (!title.trim().equals(""))) {
                    title = title.replaceAll("\\s+", " ");

                    TokenStream tokenStream = analyzer.tokenStream(Field_title, new StringReader(title));
                    htitle = highlighter.getBestFragment(tokenStream, title);

                    if (htitle != null) {
                        title = htitle;



    } catch (Exception ex) {
        throw new SearchException(ex);
    } finally {

    return searchObj;

From source file:com.edgenius.wiki.search.service.AbstractSearchService.java

License:Open Source License

private int detach(IndexSearcher searcher, List<SearchResultItem> viewableMatchedResults, TopDocs hits,
        Query hlQuery, int from, int to, User user) throws IOException {

    Assert.isTrue(from <= to && from >= 0 && (to >= 0 || to == -1));

    //For performance issue, we simply return total result set length without permission filter out.
    //This means is total length might be larger than the set that user can view, as some result will be filter out
    //if user doesn't have permission to see.
    int len = hits.totalHits;

    if (len > 0 && from < len) {
        to = to == -1 ? len : (len > to ? to : len);
        //security filter from return result

        List<Integer> resultIdx = new ArrayList<Integer>();
        for (int idx = from; idx < to; idx++) {
            //does not include "to" , For example, from:to is 0:10, then return index is 0-9

            //TODO: if page includes some result that invisible to user, it is better display message to tell user
            //some result is hidden for security reason.
            if (!isAllowView(searcher.doc(hits.scoreDocs[idx].doc), user))

            resultIdx.add(idx);// w w w.j ava 2  s  . c  om

        //create a highlighter for all fragment parser
        Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");
        Highlighter hl = null;
        if (hlQuery != null) {
            Scorer scorer = new QueryScorer(hlQuery);
            hl = new Highlighter(formatter, scorer);
            Fragmenter fragmenter = new SimpleFragmenter(FRAGMENT_LEN);

        for (int idx : resultIdx) {
            SearchResultItem item = new SearchResultItem();

            Document doc = searcher.doc(hits.scoreDocs[idx].doc);
            String docType = doc.get(FieldName.DOC_TYPE);

            //common items in search results
            if (userReadingService != null
                    && !new Integer(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                String username = doc.get(FieldName.CONTRIBUTOR);
                User contirUser = userReadingService.getUserByName(username);
                if (contirUser != null) {
            if (Integer.valueOf(SharedConstants.SEARCH_PAGE).toString().equals(docType)) {
                String content = doc.get(FieldName.PAGE_CONTENT);

                //does set item.desc() as content, which maybe very big string. no necessary returned
                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), content)));

            } else if (Integer.valueOf(SharedConstants.SEARCH_COMMENT).toString().equals(docType)) {
                String content = doc.get(FieldName.CONTENT);

                //does set item.desc() as content, which maybe very big string. no necessary returned
                item.setFragment(createFragment(hl, content));

            } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE).toString().equals(docType)) {
                String title = doc.get(FieldName.SPACE_NAME);
                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), item.getDesc())));

            } else if (Integer.valueOf(SharedConstants.SEARCH_WIDGET).toString().equals(docType)) {
                //wTitle-> title; wDesc-> desc; wTitle(could be pageTitle or markup title) ->spaceUname
                String widgetType = doc.get(FieldName.WIDGET_TYPE);
                String title = doc.get(FieldName.WIDGET_TITLE);

                //does content need transfer back?? so far no
                String content = doc.get(FieldName.WIDGET_CONTENT);
                if (WidgetModel.TYPE_PAGE_LINKER.equals(widgetType)) {
                    //don't use as Highlighter fragment
                    content = "";

                String desc = doc.get(FieldName.WIDGET_DESC);


                //add little confuse field mapping :(

                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getDesc(), content)));

            } else if (Integer.valueOf(SharedConstants.SEARCH_PAGE_TAG).toString().equals(docType)) {
                //page tag
                item.setFragment(createFragment(hl, item.getTitle()));
            } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE_TAG).toString().equals(docType)) {
                //space tag
                item.setFragment(createFragment(hl, item.getTitle()));

            } else if (Integer.valueOf(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                String username = doc.get(FieldName.USER_NAME);
                String fullname = doc.get(FieldName.USER_FULLNAME);
                //hacker - contributor is current user fullname
                if (userReadingService != null)
                item.setFragment(createFragment(hl, fullname));

            } else if (Integer.valueOf(SharedConstants.SEARCH_ROLE).toString().equals(docType)) {

            } else if (Integer.valueOf(SharedConstants.SEARCH_ATTACHMENT).toString().equals(docType)) {
                String text = doc.get(FieldName.TEXT);
                //does not mark file content fragment, because it does not store in index
                String fragment = createFragment(hl, StringUtil.join(" ", item.getDesc(), text));
                        (fragment == null || fragment.trim().length() == 0) ? ("Comment: " + item.getDesc())
                                : fragment);
    return len;

From source file:com.flaptor.hounder.searcher.SnippetSearcher.java

License:Apache License

 * Adds snippets to the search results.//from w w  w .j a  va 2s.  co m
 * How stuff works:
 * For each 'group g' in provided GroupedSearchResults:
 *   For each result in 'g':
 *     Use the lucene highlighter to get the terms highlighted on the required field.
 *     Then call getSnippet(...) to get the resulting snippet
private void addSnippets(GroupedSearchResults res, String snippetOfField, int snippetLength, QueryScorer scorer,
        Formatter simpleHtmlFormatter) throws IOException {

    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed
    // Here we store every seen phrase. It is used to give less score to
    // recurrying phrases
    Set<String> usedSnippets = new HashSet<String>();

    for (int j = 0; j < res.groups(); j++) { // for each group
        Vector<Document> resDocs = res.getGroup(j).last();
        int docsLen = resDocs.size();
        for (int i = 0; i < docsLen; i++) { // for each document on that group               
            Document doc = resDocs.get(i); // get the document i
            String text = doc.get(snippetOfField); // text to be snippeted
            if (null == text) {
                logger.warn("Asked to snippet an unexisting field: " + snippetOfField);

            TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text));
            TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1);

            String result = null;
            if (null != fragments && 0 < fragments.length) {
                result = getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets);

            if (null == result || 0 == result.length()) { // 
                if (emptySnippetsAllowed) {
                    result = "";
                } else {
                    result = text.substring(0, Math.min(text.length(), snippetLength));
            String snippetF = SNIPPET_FIELDNAME_PREFIX + snippetOfField;
            doc.add(new Field(snippetF, result.toString(), Field.Store.YES, Field.Index.NO));