Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Formatter formatter, Scorer fragmentScorer)

Source Link

Usage

From source file:com.aistor.common.persistence.BaseDaoImpl.java

License:Open Source License

/**
 * //  www .j av a 2s .  c  o  m
 * @param query 
 * @param list 
 * @param fields ??
 */
public List<T> keywordsHighlight(BooleanQuery query, List<T> list, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(130));
    for (T entity : list) {
        try {
            for (String field : fields) {
                String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
                String desciption = highlighter.getBestFragment(analyzer, field, text);
                if (desciption != null) {
                    Reflections.invokeSetter(entity, fields[0], desciption);
                    break;
                }
                Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, 130));
            }
            //Reflections.invokeSetter(entity, fields[1], "sdfkjsdlkfjklsdjf");
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
        }
    }
    return list;
}

From source file:com.appeligo.alerts.KeywordAlertThread.java

License:Apache License

/**
 * @param searchExecutor callback to get the set of hits for the given query. This can be
 * executed in different ways.//from w  w  w .j  a v a2s.  c o m
 * @return true if we hit too many consecutive exceptions so we broke out of the loop
 */
private boolean executeKeywordSearch(SearchExecutor searchExecutor, String messagePrefix,
        boolean groupQueries) {
    ChunkedResults<KeywordAlert> results = KeywordAlert.getAllInNormalizedQueryOrder();
    Hits hits = null;
    String lastNormalizedQuery = null;
    Query lastLuceneQuery = null;
    int consecutiveExceptions = 0;
    results.beforeFirst();
    while (results.next() && isActive()) {
        KeywordAlert keywordAlert = results.get();
        try {
            if (keywordAlert.isDeleted() || keywordAlert.isDisabled()) {
                if (log.isDebugEnabled())
                    log.debug("keyword alert is deleted or disabled");
                continue;
            }
            User user = keywordAlert.getUser();
            if (user == null) {
                if (log.isDebugEnabled())
                    log.debug("keyword alert is implicitly deleted (user is null)");
                keywordAlert.setDeleted(true);
                keywordAlert.save();
                continue;
            }

            if (helper.maxAlertsExceeded(keywordAlert)) {
                continue;
            }

            if (groupQueries) {
                if ((hits == null) || (!keywordAlert.getNormalizedQuery().equals(lastNormalizedQuery))) {
                    hits = searchExecutor.search(null, keywordAlert.getNormalizedQuery());
                    lastLuceneQuery = searchExecutor.getLuceneQuery();
                } else if (log.isDebugEnabled())
                    log.debug("Not searching on " + keywordAlert.getNormalizedQuery() + " again");
            } else {
                hits = searchExecutor.search(keywordAlert.getUser().getLineupId(),
                        keywordAlert.getNormalizedQuery());
                // Note that I'm searching with the lineup from the user, which will
                // only ensure that the liveIndex doesn't return shows that don't ever
                // play for this lineup.  However, it does not guarantee that the show
                // on this user's lineup is playing at the same time (meaning alerts
                // might tell the user of a show that is only in the future).
                lastLuceneQuery = searchExecutor.getLuceneQuery();
            }
            lastNormalizedQuery = keywordAlert.getNormalizedQuery();
            Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(lastLuceneQuery));
            PorterStemAnalyzer analyzer = new PorterStemAnalyzer(LuceneIndexer.STOP_WORDS);

            for (int i = 0; i < hits.length(); i++) {
                Document doc = hits.doc(i);

                if (!isActive()) {
                    break;
                }

                //                 if (groupQueries && (!"true".equals(doc.get("lineup-"+keywordAlert.getUser().getLineupId())))) {
                if (groupQueries
                        && (doc.get("lineup-" + keywordAlert.getUser().getLineupId() + "-startTime") == null)) {
                    // This "if" statement checks to make sure the program is or did play on the user's
                    // lineup, which might be on a different station, a different time, past or future.
                    if (log.isDebugEnabled())
                        log.debug(doc.get("programTitle") + " matched on " + keywordAlert.getNormalizedQuery()
                                + " but it isn't airing on this user's lineup anytime soon.");
                    continue;
                }

                Transaction transaction = HibernateUtil.currentSession().beginTransaction();
                try {
                    if ((!helper.maxAlertsExceeded(keywordAlert)) && helper.isNewMatch(keywordAlert, doc)) {
                        if (log.isDebugEnabled())
                            log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for "
                                    + keywordAlert.getNormalizedQuery() + "... sending messages");
                        String text = doc.get("text");
                        String fragments = null;
                        if (text != null) {
                            TokenStream tokenStream = analyzer.tokenStream("text", new StringReader(text));
                            fragments = highlighter.getBestFragments(tokenStream, text, 3, "...");
                        }

                        helper.incrementTodaysAlertCount(keywordAlert);
                        helper.sendMessages(keywordAlert, fragments, doc, messagePrefix);
                    } else if (log.isDebugEnabled())
                        log.debug("KeywordAlertThread found match in " + doc.get("programTitle") + " for "
                                + keywordAlert.getNormalizedQuery()
                                + " but max exceeded or we already matched this one");
                } catch (Throwable t) {
                    log.error(
                            "Error processing keyword alerts when searching live lucene index. Rolling back transaction.",
                            t);
                    transaction.rollback();
                } finally {
                    if (!transaction.wasRolledBack()) {
                        transaction.commit();
                    }
                }
            }
            consecutiveExceptions = 0;
        } catch (Throwable t) {
            User user = keywordAlert.getUser();
            log.error("Caught throwable on keyword " + keywordAlert.getId() + ", " + keywordAlert.getUserQuery()
                    + ", user " + ((user == null) ? null : user.getUsername()), t);
            consecutiveExceptions++;
            if (consecutiveExceptions >= maxConsecutiveExceptions) {
                return true;
            }
        }
    }
    return false;
}

From source file:com.appeligo.search.actions.SearchResults.java

License:Apache License

public List<SearchResult> getSearchResults(int startIndex) {

    initializeStatics();// w  w w  .  j  a  v a  2s  .co  m

    hasMoreResults = false;
    try {
        IndexSearcher searcher = null;

        try {
            searcher = newIndexSearcher();
            IndexReader reader = searcher.getIndexReader();

            Query luceneQuery = generateLuceneQuery(searcher);
            luceneQuery = luceneQuery.rewrite(reader);
            Hits hits = searcher.search(luceneQuery);

            usingSuggestedQuery = false;
            suggestedQuery = null;
            if ((didYouMeanParser != null)
                    && ((hits.length() < minimumHits) || (calcScore(searcher, getQuery()) < minimumScore))) {
                if (log.isDebugEnabled()) {
                    log.debug("Need to suggest because either num hits " + hits.length() + " < " + minimumHits
                            + "\n or top hit score " + (hits.length() > 0 ? hits.score(0) : "[NO HITS]") + " < "
                            + minimumScore);
                }
                IndexSearcher compositeSearcher = new IndexSearcher(compositeIndexLocation);
                try {
                    log.debug("calling suggest() with query=" + getQuery() + " and composite index from "
                            + compositeIndexLocation);
                    //Query didYouMean = didYouMeanParser.suggest(getQuery(), compositeSearcher.getIndexReader());
                    Query suggestedQueries[] = didYouMeanParser.getSuggestions(getQuery(),
                            compositeSearcher.getIndexReader());
                    TreeSet<Suggestion> suggestions = new TreeSet<Suggestion>();

                    if (suggestedQueries != null) {
                        for (int i = 0; i < suggestedQueries.length; i++) {
                            log.debug("trying suggested query: " + suggestedQueries[i].toString(defaultField));
                            String suggestedQueryString = suggestedQueries[i].toString(defaultField);
                            String constrainedQueryString = suggestedQueryString;
                            if (constrainedQueryString.indexOf('"') < 0
                                    && constrainedQueryString.indexOf('\'') < 0) {
                                constrainedQueryString = "\"" + constrainedQueryString + "\"~5"; // proximity/distance query (within 5 words of each other)
                            }
                            Query suggestedLuceneQuery = generateLuceneQuery(constrainedQueryString, searcher);
                            suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader);
                            Hits suggestedHits = searcher.search(suggestedLuceneQuery);

                            float score = calcScore(suggestedQueryString, suggestedHits);

                            log.debug("=========================================");
                            log.debug("SCORE = " + score);
                            log.debug("=========================================");

                            suggestions.add(
                                    new Suggestion(suggestedQueryString, suggestedLuceneQuery, suggestedHits,
                                            score, ((i == 0) ? didYouMeanParser.includesOriginal() : false)));
                            log.debug("hits=" + suggestedHits.length() + ", score=" + score);
                        }
                    }

                    Suggestion best = null;
                    if (suggestions.size() > 0) {
                        best = suggestions.last();
                    }

                    if (best != null && !best.isOriginal()) {
                        suggestedQuery = best.getQueryString();
                        if (suggestedQuery != null && suggestedQuery.indexOf('+') >= 0
                                && getQuery().indexOf('+') < 0) {
                            suggestedQuery = suggestedQuery.replaceAll("\\+", "");
                        }
                        if (hits.length() == 0) {
                            if (best.getHits().length() > 0) {
                                // Requery probably required because we added proximity before
                                String suggestedQueryString = best.getQueryString();
                                luceneQuery = generateLuceneQuery(suggestedQueryString, searcher);
                                luceneQuery = luceneQuery.rewrite(reader);
                                hits = searcher.search(luceneQuery);
                                //hits = best.getHits();
                                //luceneQuery = best.getLuceneQuery();
                                usingSuggestedQuery = true;
                            }
                        }
                        log.debug("DidYouMeanParser suggested " + suggestedQuery);
                    } else {
                        if (best != null && best.isOriginal()) {
                            log.debug("The suggestion was the original query after all");
                        }
                        log.debug("DidYouMeanParser did not suggest anything");
                    }
                } finally {
                    compositeSearcher.close();
                }
            }
            /*
            if (hits.length() == 0 && suggestedQuery != null) {
            // If we didn't find anything at all, go ahead and show them what the suggested query
            // will give them
            Query suggestedLuceneQuery = generateLuceneQuery(suggestedQuery, searcher);
            suggestedLuceneQuery = suggestedLuceneQuery.rewrite(reader);
               Hits suggestedHits = searcher.search(suggestedLuceneQuery);
               if (suggestedHits.length() > 0) {
             hits = suggestedHits;
             luceneQuery = suggestedLuceneQuery;
             usingSuggestedQuery = true;
               }
            }
               */
            totalHits = hits.length();
            //Get the genere matches:
            try {
                BitSetFacetHitCounter facetHitCounter = new BitSetFacetHitCounter();
                facetHitCounter.setSearcher(searcher);
                String baseQueryString = (isUsingSuggestedQuery() ? suggestedQuery : query);
                String quotedQueryString = baseQueryString;
                if (quotedQueryString.indexOf('"') == -1 && quotedQueryString.indexOf(' ') > -1) {
                    quotedQueryString = "\"" + quotedQueryString + "\"";
                }
                facetHitCounter.setBaseQuery(luceneQuery, baseQueryString);

                List<HitCount> subQueries = new ArrayList<HitCount>();
                for (Map.Entry<String, Query> entry : genreQueries.entrySet()) {
                    subQueries.add(
                            new HitCount(entry.getKey(), entry.getValue(), entry.getValue().toString(), 0));
                }
                facetHitCounter.setSubQueries(subQueries);
                genreCounts = facetHitCounter.getFacetHitCounts(true);

                whatMatchedCounts = new ArrayList<HitCount>();
                whatMatchedCounts
                        .add(new HitCount("Title", getFieldQuery(baseQueryString, "programTitle", searcher),
                                "programTitle:" + quotedQueryString, 0));
                whatMatchedCounts.add(
                        new HitCount("Episode Title", getFieldQuery(baseQueryString, "episodeTitle", searcher),
                                "episodeTitle:" + quotedQueryString, 0));
                whatMatchedCounts.add(
                        new HitCount("Description", getFieldQuery(baseQueryString, "description", searcher),
                                "description:" + quotedQueryString, 0));
                whatMatchedCounts.add(new HitCount("Content", getFieldQuery(baseQueryString, "text", searcher),
                        "text:" + quotedQueryString, 0));
                whatMatchedCounts
                        .add(new HitCount("Credits", getFieldQuery(baseQueryString, "credits", searcher),
                                "credits:" + quotedQueryString, 0));
                facetHitCounter.setSubQueries(whatMatchedCounts);
                whatMatchedCounts = facetHitCounter.getFacetHitCounts(true);

                //Program Count  -- Not sure if there is a better way to do this.
                HashSet<String> programTitles = new HashSet<String>();
                programCounts = new ArrayList<HitCount>();
                for (int i = 0; i < hits.length() && programCounts.size() < 5; i++) {
                    String title = hits.doc(i).get("programTitle");
                    if (!programTitles.contains(title)) {
                        String queryTitle = title;
                        queryTitle = QueryParser.escape(title);
                        if (queryTitle.indexOf('"') > -1) {
                            queryTitle.replace("\"", "\\\"");
                        }
                        if (queryTitle.indexOf(' ') > -1) {
                            queryTitle = "\"" + queryTitle + "\"";
                        }

                        programCounts
                                .add(new HitCount(title, getFieldQuery(queryTitle, "programTitle", searcher),
                                        "programTitle:" + queryTitle, 0));
                        programTitles.add(title);
                    }
                }
                facetHitCounter.setSubQueries(programCounts);
                programCounts = facetHitCounter.getFacetHitCounts(false);
            } catch (Exception e) {
                e.printStackTrace();
            }

            results = new ArrayList<SearchResult>();
            programToSearchResult.clear();
            Query userQuery = getContentQuery(query, searcher);
            userQuery.rewrite(reader);
            Highlighter highlighter = new Highlighter(new TermFormatter(), new QueryScorer(userQuery, "text"));

            log.debug("#hits=" + hits.length());

            EPGProvider epgProvider = DefaultEpg.getInstance();

            boolean missingWebPaths = false; // We added this to the index midstream, so some do and some don't.
            // Next index rebuild, and they'll all have it.
            for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) {
                if (hits.doc(i + startIndex).get("webPath") == null) {
                    missingWebPaths = true;
                    break;
                }
            }
            Program[] programs = null;
            if (missingWebPaths) {
                List<String> programIds = new ArrayList<String>(pageSize);
                for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) {
                    programIds.add(hits.doc(i + startIndex).get("programID"));
                }
                programs = DefaultEpg.getInstance().getProgramList(programIds);
            }
            for (int i = 0; i < pageSize && i + startIndex < hits.length(); i++) {
                addDocument(hits.doc(i + startIndex), hits.score(i + startIndex), epgProvider, highlighter,
                        analyzer, null, null, (programs == null ? null : programs[i]));
            }
            if (results.size() + startIndex < hits.length()) {
                hasMoreResults = true;
            }
        } finally {
            if (searcher != null) {
                searcher.close();
            }
        }
    } catch (IOException e) {
        log.error("Error searching index", e);
    } catch (ParseException e) {
        log.error("Error searching index", e);
    }
    return results;
}

From source file:com.bewsia.script.safe.lucene.SEntity.java

License:Open Source License

public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments,
        String separator) throws Exception {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
    CachingTokenFilter tokenStream = new CachingTokenFilter(
            analyzer.tokenStream(field, new StringReader(text)));
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
    tokenStream.reset();//  ww  w. j  a  v  a2s .  c o m
    String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator);
    return rv.length() == 0 ? text : rv;
}

From source file:com.bluedragon.search.search.QueryRun.java

License:Open Source License

private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount,
        int recordsSearched) throws CorruptIndexException, Exception {
    DocumentWrap document = new DocumentWrap(searcher.doc(docid));

    queryResultData.addRow(1);//from   w w  w  .  j  a v  a  2s . c  o  m
    queryResultData.setCurrentRow(queryResultData.getSize());

    // Add in the standard columns that we know we have for every search
    queryResultData.setCell(1, new cfStringData(document.getId()));
    queryResultData.setCell(2, new cfStringData(document.getName()));
    queryResultData.setCell(3, new cfNumberData(score));
    queryResultData.setCell(4, new cfNumberData(searchCount));
    queryResultData.setCell(5, new cfNumberData(recordsSearched));
    queryResultData.setCell(6, new cfNumberData(rank + 1));

    String uC = queryAttributes.getUniqueColumn();

    // Now we do the custom ones
    List<IndexableField> fields = document.getDocument().getFields();
    Iterator<IndexableField> it = fields.iterator();
    while (it.hasNext()) {
        IndexableField fieldable = it.next();

        String fieldName = fieldable.name().toLowerCase();

        // Check for the unique
        if (uniqueSet != null && fieldName.equals(uC)) {
            if (uniqueSet.contains(fieldable.stringValue())) {
                queryResultData.deleteRow(queryResultData.getSize());
                return;
            } else
                uniqueSet.add(fieldable.stringValue());
        }

        // Check to see if we have this column
        if (fieldName.equals("contents") && !queryAttributes.getContentFlag())
            continue;

        if (!activeColumns.containsKey(fieldName)) {
            int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(),
                    cfArrayData.createArray(1), null);
            activeColumns.put(fieldName, newcolumn);
        }

        int column = activeColumns.get(fieldName);
        if (column <= 6)
            continue;

        queryResultData.setCell(column, new cfStringData(fieldable.stringValue()));
    }

    // Do the context stuff if enable
    if (queryAttributes.getContextPassages() > 0) {

        Scorer scorer = new QueryScorer(queryAttributes.getQuery());
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(),
                queryAttributes.getContextHighlightEnd());
        Highlighter highlighter = new Highlighter(formatter, scorer);
        Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes());
        highlighter.setTextFragmenter(fragmenter);

        String nextContext = "";
        String contents = document.getAttribute(DocumentWrap.CONTENTS);

        if (contents != null) {
            TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS,
                    new StringReader(contents));
            String[] fragments = null;
            try {
                fragments = highlighter.getBestFragments(tokenStream, contents,
                        queryAttributes.getContextPassages());
                if (fragments.length == 1) {
                    nextContext = fragments[0] + "...";
                } else {
                    StringBuilder context = new StringBuilder();
                    for (int f = 0; f < fragments.length; f++) {
                        context.append("...");
                        context.append(fragments[f]);
                    }
                    context.append("...");
                    nextContext = context.toString();
                }
            } catch (Exception e) {
            }

            // Add in the context
            if (!activeColumns.containsKey("context")) {
                int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null);
                activeColumns.put("context", newcolumn);
            }

            queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext));
        }
    }
}

From source file:com.bugull.mongo.lucene.BuguHighlighter.java

License:Apache License

public String getResult(String fieldName, String fieldValue) throws Exception {
    BuguIndex index = BuguIndex.getInstance();
    QueryParser parser = new QueryParser(index.getVersion(), fieldName, index.getAnalyzer());
    Query query = parser.parse(keywords);
    TokenStream tokens = index.getAnalyzer().tokenStream(fieldName, new StringReader(fieldValue));
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    return highlighter.getBestFragments(tokens, fieldValue, maxFragments, "...");
}

From source file:com.doculibre.constellio.lucene.BaseLuceneIndexHelper.java

License:Open Source License

public String highlight(String strToHighlight, String fieldName, Query luceneQuery) {
    String highlightedText;/*ww w  .  j  a va2s.  c  o  m*/
    Analyzer analyzer = analyzerProvider.getAnalyzer(Locale.FRENCH);
    try {
        Directory directory = FSDirectory.open(indexDir);
        IndexReader indexReader = DirectoryReader.open(directory);
        Query rewrittenLuceneQuery = luceneQuery.rewrite(indexReader);
        QueryScorer luceneScorer = new QueryScorer(rewrittenLuceneQuery);
        SimpleHTMLFormatter luceneFormatter = new SimpleHTMLFormatter("<span class=\"hit\">", "</span>");
        Highlighter luceneHighlighter = new Highlighter(luceneFormatter, luceneScorer);

        Fragmenter luceneFragmenter;
        // Si la chaine  highlighter est sup  250 carac
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            // Cration de best fragments de 100 carac chaque
            luceneFragmenter = new SimpleFragmenter(TAILLE_FRAGMENT);
        } else {
            // Toute la chaine est highlight
            luceneFragmenter = new SimpleFragmenter(Integer.MAX_VALUE);
        }
        luceneHighlighter.setTextFragmenter(luceneFragmenter);

        TokenStream luceneTokenStream = analyzer.tokenStream(fieldName, new StringReader(strToHighlight));
        String fragment = null;
        if (strToHighlight.length() > TAILLE_CHAINE_NON_FRAGMENTEE) {
            fragment = luceneHighlighter.getBestFragments(luceneTokenStream, strToHighlight, NB_BEST_FRAGMENT,
                    FRAGMENT_SEP);
        } else {
            fragment = luceneHighlighter.getBestFragment(luceneTokenStream, strToHighlight);
        }

        if (StringUtils.isBlank(fragment) && fieldName.equalsIgnoreCase("titre")) {
            fragment = strToHighlight;
        }
        indexReader.close();
        directory.close();

        highlightedText = fragment;
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InvalidTokenOffsetsException e) {
        throw new RuntimeException(e);
    }
    return highlightedText;
}

From source file:com.duroty.application.bookmark.manager.BookmarkManager.java

License:Open Source License

/**
 * DOCUMENT ME!/*  w  w  w .jav  a2 s  .com*/
 *
 * @param repositoryName DOCUMENT ME!
 * @param token DOCUMENT ME!
 * @param page DOCUMENT ME!
 * @param messagesByPage DOCUMENT ME!
 * @param order DOCUMENT ME!
 * @param orderType DOCUMENT ME!
 *
 * @return DOCUMENT ME!
 *
 * @throws BookmarkException DOCUMENT ME!
 * @throws SearchException DOCUMENT ME!
 */
public SearchObj search(String repositoryName, String token, int page, int messagesByPage, int order,
        String orderType, boolean isNotebook) throws BookmarkException {
    String lucenePath = "";

    if (!defaultLucenePath.endsWith(File.separator)) {
        lucenePath = defaultLucenePath + File.separator + repositoryName + File.separator
                + Constants.BOOKMARK_LUCENE_BOOKMARK;
    } else {
        lucenePath = defaultLucenePath + repositoryName + File.separator + Constants.BOOKMARK_LUCENE_BOOKMARK;
    }

    Searcher searcher = null;
    SearchObj searchObj = new SearchObj();
    Highlighter highlighter = null;

    try {
        searcher = BookmarkIndexer.getSearcher(lucenePath);

        Query query = null;
        Hits hits = null;

        if (StringUtils.isBlank(token)) {
            if (isNotebook) {
                query = SimpleQueryParser.parse("notebook:true", new KeywordAnalyzer());
            } else {
                query = new MatchAllDocsQuery();
            }

            hits = searcher.search(query, new Sort(new SortField[] { SortField.FIELD_SCORE,
                    new SortField(Field_insert_date, SortField.STRING, true) }));
        } else {
            query = SimpleQueryParser.parse(token, analyzer);

            StringBuffer buffer = new StringBuffer();

            if (isNotebook) {
                buffer.append("(" + query.toString() + ") AND ");

                QueryParser parser = new QueryParser(Field_notebook, new KeywordAnalyzer());
                parser.setDefaultOperator(Operator.AND);

                Query aux = parser.parse(String.valueOf(true));

                buffer.append("(" + aux.toString() + ") ");
            }

            if (buffer.length() > 0) {
                QueryParser parser = new QueryParser("", new WhitespaceAnalyzer());
                query = parser.parse(buffer.toString());
            }

            hits = searcher.search(query);
        }

        Date searchStart = new Date();

        Date searchEnd = new Date();

        //time in seconds
        double time = ((double) (searchEnd.getTime() - searchStart.getTime())) / (double) 1000;

        int hitsLength = hits.length();

        if (hitsLength <= 0) {
            return null;
        }

        int start = page * messagesByPage;
        int end = start + messagesByPage;

        if (end > 0) {
            end = Math.min(hitsLength, end);
        } else {
            end = hitsLength;
        }

        if (start > end) {
            throw new SearchException("Search index of bound. start > end");
        }

        Vector bookmarks = new Vector();

        for (int j = start; j < end; j++) {
            Document doc = hits.doc(j);

            if (doc != null) {
                LuceneBookmark luceneBookmark = new LuceneBookmark(doc);

                SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
                highlighter = new Highlighter(formatter, new QueryScorer(query));
                highlighter.setTextFragmenter(new SimpleFragmenter(150));

                BookmarkObj bookmarkObj = new BookmarkObj();
                bookmarkObj.setCacheDate(luceneBookmark.getCacheDate());
                bookmarkObj.setComments(luceneBookmark.getComments());
                bookmarkObj.setContents(luceneBookmark.getCotents());
                bookmarkObj.setDepth(luceneBookmark.getDepth());
                bookmarkObj.setFlagged(luceneBookmark.isFlagged());
                bookmarkObj.setIdint(luceneBookmark.getIdint());
                bookmarkObj.setInsertDate(luceneBookmark.getInsertDate());
                bookmarkObj.setKeywords(luceneBookmark.getKeywords());
                bookmarkObj.setNotebook(luceneBookmark.isNotebook());
                bookmarkObj.setParent(Long.parseLong(luceneBookmark.getParent()));
                bookmarkObj.setTitle(luceneBookmark.getTitle());
                bookmarkObj.setTitleHighlight(luceneBookmark.getTitle());
                bookmarkObj.setUrl(luceneBookmark.getUrl());

                String contents = luceneBookmark.getCotents();
                String hcontents = null;

                if ((contents != null) && (!contents.trim().equals(""))) {
                    contents = contents.replaceAll("\\s+", " ");

                    TokenStream tokenStream = analyzer.tokenStream(Field_contents, new StringReader(contents));
                    hcontents = highlighter.getBestFragment(tokenStream, contents);

                    if (hcontents != null) {
                        contents = hcontents;
                    } else {
                        contents = null;
                    }
                }

                bookmarkObj.setContentsHighlight(contents);

                String title = luceneBookmark.getTitle();
                String htitle = null;

                if ((title != null) && (!title.trim().equals(""))) {
                    title = title.replaceAll("\\s+", " ");

                    TokenStream tokenStream = analyzer.tokenStream(Field_title, new StringReader(title));
                    htitle = highlighter.getBestFragment(tokenStream, title);

                    if (htitle != null) {
                        title = htitle;
                    }
                }

                bookmarkObj.setTitleHighlight(title);

                bookmarks.addElement(bookmarkObj);
            }
        }

        searchObj.setHits(hitsLength);
        searchObj.setTime(time);
        searchObj.setBookmarks(bookmarks);
    } catch (Exception ex) {
        throw new SearchException(ex);
    } finally {
    }

    return searchObj;
}

From source file:com.edgenius.wiki.search.service.AbstractSearchService.java

License:Open Source License

private int detach(IndexSearcher searcher, List<SearchResultItem> viewableMatchedResults, TopDocs hits,
        Query hlQuery, int from, int to, User user) throws IOException {

    Assert.isTrue(from <= to && from >= 0 && (to >= 0 || to == -1));

    //For performance issue, we simply return total result set length without permission filter out.
    //This means is total length might be larger than the set that user can view, as some result will be filter out
    //if user doesn't have permission to see.
    int len = hits.totalHits;

    if (len > 0 && from < len) {
        to = to == -1 ? len : (len > to ? to : len);
        //security filter from return result

        List<Integer> resultIdx = new ArrayList<Integer>();
        for (int idx = from; idx < to; idx++) {
            //does not include "to" , For example, from:to is 0:10, then return index is 0-9

            //TODO: if page includes some result that invisible to user, it is better display message to tell user
            //some result is hidden for security reason.
            if (!isAllowView(searcher.doc(hits.scoreDocs[idx].doc), user))
                continue;

            resultIdx.add(idx);// w w w.j ava 2  s  . c  om
        }

        //create a highlighter for all fragment parser
        Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlighter\">", "</span>");
        Highlighter hl = null;
        if (hlQuery != null) {
            Scorer scorer = new QueryScorer(hlQuery);
            hl = new Highlighter(formatter, scorer);
            Fragmenter fragmenter = new SimpleFragmenter(FRAGMENT_LEN);
            hl.setTextFragmenter(fragmenter);
        }

        for (int idx : resultIdx) {
            SearchResultItem item = new SearchResultItem();

            Document doc = searcher.doc(hits.scoreDocs[idx].doc);
            String docType = doc.get(FieldName.DOC_TYPE);

            //common items in search results
            item.setType(NumberUtils.toInt(docType));
            item.setDatetime(doc.get(FieldName.UPDATE_DATE));
            if (userReadingService != null
                    && !new Integer(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                String username = doc.get(FieldName.CONTRIBUTOR);
                User contirUser = userReadingService.getUserByName(username);
                if (contirUser != null) {
                    item.setContributor(contirUser.getFullname());
                    item.setContributorUsername(username);
                }
            }
            if (Integer.valueOf(SharedConstants.SEARCH_PAGE).toString().equals(docType)) {
                String content = doc.get(FieldName.PAGE_CONTENT);
                item.setTitle(doc.get(FieldName.PAGE_TITLE));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));

                //does set item.desc() as content, which maybe very big string. no necessary returned
                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), content)));

            } else if (Integer.valueOf(SharedConstants.SEARCH_COMMENT).toString().equals(docType)) {
                String content = doc.get(FieldName.CONTENT);

                item.setItemUid(doc.get(FieldName.COMMENT_UID));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                item.setTitle(doc.get(FieldName.UNSEARCH_PAGE_TITLE));
                //does set item.desc() as content, which maybe very big string. no necessary returned
                item.setFragment(createFragment(hl, content));

            } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE).toString().equals(docType)) {
                String title = doc.get(FieldName.SPACE_NAME);
                item.setTitle(title);
                item.setSpaceUname(doc.get(FieldName.SPACE_UNIXNAME));
                item.setDesc(doc.get(FieldName.SPACE_DESC));
                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getTitle(), item.getDesc())));

            } else if (Integer.valueOf(SharedConstants.SEARCH_WIDGET).toString().equals(docType)) {
                //wTitle-> title; wDesc-> desc; wTitle(could be pageTitle or markup title) ->spaceUname
                String widgetType = doc.get(FieldName.WIDGET_TYPE);
                String title = doc.get(FieldName.WIDGET_TITLE);

                //does content need transfer back?? so far no
                String content = doc.get(FieldName.WIDGET_CONTENT);
                if (WidgetModel.TYPE_PAGE_LINKER.equals(widgetType)) {
                    //don't use as Highlighter fragment
                    content = "";
                }

                String desc = doc.get(FieldName.WIDGET_DESC);

                item.setDesc(desc);
                item.setTitle(title);

                //add little confuse field mapping :(
                item.setSpaceUname(doc.get(FieldName.WIDGET_KEY));
                item.setItemUid(widgetType);

                item.setFragment(createFragment(hl, StringUtil.join(" ", item.getDesc(), content)));

            } else if (Integer.valueOf(SharedConstants.SEARCH_PAGE_TAG).toString().equals(docType)) {
                //page tag
                item.setTitle(doc.get(FieldName.PAGE_TAG_NAME));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                item.setFragment(createFragment(hl, item.getTitle()));
            } else if (Integer.valueOf(SharedConstants.SEARCH_SPACE_TAG).toString().equals(docType)) {
                //space tag
                item.setTitle(doc.get(FieldName.SPACE_TAG_NAME));
                item.setFragment(createFragment(hl, item.getTitle()));

            } else if (Integer.valueOf(SharedConstants.SEARCH_USER).toString().equals(docType)) {
                String username = doc.get(FieldName.USER_NAME);
                item.setTitle(username);
                String fullname = doc.get(FieldName.USER_FULLNAME);
                //hacker - contributor is current user fullname
                item.setContributor(fullname);
                if (userReadingService != null)
                    item.setDesc(userReadingService.getUserByName(username).getSetting().getStatus());
                item.setFragment(createFragment(hl, fullname));

            } else if (Integer.valueOf(SharedConstants.SEARCH_ROLE).toString().equals(docType)) {
                item.setSpaceUname(doc.get(FieldName.ROLE_NAME));
                item.setTitle(doc.get(FieldName.ROLE_DISPLAY_NAME));
                item.setDesc(doc.get(FieldName.ROLE_DESC));
                //item.setFragment("");

            } else if (Integer.valueOf(SharedConstants.SEARCH_ATTACHMENT).toString().equals(docType)) {
                item.setTitle(doc.get(FieldName.FILE_NAME));
                item.setDesc(doc.get(FieldName.FILE_COMMENT));
                item.setItemUid(doc.get(FieldName.FILE_NODE_UUID));
                item.setSpaceUname(doc.get(FieldName.UNSEARCH_SPACE_UNIXNAME));
                String text = doc.get(FieldName.TEXT);
                //does not mark file content fragment, because it does not store in index
                String fragment = createFragment(hl, StringUtil.join(" ", item.getDesc(), text));
                item.setFragment(
                        (fragment == null || fragment.trim().length() == 0) ? ("Comment: " + item.getDesc())
                                : fragment);
            }
            viewableMatchedResults.add(item);
        }
    }
    return len;
}

From source file:com.flaptor.hounder.searcher.SnippetSearcher.java

License:Apache License

/**
 * Adds snippets to the search results.//from w w  w .j a  va 2s.  co m
 * How stuff works:
 * For each 'group g' in provided GroupedSearchResults:
 *   For each result in 'g':
 *     Use the lucene highlighter to get the terms highlighted on the required field.
 *     Then call getSnippet(...) to get the resulting snippet
 */
private void addSnippets(GroupedSearchResults res, String snippetOfField, int snippetLength, QueryScorer scorer,
        Formatter simpleHtmlFormatter) throws IOException {

    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
    highlighter.setTextFragmenter(NULL_FRAGMENTER);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed
    // Here we store every seen phrase. It is used to give less score to
    // recurrying phrases
    Set<String> usedSnippets = new HashSet<String>();

    for (int j = 0; j < res.groups(); j++) { // for each group
        Vector<Document> resDocs = res.getGroup(j).last();
        int docsLen = resDocs.size();
        for (int i = 0; i < docsLen; i++) { // for each document on that group               
            Document doc = resDocs.get(i); // get the document i
            String text = doc.get(snippetOfField); // text to be snippeted
            if (null == text) {
                logger.warn("Asked to snippet an unexisting field: " + snippetOfField);
                continue;
            }

            TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text));
            TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1);

            String result = null;
            if (null != fragments && 0 < fragments.length) {
                result = getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets);
            }

            if (null == result || 0 == result.length()) { // 
                if (emptySnippetsAllowed) {
                    result = "";
                } else {
                    result = text.substring(0, Math.min(text.length(), snippetLength));
                }
            }
            String snippetF = SNIPPET_FIELDNAME_PREFIX + snippetOfField;
            doc.add(new Field(snippetF, result.toString(), Field.Store.YES, Field.Index.NO));
        }
    }
}