From source file:org.intermine.api.search.SearchResults.java

License:GNU General Public License

 * Actually filter the web searchable items we have to get a reduced list of matches.
 * @param origQueryString A query to filter the items against. Assumes the query
 *                        string is neither null not empty.
 * @param target Information about the scope and type of items to receive.
 * @param profileRepo The repository of the user who wants to find something.
 * @return A set of search results.//w  w w  .  j  av a2s  . c o  m
 * @throws ParseException If the query string cannot be parsed.
 * @throws IOException If there is an issue opening the indices.
private static SearchResults doFilteredSearch(String origQueryString, SearchTarget target,
        SearchRepository profileRepo) throws ParseException, IOException {

    Map<WebSearchable, String> highlightedDescMap = new HashMap<WebSearchable, String>();

    String queryString = prepareQueryString(origQueryString);

    LOG.info("Searching " + target + " for " + " was:" + origQueryString + " now:" + queryString);
    long time = System.currentTimeMillis();

    org.apache.lucene.search.Query query;

    Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_30, "English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);

    // The default search field is the content buffer.
    QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer);
    query = queryParser.parse(queryString);

    // Get directories.
    String type = target.getType();
    Map<String, WebSearchable> globalWebSearchables = new HashMap<String, WebSearchable>();
    Set<SearchRepository> globals = SearchRepository.getGlobalSearchRepositories();
    List<Directory> globalDirs = new ArrayList<Directory>();
    for (SearchRepository sr : globals) {
    Map<String, WebSearchable> userWebSearchables = profileRepo.getWebSearchableMap(type);
    Directory userDirectory = profileRepo.getSearchIndex(type);

    MultiSearcher searcher = prepareSearcher(target, userDirectory, globalDirs);

    // required to expand search terms
    query = searcher.rewrite(query);
    TopDocs topDocs = searcher.search(query, 1000); //FIXME: hardcoded limit

    time = System.currentTimeMillis() - time;
    LOG.info("Found " + topDocs.totalHits + " document(s) that matched query '" + queryString + "' in " + time
            + " milliseconds:");

    QueryScorer scorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(FORMATTER, scorer);

    Map<WebSearchable, Float> hitMap = new HashMap<WebSearchable, Float>();
    Map<WebSearchable, Set<String>> tags = new HashMap<WebSearchable, Set<String>>();

    for (int i = 0; i < topDocs.totalHits; i++) {
        WebSearchable webSearchable = null;
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        //String docScope = doc.get("scope");
        String name = doc.get("name");

        webSearchable = userWebSearchables.get(name);
        if (webSearchable == null) {
            webSearchable = globalWebSearchables.get(name);
        if (webSearchable == null) {
            throw new RuntimeException("unknown WebSearchable: " + name);

        Float luceneScore = new Float(topDocs.scoreDocs[i].score);
        hitMap.put(webSearchable, luceneScore);

        tags.put(webSearchable, new HashSet<String>(asList(split(doc.get("tags")))));

        try {
            if (highlightedDescMap != null) {
                String highlightString = webSearchable.getDescription();
                if (highlightString == null) {
                    highlightString = "";
                TokenStream tokenStream = analyzer.tokenStream("", new StringReader(highlightString));
                highlighter.setTextFragmenter(new NullFragmenter());
                        highlighter.getBestFragment(tokenStream, highlightString));
        } catch (InvalidTokenOffsetsException e) {
            LOG.warn("Highlighter exception", e);

    Map<String, WebSearchable> wsMap = new HashMap<String, WebSearchable>();
    for (WebSearchable ws : hitMap.keySet()) {
        wsMap.put(ws.getName(), ws);

    return new SearchResults(hitMap, wsMap, highlightedDescMap, tags);

From source file:org.jboss.seam.wiki.core.search.metamodel.SearchSupport.java


 * Returns the hits of the given query as fragments, highlighted, concatenated, and separated.
 * <p>/*from w  w w .j  a va2 s . c om*/
 * Pass in a <tt>NullFragmenter</tt> if you don't want any fragmentation by terms but
 * simply the hits highlighted. Otherwise, you will most likely use <tt>SimpleFragmenter</tt>.
 * The text you supply must be the same that was indexed, it will go through the same
 * analysis procedure to find the hits. Do not pass a different String than the one indexed
 * by Hibernate Search! If you use transparent string bridge with Hibernate Search, run the
 * bridge before passing the string into this method.
 * <p>
 * This method escapes any dangerous HTML characters in the indexed text and fragments by
 * replacing it with HTML entities. You can use the returned string directly to build a
 * <tt>SearchHit</tt>.
 * @param query the query that produced hits
 * @param fragmenter a fragmenter that can split the indexed text
 * @param indexedText the original text that was analyzed and indexed by Hibernate Search (after any bridges!)
 * @param numOfFragments the number of fragments to include in the returned result
 * @param alternativeLength if there are no hits to highlight, how many characters of the original text to return
 * @return the fragmented, highglighted, and then concatenated substring of the indexed text
protected String escapeBestFragments(Query query, Fragmenter fragmenter, String indexedText, int numOfFragments,
        int alternativeLength) {

    // The HTML escaping forces us to first fragment with internal placeholders...
    Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(INTERNAL_BEGIN_HIT, INTERNAL_END_HIT),
            new QueryScorer(query));
    try {
        // Use the same analyzer as the indexer!
        TokenStream tokenStream = new StandardAnalyzer().tokenStream(null, new StringReader(indexedText));

        String unescapedFragements = highlighter.getBestFragments(tokenStream, indexedText, numOfFragments,

        String escapedFragments = WikiUtil.escapeHtml(WikiUtil.removeMacros(unescapedFragements), false, false);

        // .. and then replace the internal placeholders with real tags after HTML has been escaped
        escapedFragments = escapedFragments.replaceAll(INTERNAL_BEGIN_HIT, getBeginHitTag());
        escapedFragments = escapedFragments.replaceAll(INTERNAL_END_HIT, getEndHitTag());

        // Strip out macros

        // If no fragments were produced (no hits), return the original text as an alternative
        if (escapedFragments.length() == 0 && alternativeLength != 0) {
            return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText.substring(0,
                    indexedText.length() > alternativeLength ? alternativeLength : indexedText.length())),
                    false, false);
        } else if (escapedFragments.length() == 0 && alternativeLength == 0) {
            return WikiUtil.escapeHtml(WikiUtil.removeMacros(indexedText), false, false);

        return escapedFragments;

    } catch (Exception ex) {
        throw new RuntimeException(ex);

From source file:org.mskcc.pathdb.lucene.LuceneResults.java

License:Open Source License

private Highlighter createHighlighter(String term) throws IOException, ParseException {

    //  Standard Analyzer to extract words using a list of English stop words.
    StandardAnalyzer analyzer = new StandardAnalyzer();

    //  Standard Query Parser
    QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

    // for the usage of highlighting with wildcards
    // Necessary to expand search terms
    IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
    Query luceneQuery = queryParser.parse(term);
    luceneQuery = luceneQuery.rewrite(reader);

    //  Scorer implementation which scores text fragments by the number of
    //  unique query terms found.
    QueryScorer queryScorer = new QueryScorer(luceneQuery);

    //  HTML Formatted surrounds matching text with <B></B> tags.
    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

    //  Highligher Class
    Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

    //  XXX Characters Max in Each Fragment
    Fragmenter fragmenter = new SimpleFragmenter(100);
    return highLighter;

From source file:org.mskcc.pathdb.lucene.PsiInteractorExtractor.java

License:Open Source License

 * Constructor//from  w ww.j a  va 2s . co  m
 * @param entrySet PSI-MI Entry Set Object.
 * @param queryStr Query String.
 * @param xdebug   XDebug Object.
 * @throws IOException         Input Output Exception.
 * @throws ParseException      Parsing Exception.
 * @throws ValidationException Validation Exception.
 * @throws MarshalException    Marshaling Exception.
public PsiInteractorExtractor(EntrySet entrySet, String queryStr, XDebug xdebug)
        throws IOException, ParseException, ValidationException, MarshalException {
    try {
        this.xdebug = xdebug;
        this.entrySet = entrySet;
        interactors = new HashSet();
        analyzer = new StandardAnalyzer();
        reader = IndexReader.open(LuceneConfig.getLuceneDirectory());
        if (queryStr != null) {
            QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);
            query = queryParser.parse(queryStr);
            query = query.rewrite(reader);
            SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
            highLighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    } finally {
        if (reader != null) {

From source file:org.mskcc.pathdb.tool.QueryFullText.java

License:Open Source License

 * Executes Full Text Query.//from ww  w . j  av a 2  s  .  c  om
 * @param term Search Term
 * @throws QueryException Lucene Query Error
 * @throws IOException    I/O Error
 * @throws ParseException Lucene Parsing Error
public static void queryFullText(String term) throws QueryException, IOException, ParseException {
    System.out.println("Using search term:  " + term);
    LuceneReader luceneReader = new LuceneReader();
    Hits hits = luceneReader.executeQuery(term);
    int num = Math.min(10, hits.length());
    System.out.println("Total Number of Hits:  " + hits.length());
    if (hits.length() > 0) {

        //  Standard Analyzer to extract words using a list of English stop words.
        StandardAnalyzer analyzer = new StandardAnalyzer();

        //  Standard Query Parser
        QueryParser queryParser = new QueryParser(LuceneConfig.FIELD_ALL, analyzer);

        // for the usage of highlighting with wildcards
        // Necessary to expand search terms
        IndexReader reader = IndexReader.open(new File(LuceneConfig.getLuceneDirectory()));
        Query luceneQuery = queryParser.parse(term);
        luceneQuery = luceneQuery.rewrite(reader);

        //  Scorer implementation which scores text fragments by the number of
        //  unique query terms found.
        QueryScorer queryScorer = new QueryScorer(luceneQuery);

        //  HTML Formatted surrounds matching text with <B></B> tags.
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();

        //  Highligher Class
        Highlighter highLighter = new Highlighter(htmlFormatter, queryScorer);

        //  XXX Characters Max in Each Fragment
        Fragmenter fragmenter = new SimpleFragmenter(100);

        System.out.println("Showing hits:  0-" + (num - 1));
        for (int i = 0; i < num; i++) {
            System.out.print("Hit " + i + ":  ");

            //  Get the Matching Hit
            Document doc = hits.doc(i);

            //  Get the Field of Interest
            Field field = doc.getField(LuceneConfig.FIELD_ALL);

            //  Create the Token Stream
            TokenStream tokenStream = new StandardAnalyzer().tokenStream(LuceneConfig.FIELD_ALL,
                    new StringReader(field.stringValue()));

            //  Get the Best Fragment
            String formattedText = highLighter.getBestFragments(tokenStream, field.stringValue(), 5, "...");

From source file:org.olat.search.service.searcher.SearchResultsImpl.java

License:Apache License

 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * /*from  ww w  . ja  va  2  s . c  o m*/
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc,
        final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(
            new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME,
                new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME,
                    new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);

From source file:org.openrdf.sail.lucene.LuceneQuery.java

License:BSD License

public void highlight(URI property) {
    Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG,
    highlighter = new Highlighter(formatter, new QueryScorer(query));

From source file:org.openrdf.sail.lucene.LuceneQueryIterator.java

License:BSD License

 * Evaluates one Lucene Query. It distinguishes between two cases,
 * the one where no subject is given and the one were it is given.
 * @param query the lucene query to evaluate
 * @return the lucene hits/*from   ww w  .  ja  v  a  2s  .c o m*/
private TopDocs evaluate(QuerySpec query) {
    // get the subject of the query
    Resource subject = query.getSubject();

    try {
        // parse the query string to a lucene query
        Query lucenequery = this.index.parseQuery(query.getQueryString(), query.getPropertyURI());

        // if the query requests for the snippet, create a highlighter using this query
        if (query.getSnippetVariableName() != null) {
            Highlighter highlighter = new Highlighter(formatter, new QueryScorer(lucenequery));
            this.highlighters.put(query, highlighter);

        // distinguish the two cases of subject == null
        if (subject == null) {
            return this.index.search(lucenequery);
        } else {
            return this.index.search(subject, lucenequery);
    } catch (Exception e) {
        log.error("There was a problem evaluating query '" + query.getQueryString() + "' for property '"
                + query.getPropertyURI() + "!", e);

    return null;

From source file:org.openrdf.sail.lucene.LuceneSailConnection.java

License:BSD License

 * Evaluates one Lucene Query. It distinguishes between two cases, the one
 * where no subject is given and the one were it is given.
 * /*from   ww  w  .  j a va  2 s  .  co  m*/
 * @param query
 *        the Lucene query to evaluate
 * @return QueryResult consisting of hits and highlighter
private QueryResult evaluate(QuerySpec query) {
    TopDocs hits = null;
    Highlighter highlighter = null;

    // get the subject of the query
    Resource subject = query.getSubject();

    try {
        // parse the query string to a lucene query

        String sQuery = query.getQueryString();

        if (!sQuery.isEmpty()) {
            Query lucenequery = this.luceneIndex.parseQuery(query.getQueryString(), query.getPropertyURI());

            // if the query requests for the snippet, create a highlighter using
            // this query
            if (query.getSnippetVariableName() != null) {
                Formatter formatter = new SimpleHTMLFormatter();
                highlighter = new Highlighter(formatter, new QueryScorer(lucenequery));

            // distinguish the two cases of subject == null
            if (subject == null) {
                hits = this.luceneIndex.search(lucenequery);
            } else {
                hits = this.luceneIndex.search(subject, lucenequery);
        } else {
            hits = new TopDocs(0, new ScoreDoc[0], 0.0f);
    } catch (Exception e) {
        logger.error("There was a problem evaluating query '" + query.getQueryString() + "' for property '"
                + query.getPropertyURI() + "!", e);

    return new QueryResult(hits, highlighter);

From source file:org.schors.evlampia.search.LogEntryAggregator.java

License:Open Source License

private final String tryHighlight(String text, String[] fields)
        throws IOException, InvalidTokenOffsetsException {

    if (null == text)
        return null;

    if (null == highlighter) {
        final QueryScorer scorer = new QueryScorer(query.rewrite(indexSearcher.getIndexReader()));
        highlighter = new Highlighter(new SimpleHTMLFormatter("<span class='highlighted'>", "</span>"), scorer);
        highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 330));
    }//  w  ww. jav  a  2s.  c o m

    for (final String field : fields) {
        final String highlighted = highlighter.getBestFragment(Constants.analyzer, field, text);
        if (null != highlighted)
            return highlighted;

    return text;