Example usage for org.apache.lucene.search.highlight Highlighter Highlighter

List of usage examples for org.apache.lucene.search.highlight Highlighter Highlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.

Prototype

public Highlighter(Scorer fragmentScorer) 

Source Link

Usage

From source file:aos.lucene.tools.HighlightTest.java

License:Apache License

public void testHighlighting() throws Exception {
    String text = "The quick brown fox jumps over the lazy dog";

    TermQuery query = new TermQuery(new Term("field", "fox"));

    TokenStream tokenStream = new SimpleAnalyzer().tokenStream("field", new StringReader(text));

    QueryScorer scorer = new QueryScorer(query, "field");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    assertEquals("The quick brown <B>fox</B> jumps over the lazy dog",
            highlighter.getBestFragment(tokenStream, text));
}

From source file:aos.lucene.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);//w w  w  . j av a 2 s .  c  om
        String fragment = highlighter.getBestFragment(stream, title);

        LOGGER.info(fragment);
    }
}

From source file:ca.dracode.ais.indexer.FileSearcher.java

License:Open Source License

/**
 * Takes a list of Documents and highlights information relevant to a given Query
 * @param docs The documents to highlight
 * @param qry The query used to highlight the documents
 * @param type The type of the search, one of QUERY_BOOLEAN,
 *             which just notes the page on which the term exists or QUERY_STANDARD,
 *             which gives highlighted fragments and the page on which they exist.
 * @param term The term that created the query
 * @param maxResults The maximum number of results that will be returned
 * @return A SearchResult containing the results sorted by relevance and page
 *//*from ww w . ja  v a2 s.c  om*/
private SearchResult getHighlightedResults(List<Document> docs, Query qry, int type, String term,
        int maxResults) {
    try {
        int numResults = 0;
        LinkedHashMap<String, LinkedHashMap<Integer, List<String>>> results = new LinkedHashMap<String, LinkedHashMap<Integer, List<String>>>();
        for (int i = 0; i < docs.size() && numResults < maxResults; i++) {
            Document d = docs.get(i);
            int docPage = Integer.parseInt(d.get("page"));
            String name = d.get("path");
            LinkedHashMap<Integer, List<String>> docResult = results.get(name);
            if (docResult == null) {
                docResult = new LinkedHashMap<Integer, List<String>>();
                results.put(name, docResult);
            }
            if (type != FileSearcher.QUERY_BOOLEAN) {
                String contents = d.get("text");
                Highlighter highlighter = new Highlighter(new QueryScorer(qry));

                String[] frag = null;
                try {
                    frag = highlighter.getBestFragments(new SimpleAnalyzer(Version.LUCENE_47), "text", contents,
                            maxResults - numResults);
                    numResults += frag.length;
                } catch (IOException e) {
                    Log.e(TAG, "Error while reading index", e);
                } catch (InvalidTokenOffsetsException e) {
                    Log.e(TAG, "Error while highlighting", e);
                }
                if (frag != null) {
                    Log.i(TAG, "Frags: " + frag.length + " " + frag + " " + frag[0]);
                }
                ArrayList<String> tmpList = new ArrayList<String>(
                        Arrays.asList(frag != null ? frag : new String[0]));
                Log.i(TAG, "list " + tmpList.getClass().getName());
                docResult.put(docPage, tmpList);
            } else {
                ArrayList<String> tmp = new ArrayList<String>();
                tmp.add(term);
                docResult.put(docPage, tmp);
            }

        }
        Log.i(TAG, "" + results.size());
        return new SearchResult(results);
    } catch (Exception e) {
        Log.e("TAG", "Error while Highlighting", e);
        return null;
    }
}

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Search the index for the given expression
 * @param expr the expression to be parsed
 * @param langCode the language of the expression and index
 * @param profile the hit profile (where to start from etc)
 * @return the result docs/* w w w  .j a  va  2  s. com*/
 */
public static String searchIndex(String expr, String langCode, HitProfile profile) {
    StringBuilder sb = new StringBuilder();
    try {
        Analyzer analyzer = AeseSearch.createAnalyzer(langCode);
        DirectoryReader reader = DirectoryReader.open(AeseSearch.index);
        if (reader != null) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser qp = new QueryParser(Version.LUCENE_45, "text", analyzer);
            Query q = qp.parse(expr);
            TopDocs hits = searcher.search(q, AeseSearch.maxHits);
            ScoreDoc[] docs = hits.scoreDocs;
            for (int j = profile.from; j < profile.to && j < docs.length; j++) {
                Document doc = searcher.doc(docs[j].doc);
                String vid = doc.get(LuceneFields.VID);
                String docID = doc.get(LuceneFields.DOCID);
                Highlighter h = new Highlighter(new QueryScorer(q));
                String text = getCorTexVersion(docID, vid);
                sb.append(formatDocID(docID));
                sb.append(" ");
                sb.append(formatVersionID(vid));
                sb.append(" ");
                String frag = h.getBestFragment(analyzer, "text", text);
                sb.append("<span class=\"found\">");
                sb.append(frag);
                sb.append("</span>\n");
            }
            profile.numHits = docs.length;
        }
        reader.close();
    } catch (Exception e) {
        sb.append(e.getMessage());
    }
    return sb.toString();
}

From source file:com.aurel.track.lucene.search.LuceneSearcher.java

License:Open Source License

private static int[] getQueryResults(Query query, String userQueryString, String preprocessedQueryString,
        Map<Integer, String> highlightedTextMap) {
    int[] hitIDs = new int[0];
    IndexSearcher indexSearcher = null;/*  w w  w.  j a v a  2 s  .c o  m*/
    try {
        long start = 0;
        if (LOGGER.isDebugEnabled()) {
            start = new Date().getTime();
        }
        indexSearcher = getIndexSearcher(LuceneUtil.INDEXES.WORKITEM_INDEX);
        if (indexSearcher == null) {
            return hitIDs;
        }
        ScoreDoc[] scoreDocs;
        try {
            TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAXIMAL_HITS);
            indexSearcher.search(query, collector);
            scoreDocs = collector.topDocs().scoreDocs;
        } catch (IOException e) {
            LOGGER.warn("Getting the workitem search results failed with failed with " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
            return hitIDs;
        }
        if (LOGGER.isDebugEnabled()) {
            long end = new Date().getTime();
            LOGGER.debug("Found " + scoreDocs.length + " document(s) (in " + (end - start)
                    + " milliseconds) that matched the user query '" + userQueryString
                    + "' the preprocessed query '" + preprocessedQueryString + "' and the query.toString() '"
                    + query.toString() + "'");
        }
        QueryScorer queryScorer = new QueryScorer(query/*, LuceneUtil.HIGHLIGHTER_FIELD*/);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer); // Set the best scorer fragments
        highlighter.setTextFragmenter(fragmenter); // Set fragment to highlight
        hitIDs = new int[scoreDocs.length];
        for (int i = 0; i < scoreDocs.length; i++) {
            int docID = scoreDocs[i].doc;
            Document doc = null;
            try {
                doc = indexSearcher.doc(docID);
            } catch (IOException e) {
                LOGGER.error("Getting the workitem documents failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
            if (doc != null) {
                Integer itemID = Integer.valueOf(doc.get(LuceneUtil.getFieldName(SystemFields.ISSUENO)));
                if (itemID != null) {
                    hitIDs[i] = itemID.intValue();
                    if (highlightedTextMap != null) {
                        String highligherFieldValue = doc.get(LuceneUtil.HIGHLIGHTER_FIELD);
                        TokenStream tokenStream = null;
                        try {
                            tokenStream = TokenSources.getTokenStream(LuceneUtil.HIGHLIGHTER_FIELD, null,
                                    highligherFieldValue, LuceneUtil.getAnalyzer(), -1);
                        } catch (Exception ex) {
                            LOGGER.debug(ex.getMessage());
                        }
                        if (tokenStream != null) {
                            String fragment = highlighter.getBestFragment(tokenStream, highligherFieldValue);
                            if (fragment != null) {
                                highlightedTextMap.put(itemID, fragment);
                            }
                        }
                    }
                }
            }
        }
        return hitIDs;
    } catch (BooleanQuery.TooManyClauses e) {
        LOGGER.error("Searching the query resulted in too many clauses. Try to narrow the query results. "
                + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
        throw e;
    } catch (Exception e) {
        LOGGER.error("Searching the workitems failed with " + e.getMessage());
        LOGGER.debug(ExceptionUtils.getStackTrace(e));
        return hitIDs;
    } finally {
        closeIndexSearcherAndUnderlyingIndexReader(indexSearcher, "workItem");
    }
}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public SearchResultWrapper search(String collection, String query, int offset, int size, boolean includeDebug)
        throws IndexException {
    try {/*from w w  w  .j  a va 2 s  .c o  m*/
        //TODO: make age be a component in the ranking?
        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
        queryBuilder.add(parser.parse(query), Occur.MUST);
        queryBuilder.add(new TermQuery(new Term(IndexDocumentAdapter.FIELD_COLLECTION, collection)),
                Occur.FILTER);
        Query baseQuery = queryBuilder.build();

        FunctionQuery boostQuery = new FunctionQuery(
                new ReciprocalFloatFunction(new DurationValueSource(new Date().getTime() / 1000,
                        new LongFieldSource(IndexDocumentAdapter.FIELD_TIMESTAMP)), RECIP, 1F, 1F));

        Query q = new CustomScoreQuery(baseQuery, boostQuery);

        QueryScorer queryScorer = new QueryScorer(q, IndexDocumentAdapter.FIELD_SEARCH);
        Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setTextFragmenter(fragmenter);

        GroupingSearch gsearch = new GroupingSearch(IndexDocumentAdapter.FIELD_URL_GROUP).setGroupDocsLimit(1)
                .setAllGroups(true).setIncludeMaxScore(true);
        TopGroups<?> groups = gsearch.search(searcher, q, offset, size);

        ArrayList<SearchResult> results = new ArrayList<>(size);
        for (int i = offset; i < offset + size && i < groups.groups.length; i++) {
            ScoreDoc scoreDoc = groups.groups[i].scoreDocs[0];
            Document luceneDoc = searcher.doc(scoreDoc.doc);
            IndexDocumentAdapter doc = new IndexDocumentAdapter(luceneDoc);

            TokenStream tokenStream = TokenSources.getTokenStream(IndexDocumentAdapter.FIELD_SEARCH,
                    reader.getTermVectors(scoreDoc.doc), luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH),
                    analyzer, highlighter.getMaxDocCharsToAnalyze() - 1);

            String[] snippets = highlighter.getBestFragments(tokenStream,
                    luceneDoc.get(IndexDocumentAdapter.FIELD_SEARCH), 3);
            String snippet = Arrays.asList(snippets).stream().collect(Collectors.joining("\n"));
            snippet = Jsoup.clean(snippet, Whitelist.simpleText());

            String debugInfo = null;
            if (includeDebug) {
                Explanation explanation = searcher.explain(q, scoreDoc.doc);
                debugInfo = explanation.toString();
            }

            results.add(new SearchResult(doc.getKey(), doc.getCollection(), doc.getTitle(), doc.getUrl(),
                    doc.getDomain(), doc.getTimestampText(), snippet, debugInfo, scoreDoc.score));
        }

        SearchResultWrapper wrapper = new SearchResultWrapper().setQuery(query).setOffset(offset)
                .setMaxResultsRequested(size)
                .setResultCount(groups.totalGroupCount != null ? groups.totalGroupCount : 0)
                .setResults(results);

        if (includeDebug) {
            wrapper.setDebugInfo(q.toString());
        }

        return wrapper;

    } catch (IOException | ParseException | InvalidTokenOffsetsException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle,
        boolean searchInTags, boolean searchInText, boolean superFuzzy) {
    ArrayList<CardSnapshot> cardSnaps = new ArrayList();
    try {// w  w  w .j  a v  a  2  s  .co m
        ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>();

        for (String indexName : indexNames) {
            IndexReader reader = IndexReader
                    .open(FSDirectory.open(new File(indexDir + File.separator + indexName),
                            new SimpleFSLockFactory(indexDir + File.separator + indexName)));
            IndexSearcher searcher = new IndexSearcher(reader);
            searchers.add(searcher);
        }

        BooleanQuery query = new BooleanQuery();
        if (searchInTitle) {
            IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy);
        }
        if (searchInTags) {
            IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy);
        }
        if (searchInText) {
            IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy);
            IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy);
        }

        for (IndexSearcher searcher : searchers) {
            TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false);
            searcher.search(query, collector);
            ScoreDoc[] hits = collector.topDocs().scoreDocs;

            for (ScoreDoc hit : hits) {
                Document doc = searcher.doc(hit.doc);

                TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"),
                        new StandardAnalyzer(Version.LUCENE_20.LUCENE_35));
                QueryScorer scorer = new QueryScorer(query, "analyzedText");
                Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20);
                Highlighter highlighter = new Highlighter(scorer);
                highlighter.setTextFragmenter(fragmenter);
                String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5);
                String highlights = "";

                for (String fragment : fragments) {
                    highlights += fragment + "...";
                }

                if (highlights.equals("")) {
                    String text = doc.get("text");
                    if (text.length() > 100) {
                        highlights += doc.get("text").substring(0, 100);
                    } else {
                        highlights += doc.get("text");
                    }
                }

                cardSnaps.add(new CardSnapshot(highlights, doc));
            }
            searcher.getIndexReader().close();
            searcher.close();
            searcher = null;
        }

    } catch (Exception ex) {
        ex.printStackTrace();
    }
    return cardSnaps;
}

From source file:com.leavesfly.lia.tool.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(TestUtil.getBookIndexDirectory());
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer();

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);/*from  w  ww.j  av  a 2s  .c  o  m*/
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}

From source file:com.mathworks.xzheng.tools.HighlightTest.java

License:Apache License

public void testHighlighting() throws Exception {
    String text = "The quick brown fox jumps over the lazy dog";

    TermQuery query = new TermQuery(new Term("field", "fox"));

    TokenStream tokenStream = new SimpleAnalyzer(Version.LUCENE_46).tokenStream("field",
            new StringReader(text));

    QueryScorer scorer = new QueryScorer(query, "field");
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    assertEquals("The quick brown <B>fox</B> jumps over the lazy dog",
            highlighter.getBestFragment(tokenStream, text));
}

From source file:com.mathworks.xzheng.tools.HighlightTest.java

License:Apache License

public void testHits() throws Exception {
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(TestUtil.getBookIndexDirectory()));
    TermQuery query = new TermQuery(new Term("title", "action"));
    TopDocs hits = searcher.search(query, 10);

    QueryScorer scorer = new QueryScorer(query, "title");
    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

    Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_46);

    for (ScoreDoc sd : hits.scoreDocs) {
        Document doc = searcher.doc(sd.doc);
        String title = doc.get("title");

        TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), sd.doc, "title", doc,
                analyzer);/*ww  w  . ja  va 2 s.c  om*/
        String fragment = highlighter.getBestFragment(stream, title);

        System.out.println(fragment);
    }
}