In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter getBestTextFragments.


public final TextFragment[] getBestTextFragments(TokenStream tokenStream, String text,
        boolean mergeContiguousFragments, int maxNumFragments)
        throws IOException, InvalidTokenOffsetsException 

Low level api to get the most relevant (formatted) sections of the document.


From source file:ci6226.eval_index_reader.java

public static void Searchit(IndexReader reader, IndexSearcher searcher, Analyzer _analyzer, String field,
        String[] _searchList, int _topn, PrintWriter writer)
        throws org.apache.lucene.queryparser.classic.ParseException, IOException, InvalidTokenOffsetsException {
    Analyzer analyzer = _analyzer;

    QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);

    String[] testString = _searchList;//{"to","123","impressed","Geezer","geezer","semi-busy","\"eggs vegetable\"","gs veget","\"gs veget\""};//,"good","I","but","coffee"};

    for (int j = 0; j < testString.length; j++) {
        String lstr = String.valueOf(j) + "," + testString[j];
        Query query = parser.parse(testString[j]);
        System.out.println("Searching for: " + query.toString(field));
        TopDocs topdocs = searcher.search(query, _topn);
        lstr += "," + topdocs.totalHits;
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query.rewrite(reader)));
        for (int i = 0; i < scoreDocs.length; i++) {
            int doc = scoreDocs[i].doc;
            Document document = searcher.doc(doc);
            //      System.out.println("Snippet=" + document.get(field));
            String text = document.get(field);
            TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc, field,
            TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
            String line = "";
            for (int m = 0; m < frag.length; m++) {

                if ((frag[m] != null) && (frag[m].getScore() > 0)) {
                    line = frag[m].toString();
                    line = line.replaceAll("\n", "");
                    line = line.replaceAll("\r", "");
                    line = line.replaceAll("\"", "");
                    line = line.replaceAll(",", " ");


            lstr += "," + line;
            lstr += "," + String.valueOf(scoreDocs[i].score);

        writer.write(lstr + "\n");
        System.out.println("Search for:" + testString[j] + " Total hits=" + scoreDocs.length);


From source file:ci6226.loadIndex.java

 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected.//from  w w w . j  a  v a  2  s.c o m
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive, Analyzer analyzer) throws IOException, InvalidTokenOffsetsException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {

            hits = searcher.search(query, numTotalHits).scoreDocs;

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("review_id");
            if (path != null) {
                System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score);
                String title = doc.get("business_id");
                if (title != null) {

                    String text = doc.get("text");
                    TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(),
                            hits[i].doc, "text", doc, analyzer);//TokenSources.getAnyTokenStream(searcher.getIndexReader() ,"text", analyzer);
                    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(ANSI_RED, ANSI_RESET);
                    // SimpleFragmenter fragmenter = new SimpleFragmenter(80);
                    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
                    TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4);
                    for (int j = 0; j < frag.length; j++) {
                        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    System.out.println("Full Review=\t" + doc.get("text") + "\nBusinessID=\t" + title);
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");


        if (!interactive || end == 0) {

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {

                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                System.out.println("(q)uit or enter number to jump to a page.");
                int cpage = start / hitsPerPage;
                System.out.println(String.format("Current page=%d,max page=%d", cpage + 1,
                        1 + numTotalHits / hitsPerPage));
                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                    } else {
                        System.out.println("No such page");
            if (quit) {
            end = Math.min(numTotalHits, start + hitsPerPage);


From source file:com.flaptor.hounder.searcher.SnippetSearcher.java

 * Adds snippets to the search results./*from   w w w.j  av a 2 s  . co m*/
 * How stuff works:
 * For each 'group g' in provided GroupedSearchResults:
 *   For each result in 'g':
 *     Use the lucene highlighter to get the terms highlighted on the required field.
 *     Then call getSnippet(...) to get the resulting snippet
private void addSnippets(GroupedSearchResults res, String snippetOfField, int snippetLength, QueryScorer scorer,
        Formatter simpleHtmlFormatter) throws IOException {

    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed
    // Here we store every seen phrase. It is used to give less score to
    // recurrying phrases
    Set<String> usedSnippets = new HashSet<String>();

    for (int j = 0; j < res.groups(); j++) { // for each group
        Vector<Document> resDocs = res.getGroup(j).last();
        int docsLen = resDocs.size();
        for (int i = 0; i < docsLen; i++) { // for each document on that group               
            Document doc = resDocs.get(i); // get the document i
            String text = doc.get(snippetOfField); // text to be snippeted
            if (null == text) {
                logger.warn("Asked to snippet an unexisting field: " + snippetOfField);

            TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text));
            TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1);

            String result = null;
            if (null != fragments && 0 < fragments.length) {
                result = getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets);

            if (null == result || 0 == result.length()) { // 
                if (emptySnippetsAllowed) {
                    result = "";
                } else {
                    result = text.substring(0, Math.min(text.length(), snippetLength));
            String snippetF = SNIPPET_FIELDNAME_PREFIX + snippetOfField;
            doc.add(new Field(snippetF, result.toString(), Field.Store.YES, Field.Index.NO));

From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java

public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms)
        throws InvalidQuerySyntaxException {
    Analyzer analyzer = null;/*from ww  w. j  a  va 2 s  .c  om*/

    // default QueryParser.escape(pattern) method does not support phrase queries
    pattern = QuerySyntaxUtil.escapeQueryPattern(pattern);
    if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) {
        return Collections.emptyList();

    logger.log(Level.FINE, "Escaped search pattern: " + pattern);

    Lock lock = rwlock.readLock();
    if (exception != null) {
        throw new RuntimeException("Failed to refesh index reader after last commit", exception);

    try {
        List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>();
        analyzer = new TermNameAnalyzer(false);

        QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer);
        Query query = parser.parse(pattern);

        logger.log(Level.FINE, "Query: " + query);

        // For highlighting words in query results
        QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder();
        Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer);

        // Perform search
        ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String ontology = doc.get(FIELD_ONTOLOGY);
            String referenceId = doc.get(FIELD_ID);
            String term = doc.get(FIELD_TERM);
            byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM);
            boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1;

            if (!isSynonym || includeSynonyms) {
                Analyzer highlighterAnalyzer = new TermNameAnalyzer(true);
                TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM,
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1);
                if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) {
                    results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(),
                            frag[0].getScore(), isSynonym));

        return results;
    } catch (ParseException e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (TokenMgrError e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (Throwable e) {
        String msg = "Failed to perform Lucene seach with pattern: " + pattern;
        logger.log(Level.WARNING, msg, e);
        throw new RuntimeException(msg, e);
    } finally {

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;//from w w  w . j  ava  2s  .  c  o m
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {

    String[] docTexts = listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
    if (docTexts.length == 0)

    TokenStream tokenStream;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
        //      TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
        //      if (tvStream != null) {
        //        tots = new TermOffsetsTokenStream(tvStream);
        //      }
    } catch (IllegalArgumentException e) {
        // No problem. But we can't use TermOffsets optimization.

    for (int j = 0; j < docTexts.length; j++) {
        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length());
        } else {
            // fall back to analyzer
            tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]);

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tokenStream = new CachingTokenFilter(tokenStream);
            } else {
                tokenStream = new CachingTokenFilter(
                        new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze));

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);

        if (maxCharsToAnalyze < 0) {
        } else {

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j],
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
    // sort such that the fragments with the highest score come first
    Collections.sort(frags, new Comparator<TextFragment>() {
        public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    String[] summaries = null;
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if ((fragment != null) && (fragment.getScore() > 0)) {
            if (fragTexts.size() >= numFragments)
        summaries = (String[]) fragTexts.toArray();
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);

From source file:com.tripod.lucene.service.AbstractLuceneService.java

 * Performs highlighting for a given query and a given document.
 * @param indexSearcher the IndexSearcher performing the query
 * @param query the Tripod LuceneQuery//from  ww w . j a  v a 2s. c om
 * @param scoreDoc the Lucene ScoreDoc
 * @param doc the Lucene Document
 * @param highlighter the Highlighter to use
 * @param result the QueryResult to add the highlights to
 * @throws IOException if an error occurs performing the highlighting
 * @throws InvalidTokenOffsetsException if an error occurs performing the highlighting
protected void performHighlighting(final IndexSearcher indexSearcher, final Q query, final ScoreDoc scoreDoc,
        final Document doc, final Highlighter highlighter, final QR result)
        throws IOException, InvalidTokenOffsetsException {

    if (query.getHighlightFields() == null || query.getHighlightFields().isEmpty()) {

    final List<Highlight> highlights = new ArrayList<>();
    final List<String> hlFieldNames = getHighlightFieldNames(query, doc);

    // process each field to highlight on
    for (String hlField : hlFieldNames) {
        final String text = doc.get(hlField);
        if (StringUtils.isEmpty(text)) {

        final List<String> snippets = new ArrayList<>();
        final Fields tvFields = indexSearcher.getIndexReader().getTermVectors(scoreDoc.doc);
        final int maxStartOffset = highlighter.getMaxDocCharsToAnalyze() - 1;

        // get the snippets for the given field
        final TokenStream tokenStream = TokenSources.getTokenStream(hlField, tvFields, text, analyzer,
        final TextFragment[] textFragments = highlighter.getBestTextFragments(tokenStream, text, false, 10);
        for (TextFragment textFragment : textFragments) {
            if (textFragment != null && textFragment.getScore() > 0) {

        // if we have snippets then add a highlight result to the QueryResult
        if (snippets.size() > 0) {
            highlights.add(new Highlight(hlField, snippets));


From source file:Example.lucene.SearchNHilight.java

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "golf user";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)
            .parse(querystr);/* w w  w.j  a v  a2 s. c  o  m*/

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                // Get 2 Line Preview
                if (k >= 2)
        //Term vector

From source file:io.bdrc.lucene.bo.TibetanAnalyzerTest.java

public void ewtsOffsetBug2() throws IOException, ParseException, InvalidTokenOffsetsException {
    String input = "(cha) bka' bkan gnyis kyi lung";
    String queryLucene = "test:\"bka'\"";
    Analyzer indexingAnalyzer = new TibetanAnalyzer(false, true, false, "ewts", "");
    Analyzer queryAnalyzer = new TibetanAnalyzer(false, true, false, "ewts", "");
    TokenStream indexTk = indexingAnalyzer.tokenStream("", input);
    QueryParser queryParser = new QueryParser("test", queryAnalyzer);
    Query query = queryParser.parse(queryLucene);
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("->", "<-");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(10));
    TextFragment[] frags = highlighter.getBestTextFragments(indexTk, input, true, 128);
    final String firstFrag = frags[0].toString();
    assert (firstFrag.equals("(cha) ->bka'<- bkan gnyis kyi lung"));
    indexingAnalyzer.close();//from   ww  w . j a  v  a  2  s.  co  m

From source file:Main.WebAPI.Search.java

 * /*from  w ww  . j a va  2 s .com*/
 * @param args args[0] is a query
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException 

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "mike lab";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                // Get 2 Line Preview
                if (k >= 2)
        //Term vector

From source file:net.riezebos.thoth.content.search.Searcher.java

public PagedList<SearchResult> search(Identity identity, String queryExpression, int pageNumber, int pageSize)
        throws SearchException {
    try {
        IndexReader reader = getIndexReader(contentManager);
        IndexSearcher searcher = getIndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();

        // We might need to restrict the results to books of the user does not have access to fragments:
        AccessManager accessManager = contentManager.getAccessManager();
        boolean booksOnly = !accessManager.hasPermission(identity, "", Permission.READ_FRAGMENTS);
        if (booksOnly) {
            queryExpression = Indexer.INDEX_TYPE + ":" + Indexer.TYPE_DOCUMENT + " AND (" + queryExpression
                    + ")";

        QueryParser parser = new QueryParser(Indexer.INDEX_CONTENTS, analyzer);
        Query query = parser.parse(queryExpression);

        // We add 1 to determine if there is more to be found after the current page
        int maxResults = pageSize * pageNumber + 1;
        TopDocs results = searcher.search(query, maxResults, Sort.RELEVANCE);
        ScoreDoc[] hits = results.scoreDocs;

        boolean hadMore = (hits.length == maxResults);

        List<SearchResult> searchResults = new ArrayList<>();
        int idx = 0;
        for (ScoreDoc scoreDoc : hits) {
            if (searchResults.size() == pageSize)
            if (idx >= (pageNumber - 1) * pageSize) {
                Document document = searcher.doc(scoreDoc.doc);
                IndexableField field = document.getField(Indexer.INDEX_PATH);
                String documentPath = field.stringValue();
                SearchResult searchResult = new SearchResult();
                searchResult.setIndexNumber((pageNumber - 1) * pageSize + idx);

                String type = document.get(Indexer.INDEX_TYPE);
                if (Indexer.TYPE_DOCUMENT.equals(type) || Indexer.TYPE_FRAGMENT.equals(type)) {

                    try {
                        MarkDownDocument markDownDocument = contentManager.getMarkDownDocument(documentPath,
                                true, CriticProcessingMode.DO_NOTHING);
                        String contents = markDownDocument.getMarkdown();

                        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
                        Highlighter highlighter = new Highlighter(htmlFormatter,
                                new QueryScorer(query, Indexer.INDEX_CONTENTS));

                        TokenStream tokenStream = analyzer.tokenStream(Indexer.INDEX_CONTENTS, contents);

                        TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, contents, false,
                        for (TextFragment frag : frags) {
                            if ((frag != null) && (frag.getScore() > 0)) {
                                String fragmentText = frag.toString();
                                        new Fragment(ThothCoreUtil.escapeHtmlExcept("B", fragmentText)));
                    } catch (FileNotFoundException e) {
                                "Index contains an invalid file reference); probably need to reindex to get rid of this. File: "
                                        + e.getMessage());
                } else {
                    String extension = ThothUtil.getExtension(documentPath);

                    searchResult.addFragment(new Fragment(document.get(Indexer.INDEX_TITLE)));
        PagedList<SearchResult> pagedList = new PagedList<>(searchResults, hadMore);
        return pagedList;
    } catch (Exception e) {
        throw new SearchException(e);