In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter Highlighter.


public Highlighter(Formatter formatter, Scorer fragmentScorer) 

Source Link


From source file:Main.WebAPI.Search.java

 * /*from w  w  w  .jav  a2 s  .c  o m*/
 * @param args args[0] is a query
 * @throws IOException
 * @throws ParseException
 * @throws InvalidTokenOffsetsException 

public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
    //... Above, create documents with two fields, one with term vectors (tv) and one without (notv)
    Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45);

    Directory index = FSDirectory.open(new File("data/indexing"));
    String querystr = args.length > 0 ? args[0] : "mike lab";
    // the "title" arg specifies the default field to use
    // when no field is explicitly specified in the query.
    Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer)

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);

    TopDocs hits = searcher.search(query, 10);

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
    String Preview;
    for (int i = 0; i < 10; i++) {
        int id = hits.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        String text;
        Preview = "";
        text = doc.get("content");
        TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content",
        TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "...");
        int k = 0;
        for (TextFragment frag1 : frag) {
            if ((frag1 != null) && (frag1.getScore() > 0)) {
                Preview += (frag1.toString()) + "...<br>";
                // Get 2 Line Preview
                if (k >= 2)
        //Term vector

From source file:net.riezebos.thoth.content.search.Searcher.java

License:Apache License

public PagedList<SearchResult> search(Identity identity, String queryExpression, int pageNumber, int pageSize)
        throws SearchException {
    try {/*from   w  w  w  .  j  a va 2 s. c om*/
        IndexReader reader = getIndexReader(contentManager);
        IndexSearcher searcher = getIndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();

        // We might need to restrict the results to books of the user does not have access to fragments:
        AccessManager accessManager = contentManager.getAccessManager();
        boolean booksOnly = !accessManager.hasPermission(identity, "", Permission.READ_FRAGMENTS);
        if (booksOnly) {
            queryExpression = Indexer.INDEX_TYPE + ":" + Indexer.TYPE_DOCUMENT + " AND (" + queryExpression
                    + ")";

        QueryParser parser = new QueryParser(Indexer.INDEX_CONTENTS, analyzer);
        Query query = parser.parse(queryExpression);

        // We add 1 to determine if there is more to be found after the current page
        int maxResults = pageSize * pageNumber + 1;
        TopDocs results = searcher.search(query, maxResults, Sort.RELEVANCE);
        ScoreDoc[] hits = results.scoreDocs;

        boolean hadMore = (hits.length == maxResults);

        List<SearchResult> searchResults = new ArrayList<>();
        int idx = 0;
        for (ScoreDoc scoreDoc : hits) {
            if (searchResults.size() == pageSize)
            if (idx >= (pageNumber - 1) * pageSize) {
                Document document = searcher.doc(scoreDoc.doc);
                IndexableField field = document.getField(Indexer.INDEX_PATH);
                String documentPath = field.stringValue();
                SearchResult searchResult = new SearchResult();
                searchResult.setIndexNumber((pageNumber - 1) * pageSize + idx);

                String type = document.get(Indexer.INDEX_TYPE);
                if (Indexer.TYPE_DOCUMENT.equals(type) || Indexer.TYPE_FRAGMENT.equals(type)) {

                    try {
                        MarkDownDocument markDownDocument = contentManager.getMarkDownDocument(documentPath,
                                true, CriticProcessingMode.DO_NOTHING);
                        String contents = markDownDocument.getMarkdown();

                        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
                        Highlighter highlighter = new Highlighter(htmlFormatter,
                                new QueryScorer(query, Indexer.INDEX_CONTENTS));

                        TokenStream tokenStream = analyzer.tokenStream(Indexer.INDEX_CONTENTS, contents);

                        TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, contents, false,
                        for (TextFragment frag : frags) {
                            if ((frag != null) && (frag.getScore() > 0)) {
                                String fragmentText = frag.toString();
                                        new Fragment(ThothCoreUtil.escapeHtmlExcept("B", fragmentText)));
                    } catch (FileNotFoundException e) {
                                "Index contains an invalid file reference); probably need to reindex to get rid of this. File: "
                                        + e.getMessage());
                } else {
                    String extension = ThothUtil.getExtension(documentPath);

                    searchResult.addFragment(new Fragment(document.get(Indexer.INDEX_TITLE)));
        PagedList<SearchResult> pagedList = new PagedList<>(searchResults, hadMore);
        return pagedList;
    } catch (Exception e) {
        throw new SearchException(e);

From source file:net.sf.mmm.search.engine.impl.lucene.LuceneSearchHighlighter.java

License:Apache License

 * The constructor.//from   w ww.  j a  v  a  2  s .co m
 * @param searchAnalyzer is the {@link Analyzer} used by the {@link LuceneSearchEngine search-engine}.
 * @param formatter is the formatter used to highlight terms.
 * @param searchQuery is the {@link Query} of the search. Matching terms of this query shall be highlighted.
public LuceneSearchHighlighter(Analyzer searchAnalyzer, Formatter formatter, Query searchQuery) {

    this.highlighter = new Highlighter(formatter, new QueryScorer(searchQuery));
    this.analyzer = searchAnalyzer;

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

 * Main search method, for internal use.
 * //www .ja  va 2s  . c o  m
 * @param q query string
 * @return a list of highlighted string objects.
 * @throws SearchException
private List<SearchResultItem> internalSearch(String q) throws SearchException {
    IndexSearcher is = null;
    try {
        is = new IndexSearcher(zekrIndexReader.indexReader);

        // analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        // resultTokenStream = new StandardTokenizer(Version.LUCENE_CURRENT, reader);

        QueryParser parser = QueryParserFactory.create(Version.LUCENE_CURRENT, QuranTextIndexer.CONTENTS_FIELD,

        // allow search terms like "*foo" with leading star
        // parser.setFuzzyPrefixLength(10);

        // if this line is not set, highlighter doesn't work in in wildcard queries while query.rewrite() is done.
        // and sorting also doesn't work correctly for wildcard queries.

        logger.debug("Parse query.");
        query = parser.parse(q);

        logger.debug("Rewrite query.");
        query = query.rewrite(zekrIndexReader.indexReader); // required to expand search terms

        logger.debug("Searching for: " + query.toString());
        // Hits hits;
        TopFieldDocs tops = null;
        is.setDefaultFieldSortScoring(true, true);
        if (searchScope != null && searchScope.getScopeItems().size() > 0) {
            String scopeQuery = makeSearchScope();
            logger.debug("Scope is: " + scopeQuery);
            // hits = is.search(query, new QuranRangeFilter(searchScope), sortResultOrder);
            tops = is.search(query, new QuranRangeFilter(searchScope), maxSearchResult, sortResultOrder);

        } else {
            // hits = is.search(query, new QueryWrapperFilter(query), 20, sortResultOrder);
            tops = is.search(query, new QueryWrapperFilter(query), maxSearchResult, sortResultOrder);

        logger.debug("Highlight search result.");
        Highlighter highlighter = new Highlighter(highlightFormatter, new QueryScorer(query));
        // highlighter.setFragmentScorer(new QueryTermScorer(query));

        int total = Math.min(maxSearchResult, tops.totalHits);
        List<SearchResultItem> res = new ArrayList<SearchResultItem>(total);
        for (int i = 0; i < total; i++) {
            ScoreDoc[] sd = tops.scoreDocs;
            Document doc = is.doc(sd[i].doc);
            final String contents = doc.get(QuranTextIndexer.CONTENTS_FIELD);
            final IQuranLocation location = new QuranLocation(doc.get(QuranTextIndexer.LOCATION_FIELD));
            TokenStream tokenStream = analyzer.tokenStream(QuranTextIndexer.CONTENTS_FIELD,
                    new StringReader(contents));

            // String resultStr = highlighter.getBestFragment(tokenStream, contents);
            String resultStr = highlighter.getBestFragments(tokenStream, contents, 100, "...");
            SearchResultItem sri = new SearchResultItem(resultStr, location);
        matchedItemCount = highlightFormatter.getHighlightCount();
        // highlightedTermList = highlightFormatter.getHighlightedTermList();
        return res;
    } catch (Exception e) {
        throw new SearchException(e);
    } finally {
        if (is != null) {
            try {
            } catch (IOException e) {

From source file:net.skyatlas.icd.test.AnsegTest.java

private String toHighlighter(Analyzer analyzer, Query query, Document doc) throws InvalidTokenOffsetsException {
    String field = "text";
    try {/*from   w w w.j  a  v  a 2s.  co  m*/
        SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
        Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
        TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
        String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
        return highlighterStr == null ? doc.get(field) : highlighterStr;
    } catch (IOException e) {
        // TODO Auto-generated catch block
    } catch (InvalidTokenOffsetsException e) {
        // TODO Auto-generated catch block
    return null;

From source file:net.sourceforge.docfetcher.model.search.HighlightService.java

License:Open Source License

@NotNull//from   w ww.  j a  v  a2  s .c o m
private static List<Range> highlight(@NotNull Query query, @NotNull String text)
        throws CheckedOutOfMemoryError {
    final List<Range> ranges = new ArrayList<Range>();
     * A formatter is supposed to return formatted text, but since we're
     * only interested in the start and end offsets of the search terms, we
     * return null and store the offsets in a list.
    Formatter nullFormatter = new Formatter() {
        public String highlightTerm(String originalText, TokenGroup tokenGroup) {
            for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
                Token token = tokenGroup.getToken(i);
                if (tokenGroup.getScore(i) == 0)
                int start = token.startOffset();
                int end = token.endOffset();
                ranges.add(new Range(start, end - start));
            return null;
    String key = Fields.CONTENT.key();
    Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, key));
    highlighter.setTextFragmenter(new NullFragmenter());
    try {
         * This has a return value, but we ignore it since we only want the
         * offsets. Might throw an OutOfMemoryError.
        highlighter.getBestFragment(IndexRegistry.getAnalyzer(), key, text);
    } catch (OutOfMemoryError e) {
        throw new CheckedOutOfMemoryError(e);
    } catch (Exception e) {
    return ranges;

From source file:net.sourceforge.docfetcher.view.PreviewPanel.java

License:Open Source License

 * Sets the file to be displayed, using <tt>parser</tt> to extract the
 * text from the file on the disk. This method does nothing if the given
 * file is null. The <tt>force</tt> parameter specifies whether the
 * preview should be updated even if neither the file nor the search terms
 * have changed in the meantime.//from ww w.ja  va2s  .c o m
private void setFile(final File file, final Parser parser, final Query query, boolean force) {
    File lastFile = this.file;
    Query lastQuery = this.query;
    this.file = file;
    this.parser = parser;
    this.query = query;

    // Check input
    if (file == null)
    if (parser == null) // Allowed to be null if file is null, too
        throw new IllegalArgumentException();
    if (!isActive)
    if (file.equals(lastFile) && !force)
        if (query != null && query.equals(lastQuery))

    if (file.isDirectory())
        throw new IllegalStateException("File expected for preview, got directory instead."); //$NON-NLS-1$
    if (!file.exists()) {

    // Use the HTML browser
    if (file.getAbsolutePath().equals(Const.HELP_FILE) || Pref.Bool.PreviewHTML.getValue()) {
        final BrowserPanel browser = browserProvider.getBrowser(previewPanel, browserToolBar, parser);
        if (browser != null) {
            browser.addProgressListener(new ProgressAdapter() {
                public void completed(ProgressEvent event) {
                    occurrenceCounter.setText("0"); //$NON-NLS-1$
        // Browser creation failed, go on to next code block

    // Use text renderers

    // Use monospace font for text files
    if (parser instanceof TextParser) {
        org.eclipse.swt.graphics.Font monoFont = Font.PREVIEW_MONO.getFont();
        if (!textViewer.getFont().equals(monoFont))
    } else {
        org.eclipse.swt.graphics.Font previewFont = Font.PREVIEW.getFont();
        if (!textViewer.getFont().equals(previewFont))

    textViewer.setText(Msg.loading.value()); // display loading message

    new Thread() { // run in a thread because parsing the file takes some time
        public void run() {
            // Extract the raw text from the file
            String text;
            boolean fileParsed = true;
            try {
                text = parser.renderText(file);
            } catch (ParseException e) {
                text = Msg.cant_read_file.format(e.getMessage());
                fileParsed = false;
            } catch (OutOfMemoryError e) {
                 * We can get here if the user sets a high java heap space
                 * value during indexing and then sets a lower value for
                 * search only usage.
                text = Msg.out_of_jvm_memory.value();
                fileParsed = false;

            if (PreviewPanel.this.file != file)
                return; // Another preview request had been started while we were parsing

             * Create the message that will be displayed if the character limit
             * is reached. It is appended to the file contents later; if it
             * was appended here, some words in it might get highlighted.
            int maxLength = Pref.Int.PreviewLimit.getValue();
            final String msg = "...\n\n\n[" //$NON-NLS-1$
                    + Msg.preview_limit_hint.format(new Object[] { maxLength, Pref.Int.PreviewLimit.name(),
                            Const.USER_PROPERTIES_FILENAME })
                    + "]"; //$NON-NLS-1$
            final boolean exceeded = text.length() > maxLength;
            if (text.length() > maxLength)
                text = text.substring(0, maxLength - msg.length());
            final String fText = text;

             * Create StyleRange ranges (i.e. start-end integer pairs) for
             * search term highlighting. Only tokenize preview text if we're
             * not displaying any info messages and if there are tokens to
             * highlight.
            ranges = new int[0];
            if (fileParsed && query != null) {
                final List<Integer> rangesList = new ArrayList<Integer>();
                Analyzer analyzer = RootScope.analyzer;

                 * A formatter is supposed to return formatted text, but
                 * since we're only interested in the start and end offsets
                 * of the search terms, we return null and store the offsets
                 * in a list.
                Formatter nullFormatter = new Formatter() {
                    public String highlightTerm(String originalText, TokenGroup tokenGroup) {
                        for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
                            Token token = tokenGroup.getToken(i);
                            if (tokenGroup.getScore(i) == 0)
                            int start = token.startOffset();
                            int end = token.endOffset();
                            rangesList.add(end - start);
                        return null;

                Highlighter highlighter = new Highlighter(nullFormatter,
                        new QueryScorer(query, Document.contents));
                highlighter.setTextFragmenter(new NullFragmenter());
                try {
                     * This has a return value, but we ignore it since we
                     * only want the offsets.
                    highlighter.getBestFragment(analyzer, Document.contents, fText);
                } catch (Exception e) {
                    // We can do without the search term highlighting

                // List to array (will be used by the method 'setHighlighting(..)')
                ranges = new int[rangesList.size()];
                for (int i = 0; i < ranges.length; i++)
                    ranges[i] = rangesList.get(i);

            // Parsing and tokenizing done; display the results
            final boolean fFileParsed = fileParsed;
            Display.getDefault().syncExec(new Runnable() {
                public void run() {
                    // Enable or disable up and down buttons
                    upBt.setEnabled(ranges.length != 0);
                    downBt.setEnabled(ranges.length != 0);

                    setHighlighting(fFileParsed && Pref.Bool.HighlightSearchTerms.getValue());
                    occurrenceCounter.setText(Integer.toString(ranges.length / 2));
                    if (exceeded)
                        textViewer.append(msg); // character limit exceeded, append hint

From source file:net.sourceforge.vaticanfetcher.model.search.HighlightService.java

License:Open Source License

@NotNull/*from www.j a  va2  s  .  c o  m*/
private static List<Range> highlight(@NotNull Query query, @NotNull String text)
        throws CheckedOutOfMemoryError {
    final List<Range> ranges = new ArrayList<Range>();
     * A formatter is supposed to return formatted text, but since we're
     * only interested in the start and end offsets of the search terms, we
     * return null and store the offsets in a list.
    Formatter nullFormatter = new Formatter() {
        public String highlightTerm(String originalText, TokenGroup tokenGroup) {
            for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
                Token token = tokenGroup.getToken(i);
                if (tokenGroup.getScore(i) == 0)
                int start = token.startOffset();
                int end = token.endOffset();
                ranges.add(new Range(start, end - start));
            return null;
    String key = Fields.CONTENT.key();
    Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, key));
    highlighter.setTextFragmenter(new NullFragmenter());
    try {
         * This has a return value, but we ignore it since we only want the
         * offsets. Might throw an OutOfMemoryError.
        highlighter.getBestFragment(IndexRegistry.analyzer, key, text);
    } catch (OutOfMemoryError e) {
        throw new CheckedOutOfMemoryError(e);
    } catch (Exception e) {
    return ranges;

From source file:org.apache.blur.utils.HighlightHelper.java

License:Apache License

 * NOTE: This method will not preserve the correct field types.
 * //from w w w .j  a v a  2s .c  o m
 * @param preTag
 * @param postTag
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
        IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

    String fieldLessFieldName = fieldManager.getFieldLessFieldName();

    Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

    Analyzer analyzer = fieldManager.getAnalyzerForQuery();

    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
    Document result = new Document();
    for (IndexableField f : document) {
        String name = f.name();
        if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
        String text = f.stringValue();
        Number numericValue = f.numericValue();

        Query fieldFixedQuery;
        if (fieldManager.isFieldLessIndexed(name)) {
            fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
        } else {
            fieldFixedQuery = fixedQuery;

        if (numericValue != null) {
            if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
                String numberHighlight = preTag + text + postTag;
                result.add(new StringField(name, numberHighlight, Store.YES));
        } else {
            Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
            TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
            TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
            for (int j = 0; j < frag.length; j++) {
                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    result.add(new StringField(name, frag[j].toString(), Store.YES));
    return result;

From source file:org.apache.jena.query.text.TextIndexLucene.java

License:Apache License

private List<TextHit> highlightResults(ScoreDoc[] sDocs, IndexSearcher indexSearcher, Query query, String field,
        String highlight) throws IOException, InvalidTokenOffsetsException {
    List<TextHit> results = new ArrayList<>();

    HighlightOpts opts = new HighlightOpts(highlight);

    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(opts.start, opts.end);
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(opts.fragSize));

    for (ScoreDoc sd : sDocs) {
        Document doc = indexSearcher.doc(sd.doc);
        log.trace("highlightResults[{}]: {}", sd.doc, doc);
        String entity = doc.get(docDef.getEntityField());

        Node literal = null;//from  w w w  .j  a  v a2  s  .c  o  m
        String lexical = doc.get(field);
        if (lexical != null) {
            String docLang = doc.get(docDef.getLangField());
            TokenStream tokenStream = analyzer.tokenStream(field, lexical);
            TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, lexical, opts.joinFrags,
            String rez = frags2string(frags, opts);

            literal = NodeFactory.createLiteral(rez, docLang);

        String graf = docDef.getGraphField() != null ? doc.get(docDef.getGraphField()) : null;
        Node graph = graf != null ? TextQueryFuncs.stringToNode(graf) : null;

        Node subject = TextQueryFuncs.stringToNode(entity);
        TextHit hit = new TextHit(subject, sd.score, literal, graph);
    return results;