Example usage for org.apache.lucene.search.highlight Highlighter setMaxDocCharsToAnalyze

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Highlighter setMaxDocCharsToAnalyze.

Prototype

public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze)

Source Link

Usage

From source file:com.flaptor.hounder.searcher.SnippetSearcher.java

License:Apache License

/**
 * Adds snippets to the search results./*from w  w  w  . j ava2s. c o  m*/
 * How stuff works:
 * For each 'group g' in provided GroupedSearchResults:
 *   For each result in 'g':
 *     Use the lucene highlighter to get the terms highlighted on the required field.
 *     Then call getSnippet(...) to get the resulting snippet
 */
private void addSnippets(GroupedSearchResults res, String snippetOfField, int snippetLength, QueryScorer scorer,
        Formatter simpleHtmlFormatter) throws IOException {

    Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer);
    highlighter.setTextFragmenter(NULL_FRAGMENTER);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed
    // Here we store every seen phrase. It is used to give less score to
    // recurrying phrases
    Set<String> usedSnippets = new HashSet<String>();

    for (int j = 0; j < res.groups(); j++) { // for each group
        Vector<Document> resDocs = res.getGroup(j).last();
        int docsLen = resDocs.size();
        for (int i = 0; i < docsLen; i++) { // for each document on that group               
            Document doc = resDocs.get(i); // get the document i
            String text = doc.get(snippetOfField); // text to be snippeted
            if (null == text) {
                logger.warn("Asked to snippet an unexisting field: " + snippetOfField);
                continue;
            }

            TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text));
            TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1);

            String result = null;
            if (null != fragments && 0 < fragments.length) {
                result = getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets);
            }

            if (null == result || 0 == result.length()) { // 
                if (emptySnippetsAllowed) {
                    result = "";
                } else {
                    result = text.substring(0, Math.min(text.length(), snippetLength));
                }
            }
            String snippetF = SNIPPET_FIELDNAME_PREFIX + snippetOfField;
            doc.add(new Field(snippetF, result.toString(), Field.Store.YES, Field.Index.NO));
        }
    }
}

From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java

License:Apache License

@Override
public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms)
        throws InvalidQuerySyntaxException {
    Analyzer analyzer = null;/*from   w  w  w  . j av  a2  s.co m*/

    // default QueryParser.escape(pattern) method does not support phrase queries
    pattern = QuerySyntaxUtil.escapeQueryPattern(pattern);
    if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) {
        return Collections.emptyList();
    }

    logger.log(Level.FINE, "Escaped search pattern: " + pattern);

    Lock lock = rwlock.readLock();
    lock.lock();
    if (exception != null) {
        lock.unlock();
        throw new RuntimeException("Failed to refesh index reader after last commit", exception);
    }

    try {
        List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>();
        analyzer = new TermNameAnalyzer(false);

        QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer);
        Query query = parser.parse(pattern);

        logger.log(Level.FINE, "Query: " + query);

        // For highlighting words in query results
        QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM);
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
        SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder();
        Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer);
        highlighter.setMaxDocCharsToAnalyze(MAX_CHARS);
        scorer.setExpandMultiTermQuery(true);

        // Perform search
        ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document doc = searcher.doc(id);
            String ontology = doc.get(FIELD_ONTOLOGY);
            String referenceId = doc.get(FIELD_ID);
            String term = doc.get(FIELD_TERM);
            byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM);
            boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1;

            if (!isSynonym || includeSynonyms) {
                Analyzer highlighterAnalyzer = new TermNameAnalyzer(true);
                TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM,
                        highlighterAnalyzer);
                TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1);
                if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) {
                    results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(),
                            frag[0].getScore(), isSynonym));
                }
                highlighterAnalyzer.close();
            }
        }

        return results;
    } catch (ParseException e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (TokenMgrError e) {
        throw new InvalidQuerySyntaxException(e.getMessage(), e);
    } catch (Throwable e) {
        String msg = "Failed to perform Lucene seach with pattern: " + pattern;
        logger.log(Level.WARNING, msg, e);
        throw new RuntimeException(msg, e);
    } finally {
        close(analyzer);
        lock.unlock();
    }
}

From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java

License:Apache License

private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId,
        Document doc, String fieldName) throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)))
        return;/*from  ww  w . j a  v a  2s  . c  o m*/
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
        listFields.add(field.stringValue());
    }

    String[] docTexts = listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of null
    if (docTexts.length == 0)
        return;

    TokenStream tokenStream;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
        //      TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
        //      if (tvStream != null) {
        //        tots = new TermOffsetsTokenStream(tvStream);
        //      }
    } catch (IllegalArgumentException e) {
        // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
        if (tots != null) {
            // if we're using TermOffsets optimization, then get the next
            // field value's TokenStream (i.e. get field j's TokenStream) from tots:
            tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length());
        } else {
            // fall back to analyzer
            tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
        }

        int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS,
                Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

        Highlighter highlighter;
        if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
            if (maxCharsToAnalyze < 0) {
                tokenStream = new CachingTokenFilter(tokenStream);
            } else {
                tokenStream = new CachingTokenFilter(
                        new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze));
            }

            // get highlighter
            highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream);

            // after highlighter initialization, reset tstream since construction of highlighter already used it
            tokenStream.reset();
        } else {
            // use "the old way"
            highlighter = getHighlighter(query, fieldName, req);
        }

        if (maxCharsToAnalyze < 0) {
            highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
        } else {
            highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
        }

        try {
            TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j],
                    mergeContiguousFragments, numFragments);
            for (int k = 0; k < bestTextFragments.length; k++) {
                if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
                    frags.add(bestTextFragments[k]);
                }
            }
        } catch (InvalidTokenOffsetsException e) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
        }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(frags, new Comparator<TextFragment>() {
        public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
        }
    });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    String[] summaries = null;
    if (frags.size() > 0) {
        ArrayList<String> fragTexts = new ArrayList<String>();
        for (TextFragment fragment : frags) {
            if ((fragment != null) && (fragment.getScore() > 0)) {
                fragTexts.add(fragment.toString());
            }
            if (fragTexts.size() >= numFragments)
                break;
        }
        summaries = (String[]) fragTexts.toArray();
        if (summaries.length > 0)
            docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
        alternateField(docSummaries, params, doc, fieldName);
    }
}

From source file:docet.engine.SimpleDocetDocSearcher.java

License:Apache License

@Override
public List<DocetPage> searchForMatchingDocuments(final String searchText, final String lang,
        final int maxNumResults) throws DocetDocumentSearchException {
    final List<DocetPage> results = new ArrayList<>();
    final String fallbackLang = this.getFallbackLangForLang(lang);
    final String actualSearchLang;
    if (fallbackLang.isEmpty()) {
        actualSearchLang = lang;/*from w  ww .  j  a v a  2s  .  com*/
    } else {
        actualSearchLang = fallbackLang;
    }
    try {
        final IndexSearcher searcher = new IndexSearcher(reader);
        final Analyzer analyzer = new AnalyzerBuilder().language(actualSearchLang).build();
        QueryParser queryParser = new QueryParser(LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang, analyzer);
        final Query query = queryParser.parse(constructLucenePhraseTermSearchQuery(searchText));
        final QueryScorer queryScorer = new QueryScorer(query, LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang);

        final Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer);
        final Highlighter highlighter = new Highlighter(queryScorer);
        highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
        highlighter.setTextFragmenter(fragmenter);

        final TopDocs res = searcher.search(query, maxNumResults);
        final float maxScore = res.getMaxScore();
        final List<ScoreDoc> scoreDocs = Arrays.asList(res.scoreDocs);
        Map<org.apache.lucene.document.Document, String> docs = new HashMap<>();
        Map<String, ScoreDoc> scoresForDocs = new HashMap<>();
        for (final ScoreDoc sd : scoreDocs) {
            final org.apache.lucene.document.Document doc = searcher.doc(sd.doc);
            final String contents = doc.get(LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang);
            final String docId = doc.get("id");
            final String[] fragments = highlighter.getBestFragments(analyzer,
                    LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang, contents, MAX_NUM_FRAGMENTS);
            List<String> fragmentList = Arrays.asList(fragments);
            fragmentList = fragmentList.stream().map(s1 -> s1.trim().split("\n"))
                    .map(s1 -> Arrays.asList(s1).stream().filter(s -> !s.trim().isEmpty())
                            .reduce((sa, sb) -> sa + MACHING_EXCERPTS_SEPARATOR + sb)
                            .orElse(MACHING_EXCERPTS_SEPARATOR))
                    .collect(Collectors.toList());
            docs.put(doc,
                    MACHING_EXCERPTS_SEPARATOR
                            + fragmentList.stream().filter(s -> !s.isEmpty())
                                    .reduce((s1, s2) -> s1 + "..." + s2).orElse("")
                            + MACHING_EXCERPTS_SEPARATOR);
            scoresForDocs.putIfAbsent(docId, sd);
        }
        docs.entrySet().stream().forEach(e -> {
            final int relevance = Math.round((scoresForDocs.get(e.getKey().get("id")).score / maxScore) * 100);
            results.add(DocetPage.toDocetDocument(e.getKey(), e.getValue(), relevance));
        });
        return results;
    } catch (ParseException | IOException | InvalidTokenOffsetsException ex) {
        throw new DocetDocumentSearchException(
                "Error on searching query " + searchText + " for lang " + actualSearchLang, ex);
    }
}

From source file:natural.language.qa.LuceneSearch.java

License:Apache License

public List<LuceneSearchResult> search(String queryString, int maxRes) throws Exception {
    IndexSearcher searcher = null;//from w  w  w .  j av  a  2s .  c  o  m
    List<LuceneSearchResult> results = new ArrayList<LuceneSearchResult>();
    try {
        Properties indexConf = new Properties();
        FileInputStream fis = new FileInputStream("index.properties");
        indexConf.load(fis);

        String index = indexConf.getProperty("index");
        String field = "contents";

        Directory indexDir = FSDirectory.open(new File(index));

        searcher = new IndexSearcher(indexDir);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);

        QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);

        queryString = queryString.trim();
        if (queryString.length() == 0) {
            return results;
        }

        Query query = parser.parse(queryString);
        System.out.println("Searching for: " + query.toString(field));

        // ================================================
        Formatter f = new SimpleHTMLFormatter("", "");
        Encoder e = new DefaultEncoder();
        QueryScorer fs = new QueryScorer(query);
        Fragmenter fragmenter = new SimpleSpanFragmenter(fs, 50);// new SentenceFragmenter();
        Highlighter h = new Highlighter(f, e, fs);
        h.setTextFragmenter(fragmenter);
        h.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

        // ================================================

        // Collect docs
        TopDocs res = searcher.search(query, maxRes);
        int numTotalHits = res.totalHits;
        ScoreDoc[] scoreDocs = res.scoreDocs;

        for (ScoreDoc scoreDoc : scoreDocs) {
            Document doc = searcher.doc(scoreDoc.doc);
            String path = doc.get("path");
            String content = readDocument(path);
            String bestFragment = h.getBestFragment(analyzer, field, content);
            String frag = bestFragment;
            //System.out.println(frag);
            LuceneSearchResult hit = new LuceneSearchResult(scoreDoc.doc, path, frag);
            results.add(hit);
        }
        System.out.println(numTotalHits + " total matching documents");
    } finally {
        if (searcher != null) {
            searcher.close();
        }
    }
    return results;
}

From source file:net.chwise.documents.HighlightedFragmentsRetriever.java

License:Open Source License

public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String fieldName,
        String fieldContents, int fragmentNumber, int fragmentSize)
        throws IOException, InvalidTokenOffsetsException {

    TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer);
    QueryScorer scorer = new QueryScorer(query, fieldName);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize);

    Highlighter highlighter = new Highlighter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

    String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber);

    if (fragments.length == 0) {
        //Return starting piece of fieldContents fragment
        fragments = new String[1];
        fragments[0] = fieldContents.substring(0, Math.min(fragmentSize, fieldContents.length()));
    }//w ww  .  j a  v a  2 s.  co  m

    return fragments;
}

From source file:net.riezebos.thoth.content.search.Searcher.java

License:Apache License

public PagedList<SearchResult> search(Identity identity, String queryExpression, int pageNumber, int pageSize)
        throws SearchException {
    try {/*ww w  . j  a va2s. c om*/
        IndexReader reader = getIndexReader(contentManager);
        IndexSearcher searcher = getIndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();

        // We might need to restrict the results to books of the user does not have access to fragments:
        AccessManager accessManager = contentManager.getAccessManager();
        boolean booksOnly = !accessManager.hasPermission(identity, "", Permission.READ_FRAGMENTS);
        if (booksOnly) {
            queryExpression = Indexer.INDEX_TYPE + ":" + Indexer.TYPE_DOCUMENT + " AND (" + queryExpression
                    + ")";
        }

        QueryParser parser = new QueryParser(Indexer.INDEX_CONTENTS, analyzer);
        Query query = parser.parse(queryExpression);

        // We add 1 to determine if there is more to be found after the current page
        int maxResults = pageSize * pageNumber + 1;
        TopDocs results = searcher.search(query, maxResults, Sort.RELEVANCE);
        ScoreDoc[] hits = results.scoreDocs;

        boolean hadMore = (hits.length == maxResults);

        List<SearchResult> searchResults = new ArrayList<>();
        int idx = 0;
        for (ScoreDoc scoreDoc : hits) {
            if (searchResults.size() == pageSize)
                break;
            idx++;
            if (idx >= (pageNumber - 1) * pageSize) {
                Document document = searcher.doc(scoreDoc.doc);
                IndexableField field = document.getField(Indexer.INDEX_PATH);
                String documentPath = field.stringValue();
                SearchResult searchResult = new SearchResult();
                searchResult.setIndexNumber((pageNumber - 1) * pageSize + idx);
                searchResult.setDocument(documentPath);

                String type = document.get(Indexer.INDEX_TYPE);
                if (Indexer.TYPE_DOCUMENT.equals(type) || Indexer.TYPE_FRAGMENT.equals(type)) {
                    searchResult.setResource(false);

                    try {
                        MarkDownDocument markDownDocument = contentManager.getMarkDownDocument(documentPath,
                                true, CriticProcessingMode.DO_NOTHING);
                        String contents = markDownDocument.getMarkdown();

                        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
                        Highlighter highlighter = new Highlighter(htmlFormatter,
                                new QueryScorer(query, Indexer.INDEX_CONTENTS));
                        highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);

                        TokenStream tokenStream = analyzer.tokenStream(Indexer.INDEX_CONTENTS, contents);

                        TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, contents, false,
                                99999);
                        for (TextFragment frag : frags) {
                            if ((frag != null) && (frag.getScore() > 0)) {
                                String fragmentText = frag.toString();
                                searchResult.addFragment(
                                        new Fragment(ThothCoreUtil.escapeHtmlExcept("B", fragmentText)));
                            }
                        }
                    } catch (FileNotFoundException e) {
                        LOG.warn(
                                "Index contains an invalid file reference); probably need to reindex to get rid of this. File: "
                                        + e.getMessage());
                    }
                } else {
                    searchResult.setResource(true);
                    String extension = ThothUtil.getExtension(documentPath);
                    searchResult.setImage(getConfiguration().isImageExtension(extension));

                    searchResult.addFragment(new Fragment(document.get(Indexer.INDEX_TITLE)));
                }
                searchResults.add(searchResult);
            }
        }
        reader.close();
        linkBooks(searchResults);
        PagedList<SearchResult> pagedList = new PagedList<>(searchResults, hadMore);
        return pagedList;
    } catch (Exception e) {
        throw new SearchException(e);
    }
}

From source file:net.sourceforge.docfetcher.model.search.HighlightService.java

License:Open Source License

@MutableCopy
@NotNull//from w w w  . j ava2 s  . c  o m
private static List<Range> highlight(@NotNull Query query, @NotNull String text)
        throws CheckedOutOfMemoryError {
    final List<Range> ranges = new ArrayList<Range>();
    /*
     * A formatter is supposed to return formatted text, but since we're
     * only interested in the start and end offsets of the search terms, we
     * return null and store the offsets in a list.
     */
    Formatter nullFormatter = new Formatter() {
        public String highlightTerm(String originalText, TokenGroup tokenGroup) {
            for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
                Token token = tokenGroup.getToken(i);
                if (tokenGroup.getScore(i) == 0)
                    continue;
                int start = token.startOffset();
                int end = token.endOffset();
                ranges.add(new Range(start, end - start));
            }
            return null;
        }
    };
    String key = Fields.CONTENT.key();
    Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, key));
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
    highlighter.setTextFragmenter(new NullFragmenter());
    try {
        /*
         * This has a return value, but we ignore it since we only want the
         * offsets. Might throw an OutOfMemoryError.
         */
        highlighter.getBestFragment(IndexRegistry.getAnalyzer(), key, text);
    } catch (OutOfMemoryError e) {
        throw new CheckedOutOfMemoryError(e);
    } catch (Exception e) {
        Util.printErr(e);
    }
    return ranges;
}

From source file:net.sourceforge.docfetcher.view.PreviewPanel.java

License:Open Source License

/**
 * Sets the file to be displayed, using <tt>parser</tt> to extract the
 * text from the file on the disk. This method does nothing if the given
 * file is null. The <tt>force</tt> parameter specifies whether the
 * preview should be updated even if neither the file nor the search terms
 * have changed in the meantime.//from   w w  w .jav  a2  s .  c  o  m
 */
private void setFile(final File file, final Parser parser, final Query query, boolean force) {
    File lastFile = this.file;
    Query lastQuery = this.query;
    this.file = file;
    this.parser = parser;
    this.query = query;

    // Check input
    if (file == null)
        return;
    if (parser == null) // Allowed to be null if file is null, too
        throw new IllegalArgumentException();
    if (!isActive)
        return;
    if (file.equals(lastFile) && !force)
        if (query != null && query.equals(lastQuery))
            return;

    if (file.isDirectory())
        throw new IllegalStateException("File expected for preview, got directory instead."); //$NON-NLS-1$
    if (!file.exists()) {
        textViewer.setText(Msg.file_not_found.value());
        showViewer(textViewerContainer);
        return;
    }

    // Use the HTML browser
    if (file.getAbsolutePath().equals(Const.HELP_FILE) || Pref.Bool.PreviewHTML.getValue()) {
        final BrowserPanel browser = browserProvider.getBrowser(previewPanel, browserToolBar, parser);
        if (browser != null) {
            browser.addProgressListener(new ProgressAdapter() {
                public void completed(ProgressEvent event) {
                    showViewer(browser);
                    upBt.setEnabled(false);
                    downBt.setEnabled(false);
                    occurrenceCounter.setText("0"); //$NON-NLS-1$
                }
            });
            browser.setFile(file);
            return;
        }
        // Browser creation failed, go on to next code block
    }

    // Use text renderers
    showViewer(textViewerContainer);

    // Use monospace font for text files
    if (parser instanceof TextParser) {
        org.eclipse.swt.graphics.Font monoFont = Font.PREVIEW_MONO.getFont();
        if (!textViewer.getFont().equals(monoFont))
            textViewer.setFont(monoFont);
    } else {
        org.eclipse.swt.graphics.Font previewFont = Font.PREVIEW.getFont();
        if (!textViewer.getFont().equals(previewFont))
            textViewer.setFont(previewFont);
    }

    textViewer.setText(Msg.loading.value()); // display loading message

    new Thread() { // run in a thread because parsing the file takes some time
        public void run() {
            // Extract the raw text from the file
            String text;
            boolean fileParsed = true;
            try {
                text = parser.renderText(file);
            } catch (ParseException e) {
                text = Msg.cant_read_file.format(e.getMessage());
                fileParsed = false;
            } catch (OutOfMemoryError e) {
                /*
                 * We can get here if the user sets a high java heap space
                 * value during indexing and then sets a lower value for
                 * search only usage.
                 */
                text = Msg.out_of_jvm_memory.value();
                fileParsed = false;
            }

            if (PreviewPanel.this.file != file)
                return; // Another preview request had been started while we were parsing

            /*
             * Create the message that will be displayed if the character limit
             * is reached. It is appended to the file contents later; if it
             * was appended here, some words in it might get highlighted.
             */
            int maxLength = Pref.Int.PreviewLimit.getValue();
            final String msg = "...\n\n\n[" //$NON-NLS-1$
                    + Msg.preview_limit_hint.format(new Object[] { maxLength, Pref.Int.PreviewLimit.name(),
                            Const.USER_PROPERTIES_FILENAME })
                    + "]"; //$NON-NLS-1$
            final boolean exceeded = text.length() > maxLength;
            if (text.length() > maxLength)
                text = text.substring(0, maxLength - msg.length());
            final String fText = text;

            /*
             * Create StyleRange ranges (i.e. start-end integer pairs) for
             * search term highlighting. Only tokenize preview text if we're
             * not displaying any info messages and if there are tokens to
             * highlight.
             */
            ranges = new int[0];
            if (fileParsed && query != null) {
                final List<Integer> rangesList = new ArrayList<Integer>();
                Analyzer analyzer = RootScope.analyzer;

                /*
                 * A formatter is supposed to return formatted text, but
                 * since we're only interested in the start and end offsets
                 * of the search terms, we return null and store the offsets
                 * in a list.
                 */
                Formatter nullFormatter = new Formatter() {
                    public String highlightTerm(String originalText, TokenGroup tokenGroup) {
                        for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
                            Token token = tokenGroup.getToken(i);
                            if (tokenGroup.getScore(i) == 0)
                                continue;
                            int start = token.startOffset();
                            int end = token.endOffset();
                            rangesList.add(start);
                            rangesList.add(end - start);
                        }
                        return null;
                    }
                };

                Highlighter highlighter = new Highlighter(nullFormatter,
                        new QueryScorer(query, Document.contents));
                highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
                highlighter.setTextFragmenter(new NullFragmenter());
                try {
                    /*
                     * This has a return value, but we ignore it since we
                     * only want the offsets.
                     */
                    highlighter.getBestFragment(analyzer, Document.contents, fText);
                } catch (Exception e) {
                    // We can do without the search term highlighting
                }

                // List to array (will be used by the method 'setHighlighting(..)')
                ranges = new int[rangesList.size()];
                for (int i = 0; i < ranges.length; i++)
                    ranges[i] = rangesList.get(i);
            }

            // Parsing and tokenizing done; display the results
            final boolean fFileParsed = fileParsed;
            Display.getDefault().syncExec(new Runnable() {
                public void run() {
                    // Enable or disable up and down buttons
                    upBt.setEnabled(ranges.length != 0);
                    downBt.setEnabled(ranges.length != 0);

                    textViewer.setText(fText);
                    setHighlighting(fFileParsed && Pref.Bool.HighlightSearchTerms.getValue());
                    occurrenceCounter.setText(Integer.toString(ranges.length / 2));
                    if (exceeded)
                        textViewer.append(msg); // character limit exceeded, append hint
                }
            });
        }
    }.start();
}

From source file:net.sourceforge.vaticanfetcher.model.search.HighlightService.java

License:Open Source License

@MutableCopy
@NotNull//  w ww .  j  ava 2  s  .co m
private static List<Range> highlight(@NotNull Query query, @NotNull String text)
        throws CheckedOutOfMemoryError {
    final List<Range> ranges = new ArrayList<Range>();
    /*
     * A formatter is supposed to return formatted text, but since we're
     * only interested in the start and end offsets of the search terms, we
     * return null and store the offsets in a list.
     */
    Formatter nullFormatter = new Formatter() {
        public String highlightTerm(String originalText, TokenGroup tokenGroup) {
            for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
                Token token = tokenGroup.getToken(i);
                if (tokenGroup.getScore(i) == 0)
                    continue;
                int start = token.startOffset();
                int end = token.endOffset();
                ranges.add(new Range(start, end - start));
            }
            return null;
        }
    };
    String key = Fields.CONTENT.key();
    Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, key));
    highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
    highlighter.setTextFragmenter(new NullFragmenter());
    try {
        /*
         * This has a return value, but we ignore it since we only want the
         * offsets. Might throw an OutOfMemoryError.
         */
        highlighter.getBestFragment(IndexRegistry.analyzer, key, text);
    } catch (OutOfMemoryError e) {
        throw new CheckedOutOfMemoryError(e);
    } catch (Exception e) {
        Util.printErr(e);
    }
    return ranges;
}