List of usage examples for org.apache.lucene.search.highlight Highlighter setMaxDocCharsToAnalyze
public void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze)
From source file:com.flaptor.hounder.searcher.SnippetSearcher.java
License:Apache License
/** * Adds snippets to the search results./*from w w w . j ava2s. c o m*/ * How stuff works: * For each 'group g' in provided GroupedSearchResults: * For each result in 'g': * Use the lucene highlighter to get the terms highlighted on the required field. * Then call getSnippet(...) to get the resulting snippet */ private void addSnippets(GroupedSearchResults res, String snippetOfField, int snippetLength, QueryScorer scorer, Formatter simpleHtmlFormatter) throws IOException { Highlighter highlighter = new Highlighter(simpleHtmlFormatter, scorer); highlighter.setTextFragmenter(NULL_FRAGMENTER); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // make sure the whole text will be analyzed // Here we store every seen phrase. It is used to give less score to // recurrying phrases Set<String> usedSnippets = new HashSet<String>(); for (int j = 0; j < res.groups(); j++) { // for each group Vector<Document> resDocs = res.getGroup(j).last(); int docsLen = resDocs.size(); for (int i = 0; i < docsLen; i++) { // for each document on that group Document doc = resDocs.get(i); // get the document i String text = doc.get(snippetOfField); // text to be snippeted if (null == text) { logger.warn("Asked to snippet an unexisting field: " + snippetOfField); continue; } TokenStream tokenStream = queryParser.tokenStream(snippetOfField, new StringReader(text)); TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 1); String result = null; if (null != fragments && 0 < fragments.length) { result = getSnippet(fragments[0].toString(), snippetLength, scorer, usedSnippets); } if (null == result || 0 == result.length()) { // if (emptySnippetsAllowed) { result = ""; } else { result = text.substring(0, Math.min(text.length(), snippetLength)); } } String snippetF = SNIPPET_FIELDNAME_PREFIX + snippetOfField; doc.add(new Field(snippetF, result.toString(), Field.Store.YES, Field.Index.NO)); } } }
From source file:com.novartis.pcs.ontology.service.search.OntologySearchServiceImpl.java
License:Apache License
@Override public List<HTMLSearchResult> search(String pattern, boolean includeSynonyms) throws InvalidQuerySyntaxException { Analyzer analyzer = null;/*from w w w . j av a2 s.co m*/ // default QueryParser.escape(pattern) method does not support phrase queries pattern = QuerySyntaxUtil.escapeQueryPattern(pattern); if (pattern.length() < EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE) { return Collections.emptyList(); } logger.log(Level.FINE, "Escaped search pattern: " + pattern); Lock lock = rwlock.readLock(); lock.lock(); if (exception != null) { lock.unlock(); throw new RuntimeException("Failed to refesh index reader after last commit", exception); } try { List<HTMLSearchResult> results = new ArrayList<HTMLSearchResult>(); analyzer = new TermNameAnalyzer(false); QueryParser parser = new QueryParser(Version.LUCENE_30, FIELD_TERM, analyzer); Query query = parser.parse(pattern); logger.log(Level.FINE, "Query: " + query); // For highlighting words in query results QueryScorer scorer = new QueryScorer(query, reader, FIELD_TERM); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); SimpleHTMLEncoder htmlEncoder = new SimpleHTMLEncoder(); Highlighter highlighter = new Highlighter(htmlFormatter, htmlEncoder, scorer); highlighter.setMaxDocCharsToAnalyze(MAX_CHARS); scorer.setExpandMultiTermQuery(true); // Perform search ScoreDoc[] hits = searcher.search(query, numberOfDocuments).scoreDocs; for (int i = 0; i < hits.length; i++) { int id = hits[i].doc; Document doc = searcher.doc(id); String ontology = doc.get(FIELD_ONTOLOGY); String referenceId = doc.get(FIELD_ID); String term = doc.get(FIELD_TERM); byte[] synonymBytes = doc.getBinaryValue(FIELD_SYNONYM); boolean isSynonym = synonymBytes != null && synonymBytes.length == 1 && synonymBytes[0] == 1; if (!isSynonym || includeSynonyms) { Analyzer highlighterAnalyzer = new TermNameAnalyzer(true); TokenStream tokenStream = TokenSources.getTokenStream(reader, id, FIELD_TERM, highlighterAnalyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, term, true, 1); if (frag.length > 0 && frag[0] != null && frag[0].getScore() > 0) { results.add(new HTMLSearchResult(ontology, referenceId, term, frag[0].toString(), frag[0].getScore(), isSynonym)); } highlighterAnalyzer.close(); } } return results; } catch (ParseException e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (TokenMgrError e) { throw new InvalidQuerySyntaxException(e.getMessage(), e); } catch (Throwable e) { String msg = "Failed to perform Lucene seach with pattern: " + pattern; logger.log(Level.WARNING, msg, e); throw new RuntimeException(msg, e); } finally { close(analyzer); lock.unlock(); } }
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;/*from ww w . j a v a 2s . c o m*/ // END: Hack SolrParams params = req.getParams(); IndexableField[] docFields = doc.getFields(fieldName); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { listFields.add(field.stringValue()); } String[] docTexts = listFields.toArray(new String[listFields.size()]); // according to Document javadoc, doc.getValues() never returns null. check empty instead of null if (docTexts.length == 0) return; TokenStream tokenStream; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization try { // TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName); // if (tvStream != null) { // tots = new TermOffsetsTokenStream(tvStream); // } } catch (IllegalArgumentException e) { // No problem. But we can't use TermOffsets optimization. } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tokenStream = new CachingTokenFilter(tokenStream); } else { tokenStream = new CachingTokenFilter( new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream); // after highlighter initialization, reset tstream since construction of highlighter already used it tokenStream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first Collections.sort(frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes String[] summaries = null; if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } if (fragTexts.size() >= numFragments) break; } summaries = (String[]) fragTexts.toArray(); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
From source file:docet.engine.SimpleDocetDocSearcher.java
License:Apache License
@Override public List<DocetPage> searchForMatchingDocuments(final String searchText, final String lang, final int maxNumResults) throws DocetDocumentSearchException { final List<DocetPage> results = new ArrayList<>(); final String fallbackLang = this.getFallbackLangForLang(lang); final String actualSearchLang; if (fallbackLang.isEmpty()) { actualSearchLang = lang;/*from w ww . j a v a 2s . com*/ } else { actualSearchLang = fallbackLang; } try { final IndexSearcher searcher = new IndexSearcher(reader); final Analyzer analyzer = new AnalyzerBuilder().language(actualSearchLang).build(); QueryParser queryParser = new QueryParser(LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang, analyzer); final Query query = queryParser.parse(constructLucenePhraseTermSearchQuery(searchText)); final QueryScorer queryScorer = new QueryScorer(query, LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang); final Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer); final Highlighter highlighter = new Highlighter(queryScorer); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); highlighter.setTextFragmenter(fragmenter); final TopDocs res = searcher.search(query, maxNumResults); final float maxScore = res.getMaxScore(); final List<ScoreDoc> scoreDocs = Arrays.asList(res.scoreDocs); Map<org.apache.lucene.document.Document, String> docs = new HashMap<>(); Map<String, ScoreDoc> scoresForDocs = new HashMap<>(); for (final ScoreDoc sd : scoreDocs) { final org.apache.lucene.document.Document doc = searcher.doc(sd.doc); final String contents = doc.get(LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang); final String docId = doc.get("id"); final String[] fragments = highlighter.getBestFragments(analyzer, LUCENE_QUERY_CONTENT_PREFIX + actualSearchLang, contents, MAX_NUM_FRAGMENTS); List<String> fragmentList = Arrays.asList(fragments); fragmentList = fragmentList.stream().map(s1 -> s1.trim().split("\n")) .map(s1 -> Arrays.asList(s1).stream().filter(s -> !s.trim().isEmpty()) .reduce((sa, sb) -> sa + MACHING_EXCERPTS_SEPARATOR + sb) .orElse(MACHING_EXCERPTS_SEPARATOR)) .collect(Collectors.toList()); docs.put(doc, MACHING_EXCERPTS_SEPARATOR + fragmentList.stream().filter(s -> !s.isEmpty()) .reduce((s1, s2) -> s1 + "..." + s2).orElse("") + MACHING_EXCERPTS_SEPARATOR); scoresForDocs.putIfAbsent(docId, sd); } docs.entrySet().stream().forEach(e -> { final int relevance = Math.round((scoresForDocs.get(e.getKey().get("id")).score / maxScore) * 100); results.add(DocetPage.toDocetDocument(e.getKey(), e.getValue(), relevance)); }); return results; } catch (ParseException | IOException | InvalidTokenOffsetsException ex) { throw new DocetDocumentSearchException( "Error on searching query " + searchText + " for lang " + actualSearchLang, ex); } }
From source file:natural.language.qa.LuceneSearch.java
License:Apache License
public List<LuceneSearchResult> search(String queryString, int maxRes) throws Exception { IndexSearcher searcher = null;//from w w w . j av a 2s . c o m List<LuceneSearchResult> results = new ArrayList<LuceneSearchResult>(); try { Properties indexConf = new Properties(); FileInputStream fis = new FileInputStream("index.properties"); indexConf.load(fis); String index = indexConf.getProperty("index"); String field = "contents"; Directory indexDir = FSDirectory.open(new File(index)); searcher = new IndexSearcher(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer); queryString = queryString.trim(); if (queryString.length() == 0) { return results; } Query query = parser.parse(queryString); System.out.println("Searching for: " + query.toString(field)); // ================================================ Formatter f = new SimpleHTMLFormatter("", ""); Encoder e = new DefaultEncoder(); QueryScorer fs = new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(fs, 50);// new SentenceFragmenter(); Highlighter h = new Highlighter(f, e, fs); h.setTextFragmenter(fragmenter); h.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); // ================================================ // Collect docs TopDocs res = searcher.search(query, maxRes); int numTotalHits = res.totalHits; ScoreDoc[] scoreDocs = res.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document doc = searcher.doc(scoreDoc.doc); String path = doc.get("path"); String content = readDocument(path); String bestFragment = h.getBestFragment(analyzer, field, content); String frag = bestFragment; //System.out.println(frag); LuceneSearchResult hit = new LuceneSearchResult(scoreDoc.doc, path, frag); results.add(hit); } System.out.println(numTotalHits + " total matching documents"); } finally { if (searcher != null) { searcher.close(); } } return results; }
From source file:net.chwise.documents.HighlightedFragmentsRetriever.java
License:Open Source License
public String[] getFragmentsWithHighlightedTerms(Analyzer analyzer, Query query, String fieldName, String fieldContents, int fragmentNumber, int fragmentSize) throws IOException, InvalidTokenOffsetsException { TokenStream stream = TokenSources.getTokenStream(fieldName, fieldContents, analyzer); QueryScorer scorer = new QueryScorer(query, fieldName); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentSize); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); String[] fragments = highlighter.getBestFragments(stream, fieldContents, fragmentNumber); if (fragments.length == 0) { //Return starting piece of fieldContents fragment fragments = new String[1]; fragments[0] = fieldContents.substring(0, Math.min(fragmentSize, fieldContents.length())); }//w ww . j a v a 2 s. co m return fragments; }
From source file:net.riezebos.thoth.content.search.Searcher.java
License:Apache License
public PagedList<SearchResult> search(Identity identity, String queryExpression, int pageNumber, int pageSize) throws SearchException { try {/*ww w . j a va2s. c om*/ IndexReader reader = getIndexReader(contentManager); IndexSearcher searcher = getIndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); // We might need to restrict the results to books of the user does not have access to fragments: AccessManager accessManager = contentManager.getAccessManager(); boolean booksOnly = !accessManager.hasPermission(identity, "", Permission.READ_FRAGMENTS); if (booksOnly) { queryExpression = Indexer.INDEX_TYPE + ":" + Indexer.TYPE_DOCUMENT + " AND (" + queryExpression + ")"; } QueryParser parser = new QueryParser(Indexer.INDEX_CONTENTS, analyzer); Query query = parser.parse(queryExpression); // We add 1 to determine if there is more to be found after the current page int maxResults = pageSize * pageNumber + 1; TopDocs results = searcher.search(query, maxResults, Sort.RELEVANCE); ScoreDoc[] hits = results.scoreDocs; boolean hadMore = (hits.length == maxResults); List<SearchResult> searchResults = new ArrayList<>(); int idx = 0; for (ScoreDoc scoreDoc : hits) { if (searchResults.size() == pageSize) break; idx++; if (idx >= (pageNumber - 1) * pageSize) { Document document = searcher.doc(scoreDoc.doc); IndexableField field = document.getField(Indexer.INDEX_PATH); String documentPath = field.stringValue(); SearchResult searchResult = new SearchResult(); searchResult.setIndexNumber((pageNumber - 1) * pageSize + idx); searchResult.setDocument(documentPath); String type = document.get(Indexer.INDEX_TYPE); if (Indexer.TYPE_DOCUMENT.equals(type) || Indexer.TYPE_FRAGMENT.equals(type)) { searchResult.setResource(false); try { MarkDownDocument markDownDocument = contentManager.getMarkDownDocument(documentPath, true, CriticProcessingMode.DO_NOTHING); String contents = markDownDocument.getMarkdown(); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query, Indexer.INDEX_CONTENTS)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); TokenStream tokenStream = analyzer.tokenStream(Indexer.INDEX_CONTENTS, contents); TextFragment[] frags = highlighter.getBestTextFragments(tokenStream, contents, false, 99999); for (TextFragment frag : frags) { if ((frag != null) && (frag.getScore() > 0)) { String fragmentText = frag.toString(); searchResult.addFragment( new Fragment(ThothCoreUtil.escapeHtmlExcept("B", fragmentText))); } } } catch (FileNotFoundException e) { LOG.warn( "Index contains an invalid file reference); probably need to reindex to get rid of this. File: " + e.getMessage()); } } else { searchResult.setResource(true); String extension = ThothUtil.getExtension(documentPath); searchResult.setImage(getConfiguration().isImageExtension(extension)); searchResult.addFragment(new Fragment(document.get(Indexer.INDEX_TITLE))); } searchResults.add(searchResult); } } reader.close(); linkBooks(searchResults); PagedList<SearchResult> pagedList = new PagedList<>(searchResults, hadMore); return pagedList; } catch (Exception e) { throw new SearchException(e); } }
From source file:net.sourceforge.docfetcher.model.search.HighlightService.java
License:Open Source License
@MutableCopy @NotNull//from w w w . j ava2 s . c o m private static List<Range> highlight(@NotNull Query query, @NotNull String text) throws CheckedOutOfMemoryError { final List<Range> ranges = new ArrayList<Range>(); /* * A formatter is supposed to return formatted text, but since we're * only interested in the start and end offsets of the search terms, we * return null and store the offsets in a list. */ Formatter nullFormatter = new Formatter() { public String highlightTerm(String originalText, TokenGroup tokenGroup) { for (int i = 0; i < tokenGroup.getNumTokens(); i++) { Token token = tokenGroup.getToken(i); if (tokenGroup.getScore(i) == 0) continue; int start = token.startOffset(); int end = token.endOffset(); ranges.add(new Range(start, end - start)); } return null; } }; String key = Fields.CONTENT.key(); Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, key)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); highlighter.setTextFragmenter(new NullFragmenter()); try { /* * This has a return value, but we ignore it since we only want the * offsets. Might throw an OutOfMemoryError. */ highlighter.getBestFragment(IndexRegistry.getAnalyzer(), key, text); } catch (OutOfMemoryError e) { throw new CheckedOutOfMemoryError(e); } catch (Exception e) { Util.printErr(e); } return ranges; }
From source file:net.sourceforge.docfetcher.view.PreviewPanel.java
License:Open Source License
/** * Sets the file to be displayed, using <tt>parser</tt> to extract the * text from the file on the disk. This method does nothing if the given * file is null. The <tt>force</tt> parameter specifies whether the * preview should be updated even if neither the file nor the search terms * have changed in the meantime.//from w w w .jav a2 s . c o m */ private void setFile(final File file, final Parser parser, final Query query, boolean force) { File lastFile = this.file; Query lastQuery = this.query; this.file = file; this.parser = parser; this.query = query; // Check input if (file == null) return; if (parser == null) // Allowed to be null if file is null, too throw new IllegalArgumentException(); if (!isActive) return; if (file.equals(lastFile) && !force) if (query != null && query.equals(lastQuery)) return; if (file.isDirectory()) throw new IllegalStateException("File expected for preview, got directory instead."); //$NON-NLS-1$ if (!file.exists()) { textViewer.setText(Msg.file_not_found.value()); showViewer(textViewerContainer); return; } // Use the HTML browser if (file.getAbsolutePath().equals(Const.HELP_FILE) || Pref.Bool.PreviewHTML.getValue()) { final BrowserPanel browser = browserProvider.getBrowser(previewPanel, browserToolBar, parser); if (browser != null) { browser.addProgressListener(new ProgressAdapter() { public void completed(ProgressEvent event) { showViewer(browser); upBt.setEnabled(false); downBt.setEnabled(false); occurrenceCounter.setText("0"); //$NON-NLS-1$ } }); browser.setFile(file); return; } // Browser creation failed, go on to next code block } // Use text renderers showViewer(textViewerContainer); // Use monospace font for text files if (parser instanceof TextParser) { org.eclipse.swt.graphics.Font monoFont = Font.PREVIEW_MONO.getFont(); if (!textViewer.getFont().equals(monoFont)) textViewer.setFont(monoFont); } else { org.eclipse.swt.graphics.Font previewFont = Font.PREVIEW.getFont(); if (!textViewer.getFont().equals(previewFont)) textViewer.setFont(previewFont); } textViewer.setText(Msg.loading.value()); // display loading message new Thread() { // run in a thread because parsing the file takes some time public void run() { // Extract the raw text from the file String text; boolean fileParsed = true; try { text = parser.renderText(file); } catch (ParseException e) { text = Msg.cant_read_file.format(e.getMessage()); fileParsed = false; } catch (OutOfMemoryError e) { /* * We can get here if the user sets a high java heap space * value during indexing and then sets a lower value for * search only usage. */ text = Msg.out_of_jvm_memory.value(); fileParsed = false; } if (PreviewPanel.this.file != file) return; // Another preview request had been started while we were parsing /* * Create the message that will be displayed if the character limit * is reached. It is appended to the file contents later; if it * was appended here, some words in it might get highlighted. */ int maxLength = Pref.Int.PreviewLimit.getValue(); final String msg = "...\n\n\n[" //$NON-NLS-1$ + Msg.preview_limit_hint.format(new Object[] { maxLength, Pref.Int.PreviewLimit.name(), Const.USER_PROPERTIES_FILENAME }) + "]"; //$NON-NLS-1$ final boolean exceeded = text.length() > maxLength; if (text.length() > maxLength) text = text.substring(0, maxLength - msg.length()); final String fText = text; /* * Create StyleRange ranges (i.e. start-end integer pairs) for * search term highlighting. Only tokenize preview text if we're * not displaying any info messages and if there are tokens to * highlight. */ ranges = new int[0]; if (fileParsed && query != null) { final List<Integer> rangesList = new ArrayList<Integer>(); Analyzer analyzer = RootScope.analyzer; /* * A formatter is supposed to return formatted text, but * since we're only interested in the start and end offsets * of the search terms, we return null and store the offsets * in a list. */ Formatter nullFormatter = new Formatter() { public String highlightTerm(String originalText, TokenGroup tokenGroup) { for (int i = 0; i < tokenGroup.getNumTokens(); i++) { Token token = tokenGroup.getToken(i); if (tokenGroup.getScore(i) == 0) continue; int start = token.startOffset(); int end = token.endOffset(); rangesList.add(start); rangesList.add(end - start); } return null; } }; Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, Document.contents)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); highlighter.setTextFragmenter(new NullFragmenter()); try { /* * This has a return value, but we ignore it since we * only want the offsets. */ highlighter.getBestFragment(analyzer, Document.contents, fText); } catch (Exception e) { // We can do without the search term highlighting } // List to array (will be used by the method 'setHighlighting(..)') ranges = new int[rangesList.size()]; for (int i = 0; i < ranges.length; i++) ranges[i] = rangesList.get(i); } // Parsing and tokenizing done; display the results final boolean fFileParsed = fileParsed; Display.getDefault().syncExec(new Runnable() { public void run() { // Enable or disable up and down buttons upBt.setEnabled(ranges.length != 0); downBt.setEnabled(ranges.length != 0); textViewer.setText(fText); setHighlighting(fFileParsed && Pref.Bool.HighlightSearchTerms.getValue()); occurrenceCounter.setText(Integer.toString(ranges.length / 2)); if (exceeded) textViewer.append(msg); // character limit exceeded, append hint } }); } }.start(); }
From source file:net.sourceforge.vaticanfetcher.model.search.HighlightService.java
License:Open Source License
@MutableCopy @NotNull// w ww . j ava 2 s .co m private static List<Range> highlight(@NotNull Query query, @NotNull String text) throws CheckedOutOfMemoryError { final List<Range> ranges = new ArrayList<Range>(); /* * A formatter is supposed to return formatted text, but since we're * only interested in the start and end offsets of the search terms, we * return null and store the offsets in a list. */ Formatter nullFormatter = new Formatter() { public String highlightTerm(String originalText, TokenGroup tokenGroup) { for (int i = 0; i < tokenGroup.getNumTokens(); i++) { Token token = tokenGroup.getToken(i); if (tokenGroup.getScore(i) == 0) continue; int start = token.startOffset(); int end = token.endOffset(); ranges.add(new Range(start, end - start)); } return null; } }; String key = Fields.CONTENT.key(); Highlighter highlighter = new Highlighter(nullFormatter, new QueryScorer(query, key)); highlighter.setMaxDocCharsToAnalyze(Integer.MAX_VALUE); highlighter.setTextFragmenter(new NullFragmenter()); try { /* * This has a return value, but we ignore it since we only want the * offsets. Might throw an OutOfMemoryError. */ highlighter.getBestFragment(IndexRegistry.analyzer, key, text); } catch (OutOfMemoryError e) { throw new CheckedOutOfMemoryError(e); } catch (Exception e) { Util.printErr(e); } return ranges; }