List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:com.leavesfly.lia.advsearching.TimeLimitingCollectorTest.java
License:Apache License
public void testTimeLimitingCollector() throws Exception { Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(dir); Query q = new MatchAllDocsQuery(); int numAllBooks = TestUtil.hitCount(searcher, q); TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); Collector collector = new TimeLimitingCollector(topDocs, // #A 1000); // #A try {/* www .ja v a2 s . c o m*/ searcher.search(q, collector); assertEquals(numAllBooks, topDocs.getTotalHits()); // #B } catch (TimeExceededException tee) { // #C System.out.println("Too much time taken."); // #C } // #C searcher.close(); dir.close(); }
From source file:com.mathworks.xzheng.advsearching.TimeLimitingCollectorTest.java
License:Apache License
public void testTimeLimitingCollector() throws Exception { Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir)); Query q = new MatchAllDocsQuery(); int numAllBooks = TestUtil.hitCount(searcher, q); TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); Collector collector = new TimeLimitingCollector(topDocs, // #A null, 1000); // #A try {//from w w w . java 2 s . c o m searcher.search(q, collector); assertEquals(numAllBooks, topDocs.getTotalHits()); // #B } catch (TimeExceededException tee) { // #C System.out.println("Too much time taken."); // #C } // #C dir.close(); }
From source file:com.paladin.action.SearchAction.java
License:Apache License
/** * search using lucene//from ww w. j a v a 2 s . c o m * * @param jsonObject * @param request * @param _query * @throws IOException * @throws ParseException */ private void _search(JSONObject jsonObject, HttpServletRequest request, String _query, String _table) throws IOException, ParseException, InvalidTokenOffsetsException { // Bean ??? final String index_dir = Constants.LUCENE_INDEX_ROOT + _table; IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index_dir))); QueryParser parser = new QueryParser(Version.LUCENE_33, INDEX_FIELDS, new IKAnalyzer(false)); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, true); for (String key : _query.split(" ")) { Query query = parser.parse(key); searcher.search(query, collector); // IKSimilarity searcher.setSimilarity(new IKSimilarity()); // int size = collector.getTotalHits(); total_pages = (size + Constants.NUM_PER_PAGE_SEARCH - 1) / Constants.NUM_PER_PAGE_SEARCH; curr_page_number = getCurrentPage(request, 1, total_pages); // ? first_page = curr_page_number - 5 > 0 ? curr_page_number - 5 : 1; last_page = first_page + 10 >= total_pages ? total_pages : first_page + 10; // ? int begin = (curr_page_number - 1) * Constants.NUM_PER_PAGE_SEARCH; ScoreDoc[] score_docs = collector.topDocs(begin, Constants.NUM_PER_PAGE_SEARCH).scoreDocs; List<Document> doc_list = new ArrayList<Document>(); for (ScoreDoc score_doc : score_docs) doc_list.add(searcher.doc(score_doc.doc)); List<Map<String, String>> blog_list = getBlogListFromDocList(query, doc_list); jsonObject.put(_table + "_list", blog_list); jsonObject.put("p_start_" + _table, first_page); jsonObject.put("p_end_" + _table, last_page); jsonObject.put("curr_page_" + _table, curr_page_number); jsonObject.put("total_page_" + _table, total_pages); jsonObject.put("total_count_" + _table, size); } }
From source file:com.partydj.search.LuceneSearchProvider.java
License:Open Source License
@Override public Collection<MediaFile> find(Map<String, Collection<String>> queryParameters) { StringBuilder queryString = new StringBuilder(); if (queryString == null || queryString.length() == 0) { return Collections.emptyList(); } else {// w ww. j a v a2 s .c om String any = getFirst(queryParameters, ANY); if (any != null) { queryString.append(INDEX_ENCODER.encode(any)); } else { //$MR todo } List<MediaFile> found = new ArrayList(); try { Query query = new QueryParser(Version.LUCENE_CURRENT, ANY, analyzer).parse(queryString.toString()); Integer hitsPerPage = getFirstInteger(queryParameters, MAX_RESULTS); if (hitsPerPage == null) { hitsPerPage = DEFAULT_MAX_RESULTS; } IndexSearcher searcher = new IndexSearcher(index, true); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); //$MR jaro-winlker scorer searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); found.add(indexToMediaFile.get(d)); } } catch (Exception e) { e.printStackTrace(); } return found; } }
From source file:com.scsb.crpro.lucene.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from w ww . j a v a 2 s . com * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:com.searchlocal.lucene.ContentSearcher.java
License:Open Source License
/** * ? /*from w w w .j av a 2s . c om*/ * * @param param ? * @param indexlocal ? * @return list */ public static List<ResultBean> query(SearchParam param) throws IOException, LogicException { // ? String indexPath = param.getIndexPath(); // if (null == fsd) { fsd = SimpleFSDirectory.open(new File(indexPath)); } List<ResultBean> beanList = new ArrayList<ResultBean>(); try { // ?KeyWord Analyzer analyzer = new PaodingAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); Query query; query = parser.parse(param.getKeyWord()); TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); // ? if (null == is) { is = new IndexSearcher(fsd, true); } is.search(query, collector); ScoreDoc[] scoreDoc = collector.topDocs().scoreDocs; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(CONTENTS_SHOW_LENGTH)); if (scoreDoc.length == 0) { return beanList; } int startRow = param.getStartRow(); int endRow = param.getEndRow(); endRow = scoreDoc.length > endRow ? endRow : scoreDoc.length; for (int i = startRow; i < endRow; i++) { Document doc = is.doc(scoreDoc[i].doc); String content = doc.get("content"); // TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); content = highlighter.getBestFragment(tokenStream, content); ResultBean bean = BeanUtil.getBean(doc, content); beanList.add(bean); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } return beanList; }
From source file:com.searchlocal.lucene.ContentSearcher.java
License:Open Source License
/** * ?//from ww w . j a v a 2 s . c o m * * @param param ? * @return int */ public static int getCount(SearchParam param) throws IOException { // ? String indexPath = param.getIndexPath(); FSDirectory fsd = SimpleFSDirectory.open(new File(indexPath)); int count = 0; try { // ? Analyzer analyzer = new PaodingAnalyzer(); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); // Query query = parser.parse(param.getKeyWord()); TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); IndexSearcher is = new IndexSearcher(fsd, true); is.search(query, collector); ScoreDoc[] scoreDoc = collector.topDocs().scoreDocs; count = scoreDoc.length; } catch (ParseException e) { e.printStackTrace(); } return count; }
From source file:com.tilab.ca.sse.core.lucene.SimpleSearcher.java
License:Open Source License
private ScoreDoc[] getTopResults(Query query, int numResults) throws IOException { ScoreDoc[] hits;//ww w. ja v a 2 s .c o m LOG.debug("[getTopResults] - BEGIN"); TopScoreDocCollector collector = TopScoreDocCollector.create(numResults, false); indexSearcher.search(query, collector); hits = collector.topDocs().scoreDocs; LOG.debug("[getTopResults] - END"); return hits; }
From source file:com.wrmsr.search.dsl.SearcherImpl.java
License:Apache License
@Override public ScoreDoc[] search(Query query, Supplier<Float> scoreSupplier, int maxHits) throws IOException { Query scoredQuery = new ComputedScoreQuery(new DocSpecific.Composite(docSpecificSet), scoreSupplier, query); TopDocsCollector topDocsCollector = TopScoreDocCollector.create(maxHits, true); indexSearcher.search(scoredQuery, topDocsCollector); return topDocsCollector.topDocs().scoreDocs; }
From source file:com.xiaomi.linden.core.search.LindenCoreImpl.java
License:Apache License
public LindenResult search(LindenSearchRequest request) throws IOException { SearcherTaxonomyManager.SearcherAndTaxonomy searcherAndTaxonomy = lindenNRTSearcherManager.acquire(); try {/* w w w .jav a 2s.c o m*/ IndexSearcher indexSearcher = searcherAndTaxonomy.searcher; Filter filter = FilterConstructor.constructFilter(request.getFilter(), config); Sort sort = SortConstructor.constructSort(request, indexSearcher, config); indexSearcher.setSimilarity(config.getSearchSimilarityInstance()); Query query = QueryConstructor.constructQuery(request.getQuery(), config); if (filter != null) { query = new FilteredQuery(query, filter); } int from = request.getOffset(); int size = request.getLength(); LindenResultParser resultParser = new LindenResultParser(config, request, indexSearcher, snippetGenerator, query, filter, sort); // very common search, no group, no facet, no early termination, no search time limit if (!request.isSetGroupParam() && !request.isSetFacet() && !request.isSetEarlyParam() && config.getSearchTimeLimit() <= 0) { TopDocs docs; if (sort != null) { docs = indexSearcher.search(query, from + size, sort); } else { docs = indexSearcher.search(query, from + size); } return resultParser.parse(docs, null, null, null); } // group param will suppress facet, group, early termination and search time limit parameters if (request.isSetGroupParam()) { String groupField = request.getGroupParam().getGroupField(); GroupingSearch groupingSearch = new GroupingSearch(groupField); groupingSearch.setGroupDocsLimit(request.getGroupParam().getGroupInnerLimit()); if (sort != null) { groupingSearch.setGroupSort(sort); groupingSearch.setSortWithinGroup(sort); groupingSearch.setFillSortFields(true); } groupingSearch.setCachingInMB(8.0, true); groupingSearch.setAllGroups(true); TopGroups<TopDocs> topGroupedDocs = groupingSearch.search(indexSearcher, query, 0, from + size); return resultParser.parse(null, topGroupedDocs, null, null); } TopDocsCollector topDocsCollector; if (sort != null) { topDocsCollector = TopFieldCollector.create(sort, from + size, null, true, false, false, false); } else { topDocsCollector = TopScoreDocCollector.create(from + size, false); } LindenDocsCollector lindenDocsCollector; if (request.isSetEarlyParam()) { MergePolicy mergePolicy = indexWriter.getConfig().getMergePolicy(); Sort mergePolicySort = null; if (mergePolicy instanceof SortingMergePolicyDecorator) { mergePolicySort = ((SortingMergePolicyDecorator) mergePolicy).getSort(); } EarlyTerminationCollector earlyTerminationCollector = new EarlyTerminationCollector( topDocsCollector, mergePolicySort, request.getEarlyParam().getMaxNum()); lindenDocsCollector = new LindenDocsCollector(earlyTerminationCollector); } else { lindenDocsCollector = new LindenDocsCollector(topDocsCollector); } Collector collector = lindenDocsCollector; if (config.getSearchTimeLimit() > 0) { collector = new TimeLimitingCollector(lindenDocsCollector, TimeLimitingCollector.getGlobalCounter(), config.getSearchTimeLimit()); } // no facet param if (!request.isSetFacet()) { indexSearcher.search(query, collector); return resultParser.parse(lindenDocsCollector.topDocs(), null, null, null); } // facet search LindenFacet facetRequest = request.getFacet(); FacetsCollector facetsCollector = new FacetsCollector(); lindenDocsCollector.wrap(facetsCollector); Facets facets = null; if (facetRequest.isSetDrillDownDimAndPaths()) { // drillDown or drillSideways DrillDownQuery drillDownQuery = new DrillDownQuery(facetsConfig, query); List<LindenFacetDimAndPath> drillDownDimAndPaths = facetRequest.getDrillDownDimAndPaths(); for (int i = 0; i < drillDownDimAndPaths.size(); ++i) { String fieldName = drillDownDimAndPaths.get(i).dim; if (drillDownDimAndPaths.get(i).path != null) { drillDownQuery.add(fieldName, drillDownDimAndPaths.get(i).path.split("/")); } else { drillDownQuery.add(fieldName); } } // drillSideways if (facetRequest.getFacetDrillingType() == FacetDrillingType.DRILLSIDEWAYS) { DrillSideways dillSideways = new DrillSideways(indexSearcher, facetsConfig, searcherAndTaxonomy.taxonomyReader); DrillSideways.DrillSidewaysResult drillSidewaysResult = dillSideways.search(drillDownQuery, collector); facets = drillSidewaysResult.facets; } else { // drillDown indexSearcher.search(drillDownQuery, collector); facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig, facetsCollector); } } else { indexSearcher.search(query, collector); // Simple facet browsing if (facetRequest.isSetFacetParams()) { facets = new FastTaxonomyFacetCounts(searcherAndTaxonomy.taxonomyReader, facetsConfig, facetsCollector); } } return resultParser.parse(lindenDocsCollector.topDocs(), null, facets, facetsCollector); } catch (Exception e) { throw new IOException(Throwables.getStackTraceAsString(e)); } finally { lindenNRTSearcherManager.release(searcherAndTaxonomy); } }