List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:org.apache.derby.optional.lucene.LuceneQueryVTI.java
License:Apache License
/** Initialize the metadata and scan */ private void initScan() throws SQLException { try {//from ww w . ja v a 2 s. co m // read the execution context for this AwareVTI VTIContext context = getContext(); _schema = context.vtiSchema(); String[] nameParts = LuceneSupport.decodeFunctionName(context.vtiTable()); _table = nameParts[LuceneSupport.TABLE_PART]; _column = nameParts[LuceneSupport.COLUMN_PART]; // divine the column names VTITemplate.ColumnDescriptor[] returnColumns = getReturnTableSignature(_connection); String[] columnNames = new String[returnColumns.length]; for (int i = 0; i < returnColumns.length; i++) { columnNames[i] = returnColumns[i].columnName; } setColumnNames(columnNames); _scoreColumnID = getColumnCount(); _docIDColumnID = _scoreColumnID - 1; _maxKeyID = _docIDColumnID - 1; _minKeyID = 1; // make sure the user has SELECT privilege on all relevant columns of the underlying table vetPrivileges(); DerbyLuceneDir derbyLuceneDir = LuceneSupport.getDerbyLuceneDir(_connection, _schema, _table, _column); StorageFile propertiesFile = LuceneSupport.getIndexPropertiesFile(derbyLuceneDir); Properties indexProperties = readIndexProperties(propertiesFile); String indexDescriptorMaker = indexProperties.getProperty(LuceneSupport.INDEX_DESCRIPTOR_MAKER); LuceneIndexDescriptor indexDescriptor = getIndexDescriptor(indexDescriptorMaker); Analyzer analyzer = indexDescriptor.getAnalyzer(); QueryParser qp = indexDescriptor.getQueryParser(); vetLuceneVersion(indexProperties.getProperty(LuceneSupport.LUCENE_VERSION)); _indexReader = getIndexReader(derbyLuceneDir); _searcher = new IndexSearcher(_indexReader); Query luceneQuery = qp.parse(_queryText); TopScoreDocCollector tsdc = TopScoreDocCollector.create(_windowSize, true); if (_scoreCeiling != null) { tsdc = TopScoreDocCollector.create(_windowSize, new ScoreDoc(0, _scoreCeiling), true); } searchAndScore(luceneQuery, tsdc); } catch (IOException ioe) { throw LuceneSupport.wrap(ioe); } catch (ParseException pe) { throw LuceneSupport.wrap(pe); } catch (PrivilegedActionException pae) { throw LuceneSupport.wrap(pae); } }
From source file:org.apache.fuzzydb.demo.SearchFiles.java
License:Open Source License
/** * This demonstrates a typical paging search scenario, where the search engine presents pages of size n to the user. The user can * then go to the next page if interested in the next hits. * //from ww w.java 2 s . co m * When the query is executed for the first time, then only enough results are collected to fill 5 result pages. If the user wants * to page beyond this limit, then the query is executed another time and all hits are collected. * */ public static ScoreDoc[] doPagingSearch(Searcher searcher, Query query, int noOfPages) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(noOfPages, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); // System.out.println("Confidence Score : : "+hits.length); System.out.println(numTotalHits + " total matching documents"); return hits; }
From source file:org.apache.solr.search.federated.DJoinQParserPlugin.java
License:Apache License
@Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { return new QParser(qstr, localParams, params, req) { @Override//from w ww . j ava 2s. c om public Query parse() throws SyntaxError { return new RankQuery() { private Query mainQuery; @Override @SuppressWarnings("rawtypes") public TopDocsCollector getTopDocsCollector(int len, QueryCommand cmd, IndexSearcher searcher) throws IOException { Sort sort = cmd.getSort(); if (sort == null) { return TopScoreDocCollector.create(len, false); } else { return TopFieldCollector.create(sort.rewrite(searcher), len, false, true, true, false); } } @Override public MergeStrategy getMergeStrategy() { return new DJoinMergeStrategy(); } @Override public RankQuery wrap(Query mainQuery) { this.mainQuery = mainQuery; return this; } @Override public Query rewrite(IndexReader reader) throws IOException { return mainQuery.rewrite(reader); } @Override public Weight createWeight(IndexSearcher searcher) throws IOException { return mainQuery.createWeight(searcher); } }; } }; }
From source file:org.apache.solr.search.grouping.distributed.command.QueryCommand.java
License:Apache License
@Override public List<Collector> create() throws IOException { if (sort == null || sort == Sort.RELEVANCE) { collector = TopScoreDocCollector.create(docsToCollect, true); } else {/*from w ww. j a v a 2 s. c o m*/ collector = TopFieldCollector.create(sort, docsToCollect, true, needScores, needScores, true); } filterCollector = new FilterCollector(docSet, collector); return Arrays.asList((Collector) filterCollector); }
From source file:org.apache.solr.search.SolrIndexSearcher.java
License:Apache License
private DocSet getDocListAndSetNC(QueryResult qr, QueryCommand cmd) throws IOException { int len = cmd.getSupersetMaxDoc(); int last = len; if (last < 0 || last > maxDoc()) last = maxDoc();//from w w w .j a v a 2 s . co m final int lastDocRequested = last; int nDocsReturned; int totalHits; float maxScore; int[] ids; float[] scores; DocSet set; boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY; int maxDoc = maxDoc(); int smallSetSize = maxDoc >> 6; ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList()); final Filter luceneFilter = pf.filter; Query query = QueryUtils.makeQueryable(cmd.getQuery()); final long timeAllowed = cmd.getTimeAllowed(); // handle zero case... if (lastDocRequested <= 0) { final float[] topscore = new float[] { Float.NEGATIVE_INFINITY }; Collector collector; DocSetCollector setCollector; if (!needScores) { collector = setCollector = new DocSetCollector(smallSetSize, maxDoc); } else { collector = setCollector = new DocSetDelegateCollector(smallSetSize, maxDoc, new Collector() { Scorer scorer; @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public void collect(int doc) throws IOException { float score = scorer.score(); if (score > topscore[0]) topscore[0] = score; } @Override public void setNextReader(AtomicReaderContext context) { } @Override public boolean acceptsDocsOutOfOrder() { return false; } }); } if (terminateEarly) { collector = new EarlyTerminatingCollector(collector, cmd.len); } if (timeAllowed > 0) { collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed); } if (pf.postFilter != null) { pf.postFilter.setLastDelegate(collector); collector = pf.postFilter; } try { super.search(query, luceneFilter, collector); if (collector instanceof DelegatingCollector) { ((DelegatingCollector) collector).finish(); } } catch (TimeLimitingCollector.TimeExceededException x) { log.warn("Query: " + query + "; " + x.getMessage()); qr.setPartialResults(true); } set = setCollector.getDocSet(); nDocsReturned = 0; ids = new int[nDocsReturned]; scores = new float[nDocsReturned]; totalHits = set.size(); maxScore = totalHits > 0 ? topscore[0] : 0.0f; } else { TopDocsCollector topCollector; if (cmd.getSort() == null) { topCollector = TopScoreDocCollector.create(len, true); } else { topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true); } DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, topCollector); Collector collector = setCollector; if (terminateEarly) { collector = new EarlyTerminatingCollector(collector, cmd.len); } if (timeAllowed > 0) { collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed); } if (pf.postFilter != null) { pf.postFilter.setLastDelegate(collector); collector = pf.postFilter; } try { super.search(query, luceneFilter, collector); if (collector instanceof DelegatingCollector) { ((DelegatingCollector) collector).finish(); } } catch (TimeLimitingCollector.TimeExceededException x) { log.warn("Query: " + query + "; " + x.getMessage()); qr.setPartialResults(true); } set = setCollector.getDocSet(); totalHits = topCollector.getTotalHits(); assert (totalHits == set.size()); TopDocs topDocs = topCollector.topDocs(0, len); maxScore = totalHits > 0 ? topDocs.getMaxScore() : 0.0f; nDocsReturned = topDocs.scoreDocs.length; ids = new int[nDocsReturned]; scores = (cmd.getFlags() & GET_SCORES) != 0 ? new float[nDocsReturned] : null; for (int i = 0; i < nDocsReturned; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[i] = scoreDoc.doc; if (scores != null) scores[i] = scoreDoc.score; } } int sliceLen = Math.min(lastDocRequested, nDocsReturned); if (sliceLen < 0) sliceLen = 0; qr.setDocList(new DocSlice(0, sliceLen, ids, scores, totalHits, maxScore)); // TODO: if we collect results before the filter, we just need to intersect with // that filter to generate the DocSet for qr.setDocSet() qr.setDocSet(set); // TODO: currently we don't generate the DocSet for the base query, // but the QueryDocSet == CompleteDocSet if filter==null. return pf.filter == null && pf.postFilter == null ? qr.getDocSet() : null; }
From source file:org.apache.tika.example.RecentFiles.java
License:Apache License
public String generateRSS(File indexFile) throws CorruptIndexException, IOException { StringBuffer output = new StringBuffer(); output.append(getRSSHeaders());/* w w w . j a v a2 s. c om*/ IndexSearcher searcher = null; try { reader = IndexReader.open(new SimpleFSDirectory(indexFile)); searcher = new IndexSearcher(reader); GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault()); gc.setTime(new Date()); String nowDateTime = ISO8601.format(gc); gc.add(java.util.GregorianCalendar.MINUTE, -5); String fiveMinsAgo = ISO8601.format(gc); TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), fiveMinsAgo, nowDateTime, true, true); TopScoreDocCollector collector = TopScoreDocCollector.create(20, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); output.append(getRSSItem(doc)); } } finally { if (reader != null) reader.close(); if (searcher != null) searcher.close(); } output.append(getRSSFooters()); return output.toString(); }
From source file:org.chenillekit.lucene.services.impl.SearcherServiceImpl.java
License:Apache License
public List<Document> search(String fieldName, Query query, Integer howMany) { ScoreDoc[] scores;/*from ww w .ja v a 2 s. c o m*/ int total = howMany != null ? howMany.intValue() : MAX_SCORE_DOC; TopScoreDocCollector collector = TopScoreDocCollector.create(total, true); IndexSearcher indexSearcher = indexSource.createIndexSearcher(); try { indexSearcher.search(query, collector); scores = collector.topDocs().scoreDocs; } catch (IOException ioe) { logger.error(String.format("Unable to access the index for searching: '%s'", ioe.getMessage()), ioe); throw new ChenilleKitLuceneRuntimeException(ioe); } List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < scores.length; i++) { int docId = scores[i].doc; try { docs.add(indexSearcher.doc(docId)); } catch (CorruptIndexException cie) { logger.error(String.format("The index result corrupted: '%s'", cie.getMessage()), cie); throw new ChenilleKitLuceneRuntimeException(cie); } catch (IOException ioe) { logger.error(String.format("Unable to access the index for searching: '%s'", ioe.getMessage()), ioe); throw new ChenilleKitLuceneRuntimeException(ioe); } } try { indexSearcher.close(); } catch (IOException e) { logger.error(String.format("Unable to close the index for searching: '%s'", e.getMessage()), e); throw new ChenilleKitLuceneRuntimeException(e); } return docs; }
From source file:org.crosswire.jsword.index.lucene.LuceneIndex.java
License:Open Source License
public Key find(String search) throws BookException { checkActive();// ww w. ja v a 2 s. c o m SearchModifier modifier = getSearchModifier(); Key results = null; if (search != null) { try { Analyzer analyzer = new LuceneAnalyzer(book); QueryParser parser = new QueryParser(Version.LUCENE_29, LuceneIndex.FIELD_BODY, analyzer); parser.setAllowLeadingWildcard(true); Query query = parser.parse(search); log.info("ParsedQuery-" + query.toString()); // For ranking we use a PassageTally if (modifier != null && modifier.isRanked()) { PassageTally tally = new PassageTally(); tally.raiseEventSuppresion(); tally.raiseNormalizeProtection(); results = tally; TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), false); searcher.search(query, collector); tally.setTotal(collector.getTotalHits()); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); Key key = VerseFactory.fromString(doc.get(LuceneIndex.FIELD_KEY)); // PassageTally understands a score of 0 as the verse // not participating int score = (int) (hits[i].score * 100 + 1); tally.add(key, score); } tally.lowerNormalizeProtection(); tally.lowerEventSuppresionAndTest(); } else { results = book.createEmptyKeyList(); // If we have an abstract passage, // make sure it does not try to fire change events. AbstractPassage passage = null; if (results instanceof AbstractPassage) { passage = (AbstractPassage) results; passage.raiseEventSuppresion(); passage.raiseNormalizeProtection(); } searcher.search(query, new VerseCollector(searcher, results)); if (passage != null) { passage.lowerNormalizeProtection(); passage.lowerEventSuppresionAndTest(); } } } catch (IOException e) { // The VerseCollector may throw IOExceptions that merely wrap a // NoSuchVerseException Throwable cause = e.getCause(); if (cause instanceof NoSuchVerseException) { // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail. throw new BookException(UserMsg.gettext("Search failed."), cause); } // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail. throw new BookException(UserMsg.gettext("Search failed."), e); } catch (NoSuchVerseException e) { // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail. throw new BookException(UserMsg.gettext("Search failed."), e); } catch (ParseException e) { // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail. throw new BookException(UserMsg.gettext("Search failed."), e); } finally { Activator.deactivate(this); } } if (results == null) { if (modifier != null && modifier.isRanked()) { results = new PassageTally(); } else { results = book.createEmptyKeyList(); } } return results; }
From source file:org.d2.plugins.lucene.LuceneIndexer.java
License:Apache License
public List<String> findIdByQuery(D2Query query) { LuceneReaderAndSearcher searcher = null; try {/* w w w.ja va2 s . com*/ searcher = manager.getSearcher(); List<String> pList = new ArrayList<String>(); TopScoreDocCollector collector = TopScoreDocCollector.create(20, true); searcher.getSearcher().search(makeLuceneQuery(query), collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = searcher.getSearcher().doc(hits[i].doc); pList.add(doc.get("id")); } return pList; } catch (IOException e) { throw Util.wrap(e); } finally { manager.releaseSearcher(searcher); } }
From source file:org.dbpedia.spotlight.lucene.search.BaseSearcher.java
License:Apache License
/** * Basic search method used by all searches to the index. * @param query// ww w . ja v a 2s. c o m * @param n * @return * @throws SearchException */ public ScoreDoc[] getHits(Query query, int n, int timeout, Filter filter) throws SearchException { ScoreDoc[] hits = null; try { //LOG.debug("Start search. timeout="+timeout); long start = System.nanoTime(); TopScoreDocCollector collector = TopScoreDocCollector.create(n, false); //TimeLimitingCollector collector = new TimeLimitingCollector(tCollector, timeout); //TODO try to bring this back later mSearcher.search(query, filter, collector); //mSearcher. hits = collector.topDocs().scoreDocs; long end = System.nanoTime(); LOG.debug( String.format("Done search in %f ms. hits.length=%d", (end - start) / 1000000.0, hits.length)); } catch (TimeLimitingCollector.TimeExceededException timedOutException) { throw new TimeoutException("Timeout (>" + timeout + "ms searching for surface form " + query.toString(), timedOutException); } catch (Exception e) { throw new SearchException("Error searching for surface form " + query.toString(), e); } //LOG.debug(hits.length+" hits found."); return hits; }