Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:org.apache.derby.optional.lucene.LuceneQueryVTI.java

License:Apache License

/** Initialize the metadata and scan */
private void initScan() throws SQLException {
    try {//from ww  w  .  ja  v a  2  s. co m
        // read the execution context for this AwareVTI
        VTIContext context = getContext();
        _schema = context.vtiSchema();
        String[] nameParts = LuceneSupport.decodeFunctionName(context.vtiTable());
        _table = nameParts[LuceneSupport.TABLE_PART];
        _column = nameParts[LuceneSupport.COLUMN_PART];

        // divine the column names
        VTITemplate.ColumnDescriptor[] returnColumns = getReturnTableSignature(_connection);
        String[] columnNames = new String[returnColumns.length];
        for (int i = 0; i < returnColumns.length; i++) {
            columnNames[i] = returnColumns[i].columnName;
        }
        setColumnNames(columnNames);

        _scoreColumnID = getColumnCount();
        _docIDColumnID = _scoreColumnID - 1;
        _maxKeyID = _docIDColumnID - 1;
        _minKeyID = 1;

        // make sure the user has SELECT privilege on all relevant columns of the underlying table
        vetPrivileges();

        DerbyLuceneDir derbyLuceneDir = LuceneSupport.getDerbyLuceneDir(_connection, _schema, _table, _column);
        StorageFile propertiesFile = LuceneSupport.getIndexPropertiesFile(derbyLuceneDir);
        Properties indexProperties = readIndexProperties(propertiesFile);
        String indexDescriptorMaker = indexProperties.getProperty(LuceneSupport.INDEX_DESCRIPTOR_MAKER);
        LuceneIndexDescriptor indexDescriptor = getIndexDescriptor(indexDescriptorMaker);
        Analyzer analyzer = indexDescriptor.getAnalyzer();
        QueryParser qp = indexDescriptor.getQueryParser();

        vetLuceneVersion(indexProperties.getProperty(LuceneSupport.LUCENE_VERSION));

        _indexReader = getIndexReader(derbyLuceneDir);
        _searcher = new IndexSearcher(_indexReader);

        Query luceneQuery = qp.parse(_queryText);
        TopScoreDocCollector tsdc = TopScoreDocCollector.create(_windowSize, true);
        if (_scoreCeiling != null) {
            tsdc = TopScoreDocCollector.create(_windowSize, new ScoreDoc(0, _scoreCeiling), true);
        }

        searchAndScore(luceneQuery, tsdc);
    } catch (IOException ioe) {
        throw LuceneSupport.wrap(ioe);
    } catch (ParseException pe) {
        throw LuceneSupport.wrap(pe);
    } catch (PrivilegedActionException pae) {
        throw LuceneSupport.wrap(pae);
    }
}

From source file:org.apache.fuzzydb.demo.SearchFiles.java

License:Open Source License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents pages of size n to the user. The user can
 * then go to the next page if interested in the next hits.
 * //from  ww w.java  2  s .  co m
 * When the query is executed for the first time, then only enough results are collected to fill 5 result pages. If the user wants
 * to page beyond this limit, then the query is executed another time and all hits are collected.
 * 
 */
public static ScoreDoc[] doPagingSearch(Searcher searcher, Query query, int noOfPages) throws IOException {

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(noOfPages, true);
    searcher.search(query, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();
    // System.out.println("Confidence Score : : "+hits.length);
    System.out.println(numTotalHits + " total matching documents");
    return hits;
}

From source file:org.apache.solr.search.federated.DJoinQParserPlugin.java

License:Apache License

@Override
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
    return new QParser(qstr, localParams, params, req) {

        @Override//from  w  ww  . j ava 2s.  c om
        public Query parse() throws SyntaxError {
            return new RankQuery() {

                private Query mainQuery;

                @Override
                @SuppressWarnings("rawtypes")
                public TopDocsCollector getTopDocsCollector(int len, QueryCommand cmd, IndexSearcher searcher)
                        throws IOException {
                    Sort sort = cmd.getSort();
                    if (sort == null) {
                        return TopScoreDocCollector.create(len, false);
                    } else {
                        return TopFieldCollector.create(sort.rewrite(searcher), len, false, true, true, false);
                    }
                }

                @Override
                public MergeStrategy getMergeStrategy() {
                    return new DJoinMergeStrategy();
                }

                @Override
                public RankQuery wrap(Query mainQuery) {
                    this.mainQuery = mainQuery;
                    return this;
                }

                @Override
                public Query rewrite(IndexReader reader) throws IOException {
                    return mainQuery.rewrite(reader);
                }

                @Override
                public Weight createWeight(IndexSearcher searcher) throws IOException {
                    return mainQuery.createWeight(searcher);
                }

            };
        }

    };
}

From source file:org.apache.solr.search.grouping.distributed.command.QueryCommand.java

License:Apache License

@Override
public List<Collector> create() throws IOException {
    if (sort == null || sort == Sort.RELEVANCE) {
        collector = TopScoreDocCollector.create(docsToCollect, true);
    } else {/*from   w  ww. j  a  v a  2 s. c  o m*/
        collector = TopFieldCollector.create(sort, docsToCollect, true, needScores, needScores, true);
    }
    filterCollector = new FilterCollector(docSet, collector);
    return Arrays.asList((Collector) filterCollector);
}

From source file:org.apache.solr.search.SolrIndexSearcher.java

License:Apache License

private DocSet getDocListAndSetNC(QueryResult qr, QueryCommand cmd) throws IOException {
    int len = cmd.getSupersetMaxDoc();
    int last = len;
    if (last < 0 || last > maxDoc())
        last = maxDoc();//from w  w w .j  a  v a 2 s  .  co m
    final int lastDocRequested = last;
    int nDocsReturned;
    int totalHits;
    float maxScore;
    int[] ids;
    float[] scores;
    DocSet set;

    boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
    boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
    int maxDoc = maxDoc();
    int smallSetSize = maxDoc >> 6;

    ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
    final Filter luceneFilter = pf.filter;

    Query query = QueryUtils.makeQueryable(cmd.getQuery());
    final long timeAllowed = cmd.getTimeAllowed();

    // handle zero case...
    if (lastDocRequested <= 0) {
        final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };

        Collector collector;
        DocSetCollector setCollector;

        if (!needScores) {
            collector = setCollector = new DocSetCollector(smallSetSize, maxDoc);
        } else {
            collector = setCollector = new DocSetDelegateCollector(smallSetSize, maxDoc, new Collector() {
                Scorer scorer;

                @Override
                public void setScorer(Scorer scorer) {
                    this.scorer = scorer;
                }

                @Override
                public void collect(int doc) throws IOException {
                    float score = scorer.score();
                    if (score > topscore[0])
                        topscore[0] = score;
                }

                @Override
                public void setNextReader(AtomicReaderContext context) {
                }

                @Override
                public boolean acceptsDocsOutOfOrder() {
                    return false;
                }
            });
        }
        if (terminateEarly) {
            collector = new EarlyTerminatingCollector(collector, cmd.len);
        }
        if (timeAllowed > 0) {
            collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(),
                    timeAllowed);
        }
        if (pf.postFilter != null) {
            pf.postFilter.setLastDelegate(collector);
            collector = pf.postFilter;
        }

        try {
            super.search(query, luceneFilter, collector);
            if (collector instanceof DelegatingCollector) {
                ((DelegatingCollector) collector).finish();
            }
        } catch (TimeLimitingCollector.TimeExceededException x) {
            log.warn("Query: " + query + "; " + x.getMessage());
            qr.setPartialResults(true);
        }

        set = setCollector.getDocSet();

        nDocsReturned = 0;
        ids = new int[nDocsReturned];
        scores = new float[nDocsReturned];
        totalHits = set.size();
        maxScore = totalHits > 0 ? topscore[0] : 0.0f;
    } else {

        TopDocsCollector topCollector;

        if (cmd.getSort() == null) {
            topCollector = TopScoreDocCollector.create(len, true);
        } else {
            topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores,
                    needScores, true);
        }

        DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, topCollector);
        Collector collector = setCollector;
        if (terminateEarly) {
            collector = new EarlyTerminatingCollector(collector, cmd.len);
        }
        if (timeAllowed > 0) {
            collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(),
                    timeAllowed);
        }
        if (pf.postFilter != null) {
            pf.postFilter.setLastDelegate(collector);
            collector = pf.postFilter;
        }
        try {
            super.search(query, luceneFilter, collector);
            if (collector instanceof DelegatingCollector) {
                ((DelegatingCollector) collector).finish();
            }
        } catch (TimeLimitingCollector.TimeExceededException x) {
            log.warn("Query: " + query + "; " + x.getMessage());
            qr.setPartialResults(true);
        }

        set = setCollector.getDocSet();

        totalHits = topCollector.getTotalHits();
        assert (totalHits == set.size());

        TopDocs topDocs = topCollector.topDocs(0, len);
        maxScore = totalHits > 0 ? topDocs.getMaxScore() : 0.0f;
        nDocsReturned = topDocs.scoreDocs.length;

        ids = new int[nDocsReturned];
        scores = (cmd.getFlags() & GET_SCORES) != 0 ? new float[nDocsReturned] : null;
        for (int i = 0; i < nDocsReturned; i++) {
            ScoreDoc scoreDoc = topDocs.scoreDocs[i];
            ids[i] = scoreDoc.doc;
            if (scores != null)
                scores[i] = scoreDoc.score;
        }
    }

    int sliceLen = Math.min(lastDocRequested, nDocsReturned);
    if (sliceLen < 0)
        sliceLen = 0;

    qr.setDocList(new DocSlice(0, sliceLen, ids, scores, totalHits, maxScore));
    // TODO: if we collect results before the filter, we just need to intersect with
    // that filter to generate the DocSet for qr.setDocSet()
    qr.setDocSet(set);

    // TODO: currently we don't generate the DocSet for the base query,
    // but the QueryDocSet == CompleteDocSet if filter==null.
    return pf.filter == null && pf.postFilter == null ? qr.getDocSet() : null;
}

From source file:org.apache.tika.example.RecentFiles.java

License:Apache License

public String generateRSS(File indexFile) throws CorruptIndexException, IOException {
    StringBuffer output = new StringBuffer();
    output.append(getRSSHeaders());/* w  w  w  . j  a v a2  s.  c  om*/
    IndexSearcher searcher = null;
    try {
        reader = IndexReader.open(new SimpleFSDirectory(indexFile));
        searcher = new IndexSearcher(reader);
        GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
        gc.setTime(new Date());
        String nowDateTime = ISO8601.format(gc);
        gc.add(java.util.GregorianCalendar.MINUTE, -5);
        String fiveMinsAgo = ISO8601.format(gc);
        TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(), fiveMinsAgo, nowDateTime, true,
                true);
        TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            output.append(getRSSItem(doc));
        }

    } finally {
        if (reader != null)
            reader.close();
        if (searcher != null)
            searcher.close();
    }

    output.append(getRSSFooters());
    return output.toString();
}

From source file:org.chenillekit.lucene.services.impl.SearcherServiceImpl.java

License:Apache License

public List<Document> search(String fieldName, Query query, Integer howMany) {
    ScoreDoc[] scores;/*from ww  w  .ja  v  a  2 s. c o  m*/

    int total = howMany != null ? howMany.intValue() : MAX_SCORE_DOC;

    TopScoreDocCollector collector = TopScoreDocCollector.create(total, true);

    IndexSearcher indexSearcher = indexSource.createIndexSearcher();

    try {
        indexSearcher.search(query, collector);

        scores = collector.topDocs().scoreDocs;
    } catch (IOException ioe) {
        logger.error(String.format("Unable to access the index for searching: '%s'", ioe.getMessage()), ioe);
        throw new ChenilleKitLuceneRuntimeException(ioe);
    }

    List<Document> docs = new ArrayList<Document>();

    for (int i = 0; i < scores.length; i++) {
        int docId = scores[i].doc;

        try {

            docs.add(indexSearcher.doc(docId));
        } catch (CorruptIndexException cie) {
            logger.error(String.format("The index result corrupted: '%s'", cie.getMessage()), cie);
            throw new ChenilleKitLuceneRuntimeException(cie);
        } catch (IOException ioe) {
            logger.error(String.format("Unable to access the index for searching: '%s'", ioe.getMessage()),
                    ioe);
            throw new ChenilleKitLuceneRuntimeException(ioe);
        }
    }

    try {
        indexSearcher.close();
    } catch (IOException e) {
        logger.error(String.format("Unable to close the index for searching: '%s'", e.getMessage()), e);
        throw new ChenilleKitLuceneRuntimeException(e);
    }

    return docs;
}

From source file:org.crosswire.jsword.index.lucene.LuceneIndex.java

License:Open Source License

public Key find(String search) throws BookException {
    checkActive();//  ww w. ja  v  a 2  s. c  o m

    SearchModifier modifier = getSearchModifier();
    Key results = null;

    if (search != null) {
        try {
            Analyzer analyzer = new LuceneAnalyzer(book);

            QueryParser parser = new QueryParser(Version.LUCENE_29, LuceneIndex.FIELD_BODY, analyzer);
            parser.setAllowLeadingWildcard(true);
            Query query = parser.parse(search);
            log.info("ParsedQuery-" + query.toString());

            // For ranking we use a PassageTally
            if (modifier != null && modifier.isRanked()) {
                PassageTally tally = new PassageTally();
                tally.raiseEventSuppresion();
                tally.raiseNormalizeProtection();
                results = tally;

                TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), false);
                searcher.search(query, collector);
                tally.setTotal(collector.getTotalHits());
                ScoreDoc[] hits = collector.topDocs().scoreDocs;
                for (int i = 0; i < hits.length; i++) {
                    int docId = hits[i].doc;
                    Document doc = searcher.doc(docId);
                    Key key = VerseFactory.fromString(doc.get(LuceneIndex.FIELD_KEY));
                    // PassageTally understands a score of 0 as the verse
                    // not participating
                    int score = (int) (hits[i].score * 100 + 1);
                    tally.add(key, score);
                }
                tally.lowerNormalizeProtection();
                tally.lowerEventSuppresionAndTest();
            } else {
                results = book.createEmptyKeyList();
                // If we have an abstract passage,
                // make sure it does not try to fire change events.
                AbstractPassage passage = null;
                if (results instanceof AbstractPassage) {
                    passage = (AbstractPassage) results;
                    passage.raiseEventSuppresion();
                    passage.raiseNormalizeProtection();
                }
                searcher.search(query, new VerseCollector(searcher, results));
                if (passage != null) {
                    passage.lowerNormalizeProtection();
                    passage.lowerEventSuppresionAndTest();
                }
            }
        } catch (IOException e) {
            // The VerseCollector may throw IOExceptions that merely wrap a
            // NoSuchVerseException
            Throwable cause = e.getCause();
            if (cause instanceof NoSuchVerseException) {
                // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail.
                throw new BookException(UserMsg.gettext("Search failed."), cause);
            }

            // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail.
            throw new BookException(UserMsg.gettext("Search failed."), e);
        } catch (NoSuchVerseException e) {
            // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail.
            throw new BookException(UserMsg.gettext("Search failed."), e);
        } catch (ParseException e) {
            // TRANSLATOR: Error condition: An unexpected error happened that caused search to fail.
            throw new BookException(UserMsg.gettext("Search failed."), e);
        } finally {
            Activator.deactivate(this);
        }
    }

    if (results == null) {
        if (modifier != null && modifier.isRanked()) {
            results = new PassageTally();
        } else {
            results = book.createEmptyKeyList();
        }
    }
    return results;
}

From source file:org.d2.plugins.lucene.LuceneIndexer.java

License:Apache License

public List<String> findIdByQuery(D2Query query) {
    LuceneReaderAndSearcher searcher = null;
    try {/* w  w w.ja  va2 s . com*/
        searcher = manager.getSearcher();
        List<String> pList = new ArrayList<String>();

        TopScoreDocCollector collector = TopScoreDocCollector.create(20, true);
        searcher.getSearcher().search(makeLuceneQuery(query), collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.getSearcher().doc(hits[i].doc);
            pList.add(doc.get("id"));
        }

        return pList;
    } catch (IOException e) {
        throw Util.wrap(e);
    } finally {
        manager.releaseSearcher(searcher);
    }
}

From source file:org.dbpedia.spotlight.lucene.search.BaseSearcher.java

License:Apache License

/**
 * Basic search method used by all searches to the index.
 * @param query//  ww  w .  ja  v a  2s.  c o  m
 * @param n
 * @return
 * @throws SearchException
 */
public ScoreDoc[] getHits(Query query, int n, int timeout, Filter filter) throws SearchException {
    ScoreDoc[] hits = null;
    try {
        //LOG.debug("Start search. timeout="+timeout);
        long start = System.nanoTime();
        TopScoreDocCollector collector = TopScoreDocCollector.create(n, false);
        //TimeLimitingCollector collector = new TimeLimitingCollector(tCollector, timeout);  //TODO try to bring this back later
        mSearcher.search(query, filter, collector);
        //mSearcher.
        hits = collector.topDocs().scoreDocs;
        long end = System.nanoTime();
        LOG.debug(
                String.format("Done search in %f ms. hits.length=%d", (end - start) / 1000000.0, hits.length));
    } catch (TimeLimitingCollector.TimeExceededException timedOutException) {
        throw new TimeoutException("Timeout (>" + timeout + "ms searching for surface form " + query.toString(),
                timedOutException);
    } catch (Exception e) {
        throw new SearchException("Error searching for surface form " + query.toString(), e);
    }
    //LOG.debug(hits.length+" hits found.");
    return hits;
}