Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:aos.lucene.search.advanced.TimeLimitingCollectorTest.java

License:Apache License

public void testTimeLimitingCollector() throws Exception {
    Directory dir = TestUtil.getBookIndexDirectory();
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = new MatchAllDocsQuery();
    int numAllBooks = TestUtil.hitCount(searcher, q);

    TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false);
    Collector collector = new TimeLimitingCollector(topDocs, // #A
            1000); // #A
    try {/*  w ww  .  ja  v a  2 s  . c o  m*/
        searcher.search(q, collector);
        assertEquals(numAllBooks, topDocs.getTotalHits()); // #B
    } catch (TimeExceededException tee) { // #C
        LOGGER.info("Too much time taken."); // #C
    } // #C
    searcher.close();
    dir.close();
}

From source file:aplicacion.sistema.indexer.test.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//from   ww w .j av a2s  .com
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(query, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    int numTotalHits = collector.getTotalHits();
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            collector = TopScoreDocCollector.create(numTotalHits, false);
            searcher.search(query, collector);
            hits = collector.topDocs().scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                String txt = doc.get("contents");
                String title = doc.get("title");
                if (title != null) {
                    System.out.println("   Title: " + doc.get("title"));
                }
                if (txt != null) {
                    System.out.println("   Content: " + doc.get("contents"));
                }

            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }

        }

        if (!interactive) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }

    }

}

From source file:arena.lucene.LuceneIndexSearcherImpl.java

License:Open Source License

protected TopDocs executeSearch(IndexSearcher searcher, Query query, Filter filter, Sort sort,
        int collectorLimit) throws IOException {
    // Decide on how to search based on which elements of the lucene query model are available
    if (query != null) {
        // Full scoring search
        TopDocsCollector<? extends ScoreDoc> collector = null;
        if (sort == null) {
            collector = TopScoreDocCollector.create(collectorLimit, true);
        } else {/* ww  w  . j av a2s . c  om*/
            SortField sortFields[] = sort.getSort();
            if (sortFields != null && sortFields.length > 0 && sortFields[0].getType() == SortField.SCORE
                    && !sortFields[0].getReverse()) {
                collector = TopScoreDocCollector.create(collectorLimit, true);
            } else {
                collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true);
            }
        }
        searcher.search(query, filter, collector);
        return collector.topDocs();

    } else if (filter != null) {
        // No query = no need for scoring, just dump the results into a hit collector that runs 
        // off the results in the order we want 
        DocIdSetIterator filterMatchesIterator = filter.getDocIdSet(searcher.getIndexReader()).iterator();
        if (sort == null) {
            // no sort available, so the natural iteration order is fine
            // if we have an iterator that means sorting is already handled, so just pull off the first n rows into the output
            ScoreDoc[] scoreDocs = new ScoreDoc[collectorLimit];
            int found = 0;
            int docId;
            while (found < collectorLimit
                    && (docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                scoreDocs[found++] = new ScoreDoc(docId, 1f);
            }
            return new TopDocs(found, found < collectorLimit ? Arrays.copyOf(scoreDocs, found) : scoreDocs, 1f);
        } else {
            TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit,
                    false, true, true, true);
            int docId;
            while ((docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                collector.collect(docId);
            }
            return collector.topDocs();

        }
    } else if (sort != null) {
        // no query and no filter so no score but add every doc in the index for non-score sorting            
        TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit, false,
                true, true, true);
        int numDocs = searcher.getIndexReader().numDocs();
        for (int n = 0; n < numDocs; n++) {
            collector.collect(n);
        }
        return collector.topDocs();
    } else {
        // no query filter or sort: return the top n docs
        ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(collectorLimit, searcher.getIndexReader().numDocs())];

        for (int n = 0; n < scoreDocs.length; n++) {
            scoreDocs[n] = new ScoreDoc(n, 1f);
        }
        return new TopDocs(scoreDocs.length, scoreDocs, 1f);
    }
}

From source file:bajavista.Buscador.java

public ArrayList<Informacion> buscarContenido(String busqueda) throws IOException, ParseException {
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);

    File indexDirES = new File(dirIndexES);
    Directory indexES = FSDirectory.open(indexDirES);
    //File indexDirNONES = new File(dirIndexNONES);
    //Directory indexNONES = FSDirectory.open(indexDirNONES);

    // 2. Query//from  ww  w.  j a v a  2 s  .  c o  m
    String querystr = busqueda;

    Query q = new QueryParser(Version.LUCENE_43, "text", analyzer).parse(querystr);
    //Query qNONES = new QueryParser(Version.LUCENE_43, "contenido", analyzer).parse(querystr);

    // 3. Search
    int hitsPage = 1024;
    IndexReader reader = DirectoryReader.open(indexES);
    IndexSearcher searcher = new IndexSearcher(reader);

    //IndexReader readerNONES = DirectoryReader.open(indexNONES);
    //IndexSearcher searcherNONES = new IndexSearcher(readerNONES);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPage, true);
    //TopScoreDocCollector collectorNONES = TopScoreDocCollector.create(hitsPage, true);

    searcher.search(q, collector);
    //searcherNONES.search(q, collectorNONES);

    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    // ScoreDoc[] hitsNONES = collectorNONES.topDocs().scoreDocs;

    // 4. Return results
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document data = searcher.doc(docId);
        info = new Informacion(Integer.parseInt(data.get("idUser")), Long.parseLong(data.get("timestamp")),
                data.get("text"), Double.parseDouble(data.get("objective")),
                Double.parseDouble(data.get("subjective")), Double.parseDouble(data.get("positive")),
                Double.parseDouble(data.get("negative")), Integer.parseInt(data.get("need")));
        listaInfo.add(info);
    }

    /*System.out.println("No ES Found " + hitsNONES.length + " hits.");
     for(int i=0;i<hitsNONES.length;++i) {
     int docId = hitsNONES[i].doc;
     Document d = searcherNONES.doc(docId);
     System.out.println((i + 1) + ". " + d.get("es") + "\t" + d.get("contenido"));
     }*/
    reader.close();
    //readerNONES.close();

    return listaInfo;
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.LuceneTermRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;//from w w w  . j a  v a 2  s  .  com
    try {
        Map<String, Double> terms = ratingsDao.getRatings(userid);
        Query query = buildQuery(terms);
        int hitsPerPage = count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter filter = new SeenArticlesFilter(viewsDao, userid);
        searcher = manager.acquire();
        manager.maybeRefresh();
        searcher.search(query, filter, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        int stop = (start + count < hits.length ? start + count : hits.length);
        List<RecommendedNewsItem> results = new ArrayList<>(stop - start);

        for (int i = start; i < stop; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            results.add(toNewsitem(d, docId, hits[i].score, "termRecommender"));
            //System.out.println(docId);
            //System.out.println(searcher.explain(query, docId).toString());
        }

        return results;

    } catch (RatingsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    } finally {
        try {
            manager.release(searcher);
        } catch (IOException ex) {
            logger.error(ex);
        }
        searcher = null;
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.PersonalAndTrendingRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    count = count / 2;//from w w w .  j  av  a  2 s  .  c om

    List<RecommendedNewsItem> results = super.recommend(userid, start, count);

    IndexSearcher searcher = null;
    try {
        Map<String, Double> terms = ratingsDao.getRatings(userid);
        Query query = buildQuery(terms);
        int hitsPerPage = start + count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter f1 = new UniqueResultsFilter(results);
        Filter f2 = new RecentFilter("timestamp", 1000 * 60 * 60 * 24);
        Filter f = new ChainedFilter(new Filter[] { f1, f2 }, ChainedFilter.AND);

        searcher = manager.acquire();
        manager.maybeRefresh();
        searcher.search(query, f, collector);

        ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs;

        for (ScoreDoc s : hits) {
            int docId = s.doc;
            Document d = searcher.doc(docId);
            RecommendedNewsItem item = toNewsitem(d, docId, s.score, "personal");
            results.add(item);
        }
        //Collections.sort(results);
    } catch (RatingsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    }
    return results;
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TopNRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;/* ww w . j  ava 2  s.  c  om*/
    try {
        List<Long> ids = viewsDao.getNMostSeenArticles(start, start + count);
        Query query = buildQuery(ids);
        int hitsPerPage = count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        Filter filter = new SeenArticlesFilter(viewsDao, userid);
        searcher = manager.acquire();
        searcher.search(query, filter, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        int stop = (start + count < hits.length ? start + count : hits.length);
        List<RecommendedNewsItem> results = new ArrayList<>(stop - start);

        for (int i = start; i < stop; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            results.add(toNewsitem(d, docId, hits[i].score, "topN"));
        }

        return results;

    } catch (ViewsDaoException | IOException ex) {

        throw new RecommendationException(ex);
    } finally {
        if (searcher != null) {
            try {
                manager.release(searcher);
            } catch (IOException ex) {
                logger.error(ex);
            }
            searcher = null;
        }
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TrendingTopicRecommender.java

License:Apache License

@Override
public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException {
    IndexSearcher searcher = null;/* ww  w . j a v  a2  s .c  o  m*/
    try {
        String[] trends = trendsDao.getTrends(250);
        Query query = buildQuery(trends);
        int hitsPerPage = start + count;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

        //Filter filter = new SeenArticlesFilter(viewsDao, userid);
        Filter f = new RecentFilter("timestamp", 1000 * 60 * 60 * 24);

        manager.maybeRefresh();
        searcher = manager.acquire();

        searcher.search(query, f, collector);

        ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs;

        List<RecommendedNewsItem> results = new ArrayList<>(hits.length);

        for (ScoreDoc hit : hits) {
            int docId = hit.doc;
            Document d = searcher.doc(docId);
            RecommendedNewsItem item = toNewsitem(d, docId, hit.score, "trending");
            results.add(item);
        }

        return results;

    } catch (TrendsDaoException | IOException ex) {
        logger.error(ex);
        throw new RecommendationException(ex);
    } finally {
        try {
            if (searcher != null) {
                manager.release(searcher);
            }
        } catch (IOException ex) {
            logger.error(ex);
        }
        searcher = null;
    }
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.scorers.DatabaseLuceneScorer.java

License:Apache License

protected Map<String, Double> getTopTerms(String field, String value) throws IOException {
    manager.maybeRefreshBlocking();//w ww. j  a v a  2s. c  o  m
    IndexSearcher searcher = manager.acquire();
    try (IndexReader reader = searcher.getIndexReader()) {
        TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
        Query q = new TermQuery(new Term(field, value));
        searcher.search(q, collector);
        if (collector.getTotalHits() > 0) {
            int docNr = collector.topDocs().scoreDocs[0].doc;
            Document doc = reader.document(docNr);
            NewsItem nitem = NewsItemLuceneDocConverter.documentToNewsItem(doc);
            return nitem.getTerms();
        } else {
            logger.warn("Could not find document with " + field + "=" + value);
        }
    }
    manager.release(searcher);
    return new HashMap<>();
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.termExtract.LuceneTopTermExtract.java

License:Apache License

/**
 * Returns the 10 most important terms in the document with the specified
 * id./*from   www  .j  av  a 2s .  c  o  m*/
 *
 * @param id
 * @param reader
 * @param numberOfTerms
 * @return
 */
public Map<String, Double> getTopTerms(String id, IndexReader reader, int numberOfTerms) {
    try {
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
        Query q = new TermQuery(new Term("id", id));
        searcher.search(q, collector);
        if (collector.getTotalHits() > 0) {
            int docNr = collector.topDocs().scoreDocs[0].doc;
            return getTopTerms(docNr, reader, numberOfTerms);
        } else {
            logger.warn("No document found with id=" + id);
        }
    } catch (IOException ex) {
        logger.error(ex);
    }
    return new HashMap<>(0);
}