List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:aos.lucene.search.advanced.TimeLimitingCollectorTest.java
License:Apache License
public void testTimeLimitingCollector() throws Exception { Directory dir = TestUtil.getBookIndexDirectory(); IndexSearcher searcher = new IndexSearcher(dir); Query q = new MatchAllDocsQuery(); int numAllBooks = TestUtil.hitCount(searcher, q); TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); Collector collector = new TimeLimitingCollector(topDocs, // #A 1000); // #A try {/* w ww . ja v a 2 s . c o m*/ searcher.search(q, collector); assertEquals(numAllBooks, topDocs.getTotalHits()); // #B } catch (TimeExceededException tee) { // #C LOGGER.info("Too much time taken."); // #C } // #C searcher.close(); dir.close(); }
From source file:aplicacion.sistema.indexer.test.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.//from ww w .j av a2s .com * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String txt = doc.get("contents"); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } if (txt != null) { System.out.println(" Content: " + doc.get("contents")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:arena.lucene.LuceneIndexSearcherImpl.java
License:Open Source License
protected TopDocs executeSearch(IndexSearcher searcher, Query query, Filter filter, Sort sort, int collectorLimit) throws IOException { // Decide on how to search based on which elements of the lucene query model are available if (query != null) { // Full scoring search TopDocsCollector<? extends ScoreDoc> collector = null; if (sort == null) { collector = TopScoreDocCollector.create(collectorLimit, true); } else {/* ww w . j av a2s . c om*/ SortField sortFields[] = sort.getSort(); if (sortFields != null && sortFields.length > 0 && sortFields[0].getType() == SortField.SCORE && !sortFields[0].getReverse()) { collector = TopScoreDocCollector.create(collectorLimit, true); } else { collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true); } } searcher.search(query, filter, collector); return collector.topDocs(); } else if (filter != null) { // No query = no need for scoring, just dump the results into a hit collector that runs // off the results in the order we want DocIdSetIterator filterMatchesIterator = filter.getDocIdSet(searcher.getIndexReader()).iterator(); if (sort == null) { // no sort available, so the natural iteration order is fine // if we have an iterator that means sorting is already handled, so just pull off the first n rows into the output ScoreDoc[] scoreDocs = new ScoreDoc[collectorLimit]; int found = 0; int docId; while (found < collectorLimit && (docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { scoreDocs[found++] = new ScoreDoc(docId, 1f); } return new TopDocs(found, found < collectorLimit ? Arrays.copyOf(scoreDocs, found) : scoreDocs, 1f); } else { TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true); int docId; while ((docId = filterMatchesIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { collector.collect(docId); } return collector.topDocs(); } } else if (sort != null) { // no query and no filter so no score but add every doc in the index for non-score sorting TopDocsCollector<? extends ScoreDoc> collector = TopFieldCollector.create(sort, collectorLimit, false, true, true, true); int numDocs = searcher.getIndexReader().numDocs(); for (int n = 0; n < numDocs; n++) { collector.collect(n); } return collector.topDocs(); } else { // no query filter or sort: return the top n docs ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(collectorLimit, searcher.getIndexReader().numDocs())]; for (int n = 0; n < scoreDocs.length; n++) { scoreDocs[n] = new ScoreDoc(n, 1f); } return new TopDocs(scoreDocs.length, scoreDocs, 1f); } }
From source file:bajavista.Buscador.java
public ArrayList<Informacion> buscarContenido(String busqueda) throws IOException, ParseException { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); File indexDirES = new File(dirIndexES); Directory indexES = FSDirectory.open(indexDirES); //File indexDirNONES = new File(dirIndexNONES); //Directory indexNONES = FSDirectory.open(indexDirNONES); // 2. Query//from ww w. j a v a 2 s . c o m String querystr = busqueda; Query q = new QueryParser(Version.LUCENE_43, "text", analyzer).parse(querystr); //Query qNONES = new QueryParser(Version.LUCENE_43, "contenido", analyzer).parse(querystr); // 3. Search int hitsPage = 1024; IndexReader reader = DirectoryReader.open(indexES); IndexSearcher searcher = new IndexSearcher(reader); //IndexReader readerNONES = DirectoryReader.open(indexNONES); //IndexSearcher searcherNONES = new IndexSearcher(readerNONES); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPage, true); //TopScoreDocCollector collectorNONES = TopScoreDocCollector.create(hitsPage, true); searcher.search(q, collector); //searcherNONES.search(q, collectorNONES); ScoreDoc[] hits = collector.topDocs().scoreDocs; // ScoreDoc[] hitsNONES = collectorNONES.topDocs().scoreDocs; // 4. Return results for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document data = searcher.doc(docId); info = new Informacion(Integer.parseInt(data.get("idUser")), Long.parseLong(data.get("timestamp")), data.get("text"), Double.parseDouble(data.get("objective")), Double.parseDouble(data.get("subjective")), Double.parseDouble(data.get("positive")), Double.parseDouble(data.get("negative")), Integer.parseInt(data.get("need"))); listaInfo.add(info); } /*System.out.println("No ES Found " + hitsNONES.length + " hits."); for(int i=0;i<hitsNONES.length;++i) { int docId = hitsNONES[i].doc; Document d = searcherNONES.doc(docId); System.out.println((i + 1) + ". " + d.get("es") + "\t" + d.get("contenido")); }*/ reader.close(); //readerNONES.close(); return listaInfo; }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.LuceneTermRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null;//from w w w . j a v a 2 s . com try { Map<String, Double> terms = ratingsDao.getRatings(userid); Query query = buildQuery(terms); int hitsPerPage = count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter filter = new SeenArticlesFilter(viewsDao, userid); searcher = manager.acquire(); manager.maybeRefresh(); searcher.search(query, filter, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int stop = (start + count < hits.length ? start + count : hits.length); List<RecommendedNewsItem> results = new ArrayList<>(stop - start); for (int i = start; i < stop; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); results.add(toNewsitem(d, docId, hits[i].score, "termRecommender")); //System.out.println(docId); //System.out.println(searcher.explain(query, docId).toString()); } return results; } catch (RatingsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } finally { try { manager.release(searcher); } catch (IOException ex) { logger.error(ex); } searcher = null; } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.PersonalAndTrendingRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { count = count / 2;//from w w w . j av a 2 s . c om List<RecommendedNewsItem> results = super.recommend(userid, start, count); IndexSearcher searcher = null; try { Map<String, Double> terms = ratingsDao.getRatings(userid); Query query = buildQuery(terms); int hitsPerPage = start + count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter f1 = new UniqueResultsFilter(results); Filter f2 = new RecentFilter("timestamp", 1000 * 60 * 60 * 24); Filter f = new ChainedFilter(new Filter[] { f1, f2 }, ChainedFilter.AND); searcher = manager.acquire(); manager.maybeRefresh(); searcher.search(query, f, collector); ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs; for (ScoreDoc s : hits) { int docId = s.doc; Document d = searcher.doc(docId); RecommendedNewsItem item = toNewsitem(d, docId, s.score, "personal"); results.add(item); } //Collections.sort(results); } catch (RatingsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } return results; }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TopNRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null;/* ww w . j ava 2 s. c om*/ try { List<Long> ids = viewsDao.getNMostSeenArticles(start, start + count); Query query = buildQuery(ids); int hitsPerPage = count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Filter filter = new SeenArticlesFilter(viewsDao, userid); searcher = manager.acquire(); searcher.search(query, filter, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int stop = (start + count < hits.length ? start + count : hits.length); List<RecommendedNewsItem> results = new ArrayList<>(stop - start); for (int i = start; i < stop; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); results.add(toNewsitem(d, docId, hits[i].score, "topN")); } return results; } catch (ViewsDaoException | IOException ex) { throw new RecommendationException(ex); } finally { if (searcher != null) { try { manager.release(searcher); } catch (IOException ex) { logger.error(ex); } searcher = null; } } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.recommenders.TrendingTopicRecommender.java
License:Apache License
@Override public List<RecommendedNewsItem> recommend(long userid, int start, int count) throws RecommendationException { IndexSearcher searcher = null;/* ww w . j a v a2 s .c o m*/ try { String[] trends = trendsDao.getTrends(250); Query query = buildQuery(trends); int hitsPerPage = start + count; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); //Filter filter = new SeenArticlesFilter(viewsDao, userid); Filter f = new RecentFilter("timestamp", 1000 * 60 * 60 * 24); manager.maybeRefresh(); searcher = manager.acquire(); searcher.search(query, f, collector); ScoreDoc[] hits = collector.topDocs(start, count).scoreDocs; List<RecommendedNewsItem> results = new ArrayList<>(hits.length); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); RecommendedNewsItem item = toNewsitem(d, docId, hit.score, "trending"); results.add(item); } return results; } catch (TrendsDaoException | IOException ex) { logger.error(ex); throw new RecommendationException(ex); } finally { try { if (searcher != null) { manager.release(searcher); } } catch (IOException ex) { logger.error(ex); } searcher = null; } }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.recommend.scorers.DatabaseLuceneScorer.java
License:Apache License
protected Map<String, Double> getTopTerms(String field, String value) throws IOException { manager.maybeRefreshBlocking();//w ww. j a v a 2s. c o m IndexSearcher searcher = manager.acquire(); try (IndexReader reader = searcher.getIndexReader()) { TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); Query q = new TermQuery(new Term(field, value)); searcher.search(q, collector); if (collector.getTotalHits() > 0) { int docNr = collector.topDocs().scoreDocs[0].doc; Document doc = reader.document(docNr); NewsItem nitem = NewsItemLuceneDocConverter.documentToNewsItem(doc); return nitem.getTerms(); } else { logger.warn("Could not find document with " + field + "=" + value); } } manager.release(searcher); return new HashMap<>(); }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.termExtract.LuceneTopTermExtract.java
License:Apache License
/** * Returns the 10 most important terms in the document with the specified * id./*from www .j av a 2s . c o m*/ * * @param id * @param reader * @param numberOfTerms * @return */ public Map<String, Double> getTopTerms(String id, IndexReader reader, int numberOfTerms) { try { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); Query q = new TermQuery(new Term("id", id)); searcher.search(q, collector); if (collector.getTotalHits() > 0) { int docNr = collector.topDocs().scoreDocs[0].doc; return getTopTerms(docNr, reader, numberOfTerms); } else { logger.warn("No document found with id=" + id); } } catch (IOException ex) { logger.error(ex); } return new HashMap<>(0); }