List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.LuceneTopTermExtract.java
License:Apache License
public Map<String, Double> getTopTerms(String id, IndexReader reader, int numberOfTerms) { try {/*from w w w . j a v a 2 s . c o m*/ IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); Query q = new TermQuery(new Term("id", id)); searcher.search(q, collector); if (collector.getTotalHits() > 0) { int docNr = collector.topDocs().scoreDocs[0].doc; return getTopTerms(docNr, reader, numberOfTerms); } else { logger.warn("No document found with id=" + id); } } catch (IOException ex) { logger.error(ex); } return new HashMap<>(0); }
From source file:br.com.crawlerspring.model.Searcher.java
public List<br.com.crawlerspring.model.Document> parametrizeDocuments(String parameters) throws Exception { List<br.com.crawlerspring.model.Document> parametrizedDocuments = new ArrayList<br.com.crawlerspring.model.Document>(); RegexQuery q = new RegexQuery(new Term("title", ".*" + parameters + ".*")); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector);/*ww w .jav a 2 s .c om*/ ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int cont = 0; cont < hits.length; ++cont) { br.com.crawlerspring.model.Document document = new br.com.crawlerspring.model.Document(); int docId = hits[cont].doc; org.apache.lucene.document.Document luceneDocument = searcher.doc(docId); document.setTitle(luceneDocument.get("title")); document.setContent(luceneDocument.get("content")); parametrizedDocuments.add(document); } return parametrizedDocuments; }
From source file:byrne.mitre.MitreQuery.java
License:Apache License
public void run() { try {/*w w w.jav a 2 s . c o m*/ TokenStream tokenStream = analyzer.tokenStream("ngrams", new StringReader(entry.getFullName())); BooleanQuery bq = new BooleanQuery(); while (tokenStream.incrementToken()) { Term t = new Term("ngrams", tokenStream.getAttribute(TermAttribute.class).term()); bq.add(new TermQuery(t), BooleanClause.Occur.SHOULD); } TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(bq, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); out.write(entry.getID() + "|" + d.get("id") + "|" + df.format(hits[i].score) + "\n"); } } catch (IOException IOE) { } }
From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java
License:Apache License
@Override public List<Category> queryCategories(String pQueryString) { List<Category> searchResult = new ArrayList<Category>(); try {/*from www . j av a 2 s . c om*/ DirectoryReader reader = DirectoryReader.open(aDirectory); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true); // Search category names Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString); searcher.search(categoryNameQuery, results); // Search flattened text (only product names for now) Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString); searcher.search(flattenedTextQuery, results); for (ScoreDoc scoredResult : results.topDocs().scoreDocs) { Document doc = searcher.doc(scoredResult.doc); Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID)); if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) { searchResult.add(resultCategory); } } } catch (IOException e) { LOG.error(e.getMessage()); } catch (ParseException e) { LOG.error(e.getMessage()); } return searchResult; }
From source file:com.andreig.jetty.Search.java
License:GNU General Public License
public Document[] search(String dbid, String k, String v, int count) throws IOException, ParseException { Term t = new Term(k, v); Query q = new TermQuery(t); Query q2 = add_dbid(q, dbid); TopScoreDocCollector collector = TopScoreDocCollector.create(count, true); IndexSearcher searcher = sm.acquire(); Document docs[] = null;//from ww w. j a v a 2 s. c o m try { searcher.search(q2, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (hits.length == 0) return null; docs = new Document[hits.length]; for (int i = 0; i < hits.length; i++) { int doc_id = hits[i].doc; docs[i] = searcher.doc(doc_id); } } finally { sm.release(searcher); } return docs; }
From source file:com.andreig.jetty.Search.java
License:GNU General Public License
public Document[] search2(String dbid, String q, int count) throws IOException, ParseException { Query query = tl.get().parse(QueryParser.escape(q)); Query q2 = add_dbid(query, dbid); TopScoreDocCollector collector = TopScoreDocCollector.create(count, true); IndexSearcher searcher = sm.acquire(); Document docs[] = null;//from w w w . j a v a2 s . c o m try { searcher.search(q2, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (hits.length == 0) return null; docs = new Document[hits.length]; for (int i = 0; i < hits.length; i++) { int doc_id = hits[i].doc; docs[i] = searcher.doc(doc_id); } } finally { sm.release(searcher); } return docs; }
From source file:com.barchart.feed.ddf.resolver.provider.ResolverDDF.java
License:BSD License
private List<Document> searchDocument(final Query query) throws Exception { final IndexSearcher searcher = getSearcher(); final TopScoreDocCollector collector = TopScoreDocCollector.create(limit, true); searcher.search(query, collector);/*from w ww . j a v a 2s . c om*/ final ScoreDoc[] hits = collector.topDocs().scoreDocs; final int size = Math.min(hits.length, limit); log.debug("hits size : {}", size); final List<Document> list = new ArrayList<Document>(size); for (int k = 0; k < size; k++) { final int index = hits[k].doc; final Document doc = searcher.doc(index); list.add(doc); } return list; }
From source file:com.bizosys.hsearch.dictionary.DictionaryValues.java
License:Apache License
private ScoreDoc[] searchTop(Directory idx, String query, Analyzer analyzer, List<String> words, Set<Term> terms) throws ParseException, CorruptIndexException, IOException { fastSplit(words, query, ' '); QueryParser parser = new QueryParser(Version.LUCENE_35, "k", analyzer); PhraseQuery q = new PhraseQuery(); int location = 0; for (String word : words) { Query q1 = parser.parse(word); q1.extractTerms(terms);/*from www . ja va2 s . com*/ for (Term term : terms) { q.add(term, location++); } terms.clear(); } words.clear(); q.setSlop(0); int hitsPerPage = 1; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; return hits; }
From source file:com.bsiag.smartfield.server.services.lookup.CityLookupService.java
License:Open Source License
private LookupRow[] queryIndex(Query query, int maxRowCount) throws IOException, CorruptIndexException { TopScoreDocCollector collector = TopScoreDocCollector.create(maxRowCount, true); searcher.search(query, collector);/*from www .j a v a 2 s . c o m*/ ScoreDoc[] hits = collector.topDocs().scoreDocs; List<LookupRow> rows = new ArrayList<LookupRow>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); String key = d.get("zip"); String text = key + " " + d.get("city"); rows.add(new LookupRow(key, text)); } return rows.toArray(new LookupRow[rows.size()]); }
From source file:com.chimpler.example.FacetLuceneAdvancedSearcher.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 5) { System.err.println(/* w w w .j a va 2 s .c om*/ "Parameters: [index directory] [taxonomy directory] [query] [field drilldown] [value drilldown]"); System.exit(1); } String indexDirectory = args[0]; String taxonomyDirectory = args[1]; String query = args[2]; String fieldDrilldown = args[3]; String valueDrilldown = args[4]; IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); CategoryPath drillDownCategoryPath = new CategoryPath(fieldDrilldown + "/" + valueDrilldown, '/'); FacetSearchParams searchParams = new FacetSearchParams(); searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); searchParams.facetRequests.add(new CountFacetRequest(drillDownCategoryPath, 100)); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); //luceneQuery = DrillDownQuery.query(luceneQuery, drillDownCategoryPath); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); }