List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:org.xeustechnologies.treeing.TreeingTest.java
License:Open Source License
@Test public void testSearch() throws Exception { FSDirectory index = FSDirectory.open(new File("C:/test/luc")); String querystr = "hello"; Query q = new QueryParser(Version.LUCENE_CURRENT, "contents", new StandardAnalyzer(Version.LUCENE_CURRENT)) .parse(querystr);//from ww w .jav a2s . c o m int hitsPerPage = 10; IndexSearcher searcher = new IndexSearcher(index, true); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println(collector.getTotalHits()); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("url")); } }
From source file:parts.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.// ww w . j a v a2 s.c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; // int end = Math.min(numTotalHits, hitsPerPage); int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:persistence.lucene.search.QueryExecutorHelper.java
License:Open Source License
List<QueryResult> queryDocuments(final String query, final String[] fields, final String user) throws ParseException, IOException, InvalidTokenOffsetsException { int hitsPerPage = 10; MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, new StandardAnalyzer()); Query luceneQuery = queryParser.parse(query); Highlighter highlighter = new Highlighter(new QueryScorer(luceneQuery)); DirectoryReader indexReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(indexReader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); TermsFilter filter = new TermsFilter(new Term(FullTextSearchResource.DOCUMENT_OWNER, user.toLowerCase())); searcher.search(luceneQuery, filter, collector); ScoreDoc[] docs = collector.topDocs().scoreDocs; List<QueryResult> resultBeans = new ArrayList<>(docs.length); for (ScoreDoc doc : docs) { Document document = searcher.doc(doc.doc); String text = document.get(FullTextSearchResource.DOCUMENT_CONTENT_FIELD); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), doc.doc, FullTextSearchResource.DOCUMENT_CONTENT_FIELD, standardAnalyzer); TextFragment[] fragments = highlighter.getBestTextFragments(tokenStream, text, false, 10); resultBeans.add(new QueryResult(doc.doc, doc.score, document, fragments)); }// w w w .j a v a 2 s .c om indexReader.close(); return resultBeans; }
From source file:pt.ua.ieeta.biomedhub.dictionaries.wrappers.CTakesLucene.java
License:Open Source License
public Multimap readBlocks(int blockSize) throws IOException { try {/*from w w w . j av a 2s . c om*/ if (!DirectoryReader.indexExists(index)) { return null; } } catch (IOException ex) { Logger.getLogger("lucene").log(Level.SEVERE, null, ex); } String querystr = "*:*"; Query q = null; try { q = new QueryParser(Version.LUCENE_4_9, "title", analyzer).parse(querystr); } catch (ParseException ex) { Logger.getLogger(CTakesLucene.class.getName()).log(Level.SEVERE, null, ex); } int hitsPerPage = 100; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(200000, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); //System.out.println((i + 1) + ". " + d.get("first_word") + "\t" + d.get("code")); System.out.print("."); synons.put(d.get("code"), d.get("first_word")); } return synons; }
From source file:retrievability.SampledQuery.java
TopDocs execute(IndexSearcher searcher) throws Exception { TopScoreDocCollector collector = TopScoreDocCollector.create(sampler.getNumWanted(), true); searcher.search(this.lucquery, collector); return collector.topDocs(); }
From source file:retrievability.UserQueryIndex.java
public static void printing_indexes() throws Exception { ScoreDoc[] hits = null;/*from w ww . ja v a 2 s. c om*/ TopDocs topDocs = null; System.out.println("Searching for --- "); Directory index = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(index); IndexSearcher indexSearcher = new IndexSearcher(indexReader); //Searcher searcher=new Searcher(); Query queryDocid = new TermQuery(new Term("user_id", "ayaisthebest")); //Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_CURRENT); //QueryParser queryParser = new QueryParser(Version.LUCENE_3_0, user_id, analyzer); TopScoreDocCollector collectorDocSearch = TopScoreDocCollector.create(100, true); indexSearcher.search(queryDocid, collectorDocSearch); topDocs = collectorDocSearch.topDocs(); hits = topDocs.scoreDocs; System.out.println(hits.length); if (hits == null) { System.out.println("Nothing found"); //return -1; } else { for (int i = 0; i < hits.length; i++) { System.out.println("printing ayaisthebest---" + hits[i]); int docId = hits[i].doc; Document d = indexSearcher.doc(docId); String prev_search = d.get("search_query"); // do something with current hit System.err.println("docID:" + docId + d.get("search_query")); System.err.println(""); } //return hits[0].doc; } }
From source file:retriever.TermFreq.java
public float batchRetrieveTREC(CubicBezierTF dtfFunc) throws Exception { System.out.println("Batch retrieving for TREC " + this.getTrecCode()); Similarity sim = new GeneralizedTfIdfSimilarity(dtfFunc); searcher.setSimilarity(sim);//from w w w. java 2s . c om List<TRECQuery> queries = constructQueries(trecCode); float map = 0f; for (TRECQuery query : queries) { System.out.println("Retrieving for query " + query.id + ": " + query); TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); Query luceneQuery = buildQuery(query.title); searcher.search(luceneQuery, collector); TopDocs initialList = collector.topDocs(); map += evaluator.computeAP(query.id, initialList); /* // Re-rank based on the custom tf functions for doc and qry DoclistReranker reranker = new DoclistReranker(reader, dtfFunc, luceneQuery, initialList); TopDocs rerankedDocs = reranker.rerank(); map += evaluator.computeAP(query.id, rerankedDocs); */ } float numQueries = (float) queries.size(); map /= numQueries; System.out.println("MAP " + dtfFunc.toString() + ": " + map); // Evaluate // TODO: Write code here to evaluate and keep track of the // function settings which yields the highest MAP till now. dtfFunc.setMAP(map); return map; }
From source file:reviews.searching.SearchReviews.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * /*from ww w . ja v a2s . c o m*/ * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected. * */ public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) ;// System.out.println("Title: " + doc.get("title")); // HTMLParser parser = new HTMLParser(new File(path)); // Reader reader = parser.getReader(); /* SentenceTokenizer sentTok = new SentenceTokenizer(reader); while (sentTok.incrementToken()) { displayTokenStream(sentTok); } */ /* String contents = ""; int c; while ((c = reader.read()) != -1) { char buf[] = Character.toChars(c); contents += String.valueOf(buf); } //System.out.println("contents "); //System.out.println(contents); // creates a StanfordCoreNLP object, with POS tagging, parsing Properties props = new Properties(); props.put("annotators", "tokenize, ssplit"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(contents); // run all Annotators on this text pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { //System.out.println("sentence: " + sentence.toString()); System.out.println("sentence: " + sentence.get(TextAnnotation.class)); // traversing the words in the current sentence System.out.println("Has the following words: "); for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); System.out.print(word + " "); } System.out.println(); } */ } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:searchenginelucene.LuceneSearchEngine.java
public static void searchForQuery(String indexLocation, String s, BufferedReader br) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); PrintWriter writer_query = new PrintWriter("..\\Query-1.csv"); s = "";/* w w w . ja va2 s . com*/ while (!s.equalsIgnoreCase("q")) { TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); try { System.out.println("Enter the search query (q=quit):"); s = br.readLine(); if (s.equalsIgnoreCase("q")) { writer_query.close(); break; } Query q = new QueryParser(Version.LUCENE_47, "contents", sAnalyzer).parse(s); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Write the results in a file System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); writer_query.println((i + 1) + "," + s + ", " + d.get("filename") + "," + hits[i].score); } // // Term termInstance = new Term("contents", s); // System.out.println("The term is:" + termInstance.toString()); // long termFreq = reader.totalTermFreq(termInstance); // long docCount = reader.docFreq(termInstance); // System.out.println(s + " Term Frequency " + termFreq // + " - Document Frequency " + docCount); } catch (Exception e) { System.out.println("Error searching " + s + " : " + e.getMessage()); break; } writer_query.close(); } }
From source file:searcher.CollStat.java
private String retrieve(IndexReader reader, String queryStr) throws Exception { ScoreDoc[] hits = null;//w w w .j a va 2s. c om TopDocs topDocs = null; Query query = buildQuery(queryStr); TopScoreDocCollector collector = TopScoreDocCollector.create(numTopDocs, true); IndexSearcher searcher = initSearcher(reader); searcher.search(query, collector); topDocs = collector.topDocs(); hits = topDocs.scoreDocs; if (hits == null || hits.length == 0) { return "Nothing found!!"; } return constructJSONForRetrievedSet(reader, query, hits); }