List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:searcher.CollStat.java
public TopDocs retrieve(HashMap<Integer, Integer> indexOrder, String queryStr, int indexNum) throws Exception { TopDocs topDocs = null;/*from ww w.java2s .c o m*/ Query query = buildQuery(queryStr); TopScoreDocCollector collector = TopScoreDocCollector.create(numTopDocs, true); System.out.println("Hitmap: "); System.out.println(indexOrder); System.out.println("Multireader: " + multiReader); IndexReader reader = multiReader != null ? multiReader : getReaderInOrder(indexOrder, indexNum); IndexSearcher searcher = initSearcher(reader); searcher.search(query, collector); topDocs = collector.topDocs(); return topDocs; }
From source file:searcher.CollStat.java
public String getHTMLFromDocId(String indexNumStr, String docId) throws Exception { TopScoreDocCollector collector;/* w ww . j a v a 2 s . c o m*/ TopDocs topDocs; int indexNum = indexNumStr == null ? -1 : Integer.parseInt(indexNumStr); System.out.println("Docid Query = |" + docId + "|"); IndexReader reader = indexNum == -1 ? multiReader : readers[indexNum]; Query query = new TermQuery(new Term(TrecDocIndexer.FIELD_ID, docId.trim())); collector = TopScoreDocCollector.create(1, true); IndexSearcher searcher = initSearcher(reader); searcher.search(query, collector); topDocs = collector.topDocs(); ScoreDoc sd = topDocs.scoreDocs[0]; Document doc = reader.document(sd.doc); String htmlDecompressed = IndexHtmlToText.decompress(doc.getBinaryValue(WTDocument.WTDOC_FIELD_HTML).bytes); return htmlDecompressed; }
From source file:searchingfrommanyindexes.SearchingFromManyIndexes.java
public static void testMulti() throws Exception { MultiSearcher searcher = new MultiSearcher(searchers); TermRangeQuery query = new TermRangeQuery("animal", "h", "t", true, true); //Search both indexesLeveraging term vectors // Ranker// w w w . j a v a 2 s. c o m TopScoreDocCollector collector = TopScoreDocCollector.create(10, true); TopDocs hits = searcher.search(query, 10); assertEquals("tarantula not included", 12, hits.totalHits); // Display results System.out.println("Found " + hits.scoreDocs.length + " hits."); for (int i = 0; i < hits.totalHits; ++i) { Document d = searcher.doc(i + 1); System.out.println((i + 1) + ". " + d.get("animal")); } // Close the multi searcher }
From source file:selfy.QueryAnswerer.java
public List<String> searchLucene(String query) { query = query.replaceAll("#", " hashtags:"); Set<String> results = new HashSet<>(50); ArrayList<String> ranking = new ArrayList<>(); try {// ww w .ja va 2 s . c om Query q = new QueryParser(Version.LUCENE_CURRENT, "body", analyzer).parse(query); TopScoreDocCollector collector = TopScoreDocCollector.create(64, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); if (!results.contains(d.get("id"))) { results.add(d.get("id")); ranking.add(d.get("id")); } //System.out.println((i + 1) + ". " + d.get("body") + "\t" + d.get("hashtags") + "\t" + d.get("id")); } } catch (ParseException ex) { Logger.getLogger(QueryAnswerer.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(QueryAnswerer.class.getName()).log(Level.SEVERE, null, ex); } return ranking; }
From source file:Tweet_search.TweetSearcher.java
public void retrieveAll() throws IOException, Exception { ScoreDoc[] hits = null;//w ww . jav a 2 s . co m TopDocs topDocs = null; DocVector docVector = new DocVector(propFileName); // queries = constructQueries(); // /* queries has all the raw data read from the query file like: // query_num, paper_title, paper_abtract, context etc. // */ System.out.println("Using BOW query:"); File file = new File(resultsFile); System.out.println("creating directory: " + file.getParentFile().getAbsolutePath()); if (!file.getParentFile().exists()) { System.out.println("creating directory: " + file.getParentFile().getName()); boolean result = false; try { file.getParentFile().mkdir(); result = true; } catch (SecurityException se) { //handle it } if (result) { System.out.println("DIR created"); } } FileWriter fw = new FileWriter(resultsFile); for (int indexcount = 0; indexcount < tweet_ends_from_date - tweet_starts_from_date + 1; indexcount++) { int query_searched_count = 0; for (TRECQuery q : queries) { System.out.println("Query: " + q.qId + ": "); setAnalyzer(); Query qry = q.getBOWQuery(getAnalyzer()); TopScoreDocCollector collector = TopScoreDocCollector.create(num_wanted, true); //System.out.println(qry.toString()); searcher[indexcount].search(qry, collector); topDocs = collector.topDocs(); hits = topDocs.scoreDocs; if (hits == null) System.out.println("Nothing found"); /* writing the result in file */ StringBuilder buff = new StringBuilder(); String d[], date; int hits_length = hits.length; System.err.println("Searching in index of date:" + (indexcount + tweet_starts_from_date)); int j = 1; for (int i = 0; i < hits_length; ++i) { d = docVector.reader[indexcount].document(hits[i].doc).get("time").split(" "); date = d[5].concat("08" + d[2]); if ((Integer.parseInt(d[2]) - tweet_starts_from_date) == indexcount && Integer.parseInt(date) > 20160000 && j <= 100) { buff.append(date + " ").append(q.qId).append(" Q0 ") .append(docVector.reader[indexcount].document(hits[i].doc).get("tweettime")) .append(" ").append(j++).append(" ").append(hits[i].score).append(" ") .append(run_name).append("\n"); //append(docVector.reader[indexcount].document(hits[i].doc).get("DOCNO")).append(" "). // append(date).append("\n"); } } fw.write(buff.toString()); /* writing the result in file ends */ query_searched_count++; } System.out.println(query_searched_count + " queries searched"); } fw.close(); }
From source file:ubic.gemma.core.search.LuceneTest.java
License:Apache License
/** * Searching uses a ram index to deal with queries using logical operators. Though it can often be finiky. *///from ww w.j a v a2 s.co m @Test public void luceneRamIndexTest() throws Exception { try (RAMDirectory idx = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36)) { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); try (IndexWriter writer = new IndexWriter(idx, iwc)) { Document doc = new Document(); Field f = new Field("content", "I have a small braintest", Field.Store.YES, Field.Index.ANALYZED); doc.add(f); writer.addDocument(doc); doc = new Document(); f = new Field("content", "I have a small braddintest", Field.Store.YES, Field.Index.ANALYZED); doc.add(f); writer.addDocument(doc); doc = new Document(); f = new Field("content", "I have a small brasaaafintest", Field.Store.YES, Field.Index.ANALYZED); doc.add(f); writer.addDocument(doc); doc = new Document(); f = new Field("content", "I have a small braidagagntest", Field.Store.YES, Field.Index.ANALYZED); doc.add(f); writer.addDocument(doc); } try (IndexReader ir = IndexReader.open(idx); IndexSearcher searcher = new IndexSearcher(ir)) { TopDocsCollector<ScoreDoc> hc = TopScoreDocCollector.create(1, true); QueryParser parser = new QueryParser(Version.LUCENE_36, "content", analyzer); Query parsedQuery; parsedQuery = parser.parse("braintest"); searcher.search(parsedQuery, hc); TopDocs topDocs = hc.topDocs(); int hitcount = topDocs.totalHits; assertTrue(hitcount > 0); } } }
From source file:ubic.gemma.core.search.LuceneTest.java
License:Apache License
private void luceneTestB(Analyzer analyzer) throws Exception { IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer); try (RAMDirectory idx = new RAMDirectory(); IndexWriter writer = new IndexWriter(idx, iwc)) { Document doc = new Document(); Field f = new Field("content", "Parkinson's disease", Field.Store.YES, Field.Index.ANALYZED); doc.add(f);// ww w. jav a 2s. co m writer.addDocument(doc); doc = new Document(); f = new Field("content", "fooo", Field.Store.YES, Field.Index.ANALYZED); doc.add(f); writer.addDocument(doc); LuceneTest.log.info(doc); writer.close(); try (IndexReader ir = IndexReader.open(idx); IndexSearcher searcher = new IndexSearcher(ir)) { //noinspection UnusedAssignment // testing create method TopDocsCollector<ScoreDoc> hc = TopScoreDocCollector.create(1, true); QueryParser parser = new QueryParser(Version.LUCENE_36, "content", analyzer); parser.setAutoGeneratePhraseQueries(true); parser.setEnablePositionIncrements(true); Query parsedQuery; parsedQuery = parser.parse("Parkinson's disease"); LuceneTest.log.info(parsedQuery.toString()); hc = TopScoreDocCollector.create(1, true); searcher.search(parsedQuery, hc); TopDocs topDocs = hc.topDocs(); int hitcount = topDocs.totalHits; assertTrue(parsedQuery.toString(), hitcount > 0); LuceneTest.log.info(searcher.doc(topDocs.scoreDocs[0].doc).getFieldable("content")); parsedQuery = parser.parse("parkinson's disease"); LuceneTest.log.info(parsedQuery.toString()); hc = TopScoreDocCollector.create(1, true); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(parsedQuery.toString(), hitcount > 0); parsedQuery = parser.parse("\"parkinson's disease\""); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(parsedQuery.toString(), hitcount > 0); parsedQuery = parser.parse("\"parkinsons disease \""); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(parsedQuery.toString(), hitcount > 0); parsedQuery = parser.parse("\"parkinson disease \""); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(parsedQuery.toString(), hitcount > 0); parsedQuery = parser.parse("parkinsons"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); parsedQuery = parser.parse("parkinson"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); parsedQuery = parser.parse("parkinson*"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); LuceneTest.log.info(searcher.doc(topDocs.scoreDocs[0].doc).getFieldable("index")); parsedQuery = parser.parse("park*"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); parsedQuery = parser.parse("parkinson's AND disease"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); parsedQuery = parser.parse("'parkinson's AND disease'"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); parsedQuery = parser.parse("parkinson disease"); hc = TopScoreDocCollector.create(1, true); LuceneTest.log.info(parsedQuery.toString()); searcher.search(parsedQuery, hc); topDocs = hc.topDocs(); hitcount = topDocs.totalHits; assertTrue(hitcount > 0); LuceneTest.log.info(searcher.doc(topDocs.scoreDocs[0].doc).getFieldable("content")); // parsedQuery = parser.parse( "parknson" ); // hc = TopScoreDocCollector.create( 1, true ); // log.info( parsedQuery.toString() ); // searcher.search( parsedQuery, hc ); // topDocs = hc.topDocs(); // hitcount = topDocs.totalHits; // assertTrue( hitcount > 0 ); // log.info( searcher.doc( topDocs.scoreDocs[0].doc ).getFieldable( "content" ) ); } } }
From source file:uk.ac.ebi.mdk.service.query.AbstractLuceneService.java
License:Open Source License
/** * Convenience method to access the first score document for a given query. * If multiple documents are found then an warning is logged. * * @param query search-able query/* w w w .j a va 2 s .c om*/ * * @return the first score document for the query */ public ScoreDoc first(Query query) { ScoreDoc[] scoreDocs = search(query, TopScoreDocCollector.create(5, true)); if (scoreDocs.length > 1) { LOGGER.warn("Expected a single hit for " + query); } return scoreDocs.length > 0 ? scoreDocs[0] : null; }
From source file:uk.ac.ebi.mdk.service.query.AbstractLuceneService.java
License:Open Source License
/** * Search the index with the provided query. A new TopScoreDocCollector is * created using and constrained using the value of {@see getMaxResults()}. * If an exception occurs an empty array of ScoreDoc's is returned. * * @param query search-able query/*from ww w. ja v a2 s . c o m*/ * * @return the score documents for the query */ public ScoreDoc[] search(Query query) { return search(query, TopScoreDocCollector.create(getMaxResults(), true)); }
From source file:uk.ac.ebi.mdk.service.query.crossreference.AbstractCrossreferenceService.java
License:Open Source License
/** * @inheritDoc/*w w w . java 2 s . c o m*/ */ @Override public Collection<? extends Identifier> getCrossReferences(I identifier) { Query query = construct(identifier.getAccession(), IDENTIFIER); Collection<Identifier> crossreferences = new ArrayList<Identifier>(); TopScoreDocCollector collector = TopScoreDocCollector.create(5, true); try { getSearcher().search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc document : hits) { Class c = getIdentifierClass(value(document, DATABASE_IDENTIFIER_INDEX.field())); String accession = value(document, DATABASE_ACCESSION.field()); crossreferences.add(DefaultIdentifierFactory.getInstance().ofClass(c, accession)); } } catch (IOException ex) { LOGGER.error("IO Exception occurred on service: " + ex.getMessage()); } return crossreferences; }