List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }//from w w w.j a va 2s . c o m String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.chimpler.example.FacetLuceneSearcher.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [query]"); // System.exit(1); // }/*from w ww . j a va2 s. c o m*/ String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String query = "story"; IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); }
From source file:com.dubture.indexing.core.index.DocumentManager.java
License:Open Source License
public void search(Query pathQuery, final IResultHandler handler) { //TODO: howto handle this? int hitsPerPage = 100; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); try {/* w w w . j a v a 2s.c om*/ searcher.search(pathQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; handler.handle(searcher.doc(docId)); } } catch (IOException e) { IndexingCorePlugin.logException(e); } }
From source file:com.edu.lucene.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search * engine presents pages of size n to the user. The user can then go to the * next page if interested in the next hits. * //from ww w. j a v a 2 s. co m * When the query is executed for the first time, then only enough results * are collected to fill 5 result pages. If the user wants to page beyond * this limit, then the query is executed another time and all hits are * collected. * */ public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { System.out.println(" Title: " + doc.get("title")); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.print("r<space><number> to read file, "); System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else if (line.charAt(0) == 'r') { int number = Integer.parseInt(line.substring(2)); if (number > numTotalHits || number < 1) continue; Document document = searcher.doc(hits[number - 1].doc); String pathString = document.get("path"); System.out.println(readFile(pathString)); System.out.print("Continue (Y) ?"); String request = in.readLine(); if (request.trim().toLowerCase().equals("y")) break; else System.exit(0); } else { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) System.exit(0); end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList getIndexNames() { IndexSearcher mainIndexSearcher = null; IndexReader ir = null;/*from w w w .j a v a 2 s.c o m*/ try { ir = IndexReader.open(FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX), new SimpleFSLockFactory(indexDir + File.separator + MAIN_INDEX))); mainIndexSearcher = new IndexSearcher(ir); ArrayList<String[]> indexNames = new ArrayList<String[]>(); mainIndexSearcher = new IndexSearcher(ir); Query q = new WildcardQuery(new Term("indexName", "*")); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); mainIndexSearcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = mainIndexSearcher.doc(hit.doc); String indexName = doc.get("indexName"); String indexDisplayName = doc.get("displayName"); indexNames.add(new String[] { indexName, indexDisplayName }); } return indexNames; } catch (Exception ex) { ex.printStackTrace(); return null; } finally { try { ir.close(); mainIndexSearcher.close(); ir = null; mainIndexSearcher = null; } catch (IOException e) { logger.info("Error: Unable to close index."); System.exit(0); e.printStackTrace(); } } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> getBookmarks() { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {//from ww w . ja v a2 s . c o m IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX), new SimpleFSLockFactory(indexDir + File.separator + MAIN_INDEX))); IndexSearcher mainIndexSearcher = new IndexSearcher(ir); Query q = new WildcardQuery(new Term("qcId", "*")); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); mainIndexSearcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = mainIndexSearcher.doc(hit.doc); IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + doc.get("qcIndexName")), new SimpleFSLockFactory(indexDir + File.separator + doc.get("qcIndexName")))); IndexSearcher searcher = new IndexSearcher(reader); q = new TermQuery(new Term("id", doc.get("qcId"))); collector = TopScoreDocCollector.create(10000, false); searcher.search(q, collector); ScoreDoc[] hits2 = collector.topDocs().scoreDocs; doc = searcher.doc(hits2[0].doc); cardSnaps.add(new CardSnapshot("", doc)); reader.close(); searcher.close(); reader = null; searcher = null; } ir.close(); mainIndexSearcher.close(); ir = null; mainIndexSearcher = null; } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> search(String searchText, ArrayList<String> indexNames, boolean searchInTitle, boolean searchInTags, boolean searchInText, boolean superFuzzy) { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {//from www. j a v a2 s . co m ArrayList<IndexSearcher> searchers = new ArrayList<IndexSearcher>(); for (String indexName : indexNames) { IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + indexName), new SimpleFSLockFactory(indexDir + File.separator + indexName))); IndexSearcher searcher = new IndexSearcher(reader); searchers.add(searcher); } BooleanQuery query = new BooleanQuery(); if (searchInTitle) { IndexerUtil.getTokenizedQuery(query, "title", searchText, superFuzzy); } if (searchInTags) { IndexerUtil.getTokenizedQuery(query, "tags", searchText, superFuzzy); } if (searchInText) { IndexerUtil.getTokenizedQuery(query, "text", searchText, superFuzzy); IndexerUtil.getTokenizedQuery(query, "analyzedText", searchText, superFuzzy); } for (IndexSearcher searcher : searchers) { TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); TokenStream stream = TokenSources.getTokenStream("text", doc.get("analyzedText"), new StandardAnalyzer(Version.LUCENE_20.LUCENE_35)); QueryScorer scorer = new QueryScorer(query, "analyzedText"); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 20); Highlighter highlighter = new Highlighter(scorer); highlighter.setTextFragmenter(fragmenter); String[] fragments = highlighter.getBestFragments(stream, doc.get("text"), 5); String highlights = ""; for (String fragment : fragments) { highlights += fragment + "..."; } if (highlights.equals("")) { String text = doc.get("text"); if (text.length() > 100) { highlights += doc.get("text").substring(0, 100); } else { highlights += doc.get("text"); } } cardSnaps.add(new CardSnapshot(highlights, doc)); } searcher.getIndexReader().close(); searcher.close(); searcher = null; } } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.gitblit.LuceneExecutor.java
License:Apache License
/** * Searches the specified repositories for the given text or query * //from w w w .j av a 2s. co m * @param text * if the text is null or empty, null is returned * @param page * the page number to retrieve. page is 1-indexed. * @param pageSize * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * */ public List<SearchResult> search(String text, int page, int pageSize, String... repositories) { if (StringUtils.isEmpty(text)) { return null; } if (ArrayUtils.isEmpty(repositories)) { return null; } Set<SearchResult> results = new LinkedHashSet<SearchResult>(); StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); try { // default search checks summary and content BooleanQuery query = new BooleanQuery(); QueryParser qp; qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer); qp.setAllowLeadingWildcard(true); query.add(qp.parse(text), Occur.SHOULD); IndexSearcher searcher; if (repositories.length == 1) { // single repository search searcher = getIndexSearcher(repositories[0]); } else { // multiple repository search List<IndexReader> readers = new ArrayList<IndexReader>(); for (String repository : repositories) { IndexSearcher repositoryIndex = getIndexSearcher(repository); readers.add(repositoryIndex.getIndexReader()); } IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]); MultiSourceReader reader = new MultiSourceReader(rdrs); searcher = new IndexSearcher(reader); } Query rewrittenQuery = searcher.rewrite(query); logger.debug(rewrittenQuery.toString()); TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true); searcher.search(rewrittenQuery, collector); int offset = Math.max(0, (page - 1) * pageSize); ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs; int totalHits = collector.getTotalHits(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits); if (repositories.length == 1) { // single repository search result.repository = repositories[0]; } else { // multi-repository search MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader(); int index = reader.getSourceIndex(docId); result.repository = repositories[index]; } String content = doc.get(FIELD_CONTENT); result.fragment = getHighlightedFragment(analyzer, query, content, result); results.add(result); } } catch (Exception e) { logger.error(MessageFormat.format("Exception while searching for {0}", text), e); } return new ArrayList<SearchResult>(results); }
From source file:com.ikon.analysis.SearchDemo.java
License:Open Source License
/** * Search in documents/*from w ww. j a va2 s. co m*/ */ private static void search(Directory index, Analyzer analyzer, String str) throws ParseException, CorruptIndexException, IOException { IndexReader reader = IndexReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(NUM_HITS, true); //Query q = new QueryParser(Config.LUCENE_VERSION, DOC_FIELD, analyzer).parse(str); Query q = new WildcardQuery(new Term(DOC_FIELD, str)); System.out.println("Query: " + q); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get(DOC_FIELD)); } searcher.close(); }
From source file:com.knowledgebooks.rdf.implementation.LuceneRdfManager.java
License:Open Source License
/** * @param search_query/*from www . j ava2s . co m*/ * @return * @throws org.apache.lucene.queryParser.ParseException * * @throws java.io.IOException */ public List<List<String>> searchIndex(String search_query) throws ParseException, IOException { File index_dir = new File(data_store_file_root + "/lucene_index"); reader = IndexReader.open(FSDirectory.open(index_dir), true); List<List<String>> ret = new ArrayList<List<String>>(); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "object", analyzer); Query query = parser.parse(search_query); TopScoreDocCollector collector = TopScoreDocCollector.create(10, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; i += 1) { Document doc = searcher.doc(hits[i].doc); List<String> as2 = new ArrayList<String>(23); as2.add(doc.get("subject")); as2.add(doc.get("predicate")); as2.add(doc.get("object")); ret.add(as2); } reader.close(); return ret; }