List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:edu.ucdenver.ccp.nlp.index.Search.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = "index"; String queries = null;/*from w w w. ja va 2 s. c o m*/ String queryString = null; int hitsPerPage = 100; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); EnglishAnalyzer analyzer = new EnglishAnalyzer(Version.LUCENE_40); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); //query building starts here. //QueryParser parser = new QueryParser(Version.LUCENE_40, "title", analyzer); MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_40, new String[] { "title", "abs", "mentions" }, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user //c for cisplatin System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } //Query q = queryParser.parse(querystr); Query query = parser.parse(line); //System.out.println("Searching for: " + query.toString(field)); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("pmid") + "\t" + d.get("title")); } //doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:edu.wayne.cs.severe.ir4se.lucene.SearchFiles.java
License:Apache License
@SuppressWarnings("deprecation") public static void search(String system, int documentType, int queryNumber, String indexDirectoryPath, String queryString, String fileOutput, String[] targetClasses, boolean runIndividualTerms, boolean append) throws Exception { String index = indexDirectoryPath; FileWriter f = new FileWriter(index + "../NotFound.txt", true); for (int i = 0; i < targetClasses.length; i++) { String target = targetClasses[i]; boolean found = Indexer.isFileDocInIndex(indexDirectoryPath, target); if (!found) f.append("Target doc " + i + " - " + target + " not found in index!\n"); }// w ww. j av a 2s . c o m f.close(); IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); int numDocs = reader.numDocs(); System.out.println("The number of documents in the index is: " + numDocs); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35); String[] fields; fields = new String[1]; fields[0] = "contents"; if (!runIndividualTerms) { MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer); int hitsPerPage = numDocs; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); Query query = parser.parse(queryString); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("The number of hits is: " + hits.length); // file with the results (score and position) only for the relevant // documents // the file contains entries in the following format: // (queryNumber,relDoc1,posRelDoc1,scoreRelDoc1,relDoc2,posRelDoc2,scoreRelDoc2,...) FileWriter fwRelevant = new FileWriter(fileOutput, append); String path = ""; String docName = ""; String docPathAndName = ""; for (String target : targetClasses) { boolean found = false; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); path = d.get("path"); float score = hits[i].score; if (documentType == 2) { docName = d.get("docName"); docPathAndName = path.toLowerCase() + "." + docName.toLowerCase(); if (target.equalsIgnoreCase(docPathAndName)) { fwRelevant.write(system + ";" + queryNumber + ";" + target + ";" + (i + 1) + ";" + hits.length + ";" + numDocs + ";" + score + "\n"); found = true; break; } } else if (documentType == 1) { File pathDir = new File(path.trim()); String fileName = pathDir.getName(); docName = fileName.replaceAll(".txt", ""); fwRelevant.write((i + 1) + ". doc = " + docName + " score = " + score + "\n"); } } if (found == false) fwRelevant.write(system + ";" + queryNumber + ";" + target + "; NOT_RETRIEVED" + "\n"); } // fw.close(); fwRelevant.close(); reader.close(); } else // runIndividualTerms = true { /** * each query will be divided in its constituent terms and each term * will be run as a separate query **/ /** * this is useful to determine the similarity of each of the terms * in a query to a target document so that we determine which terms * in the query tend to lead to the best results, i.e., to finding * the targets sooner **/ SearchFiles.search(system, documentType, queryNumber, indexDirectoryPath, queryString, fileOutput.replaceAll(".txt", "_wholeQuery.txt"), targetClasses, false, append); FileWriter fw = new FileWriter(fileOutput.replaceAll(".txt", "_terms.txt")); fw.write( "\n\n\n------------------------------------------------------------------------------------\n\n"); fw.write(" Results for query " + queryNumber + "\n"); fw.write("------------------------------------------------------------------------------------\n\n"); // file with the results (score and position) only for the relevant // documents // the file contains entries in the following format: // (queryNumber,term1,term1TF,term1DF,relDoc1,posRelDoc1Term1,scoreRelDoc1Term1,relDoc2,posRelDoc2Term1,scoreRelDoc2Term1,...) // (queryNumber,term2,term2TF,term2DF,relDoc1,posRelDoc1Term2,scoreRelDoc1Term2,relDoc2,posRelDoc2Term2,scoreRelDoc2Term2,...) // ... FileWriter fwRelevant = new FileWriter( fileOutput.replaceAll(".txt", "_terms_RelevantDocsPositions.txt")); String[] queryTerms = queryString.split(" "); for (int l = 0; l < queryTerms.length; l++) { MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer); int hitsPerPage = numDocs; TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); String q = queryTerms[l]; Query query = parser.parse(q); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; fw.write("TERM " + (l + 1) + ": " + q + "\n\n"); fwRelevant.write("\n" + queryNumber + "," + q); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document d = searcher.doc(docId); String path = d.get("path"); float score = hits[i].score; if (documentType == 2) { String docName = d.get("docName"); fw.write((i + 1) + ". doc = " + path + " " + docName + " - score = " + score + "\n"); for (int k = 0; k < targetClasses.length; k++) { if (docName.equalsIgnoreCase(targetClasses[k])) { String contents = d.get("contents"); int frequency = countOccurrences(contents, q);// tf fwRelevant.write("," + frequency); fwRelevant.write("," + reader.docFreq(new Term("contents", q)));// df fwRelevant.write("," + path + "." + docName + "," + (i + 1) + "," + score); break; } } } else if (documentType == 1) { File pathDir = new File(path); String fileName = pathDir.getName(); String docName = fileName.replaceAll(".txt", ""); fw.write((i + 1) + ". doc = " + docName + " score = " + score + "\n"); } } fw.write("\n\n\n"); } fw.close(); f.close(); fwRelevant.close(); reader.close(); } }
From source file:es.eucm.ead.editor.indexes.Index.java
License:Open Source License
/** * Query the index.//w ww. j av a2 s. c o m * * @param queryText * textual query. * @return an object with the results of the search: matches will be objects * that have fields with contents that matched the query. */ public Array<Match> search(String queryText) { Array<Match> matches = new Array<Match>(); try { Query query = getQueryParser().parse(queryText + FUZZY_SEARCH_SYMBOL + fuzzyFactor); TopScoreDocCollector collector = TopScoreDocCollector.create(maxSearchHits, true); indexSearcher.search(query, collector); for (ScoreDoc hit : collector.topDocs().scoreDocs) { Document doc = indexSearcher.doc(hit.doc); Integer id = Integer.parseInt(doc.getFieldable(DOCUMENT_ID_FIELD_NAME).stringValue()); SearchNode node = idsToNodes.get(id); Match match = new Match(node, hit.score); matches.add(match); } matches.sort(); } catch (Exception e) { Gdx.app.error("Index", "Error parsing or looking up " + queryText, e); } return matches; }
From source file:es.eucm.ead.editor.model.ModelIndex.java
License:Open Source License
/** * Query the index. The fields// w w w. ja v a2 s .c o m * "eid", "is" and "has" are interpreted as follows: * <ul> * <li>eid - exact editor-id match * <li>is - node class-name match * <li>has - node contents class-name match * </ul> * @param field field that is being searched * @param queryText contents of the query * @param quick * @return an object with the results of the search */ public SearchResult search(String field, String queryText, boolean quick) { // Short-circuited queries if (field.equals(isClassQueryField)) { return searchByClass(queryText); } else if (field.equals(editorIdQueryField)) { return searchById(queryText); } else if (field.equals(hasContentClassQueryField)) { return searchByContentClass(queryText); } // normal queries try { IndexReader reader = IndexReader.open(searchIndex); Query query = (field.isEmpty()) ? getQueryAllParser().parse(queryText) : new QueryParser(Version.LUCENE_35, field, searchAnalyzer).parse(queryText); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(MAX_SEARCH_HITS, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; SearchResult sr = new SearchResult(searcher, query, quick, hits, model.getNodesById()); return sr; } catch (Exception e) { logger.warn("Error parsing or looking up query '{}' in index", queryText, e); } return new SearchResult(); }
From source file:eu.eexcess.sourceselection.redde.dbsampling.DBSampler.java
License:Apache License
/** * sollects samples from general database (index) * //from w w w .ja v a 2 s . co m * @param queryString * the search string * @param searcher * @param maxHitsPerQuery * maximum number of documents stored from result for queryString * @throws ParseException * @throws IOException */ private void collectSamples(String queryString, IndexSearcher searcher, int maxHitsPerQuery) throws ParseException, IOException { Query query = new QueryParser(Settings.IndexFields.IndexTextField, new EnglishAnalyzer()) .parse(queryString); TopScoreDocCollector collector = TopScoreDocCollector.create(maxHitsPerQuery, true); searcher.search(query, collector); writeToIndex(searcher, collector, maxHitsPerQuery); }
From source file:eu.eexcess.sourceselection.redde.indexer.topterm.DBDomainSampler.java
License:Apache License
public void sample(Set<SampleArguments> sampleArgs) throws IllegalStateException, ParseException, IOException { if (domainToTermsTree == null) { throw new IllegalStateException("no terms aligned"); }/*from ww w . ja v a2 s . co m*/ for (SampleArguments subSample : sampleArgs) { // merge requested domain-terms Set<String> terms = distinctUnifyValues(subSample.sampleDomains); // sample with domain-term dependent query String queryString = String.join("", terms); Query query = new QueryParser(DBDomainSampler.fieldOfInterest, new EnglishAnalyzer()) .parse(queryString); TopScoreDocCollector collector = TopScoreDocCollector.create(1000, false); new IndexSearcher(inIndexReader).search(query, collector); // ScoreDoc[] docs = collector.topDocs().scoreDocs; // TODO: create and store docs to new index called subSample.name } throw new UnsupportedOperationException("not implemented yet"); }
From source file:Example.lucene.HelloLucene.java
public static void main(String[] args) throws IOException, ParseException { // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); // 1. create the index Directory index = FSDirectory.open(new File("indexing")); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); try (IndexWriter w = new IndexWriter(index, config)) { addDoc(w, "Lucene in Action 123.456", "193398817"); addDoc(w, "Lucene for Dummies 123 456", "55320055Z"); addDoc(w, "Managing Gigabytes 123456", "55063554A"); addDoc(w, "", "9900333X"); addDoc(w,//from w ww. ja va2 s . c om "?", "9900333X"); } // 2. query String querystr = args.length > 0 ? args[0] : ""; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new QueryParser(Version.LUCENE_45, "title", analyzer).parse(querystr); // 3. search int hitsPerPage = 10; try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title")); } } }
From source file:Example.lucene.TestIndexer.java
public static void main(String[] args) throws IOException, ParseException { // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); String InDirName = "data/test_snipped"; File InDir = new File(InDirName); // 1. create the index Directory index = FSDirectory.open(new File("data/indexingonly")); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); try (IndexWriter w = new IndexWriter(index, config)) { String[] s;//from w ww . j a va 2 s. com int id = 1; for (File f : InDir.listFiles()) { try (ArcReader ar = new ArcReader(f)) { System.out.println(f.getName()); while (ar.Next()) { s = ar.Record.ArchiveContent.split("\n"); switch (s.length) { case 2: addDoc(w, id++, ar.Record.URL, s[0], s[1]); break; case 1: addDoc(w, id++, ar.Record.URL, s[0], ""); break; default: break; } } } } } // 2. query String querystr = args.length > 0 ? args[0] : ""; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new QueryParser(Version.LUCENE_45, "title", analyzer).parse(querystr); // 3. search int hitsPerPage = 10; try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("url") + "\t" + d.get("title") + "\t" + d.get("content")); } } }
From source file:Example.lucene.TestSearch.java
public static void main(String[] args) throws ParseException, IOException { Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexingonly")); // 2. query/*from www . j a v a2 s. co m*/ String querystr = args.length > 0 ? args[0] : "golf user"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr); // 3. search int hitsPerPage = 10; try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); TopDocs td = collector.topDocs(5); ScoreDoc[] hits = td.scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits. from " + td.totalHits + " docs."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("url") + "\t" + d.get("title") + "\t" + d.get("content")); } } }
From source file:game.TermFreq.java
void loadDoc() throws Exception { IndexReader reader = retriever.getReader(); IndexSearcher searcher = retriever.getSearcher(); Term docIdTerm = new Term(TrecDocRetriever.FIELD_ID, this.docIdToGuess); TermQuery tq = new TermQuery(docIdTerm); TopScoreDocCollector collector = TopScoreDocCollector.create(1, true); searcher.search(tq, collector);/*from www. j a va 2s .co m*/ this.luceneDocIdToGuess = collector.topDocs().scoreDocs[0].doc; this.docToGuess = reader.document(luceneDocIdToGuess); this.contentOfDocToGuess = docToGuess.get(FIELD_ANALYZED_CONTENT); }