List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:GUIFrame.java
public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput, DefaultListModel DocList) throws ParseException, IOException { // The query/* w w w . ja v a 2s . co m*/ userInput = userInput.replace("\"", ""); Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput); // The search int hitsPerPage = 20; // return 20 top documents IndexReader indoReader = DirectoryReader.open(Index); IndexSearcher indoSearcher = new IndexSearcher(indoReader); TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true); indoSearcher.search(q, docCollector); ScoreDoc[] hits = docCollector.topDocs().scoreDocs; // Copy results to list models for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indoSearcher.doc(docId); DocList.addElement(d.get("docID")); SumListModel.addElement(d.get("summary")); } GetTerms(Index, Analyzer, "summary", userInput); return hits.length; }
From source file:SimpleLuceneSearch.java
License:Apache License
public String getCuid(String meshId, String preferred) throws IOException, ParseException { //meshId:D001416 AND preferred:true String field = "meshId"; String userQuery = "meshId:" + meshId + " AND preferred:" + preferred; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector);/*from www .j a v a2 s. c o m*/ int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("cuId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping CUI"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping CUI"); } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * /* www. ja va 2 s. c om*/ * @param CUI * @return preferred * @throws IOException * @throws ParseException */ public String getPreferredNameFromCui(String cui) throws IOException, ParseException { //term: ?? String field = "cuId"; String userQuery = "cuId:" + cui + " AND preferred:true"; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); //System.out.println(" =>"+d.get("term")); return d.get("term"); } else if (numTotalHits > 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; for (int i = 0; i < results.length; ++i) { int docId = results[i].doc; Document d = SEARCHER.doc(docId); if (d.get("cuId").toLowerCase().equals(cui.toLowerCase())) { return d.get("term"); } System.out.println((i + 1) + ". " + d.get("cuId") + ", term= " + d.get("term")); } System.out.println("WARNING: there is several corresping term and no one matches with the cui"); } else if (numTotalHits == 0) { //let's get the first non preferred term userQuery = "cuId:" + cui; Query query2 = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); int hitsPerPage2 = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage2, true); SEARCHER.search(query2, collector2); int numTotalHits2 = collector2.getTotalHits(); if (numTotalHits2 > 0) { ScoreDoc[] results2 = collector2.topDocs().scoreDocs; int docId = results2[0].doc; Document d = SEARCHER.doc(docId); return d.get("term"); } else { System.out.println("WARNING: there is no corresping term for " + cui); } } return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * /*from www .j a va 2 s .com*/ * @param meshId * @param preferred * @return * @throws IOException * @throws ParseException */ public String getCuidFromLabel(String term) throws IOException, ParseException { //term: ?? String field = "term"; String userQuery = "\"" + term + "\""; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); // System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); //System.out.println(" =>"+d.get("cuId")); return d.get("cuId"); } else if (numTotalHits > 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; for (int i = 0; i < results.length; ++i) { int docId = results[i].doc; Document d = SEARCHER.doc(docId); if (d.get("term").toLowerCase().equals(term.toLowerCase())) { return d.get("cuId"); } } System.out.println("WARNING: there is several corresping CUI and no one matches"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping CUI"); } return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
public String getCuidFromMimId(String mimId) throws IOException, ParseException { //term: ?? String field = "mimId"; String userQuery = mimId;//from w ww . j av a 2 s.c o m // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + " in OMIM MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); System.out.println(" =>" + d.get("cuId")); return d.get("cuId"); } else if (numTotalHits > 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; // WE TAKE THE FIRST ONE Document d = SEARCHER.doc(docId); return d.get("cuId"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping CUI"); } return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * /*from w w w . jav a 2 s. com*/ * @param symbol * @return * @throws IOException * @throws ParseException */ public String getPharmgkbIdForDrug(String drugName) throws IOException, ParseException { //symbol:warfarin String field = "name"; String userQuery = drugName; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + "in PharmGKB drugs "); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("paId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping paId"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping paId"); // let's try alternate gene symbols String userQuery2 = drugName; Query query2 = new QueryParser(Version.LUCENE_35, "genericNames", ANALYSER).parse(userQuery2); TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query2, collector2); int numTotalHits2 = collector2.getTotalHits(); if (numTotalHits2 > 1) { ScoreDoc[] results2 = collector2.topDocs().scoreDocs; int docId = results2[0].doc; // WE TAKE THE FIRST ONE Document d = SEARCHER.doc(docId); return d.get("paId"); } } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * get the PA ID from the gene symbol by querying a lucen index make form the genes.tsv file * @param symbol//from w ww. j a v a2 s . c o m * @return * @throws IOException * @throws ParseException */ public String getPharmgkbIdForGene(String symbol) throws IOException, ParseException { //symbol:CYP2C9 String field = "symbol"; String userQuery = symbol; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + "in PharmGKB genes "); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("paId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping paId"); } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping paId"); // let's try alternate gene symbols String userQuery2 = symbol; Query query2 = new QueryParser(Version.LUCENE_35, "alternateSymbols", ANALYSER).parse(userQuery2); TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query2, collector2); int numTotalHits2 = collector2.getTotalHits(); if (numTotalHits2 > 1) { ScoreDoc[] results2 = collector2.topDocs().scoreDocs; int docId = results2[0].doc; // WE TAKE THE FIRST ONE Document d = SEARCHER.doc(docId); return d.get("paId"); } } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * // w w w . j av a 2 s. c o m * @param diseaseLabel * @return * @throws IOException * @throws ParseException */ public String getPharmgkbIdForDisease(String diseaseLabel) throws IOException, ParseException { //symbol:warfarin String field = "diseaseLabel"; String userQuery = diseaseLabel; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); System.out.println("Searching for: " + query.toString(field) + "in PharmGKB diseases "); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); return d.get("paId"); } else if (numTotalHits > 1) { System.out.println("WARNING: there is several corresping paId"); // let's see if one of the results has a label exactly similar ScoreDoc[] results = collector.topDocs().scoreDocs; for (int i = 0; i < results.length; ++i) { int docId = results[i].doc; Document d = SEARCHER.doc(docId); if (d.get("diseaseLabel").toLowerCase().equals(diseaseLabel.toLowerCase())) { return d.get("paId"); } } } else if (numTotalHits == 0) { System.out.println("WARNING: there is no corresping paId"); } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return ""; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * does an AE form sider is frequent or note (ie is minFreq is >= 0.01) * @param stitchId//from w w w. j a va2 s .c om * @param aeCui * @return * @throws IOException * @throws CorruptIndexException * @throws ParseException */ public boolean isFrequentAe(String stitchId, String aeCui) throws CorruptIndexException, IOException, ParseException { boolean isFreq = false; String field = "stitchId"; String userQuery = "stitchId:" + stitchId + " AND aeCuiId:" + aeCui; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits > 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; for (int i = 0; i < results.length; ++i) { int docId = results[i].doc; Document d = SEARCHER.doc(docId); if (!d.get("placebo").equals("placebo") && Float.parseFloat(d.get("minFreq")) >= 0.01) { return true; } } } //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred")); return isFreq; }
From source file:SimpleLuceneSearch.java
License:Apache License
/** * //from w ww. j a v a 2 s .c om * @param MeshId * @return CUI * @throws IOException * @throws ParseException */ public String getCuiFromMeshId(String meshId) throws IOException, ParseException { //term: ?? String field = "meshId"; String userQuery = "meshId:" + meshId; // only searching, so read-only=true Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO"); int hitsPerPage = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); SEARCHER.search(query, collector); int numTotalHits = collector.getTotalHits(); //display results //System.out.println("Found " + numTotalHits + " hits."); if (numTotalHits == 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; int docId = results[0].doc; Document d = SEARCHER.doc(docId); //System.out.println(" =>"+d.get("term")); return d.get("cuId"); } else if (numTotalHits > 1) { ScoreDoc[] results = collector.topDocs().scoreDocs; for (int i = 0; i < results.length; ++i) { int docId = results[i].doc; Document d = SEARCHER.doc(docId); if (d.get("meshId").toLowerCase().equals(meshId.toLowerCase())) { return d.get("cuId"); } } System.out.println("WARNING: there is several corresping term and no one matches with the cui"); } else if (numTotalHits == 0) { //let's get the first non preferred term userQuery = "meshId:" + meshId; Query query2 = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery); int hitsPerPage2 = 10;// result is ordered with lucene scored then true TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage2, true); SEARCHER.search(query2, collector2); int numTotalHits2 = collector2.getTotalHits(); if (numTotalHits2 > 0) { ScoreDoc[] results2 = collector2.topDocs().scoreDocs; int docId = results2[0].doc; Document d = SEARCHER.doc(docId); return d.get("cuId"); } else { System.out.println("WARNING: there is no corresping term for " + meshId); } } return ""; }