Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:GUIFrame.java

public int SearchResults(PorterStemAnalyzer Analyzer, Directory Index, String userInput,
        DefaultListModel DocList) throws ParseException, IOException {
    // The query/*  w  w  w  .  ja v  a  2s  . co m*/
    userInput = userInput.replace("\"", "");
    Query q = new QueryParser(Version.LATEST, "summary", Analyzer).parse(userInput);

    // The search
    int hitsPerPage = 20; // return 20 top documents
    IndexReader indoReader = DirectoryReader.open(Index);
    IndexSearcher indoSearcher = new IndexSearcher(indoReader);
    TopScoreDocCollector docCollector = TopScoreDocCollector.create(hitsPerPage, true);
    indoSearcher.search(q, docCollector);
    ScoreDoc[] hits = docCollector.topDocs().scoreDocs;

    // Copy results to list models
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = indoSearcher.doc(docId);
        DocList.addElement(d.get("docID"));
        SumListModel.addElement(d.get("summary"));
    }

    GetTerms(Index, Analyzer, "summary", userInput);

    return hits.length;
}

From source file:SimpleLuceneSearch.java

License:Apache License

public String getCuid(String meshId, String preferred) throws IOException, ParseException { //meshId:D001416 AND preferred:true

    String field = "meshId";
    String userQuery = "meshId:" + meshId + " AND preferred:" + preferred;

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);/*from   www  .j a v  a2  s. c  o  m*/
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        return d.get("cuId");
    } else if (numTotalHits > 1) {
        System.out.println("WARNING: there is several corresping CUI");
    } else if (numTotalHits == 0) {
        System.out.println("WARNING: there is no corresping CUI");
    }

    //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred"));
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
 * /* www. ja va  2  s. c om*/
 * @param CUI
 * @return preferred
 * @throws IOException
 * @throws ParseException
 */
public String getPreferredNameFromCui(String cui) throws IOException, ParseException { //term: ??

    String field = "cuId";
    String userQuery = "cuId:" + cui + " AND preferred:true";

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        //System.out.println("         =>"+d.get("term"));
        return d.get("term");
    } else if (numTotalHits > 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        for (int i = 0; i < results.length; ++i) {
            int docId = results[i].doc;
            Document d = SEARCHER.doc(docId);
            if (d.get("cuId").toLowerCase().equals(cui.toLowerCase())) {
                return d.get("term");
            }
            System.out.println((i + 1) + ". " + d.get("cuId") + ", term= " + d.get("term"));
        }
        System.out.println("WARNING: there is several corresping term and no one matches with the cui");
    } else if (numTotalHits == 0) {
        //let's get the first non preferred term
        userQuery = "cuId:" + cui;
        Query query2 = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);
        int hitsPerPage2 = 10;// result is ordered with lucene scored then true
        TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage2, true);
        SEARCHER.search(query2, collector2);
        int numTotalHits2 = collector2.getTotalHits();
        if (numTotalHits2 > 0) {
            ScoreDoc[] results2 = collector2.topDocs().scoreDocs;
            int docId = results2[0].doc;
            Document d = SEARCHER.doc(docId);
            return d.get("term");
        } else {
            System.out.println("WARNING: there is no corresping term for " + cui);
        }
    }
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
 * /*from www  .j  a  va  2 s .com*/
 * @param meshId
 * @param preferred
 * @return
 * @throws IOException
 * @throws ParseException
 */
public String getCuidFromLabel(String term) throws IOException, ParseException { //term: ??

    String field = "term";
    String userQuery = "\"" + term + "\"";

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    // System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        //System.out.println("         =>"+d.get("cuId"));
        return d.get("cuId");
    } else if (numTotalHits > 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        for (int i = 0; i < results.length; ++i) {
            int docId = results[i].doc;
            Document d = SEARCHER.doc(docId);
            if (d.get("term").toLowerCase().equals(term.toLowerCase())) {
                return d.get("cuId");
            }
        }
        System.out.println("WARNING: there is several corresping CUI and no one matches");
    } else if (numTotalHits == 0) {
        System.out.println("WARNING: there is no corresping CUI");
    }
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

public String getCuidFromMimId(String mimId) throws IOException, ParseException { //term: ??

    String field = "mimId";
    String userQuery = mimId;//from w ww  .  j av  a  2  s.c o  m

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    System.out.println("Searching for: " + query.toString(field) + " in OMIM MRCONSO");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        System.out.println("         =>" + d.get("cuId"));
        return d.get("cuId");
    } else if (numTotalHits > 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc; // WE TAKE THE FIRST ONE
        Document d = SEARCHER.doc(docId);
        return d.get("cuId");

    } else if (numTotalHits == 0) {
        System.out.println("WARNING: there is no corresping CUI");
    }
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
 * /*from w w w .  jav  a 2  s. com*/
 * @param symbol
 * @return
 * @throws IOException
 * @throws ParseException
 */
public String getPharmgkbIdForDrug(String drugName) throws IOException, ParseException { //symbol:warfarin

    String field = "name";
    String userQuery = drugName;
    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    System.out.println("Searching for: " + query.toString(field) + "in PharmGKB drugs ");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        return d.get("paId");
    } else if (numTotalHits > 1) {
        System.out.println("WARNING: there is several corresping paId");
    } else if (numTotalHits == 0) {
        System.out.println("WARNING: there is no corresping paId");
        // let's try alternate gene symbols
        String userQuery2 = drugName;
        Query query2 = new QueryParser(Version.LUCENE_35, "genericNames", ANALYSER).parse(userQuery2);
        TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage, true);
        SEARCHER.search(query2, collector2);
        int numTotalHits2 = collector2.getTotalHits();
        if (numTotalHits2 > 1) {
            ScoreDoc[] results2 = collector2.topDocs().scoreDocs;
            int docId = results2[0].doc; // WE TAKE THE FIRST ONE
            Document d = SEARCHER.doc(docId);
            return d.get("paId");
        }
    }
    //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred"));
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
 * get the PA ID from the gene symbol by querying a lucen index make form the genes.tsv file
 * @param symbol//from w ww.  j  a v  a2  s  .  c o  m
 * @return
 * @throws IOException
 * @throws ParseException
 */
public String getPharmgkbIdForGene(String symbol) throws IOException, ParseException { //symbol:CYP2C9

    String field = "symbol";
    String userQuery = symbol;

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    System.out.println("Searching for: " + query.toString(field) + "in PharmGKB genes ");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        return d.get("paId");
    } else if (numTotalHits > 1) {
        System.out.println("WARNING: there is several corresping paId");
    } else if (numTotalHits == 0) {
        System.out.println("WARNING: there is no corresping paId");
        // let's try alternate gene symbols
        String userQuery2 = symbol;
        Query query2 = new QueryParser(Version.LUCENE_35, "alternateSymbols", ANALYSER).parse(userQuery2);
        TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage, true);
        SEARCHER.search(query2, collector2);
        int numTotalHits2 = collector2.getTotalHits();
        if (numTotalHits2 > 1) {
            ScoreDoc[] results2 = collector2.topDocs().scoreDocs;
            int docId = results2[0].doc; // WE TAKE THE FIRST ONE
            Document d = SEARCHER.doc(docId);
            return d.get("paId");
        }
    }
    //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred"));
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
 * // w w  w . j av a 2 s.  c o  m
 * @param diseaseLabel
 * @return
 * @throws IOException
 * @throws ParseException
 */
public String getPharmgkbIdForDisease(String diseaseLabel) throws IOException, ParseException { //symbol:warfarin

    String field = "diseaseLabel";
    String userQuery = diseaseLabel;
    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    System.out.println("Searching for: " + query.toString(field) + "in PharmGKB diseases ");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        return d.get("paId");
    } else if (numTotalHits > 1) {
        System.out.println("WARNING: there is several corresping paId");
        // let's see if one of the results has a label exactly similar
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        for (int i = 0; i < results.length; ++i) {
            int docId = results[i].doc;
            Document d = SEARCHER.doc(docId);
            if (d.get("diseaseLabel").toLowerCase().equals(diseaseLabel.toLowerCase())) {
                return d.get("paId");
            }
        }
    } else if (numTotalHits == 0) {
        System.out.println("WARNING: there is no corresping paId");
    }
    //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred"));
    return "";
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
 * does an AE form sider is frequent or note (ie is minFreq is >= 0.01)   
 * @param stitchId//from   w  w w.  j a  va2 s .c  om
 * @param aeCui
 * @return
 * @throws IOException 
 * @throws CorruptIndexException 
 * @throws ParseException 
 */
public boolean isFrequentAe(String stitchId, String aeCui)
        throws CorruptIndexException, IOException, ParseException {

    boolean isFreq = false;

    String field = "stitchId";
    String userQuery = "stitchId:" + stitchId + " AND aeCuiId:" + aeCui;

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits > 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        for (int i = 0; i < results.length; ++i) {
            int docId = results[i].doc;
            Document d = SEARCHER.doc(docId);
            if (!d.get("placebo").equals("placebo") && Float.parseFloat(d.get("minFreq")) >= 0.01) {
                return true;
            }
        }
    }

    //System.out.println((i + 1) + ". " + d.get("cuId")+ ", meshId= " + d.get("meshId")+ ", term= " + d.get("term")+ ", preferred= " + d.get("preferred"));
    return isFreq;
}

From source file:SimpleLuceneSearch.java

License:Apache License

/**
   * //from w  ww.  j a  v a  2 s .c  om
   * @param MeshId
   * @return CUI
   * @throws IOException
   * @throws ParseException
   */
public String getCuiFromMeshId(String meshId) throws IOException, ParseException { //term: ??

    String field = "meshId";
    String userQuery = "meshId:" + meshId;

    // only searching, so read-only=true
    Query query = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);

    //System.out.println("Searching for: " + query.toString(field)+" in meSH MRCONSO");
    int hitsPerPage = 10;// result is ordered with lucene scored then true
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    SEARCHER.search(query, collector);
    int numTotalHits = collector.getTotalHits();

    //display results
    //System.out.println("Found " + numTotalHits + " hits.");
    if (numTotalHits == 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        int docId = results[0].doc;
        Document d = SEARCHER.doc(docId);
        //System.out.println("         =>"+d.get("term"));
        return d.get("cuId");
    } else if (numTotalHits > 1) {
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        for (int i = 0; i < results.length; ++i) {
            int docId = results[i].doc;
            Document d = SEARCHER.doc(docId);
            if (d.get("meshId").toLowerCase().equals(meshId.toLowerCase())) {
                return d.get("cuId");
            }
        }
        System.out.println("WARNING: there is several corresping term and no one matches with the cui");
    } else if (numTotalHits == 0) {
        //let's get the first non preferred term
        userQuery = "meshId:" + meshId;
        Query query2 = new QueryParser(Version.LUCENE_35, field, ANALYSER).parse(userQuery);
        int hitsPerPage2 = 10;// result is ordered with lucene scored then true
        TopScoreDocCollector collector2 = TopScoreDocCollector.create(hitsPerPage2, true);
        SEARCHER.search(query2, collector2);
        int numTotalHits2 = collector2.getTotalHits();
        if (numTotalHits2 > 0) {
            ScoreDoc[] results2 = collector2.topDocs().scoreDocs;
            int docId = results2[0].doc;
            Document d = SEARCHER.doc(docId);
            return d.get("cuId");
        } else {
            System.out.println("WARNING: there is no corresping term for " + meshId);
        }
    }
    return "";
}