Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:edu.ku.brc.specify.toycode.HelpTargetFinder.java

License:Open Source License

/**
 * @param query/*  w  ww .  ja v a  2s.  com*/
 * @param hitsPerPage
 * @return
 * @throws IOException
 */
protected int getTotalHits(final Query query, int hitsPerPage) throws IOException {
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(query, collector);

    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    int numTotalHits = collector.getTotalHits();
    //System.out.println(numTotalHits + " total matching documents");
    return numTotalHits;
}

From source file:edu.ku.brc.specify.toycode.mexconabio.CopyFromGBIF.java

License:Open Source License

public void testSearch() {
    Statement stmt = null;/*w w w . ja  va  2s .c  o m*/

    String querystr = "Andrew AND Bentley AND Apogon AND angustatus";
    String term = "contents";
    try {
        stmt = srcDBConn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);

        if (analyzer == null) {
            analyzer = new StandardAnalyzer(Version.LUCENE_36);
        }
        reader = IndexReader.open(FSDirectory.open(INDEX_DIR), true);

        long startTime = System.currentTimeMillis();
        Query q = new QueryParser(Version.LUCENE_36, term, analyzer).parse(querystr);
        int hitsPerPage = 10;
        searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        System.out.println("Found " + hits.length + " hits.");
        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println((i + 1) + ". " + d.get("id"));

            String id = d.get("id");

            ResultSet rs = stmt.executeQuery(
                    "SELECT id, catalogue_number, genus, species, collector_num, collector_name, year, month, day FROM raw WHERE id = "
                            + id);
            ResultSetMetaData rsmd = rs.getMetaData();

            while (rs.next()) {
                for (int j = 1; j <= rsmd.getColumnCount(); j++) {
                    System.out.print(rs.getObject(j) + "\t");
                }
                System.out.println();
            }
            rs.close();
        }
        System.out.println(String.format("Time: %8.2f", (System.currentTimeMillis() - startTime) / 1000.0));
        searcher.close();
        reader.close();
        analyzer.close();

    } catch (SQLException e) {
        e.printStackTrace();

    } catch (IOException e) {
        e.printStackTrace();

    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (stmt != null) {
            try {
                stmt.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:edu.ku.brc.specify.toycode.mexconabio.GBIFFullTextSearchFrame.java

License:Open Source License

private void doSearch(final String serachStr) {
    long startTime = System.currentTimeMillis();
    Query q;/* www.  java2 s  .c o  m*/
    try {
        q = new QueryParser(Version.LUCENE_36, "contents", analyzer).parse(serachStr);
        int hitsPerPage = 1000;

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        if (hits.length > 0) {
            ArrayList<String> ids = new ArrayList<String>(hits.length);

            System.out.println("Found " + hits.length + " hits.");
            for (int i = 0; i < hits.length; ++i) {
                int docId = hits[i].doc;
                Document d = searcher.doc(docId);
                System.out.println((i + 1) + ". " + d.get("id"));

                String id = d.get("id");
                ids.add(id);
            }
            createAndFillModels(ids);
        } else {
            TableModel mdl = table.getModel();
            if (mdl instanceof DataObjTableModel) {
                ((DataObjTableModel) mdl).clear();
            }
        }
        System.out.println(String.format("Time: %8.2f", (System.currentTimeMillis() - startTime) / 1000.0));

        String msg = String.format("Found %d items in %8.2f", hits.length,
                (System.currentTimeMillis() - startTime) / 1000.0);
        status.setText(msg);

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.rpi.tw.linkipedia.search.searching.EntityAnnotator.java

License:Open Source License

public ArrayList<Annotation> mysearch(String label, String[] contexts, int numResult) {
    ArrayList<Annotation> results = new ArrayList<Annotation>();
    TopScoreDocCollector collector = TopScoreDocCollector.create(numResult, false);

    try {//  www.  j  a  v  a 2  s .  c  om

        Query finalQuery = wquery.parse(label, contexts);
        System.out.println("QUERY: " + finalQuery.toString() + "\n");
        searcher.search(finalQuery, collector);

    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();

    int start = 0;
    int end = Math.min(numTotalHits, numResult);
    if (end == 0) {
        Annotation annotation = new Annotation(label, "", 0);
        results.add(annotation);
    }
    for (int i = start; i < end; i++) {
        //String thisResult = "";
        Document doc = null;
        try {
            doc = searcher.doc(hits[i].doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        String url = doc.get("url");
        //      thisResult += url;
        //      String [] triples = doc.getValues("object_original");
        //      List<Field> fields = doc.getFields();

        Annotation annotation = new Annotation(label, url, hits[i].score);
        //      annotation.setTriples(new ArrayList<String>(Arrays.asList(triples)));
        results.add(annotation);
    }
    return results;
}

From source file:edu.rpi.tw.linkipedia.search.searching.EntityLinker.java

License:Open Source License

public ArrayList<Annotation> mysearch(String label, String[] contexts, int numResult) {
    ArrayList<Annotation> results = null;
    TopScoreDocCollector collector = TopScoreDocCollector.create(numResult, false);
    String query_str = "";
    try {//from w w  w  .  jav a  2 s  . c om

        Query finalQuery = wquery.parse(label, contexts);
        query_str = finalQuery.toString();

        //         if((results = cache.get(label)) != null){
        //            System.out.println("used cache for "+query_str);
        //            return results;
        //         }
        results = new ArrayList<Annotation>();
        System.out.println("QUERY: " + query_str + "\n");

        searcher.search(finalQuery, collector);
        //for(int i = start; i < )
    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();

    int start = 0;
    int end = Math.min(numTotalHits, numResult);
    if (end == 0) {
        if (fuzzy)
            return fuzzSearch(label, contexts, numResult);
        else {
            Annotation annotation = new Annotation(label, "", 0);
            results.add(annotation);
            //            cache.set(query_str, results);
            return results;
        }
    }
    for (int i = start; i < end; i++) {
        //String thisResult = "";
        Document doc = null;
        try {
            doc = searcher.doc(hits[i].doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        String[] labels = doc.getValues("defaultLabel");
        String url = doc.get("url");
        String boost = doc.get("boost");
        if (debug)
            System.out.println(url + " boost: " + boost + " score: " + hits[i].score);
        //         for(int j = 0 ; j < labels.length; j++){
        //            System.out.println("default label: "+labels[j]);
        //         }
        Annotation annotation = new Annotation(label, url, hits[i].score);
        //      annotation.setTriples(new ArrayList<String>(Arrays.asList(triples)));
        results.add(annotation);
    }

    //      cache.set(label, results);
    return results;
}

From source file:edu.rpi.tw.linkipedia.search.searching.EntityLinker.java

License:Open Source License

/**
 * Use if and only if no result returns//from   ww  w  . j  ava2 s. c o  m
 * @return
 */
public ArrayList<Annotation> fuzzSearch(String label, String[] contexts, int numResult) {
    ArrayList<Annotation> results = new ArrayList<Annotation>();
    TopScoreDocCollector collector = TopScoreDocCollector.create(numResult, false);
    //System.out.println("returned: "+numResult);
    try {

        Query finalQuery = wquery.getFuzzyQuery(label, contexts);
        System.out.println("QUERY: " + finalQuery.toString() + "\n");
        searcher.search(finalQuery, collector);
        //for(int i = start; i < )
    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();

    int start = 0;
    int end = Math.min(numTotalHits, numResult);
    if (end == 0) {

        Annotation annotation = new Annotation(label, "", 0);
        results.add(annotation);
        return results;
    }
    for (int i = start; i < end; i++) {
        //String thisResult = "";
        Document doc = null;
        try {
            doc = searcher.doc(hits[i].doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        String[] labels = doc.getValues("defaultLabel");
        String url = doc.get("url");
        String boost = doc.get("boost");

        if (debug)
            System.out.println(url + " boost: " + boost + " score: " + hits[i].score);
        //            for(int j = 0 ; j < labels.length; j++){
        //               System.out.println("default label: "+labels[j]);
        //            }
        Annotation annotation = new Annotation(label, url, hits[i].score);
        //      annotation.setTriples(new ArrayList<String>(Arrays.asList(triples)));
        results.add(annotation);
    }

    return results;
}

From source file:edu.rpi.tw.linkipedia.search.searching.EntitySearcher.java

License:Open Source License

public ArrayList<Annotation> mysearch(String label, String[] contexts, int numResult) {
    ArrayList<Annotation> results = new ArrayList<Annotation>();
    TopScoreDocCollector collector = TopScoreDocCollector.create(numResult, false);

    try {/* w  ww. j  a va2  s.co m*/

        Query finalQuery = wquery.parse(label, contexts);
        //System.out.println("QUERY: "+finalQuery.toString()+"\n");
        searcher.search(finalQuery, collector);
    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();

    int start = 0;
    int end = Math.min(numTotalHits, numResult);
    if (end == 0) {
        return fuzzSearch(label, contexts, numResult);
    }
    for (int i = start; i < end; i++) {
        //String thisResult = "";
        Document doc = null;
        try {
            doc = searcher.doc(hits[i].doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        String url = doc.get("url");

        String[] labels = doc.getValues("label");
        String contents = "";
        for (int j = 0; j < labels.length; j++) {
            contents += "label: " + labels[j] + "\n";
        }
        String boost = doc.get("boost");

        Annotation annotation = new Annotation(label, url, hits[i].score);
        annotation.setContent(contents);
        if (debug)
            System.out.println(annotation + " " + boost);
        //      annotation.setTriples(new ArrayList<String>(Arrays.asList(triples)));
        results.add(annotation);
    }
    return results;
}

From source file:edu.rpi.tw.linkipedia.search.searching.EntitySearcher.java

License:Open Source License

public ArrayList<Annotation> fuzzSearch(String label, String[] contexts, int numResult) {
    ArrayList<Annotation> results = new ArrayList<Annotation>();
    TopScoreDocCollector collector = TopScoreDocCollector.create(numResult, false);

    try {/* ww w. j av  a2s .  c o  m*/

        Query finalQuery = wquery.getFuzzyQuery(label, contexts);
        //System.out.println("QUERY: "+finalQuery.toString()+"\n");
        searcher.search(finalQuery, collector);
        //for(int i = start; i < )
    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();

    int start = 0;
    int end = Math.min(numTotalHits, numResult);
    if (end == 0) {

        Annotation annotation = new Annotation(label, "", 0);
        results.add(annotation);
        return results;
    }
    for (int i = start; i < end; i++) {
        //String thisResult = "";
        Document doc = null;
        try {
            doc = searcher.doc(hits[i].doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        String[] labels = doc.getValues("defaultLabel");
        String url = doc.get("url");
        String boost = doc.get("boost");
        if (debug)
            System.out.println(url + " boost: " + boost + " score: " + hits[i].score);
        //         for(int j = 0 ; j < labels.length; j++){
        //            System.out.println("default label: "+labels[j]);
        //         }
        Annotation annotation = new Annotation(label, url, hits[i].score);
        //      annotation.setTriples(new ArrayList<String>(Arrays.asList(triples)));
        results.add(annotation);
    }

    return results;
}

From source file:edu.rpi.tw.linkipedia.search.searching.SurfaceFormSearcher.java

License:Open Source License

public int lookup(String query, String context) {
    ArrayList<String> results = new ArrayList<String>();
    TopScoreDocCollector collector = TopScoreDocCollector.create(1, false);
    PhraseQuery luceneQuery = null;//from   w  ww  .j a  va  2  s.  c  o  m
    try {
        Term term = new Term("label", query);
        luceneQuery = new PhraseQuery();//parser.parse(query);
        luceneQuery.add(term);
        searcher.search(luceneQuery, collector);
    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();
    return numTotalHits;
}

From source file:edu.rpi.tw.linkipedia.search.searching.SurfaceFormSearcher.java

License:Open Source License

public ArrayList<Annotation> mysearch(String query) {
    TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);
    results = new ArrayList<Annotation>();
    try {//from  www .  j av  a 2  s .  co m
        BooleanQuery luceneQuery = new BooleanQuery();
        Term term = new Term("label", query);
        //TermQuery termQuery = new TermQuery(term);
        PayloadTermQuery ptq = new PayloadTermQuery(term, payloadFunction);
        ptq.setBoost(10);
        Analyzer analyzer = DefaultAnalyzer.getAnalyzer();
        QueryParser parser = new QueryParser(Version.LUCENE_47, "analyzedLabel", analyzer);
        Query parsedQuery = parser.parse(query);

        //luceneQuery.add(termQuery, BooleanClause.Occur.SHOULD);
        luceneQuery.add(ptq, BooleanClause.Occur.SHOULD);
        luceneQuery.add(parsedQuery, BooleanClause.Occur.SHOULD);
        FunctionQuery boostQuery = new FunctionQuery(new FloatFieldSource("boost"));
        //boostQuery.setBoost(100);
        Query finalQuery = new CustomScoreQuery(luceneQuery, boostQuery);
        System.out.println(finalQuery);
        searcher.search(finalQuery, collector);
    } catch (Exception e) {
        e.printStackTrace();
    }
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    int numTotalHits = collector.getTotalHits();

    int start = 0;
    int end = Math.min(numTotalHits, 10);

    for (int i = start; i < end; i++) {
        String thisResult = "";
        Document doc = null;
        try {
            doc = searcher.doc(hits[i].doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
        String url = doc.get("url");
        thisResult += url;//+" "+hits[i].score;      
        String[] triples = doc.getValues("triple");
        //         for(String label:labels){
        //            thisResult += label+"\n";   
        //         }
        Annotation annotation = new Annotation(query, url, hits[i].score);
        annotation.setTriples(new ArrayList<String>(Arrays.asList(triples)));
        results.add(annotation);
    }
    return results;
}