Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:searcher.CollStat.java

public TopDocs retrieve(HashMap<Integer, Integer> indexOrder, String queryStr, int indexNum) throws Exception {
    TopDocs topDocs = null;/*from   ww w.java2s .c  o m*/

    Query query = buildQuery(queryStr);
    TopScoreDocCollector collector = TopScoreDocCollector.create(numTopDocs, true);

    System.out.println("Hitmap: ");
    System.out.println(indexOrder);

    System.out.println("Multireader: " + multiReader);

    IndexReader reader = multiReader != null ? multiReader : getReaderInOrder(indexOrder, indexNum);

    IndexSearcher searcher = initSearcher(reader);
    searcher.search(query, collector);
    topDocs = collector.topDocs();
    return topDocs;
}

From source file:searcher.CollStat.java

public String getHTMLFromDocId(String indexNumStr, String docId) throws Exception {

    TopScoreDocCollector collector;/* w ww . j a  v a 2 s . c  o  m*/
    TopDocs topDocs;

    int indexNum = indexNumStr == null ? -1 : Integer.parseInt(indexNumStr);

    System.out.println("Docid Query = |" + docId + "|");
    IndexReader reader = indexNum == -1 ? multiReader : readers[indexNum];

    Query query = new TermQuery(new Term(TrecDocIndexer.FIELD_ID, docId.trim()));
    collector = TopScoreDocCollector.create(1, true);

    IndexSearcher searcher = initSearcher(reader);
    searcher.search(query, collector);
    topDocs = collector.topDocs();
    ScoreDoc sd = topDocs.scoreDocs[0];

    Document doc = reader.document(sd.doc);
    String htmlDecompressed = IndexHtmlToText.decompress(doc.getBinaryValue(WTDocument.WTDOC_FIELD_HTML).bytes);

    return htmlDecompressed;
}

From source file:searchingfrommanyindexes.SearchingFromManyIndexes.java

public static void testMulti() throws Exception {
    MultiSearcher searcher = new MultiSearcher(searchers);
    TermRangeQuery query = new TermRangeQuery("animal", "h", "t", true, true);

    //Search both indexesLeveraging term vectors
    // Ranker//  w  w w  .  j  a  v  a 2 s.  c o m
    TopScoreDocCollector collector = TopScoreDocCollector.create(10, true);
    TopDocs hits = searcher.search(query, 10);
    assertEquals("tarantula not included", 12, hits.totalHits);
    // Display results
    System.out.println("Found " + hits.scoreDocs.length + " hits.");
    for (int i = 0; i < hits.totalHits; ++i) {
        Document d = searcher.doc(i + 1);
        System.out.println((i + 1) + ". " + d.get("animal"));
    }
    // Close the multi searcher

}

From source file:selfy.QueryAnswerer.java

public List<String> searchLucene(String query) {
    query = query.replaceAll("#", " hashtags:");
    Set<String> results = new HashSet<>(50);
    ArrayList<String> ranking = new ArrayList<>();
    try {//  ww w .ja va 2 s . c  om
        Query q = new QueryParser(Version.LUCENE_CURRENT, "body", analyzer).parse(query);

        TopScoreDocCollector collector = TopScoreDocCollector.create(64, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            if (!results.contains(d.get("id"))) {
                results.add(d.get("id"));
                ranking.add(d.get("id"));
            }
            //System.out.println((i + 1) + ". " + d.get("body") + "\t" + d.get("hashtags") + "\t" + d.get("id"));
        }

    } catch (ParseException ex) {
        Logger.getLogger(QueryAnswerer.class.getName()).log(Level.SEVERE, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(QueryAnswerer.class.getName()).log(Level.SEVERE, null, ex);
    }
    return ranking;
}

From source file:Tweet_search.TweetSearcher.java

public void retrieveAll() throws IOException, Exception {
    ScoreDoc[] hits = null;//w ww .  jav  a  2 s  . co  m
    TopDocs topDocs = null;

    DocVector docVector = new DocVector(propFileName);
    //        queries = constructQueries();
    //        /* queries has all the raw data read from the query file like: 
    //            query_num, paper_title, paper_abtract, context etc.
    //        */

    System.out.println("Using BOW query:");

    File file = new File(resultsFile);
    System.out.println("creating directory: " + file.getParentFile().getAbsolutePath());
    if (!file.getParentFile().exists()) {
        System.out.println("creating directory: " + file.getParentFile().getName());
        boolean result = false;

        try {
            file.getParentFile().mkdir();
            result = true;
        } catch (SecurityException se) {
            //handle it
        }
        if (result) {
            System.out.println("DIR created");
        }
    }

    FileWriter fw = new FileWriter(resultsFile);

    for (int indexcount = 0; indexcount < tweet_ends_from_date - tweet_starts_from_date + 1; indexcount++) {
        int query_searched_count = 0;
        for (TRECQuery q : queries) {

            System.out.println("Query: " + q.qId + ": ");

            setAnalyzer();
            Query qry = q.getBOWQuery(getAnalyzer());

            TopScoreDocCollector collector = TopScoreDocCollector.create(num_wanted, true);

            //System.out.println(qry.toString());
            searcher[indexcount].search(qry, collector);

            topDocs = collector.topDocs();
            hits = topDocs.scoreDocs;
            if (hits == null)
                System.out.println("Nothing found");

            /* writing the result in file */
            StringBuilder buff = new StringBuilder();
            String d[], date;

            int hits_length = hits.length;
            System.err.println("Searching in index of date:" + (indexcount + tweet_starts_from_date));
            int j = 1;
            for (int i = 0; i < hits_length; ++i) {
                d = docVector.reader[indexcount].document(hits[i].doc).get("time").split(" ");
                date = d[5].concat("08" + d[2]);
                if ((Integer.parseInt(d[2]) - tweet_starts_from_date) == indexcount
                        && Integer.parseInt(date) > 20160000 && j <= 100) {
                    buff.append(date + " ").append(q.qId).append(" Q0 ")
                            .append(docVector.reader[indexcount].document(hits[i].doc).get("tweettime"))
                            .append(" ").append(j++).append(" ").append(hits[i].score).append(" ")
                            .append(run_name).append("\n");
                    //append(docVector.reader[indexcount].document(hits[i].doc).get("DOCNO")).append(" ").
                    // append(date).append("\n"); 
                }
            }
            fw.write(buff.toString());
            /* writing the result in file ends */
            query_searched_count++;
        }

        System.out.println(query_searched_count + " queries searched");
    }
    fw.close();

}

From source file:ubic.gemma.core.search.LuceneTest.java

License:Apache License

/**
 * Searching uses a ram index to deal with queries using logical operators. Though it can often be finiky.
 *///from   ww w.j  a  v a2 s.co  m
@Test
public void luceneRamIndexTest() throws Exception {
    try (RAMDirectory idx = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36)) {

        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        try (IndexWriter writer = new IndexWriter(idx, iwc)) {
            Document doc = new Document();
            Field f = new Field("content", "I have a small braintest", Field.Store.YES, Field.Index.ANALYZED);
            doc.add(f);
            writer.addDocument(doc);
            doc = new Document();
            f = new Field("content", "I have a small braddintest", Field.Store.YES, Field.Index.ANALYZED);
            doc.add(f);
            writer.addDocument(doc);
            doc = new Document();
            f = new Field("content", "I have a small brasaaafintest", Field.Store.YES, Field.Index.ANALYZED);
            doc.add(f);
            writer.addDocument(doc);
            doc = new Document();
            f = new Field("content", "I have a small braidagagntest", Field.Store.YES, Field.Index.ANALYZED);
            doc.add(f);
            writer.addDocument(doc);
        }

        try (IndexReader ir = IndexReader.open(idx); IndexSearcher searcher = new IndexSearcher(ir)) {

            TopDocsCollector<ScoreDoc> hc = TopScoreDocCollector.create(1, true);

            QueryParser parser = new QueryParser(Version.LUCENE_36, "content", analyzer);
            Query parsedQuery;

            parsedQuery = parser.parse("braintest");

            searcher.search(parsedQuery, hc);

            TopDocs topDocs = hc.topDocs();

            int hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);
        }
    }
}

From source file:ubic.gemma.core.search.LuceneTest.java

License:Apache License

private void luceneTestB(Analyzer analyzer) throws Exception {

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    try (RAMDirectory idx = new RAMDirectory(); IndexWriter writer = new IndexWriter(idx, iwc)) {
        Document doc = new Document();
        Field f = new Field("content", "Parkinson's disease", Field.Store.YES, Field.Index.ANALYZED);
        doc.add(f);// ww w. jav  a  2s. co m
        writer.addDocument(doc);

        doc = new Document();
        f = new Field("content", "fooo", Field.Store.YES, Field.Index.ANALYZED);
        doc.add(f);
        writer.addDocument(doc);
        LuceneTest.log.info(doc);

        writer.close();

        try (IndexReader ir = IndexReader.open(idx); IndexSearcher searcher = new IndexSearcher(ir)) {
            //noinspection UnusedAssignment // testing create method
            TopDocsCollector<ScoreDoc> hc = TopScoreDocCollector.create(1, true);

            QueryParser parser = new QueryParser(Version.LUCENE_36, "content", analyzer);
            parser.setAutoGeneratePhraseQueries(true);
            parser.setEnablePositionIncrements(true);

            Query parsedQuery;

            parsedQuery = parser.parse("Parkinson's disease");
            LuceneTest.log.info(parsedQuery.toString());
            hc = TopScoreDocCollector.create(1, true);
            searcher.search(parsedQuery, hc);
            TopDocs topDocs = hc.topDocs();
            int hitcount = topDocs.totalHits;
            assertTrue(parsedQuery.toString(), hitcount > 0);
            LuceneTest.log.info(searcher.doc(topDocs.scoreDocs[0].doc).getFieldable("content"));

            parsedQuery = parser.parse("parkinson's disease");
            LuceneTest.log.info(parsedQuery.toString());
            hc = TopScoreDocCollector.create(1, true);
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(parsedQuery.toString(), hitcount > 0);

            parsedQuery = parser.parse("\"parkinson's disease\"");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(parsedQuery.toString(), hitcount > 0);

            parsedQuery = parser.parse("\"parkinsons disease \"");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(parsedQuery.toString(), hitcount > 0);

            parsedQuery = parser.parse("\"parkinson disease \"");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(parsedQuery.toString(), hitcount > 0);
            parsedQuery = parser.parse("parkinsons");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);
            parsedQuery = parser.parse("parkinson");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);

            parsedQuery = parser.parse("parkinson*");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);
            LuceneTest.log.info(searcher.doc(topDocs.scoreDocs[0].doc).getFieldable("index"));

            parsedQuery = parser.parse("park*");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);

            parsedQuery = parser.parse("parkinson's AND disease");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);

            parsedQuery = parser.parse("'parkinson's AND disease'");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);

            parsedQuery = parser.parse("parkinson disease");
            hc = TopScoreDocCollector.create(1, true);
            LuceneTest.log.info(parsedQuery.toString());
            searcher.search(parsedQuery, hc);
            topDocs = hc.topDocs();
            hitcount = topDocs.totalHits;
            assertTrue(hitcount > 0);
            LuceneTest.log.info(searcher.doc(topDocs.scoreDocs[0].doc).getFieldable("content"));

            // parsedQuery = parser.parse( "parknson" );
            // hc = TopScoreDocCollector.create( 1, true );
            // log.info( parsedQuery.toString() );
            // searcher.search( parsedQuery, hc );
            // topDocs = hc.topDocs();
            // hitcount = topDocs.totalHits;
            // assertTrue( hitcount > 0 );
            // log.info( searcher.doc( topDocs.scoreDocs[0].doc ).getFieldable( "content" ) );
        }
    }
}

From source file:uk.ac.ebi.mdk.service.query.AbstractLuceneService.java

License:Open Source License

/**
 * Convenience method to access the first score document for a given query.
 * If multiple documents are found then an warning is logged.
 *
 * @param query search-able query/*  w w  w  .j a  va  2  s .c om*/
 *
 * @return the first score document for the query
 */
public ScoreDoc first(Query query) {

    ScoreDoc[] scoreDocs = search(query, TopScoreDocCollector.create(5, true));

    if (scoreDocs.length > 1) {
        LOGGER.warn("Expected a single hit for " + query);
    }

    return scoreDocs.length > 0 ? scoreDocs[0] : null;

}

From source file:uk.ac.ebi.mdk.service.query.AbstractLuceneService.java

License:Open Source License

/**
 * Search the index with the provided query. A new TopScoreDocCollector is
 * created using and constrained using the value of {@see getMaxResults()}.
 * If an exception occurs an empty array of ScoreDoc's is returned.
 *
 * @param query search-able query/*from ww w.  ja v a2 s . c  o  m*/
 *
 * @return the score documents for the query
 */
public ScoreDoc[] search(Query query) {
    return search(query, TopScoreDocCollector.create(getMaxResults(), true));
}

From source file:uk.ac.ebi.mdk.service.query.crossreference.AbstractCrossreferenceService.java

License:Open Source License

/**
 * @inheritDoc/*w  w  w  .  java 2 s  . c  o  m*/
 */
@Override
public Collection<? extends Identifier> getCrossReferences(I identifier) {

    Query query = construct(identifier.getAccession(), IDENTIFIER);

    Collection<Identifier> crossreferences = new ArrayList<Identifier>();

    TopScoreDocCollector collector = TopScoreDocCollector.create(5, true);
    try {

        getSearcher().search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        for (ScoreDoc document : hits) {
            Class c = getIdentifierClass(value(document, DATABASE_IDENTIFIER_INDEX.field()));
            String accession = value(document, DATABASE_ACCESSION.field());
            crossreferences.add(DefaultIdentifierFactory.getInstance().ofClass(c, accession));
        }
    } catch (IOException ex) {
        LOGGER.error("IO Exception occurred on service: " + ex.getMessage());
    }

    return crossreferences;
}