Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:be.ugent.tiwi.sleroux.newsrec.recommendationstester.LuceneTopTermExtract.java

License:Apache License

public Map<String, Double> getTopTerms(String id, IndexReader reader, int numberOfTerms) {
    try {/*from w  w w  . j a v  a 2 s  . c o m*/
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);
        Query q = new TermQuery(new Term("id", id));
        searcher.search(q, collector);
        if (collector.getTotalHits() > 0) {
            int docNr = collector.topDocs().scoreDocs[0].doc;
            return getTopTerms(docNr, reader, numberOfTerms);
        } else {
            logger.warn("No document found with id=" + id);
        }
    } catch (IOException ex) {
        logger.error(ex);
    }
    return new HashMap<>(0);
}

From source file:br.com.crawlerspring.model.Searcher.java

public List<br.com.crawlerspring.model.Document> parametrizeDocuments(String parameters) throws Exception {
    List<br.com.crawlerspring.model.Document> parametrizedDocuments = new ArrayList<br.com.crawlerspring.model.Document>();

    RegexQuery q = new RegexQuery(new Term("title", ".*" + parameters + ".*"));
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    searcher.search(q, collector);/*ww w .jav  a 2  s .c  om*/
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    for (int cont = 0; cont < hits.length; ++cont) {
        br.com.crawlerspring.model.Document document = new br.com.crawlerspring.model.Document();
        int docId = hits[cont].doc;
        org.apache.lucene.document.Document luceneDocument = searcher.doc(docId);
        document.setTitle(luceneDocument.get("title"));
        document.setContent(luceneDocument.get("content"));

        parametrizedDocuments.add(document);
    }
    return parametrizedDocuments;
}

From source file:byrne.mitre.MitreQuery.java

License:Apache License

public void run() {

    try {/*w w w.jav  a  2 s  .  c  o m*/

        TokenStream tokenStream = analyzer.tokenStream("ngrams", new StringReader(entry.getFullName()));

        BooleanQuery bq = new BooleanQuery();
        while (tokenStream.incrementToken()) {
            Term t = new Term("ngrams", tokenStream.getAttribute(TermAttribute.class).term());
            bq.add(new TermQuery(t), BooleanClause.Occur.SHOULD);
        }

        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(bq, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        for (int i = 0; i < hits.length; ++i) {

            int docId = hits[i].doc;
            Document d = searcher.doc(docId);

            out.write(entry.getID() + "|" + d.get("id") + "|" + df.format(hits[i].score) + "\n");
        }
    } catch (IOException IOE) {
    }
}

From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java

License:Apache License

@Override
public List<Category> queryCategories(String pQueryString) {
    List<Category> searchResult = new ArrayList<Category>();
    try {/*from www .  j av  a 2  s  .  c  om*/
        DirectoryReader reader = DirectoryReader.open(aDirectory);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector results = TopScoreDocCollector.create(MAX_NUM_RESULTS, true);

        // Search category names
        Query categoryNameQuery = new QueryParser(VERSION, CATEGORY_NAME, aAnalyzer).parse(pQueryString);
        searcher.search(categoryNameQuery, results);

        // Search flattened text (only product names for now)
        Query flattenedTextQuery = new QueryParser(VERSION, FLATTENED_TEXT, aAnalyzer).parse(pQueryString);
        searcher.search(flattenedTextQuery, results);

        for (ScoreDoc scoredResult : results.topDocs().scoreDocs) {
            Document doc = searcher.doc(scoredResult.doc);
            Category resultCategory = aDataStore.getCategory(doc.get(CATEGORY_ID));

            if (!searchResult.contains(resultCategory) && resultCategory.getNumberOfProducts() > 0) {
                searchResult.add(resultCategory);
            }
        }
    } catch (IOException e) {
        LOG.error(e.getMessage());
    } catch (ParseException e) {
        LOG.error(e.getMessage());
    }
    return searchResult;
}

From source file:com.andreig.jetty.Search.java

License:GNU General Public License

public Document[] search(String dbid, String k, String v, int count) throws IOException, ParseException {

    Term t = new Term(k, v);
    Query q = new TermQuery(t);
    Query q2 = add_dbid(q, dbid);

    TopScoreDocCollector collector = TopScoreDocCollector.create(count, true);
    IndexSearcher searcher = sm.acquire();
    Document docs[] = null;//from   ww  w.  j a v  a 2 s. c  o m

    try {
        searcher.search(q2, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        if (hits.length == 0)
            return null;
        docs = new Document[hits.length];
        for (int i = 0; i < hits.length; i++) {
            int doc_id = hits[i].doc;
            docs[i] = searcher.doc(doc_id);
        }
    } finally {
        sm.release(searcher);
    }

    return docs;

}

From source file:com.andreig.jetty.Search.java

License:GNU General Public License

public Document[] search2(String dbid, String q, int count) throws IOException, ParseException {

    Query query = tl.get().parse(QueryParser.escape(q));
    Query q2 = add_dbid(query, dbid);

    TopScoreDocCollector collector = TopScoreDocCollector.create(count, true);
    IndexSearcher searcher = sm.acquire();
    Document docs[] = null;//from  w  w  w .  j  a v  a2 s .  c o  m

    try {
        searcher.search(q2, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        if (hits.length == 0)
            return null;
        docs = new Document[hits.length];
        for (int i = 0; i < hits.length; i++) {
            int doc_id = hits[i].doc;
            docs[i] = searcher.doc(doc_id);
        }
    } finally {
        sm.release(searcher);
    }

    return docs;

}

From source file:com.barchart.feed.ddf.resolver.provider.ResolverDDF.java

License:BSD License

private List<Document> searchDocument(final Query query) throws Exception {

    final IndexSearcher searcher = getSearcher();

    final TopScoreDocCollector collector = TopScoreDocCollector.create(limit, true);

    searcher.search(query, collector);/*from   w  ww  . j a  v  a  2s . c om*/

    final ScoreDoc[] hits = collector.topDocs().scoreDocs;

    final int size = Math.min(hits.length, limit);

    log.debug("hits size : {}", size);

    final List<Document> list = new ArrayList<Document>(size);

    for (int k = 0; k < size; k++) {

        final int index = hits[k].doc;

        final Document doc = searcher.doc(index);

        list.add(doc);

    }

    return list;

}

From source file:com.bizosys.hsearch.dictionary.DictionaryValues.java

License:Apache License

private ScoreDoc[] searchTop(Directory idx, String query, Analyzer analyzer, List<String> words,
        Set<Term> terms) throws ParseException, CorruptIndexException, IOException {

    fastSplit(words, query, ' ');

    QueryParser parser = new QueryParser(Version.LUCENE_35, "k", analyzer);
    PhraseQuery q = new PhraseQuery();
    int location = 0;
    for (String word : words) {
        Query q1 = parser.parse(word);
        q1.extractTerms(terms);/*from www  .  ja  va2  s  . com*/
        for (Term term : terms) {
            q.add(term, location++);
        }
        terms.clear();
    }
    words.clear();
    q.setSlop(0);

    int hitsPerPage = 1;

    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    searcher.search(q, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    return hits;
}

From source file:com.bsiag.smartfield.server.services.lookup.CityLookupService.java

License:Open Source License

private LookupRow[] queryIndex(Query query, int maxRowCount) throws IOException, CorruptIndexException {
    TopScoreDocCollector collector = TopScoreDocCollector.create(maxRowCount, true);
    searcher.search(query, collector);/*from www .j a  v a  2 s .  c o m*/
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    List<LookupRow> rows = new ArrayList<LookupRow>();
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d = searcher.doc(docId);
        String key = d.get("zip");
        String text = key + " " + d.get("city");
        rows.add(new LookupRow(key, text));
    }
    return rows.toArray(new LookupRow[rows.size()]);
}

From source file:com.chimpler.example.FacetLuceneAdvancedSearcher.java

License:Apache License

public static void main(String args[]) throws Exception {
    if (args.length != 5) {
        System.err.println(/*  w  w w  .j  a  va  2  s .c om*/
                "Parameters: [index directory] [taxonomy directory] [query] [field drilldown] [value drilldown]");
        System.exit(1);
    }

    String indexDirectory = args[0];
    String taxonomyDirectory = args[1];
    String query = args[2];
    String fieldDrilldown = args[3];
    String valueDrilldown = args[4];

    IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory)));
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory)));

    CategoryPath drillDownCategoryPath = new CategoryPath(fieldDrilldown + "/" + valueDrilldown, '/');

    FacetSearchParams searchParams = new FacetSearchParams();
    searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100));
    searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100));
    searchParams.facetRequests.add(new CountFacetRequest(drillDownCategoryPath, 100));

    ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title",
            new StandardAnalyzer(LUCENE_VERSION));

    Query luceneQuery = queryParser.parse(query);
    //luceneQuery = DrillDownQuery.query(luceneQuery, drillDownCategoryPath);

    // Collectors to get top results and facets
    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true);
    FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader);
    indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector));
    System.out.println("Found:");

    for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) {
        Document document = indexReader.document(scoreDoc.doc);
        System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n",
                document.get("id"), document.get("title"), document.get("book_category"),
                document.get("authors"), scoreDoc.score);
    }

    System.out.println("Facets:");
    for (FacetResult facetResult : facetsCollector.getFacetResults()) {
        System.out.println("- " + facetResult.getFacetResultNode().label);
        for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) {
            System.out.printf("    - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value);
            for (FacetResultNode subFacetResultNode : facetResultNode.subResults) {
                System.out.printf("        - %s (%f)\n", subFacetResultNode.label.toString(),
                        subFacetResultNode.value);
            }
        }
    }
    taxonomyReader.close();
    indexReader.close();
}