Example usage for org.apache.lucene.search TopScoreDocCollector create

List of usage examples for org.apache.lucene.search TopScoreDocCollector create

Introduction

In this page you can find the example usage for org.apache.lucene.search TopScoreDocCollector create.

Prototype

public static TopScoreDocCollector create(int numHits, int totalHitsThreshold) 

Source Link

Document

Creates a new TopScoreDocCollector given the number of hits to collect and the number of hits to count accurately.

Usage

From source file:net.chwise.websearch.SearchServlet.java

License:Open Source License

@Override
public void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException {
    String queryText = req.getParameter("q");
    if (queryText == null)
        queryText = "";
    String[] smilesQueriesString = req.getParameterValues("sq");

    //Join text query with structures query
    StringBuffer sb = new StringBuffer();
    boolean nonEmptyQuery = isQuery(queryText);
    if (nonEmptyQuery)
        sb.append(queryText);//from w  ww. ja  va  2  s.co  m

    if (smilesQueriesString != null) {
        for (String structSmiles : smilesQueriesString) {

            if (!isQuery(structSmiles))
                continue;

            String escapedSmiles = QueryParser.escape(structSmiles);

            if (nonEmptyQuery) {
                sb.append(" AND ");
            }

            sb.append(" smiles:");
            sb.append(escapedSmiles);
            nonEmptyQuery = true;
        }
    }

    String joinedTextChemicalQuery = sb.toString();

    LOGGER.log(Level.INFO, "Query: {0}", joinedTextChemicalQuery);

    int from = 0;
    int numShow = 10;

    String strFrom = req.getParameter("from");
    String strNumShow = req.getParameter("numShow");

    if (strFrom != null)
        from = Integer.parseInt(strFrom);

    if (strNumShow != null)
        numShow = Math.min(Integer.parseInt(strNumShow), 20);

    int to = from + numShow;

    Integer[] fromTo = { new Integer(from), new Integer(to) };
    LOGGER.log(Level.INFO, "Requested results range: from {0} to {1}", fromTo);

    JSONObject jsonResponse = new JSONObject();
    JSONArray jsonResult = new JSONArray();
    try {
        //Preapre for search
        String directorySourceClassName = getServletConfig().getInitParameter("directorySourceClassName");
        String directorySourceParams = getServletConfig().getInitParameter("directorySourceParams");
        Directory directory = directorySource.getDirectory(directorySourceClassName, directorySourceParams);

        IndexReader reader = null;

        reader = IndexReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);

        //Perform query
        Query query = null;
        Analyzer analyzer = getAnalyzer();
        query = new MultiFieldQueryParser(Version.LUCENE_43, getTextFields(), analyzer, getFieldWeights())
                .parse(joinedTextChemicalQuery);

        TopScoreDocCollector collector = TopScoreDocCollector.create(to, true); //TODO: use from, to
        searcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        int totalResults = collector.getTotalHits();

        LOGGER.log(Level.INFO, "Found {0} documents", hits.length);

        //Wrap results into json object
        HighlightedFragmentsRetriever highlighter = new HighlightedFragmentsRetriever();

        to = Math.min(to, hits.length);

        for (int i = from; i < to; ++i) {
            ScoreDoc hit = hits[i];
            Document foundDoc = searcher.doc(hit.doc);

            JSONObject jsonDoc = extractJSON(query, analyzer, highlighter, foundDoc);
            jsonResult.put(jsonDoc);
        }

        jsonResponse.put("result", jsonResult);
        jsonResponse.put("total", totalResults);

    } catch (ParseException e) {
        JSONObject jsonFailure = SearchFailureJSONResponse.create("info", "We couldn't understand query",
                "Use quotes for phrase search. Use AND,OR,NOT for boolean search");
        try {
            jsonResponse.put("failure", jsonFailure);
        } catch (JSONException e1) {
            e1.printStackTrace();
            throw new RuntimeException(e1);
        }
    } catch (RuntimeException e) {
        if (e.getCause() instanceof InvalidSmilesException) {
            JSONObject jsonFailure = SearchFailureJSONResponse.create("info", "We couldn't understand query",
                    "Your structure formula doesn't seem like correct SMILES. Use structure editor for generating correct SMILES structures");
            try {
                jsonResponse.put("failure", jsonFailure);
            } catch (JSONException e1) {
                e1.printStackTrace();
                throw new RuntimeException(e1);
            }
        } else {
            e.printStackTrace();
            throw e;
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw new RuntimeException("Exception in servlet SearchServlet", e);
    }

    resp.setContentType("application/json");
    PrintWriter out = resp.getWriter();

    out.print(jsonResponse);
    out.flush();
}

From source file:net.paissad.waqtsalat.utils.geoip.WorldCitiesLucene.java

License:Open Source License

/**
 * @param queryStr//  www .  java2s  .co  m
 *            The <tt>String</tt>(location) to search into the
 *            index/database.
 * @return A {@link List} of arrays of String containing the results.
 * 
 * @throws IOException
 * @throws ParseException
 * 
 */
public static List<String[]> search(String queryStr) throws IOException, ParseException {

    try {
        List<String[]> locations = new ArrayList<String[]>();
        connectToIndex();
        int hitsPerPage = 500;

        // TODO: when the String entry is escaped, the search is less
        // productive ...
        // will try to figure it out in the future
        // queryStr = QueryParser.escape(queryStr);

        Query q = new QueryParser(LUCENE_VERSION, FIELD_LOCATION, analyzer).parse(queryStr);
        IndexSearcher searcher = new IndexSearcher(IndexReader.open(getIndexDir()));
        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        logger.debug("Found {} hits.", hits.length);
        for (int i = 0; i < hits.length; i++) {
            int docID = hits[i].doc;
            Document doc = searcher.doc(docID);
            locations.add(doc.getValues(FIELD_LOCATION));
        }
        return locations;

    } finally {
        closeIndex();
    }
}

From source file:net.sf.okapi.lib.tmdb.lucene.Seeker.java

License:Open Source License

private List<TmHit> getTopHits(Query query, String tmId, String locale, HashMap<String, String> attributes)
        throws IOException {
    IndexSearcher is = getIndexSearcher();
    int maxHits = 0;
    List<TmHit> tmHitCandidates = new ArrayList<TmHit>(maxTopDocuments);

    String gtextFName = TmEntry.GTEXT_PREFIX + locale;
    String codesFName = TmEntry.CODES_PREFIX + locale;

    // Set filter data (TM id and other fields)
    QueryWrapperFilter filter = null;// ww w  .ja v  a2s.co  m
    BooleanQuery bq = null;
    if (tmId != null) {
        bq = new BooleanQuery();
        bq.add(new TermQuery(new Term(TmEntry.TMID_FIELDNAME, tmId)), BooleanClause.Occur.MUST);
    }
    bq = createQuery(attributes, bq);
    if (bq != null) {
        filter = new QueryWrapperFilter(bq);
    }

    // Collect hits in increments of maxTopDocuments until we have all the possible candidate hits
    TopScoreDocCollector topCollector;
    do {
        maxHits += maxTopDocuments;
        topCollector = TopScoreDocCollector.create(maxHits, true);
        is.search(query, filter, topCollector);
    } while (topCollector.getTotalHits() >= maxHits);

    // Go through the candidates and create TmHits from them
    TopDocs topDocs = topCollector.topDocs();
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        Document doc = getIndexSearcher().doc(scoreDoc.doc);
        // Build the hit
        TmHit tmHit = new TmHit();
        tmHit.setId(getFieldValue(doc, TmEntry.ID_FIELDNAME));
        tmHit.setScore(scoreDoc.score);
        tmHit.setSegKey(getFieldValue(doc, TmEntry.SEGKEY_FIELDNAME));
        Variant variant = new Variant(locale, getFieldValue(doc, gtextFName), getFieldValue(doc, codesFName));
        tmHit.setVariant(variant);
        // Add it to the list
        tmHitCandidates.add(tmHit);
    }

    // Remove duplicate hits
    ArrayList<TmHit> noDups = new ArrayList<TmHit>(new LinkedHashSet<TmHit>(tmHitCandidates));
    return noDups;
}

From source file:net.sourceforge.docfetcher.model.ScopeRegistry.java

License:Open Source License

/**
 * Performs a search on the given list of indexes for <tt>searchString</tt>
 * and returns an array of results.//from   www .j a va 2 s .  c o  m
 * 
 * @param searchString
 *            The search string
 * @param searchScopes
 *            The list of indexes to use
 * @return An array of results
 * @throws SearchException
 *             Thrown if no indexes have been created yet, if the
 *             <tt>searchString</tt> is invalid, or if an IOException
 *             occurred.
 */
public ResultDocument[] search(final String searchString, List<RootScope> searchScopes) throws SearchException {
    MultiSearcher multiSearcher = null;
    try {
        if (searchScopes == null) {
            searchScopes = new ArrayList<RootScope>();

            for (final RootScope rootScope : rootScopes) {
                searchScopes.add(rootScope);
            }
        }

        // Build a lucene query object
        QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, Document.contents,
                RootScope.analyzer);
        queryParser.setAllowLeadingWildcard(true);
        queryParser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
        if (!Pref.Bool.UseOrOperator.getValue())
            queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = queryParser.parse(searchString);

        // Check that all indexes still exist
        for (RootScope rootScope : searchScopes)
            if (!rootScope.getIndexDir().exists())
                throw new SearchException(Msg.folders_not_found.value() + "\n" + //$NON-NLS-1$
                        rootScope.getIndexDir().getAbsolutePath());

        // Perform search
        Searchable[] searchables = new Searchable[searchScopes.size()];
        int i = 0;
        for (RootScope rootScope : searchScopes) {
            Directory luceneIndexDir = new SimpleFSDirectory(rootScope.getIndexDir());
            searchables[i++] = new IndexSearcher(luceneIndexDir);
        }
        multiSearcher = new MultiSearcher(searchables);

        TopScoreDocCollector collector = TopScoreDocCollector.create(Pref.Int.MaxResultsTotal.getValue(),
                false);
        multiSearcher.search(query, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        // Process results
        final ResultDocument[] results = new ResultDocument[hits.length];
        for (i = 0; i < results.length; i++)
            results[i] = new ResultDocument(multiSearcher.doc(hits[i].doc), hits[i].score, query);

        return results;
    } catch (final ParseException e) {
        throw new SearchException(Msg.invalid_query.value() + "\n" + e.getLocalizedMessage()); //$NON-NLS-1$
    } catch (final IOException e) {
        throw new SearchException(e.getLocalizedMessage());
    } finally {
        if (multiSearcher != null) {
            try {
                multiSearcher.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:newseman.lucene.whisperer.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search engine presents 
 * pages of size n to the user. The user can then go to the next page if interested in
 * the next hits.//  www .  j  a  va2 s.  c o  m
 * 
 * When the query is executed for the first time, then only enough results are collected
 * to fill 5 result pages. If the user wants to page beyond this limit, then the query
 * is executed another time and all hits are collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
    searcher.search(query, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    int numTotalHits = collector.getTotalHits();
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            collector = TopScoreDocCollector.create(numTotalHits, false);
            searcher.search(query, collector);
            hits = collector.topDocs().scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }

            Document doc = searcher.doc(hits[i].doc);
            String key = doc.get("key");
            if (key != null) {
                System.out.println((i + 1) + ". " + key);
                String value = doc.get("value");
                if (value != null) {
                    System.out.println("   " + value + " [" + doc.get("mode") + "]");
                }
            } else {
                System.out.println((i + 1) + ". " + "No key for this document");
            }

        }

        if (!interactive) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    try {
                        int page = Integer.parseInt(line);
                        if ((page - 1) * hitsPerPage < numTotalHits) {
                            start = (page - 1) * hitsPerPage;
                            break;
                        } else {
                            System.out.println("No such page");
                        }
                    } catch (Exception e) {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }

    }

}

From source file:NewsIR_search.NewsIRSearcher.java

public void retrieveAll() throws IOException, Exception {
    ScoreDoc[] hits = null;/*from   ww  w  .ja v  a  2 s  . c  om*/
    TopDocs topDocs = null;

    DocVector docVector = new DocVector(propFileName);
    //        queries = constructQueries();
    //        /* queries has all the raw data read from the query file like: 
    //            query_num, paper_title, paper_abtract, context etc.
    //        */

    System.out.println("Using BOW query:");

    File file = new File(resultsFile);
    System.out.println("creating directory: " + file.getParentFile().getAbsolutePath());
    if (!file.getParentFile().exists()) {
        System.out.println("creating directory: " + file.getParentFile().getName());
        boolean result = false;

        try {
            file.getParentFile().mkdir();
            result = true;
        } catch (SecurityException se) {
            //handle it
        }
        if (result) {
            System.out.println("DIR created");
        }
    }

    FileWriter fw = new FileWriter(resultsFile);

    int query_searched_count = 0;
    for (TRECQuery q : queries) {

        System.out.println("Query: " + q.qId + ": ");

        setAnalyzer();
        Query qry = q.getBOWQuery(getAnalyzer());

        TopScoreDocCollector collector = TopScoreDocCollector.create(num_wanted, true);

        //System.out.println(qry.toString());
        searcher.search(qry, collector);

        topDocs = collector.topDocs();
        hits = topDocs.scoreDocs;
        if (hits == null) {
            System.out.println("Nothing found");
        }

        /* writing the result in file */
        StringBuilder buff = new StringBuilder();
        String d[], date;

        int hits_length = hits.length;
        System.err.println("Searching in index :");
        for (int i = 0; i < hits_length; ++i) {

            buff.append(q.qId).append(" Q0 ").append(hits[i].score).append(" ").append(run_name).append(" ")
                    .append(docVector.reader.document(hits[i].doc).get("DOCNO")).append(" ")
                    .append(docVector.reader.document(hits[i].doc).get("source")).append(" \n");

        }
        fw.write(buff.toString());
        /* writing the result in file ends */
        query_searched_count++;
    }

    System.out.println(query_searched_count + " queries searched");

    fw.close();

}

From source file:openlr.mapviewer.linesearch.model.LineNameModel.java

License:Apache License

/**
 * Performs a search on the index using the given query string.
 * //from  ww  w.j a  v  a  2s .c  o m
 * @param queryStr
 *            A valid Lucene query string.
 * @return the matching index elements
 * @throws ParseException
 *             If an error occurs evaluating the search string.
 * @throws IOException
 *             If an IO error occurs reading the index.
 */
private Collection<Document> searchMatchingDocs(final String queryStr) throws ParseException, IOException {

    Collection<Document> result;

    Query query = QUERY_PARSER.parse(queryStr);

    int hitsPerPage = MAX_HITS; // if the single
    IndexReader reader = IndexReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    searcher.search(query, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    if (LOG.isDebugEnabled()) {
        LOG.debug("Query " + queryStr + " Found " + hits.length + " hits.");
    }

    result = new ArrayList<Document>(hits.length);
    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        result.add(searcher.doc(docId));
    }

    searcher.close();

    return result;
}

From source file:org.aksw.lucene.search.IndexSearch.java

License:Apache License

@Override
public List<Place> findByDescription(Integer hitsPerPage, String indexPath, String queryString)
        throws IOException, ParseException {

    List<Place> result = new ArrayList<Place>();

    Query query = null;/*from   ww  w  . j a  v a  2 s  .  c o m*/

    query = new QueryParser(Version.LUCENE_43, IndexField.DESCRIPTION, analyzer).parse(queryString);

    File indexDir = new File(indexPath);

    IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));

    IndexSearcher searcher = new IndexSearcher(reader);

    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);

    searcher.search(query, collector);

    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    for (int i = 0; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document doc = searcher.doc(docId);

        List<String> docs = new ArrayList<String>();

        Place place = new Place();
        place.setName(doc.get(IndexField.DESCRIPTION));
        place.setLatitude(doc.get(IndexField.LATITUDE));
        place.setLongitude(doc.get(IndexField.LONGITUDE));
        place.setUrl(doc.get(IndexField.URL));
        place.setTypes(doc.get(IndexField.TYPES));
        place.setCity(doc.get(IndexField.CITY));

        result.add(place);

    }
    reader.close();

    return result;
}

From source file:org.apache.camel.component.lucene.LuceneSearcher.java

License:Apache License

private int doSearch(String searchPhrase, int maxNumberOfHits, Version luenceVersion)
        throws NullPointerException, ParseException, IOException {
    LOG.trace("*** Search Phrase: {} ***", searchPhrase);

    QueryParser parser = new QueryParser(luenceVersion, "contents", analyzer);
    Query query = parser.parse(searchPhrase);
    TopScoreDocCollector collector = TopScoreDocCollector.create(maxNumberOfHits, true);
    indexSearcher.search(query, collector);
    hits = collector.topDocs().scoreDocs;

    LOG.trace("*** Search generated {} hits ***", hits.length);
    return hits.length;
}

From source file:org.apache.clerezza.rdf.cris.GraphIndexer.java

License:Apache License

/**
 * Find resources using conditions and collect facets and specify a sort
 * order.// ww w  . j av a2 s  .  c  o  m
 *
 * This method allows to specify the indices of the query results to return
 * (e.g. for pagination).
 *
 * @param conditions a list of conditions to construct a query from.
 * @param facetCollectors Facet collectors to apply to the query result. Can
 * be {@link Collections#EMPTY_LIST}, if not used.
 * @param sortSpecification Specifies the sort order. Can be null, if not
 * used.
 * @param from return results starting from this index (inclusive).
 * @param to return results until this index (exclusive).
 * @return a list of resources that match the query.
 *
 * @throws ParseException when the resulting query is illegal.
 */
public List<NonLiteral> findResources(List<? extends Condition> conditions, SortSpecification sortSpecification,
        List<FacetCollector> facetCollectors, int from, int to) throws ParseException {

    if (from < 0) {
        from = 0;
    }

    if (to < from) {
        to = from + 1;
    }

    if (facetCollectors == null) {
        facetCollectors = Collections.EMPTY_LIST;
    }

    BooleanQuery booleanQuery = new BooleanQuery();
    for (Condition c : conditions) {
        booleanQuery.add(c.query(), BooleanClause.Occur.MUST);
    }

    IndexSearcher searcher = luceneTools.getIndexSearcher();
    ScoreDoc[] hits = null;
    try {
        if (sortSpecification != null) {
            SortFieldArrayWrapper fieldKey = new SortFieldArrayWrapper(sortSpecification.getSortFields());
            Sort sort = sortCache.get(fieldKey);
            if (sort == null) {
                sort = new Sort(sortSpecification.getSortFields());
                sortCache.put(fieldKey, sort);
            }
            //searcher.setDefaultFieldSortScoring(true, true);
            TopFieldDocs topFieldDocs = searcher.search(booleanQuery, null, to, sort);
            hits = topFieldDocs.scoreDocs;
        } else {
            TopScoreDocCollector collector = TopScoreDocCollector.create(to, true);
            searcher.search(booleanQuery, collector);
            hits = collector.topDocs().scoreDocs;
        }
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }

    List<NonLiteral> result = new ArrayList<NonLiteral>();

    for (int i = from; i < hits.length; ++i) {
        int docId = hits[i].doc;
        Document d;
        try {
            d = searcher.doc(docId);
            collectFacets(facetCollectors, d);
            result.add(getResource(d));
        } catch (IOException ex) {
            logger.error("CRIS Error: ", ex);
        }
    }

    for (FacetCollector facetCollector : facetCollectors) {
        facetCollector.postProcess();
    }

    return result;
}