List of usage examples for org.apache.lucene.search TopScoreDocCollector create
public static TopScoreDocCollector create(int numHits, int totalHitsThreshold)
From source file:net.chwise.websearch.SearchServlet.java
License:Open Source License
@Override public void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException { String queryText = req.getParameter("q"); if (queryText == null) queryText = ""; String[] smilesQueriesString = req.getParameterValues("sq"); //Join text query with structures query StringBuffer sb = new StringBuffer(); boolean nonEmptyQuery = isQuery(queryText); if (nonEmptyQuery) sb.append(queryText);//from w ww. ja va 2 s.co m if (smilesQueriesString != null) { for (String structSmiles : smilesQueriesString) { if (!isQuery(structSmiles)) continue; String escapedSmiles = QueryParser.escape(structSmiles); if (nonEmptyQuery) { sb.append(" AND "); } sb.append(" smiles:"); sb.append(escapedSmiles); nonEmptyQuery = true; } } String joinedTextChemicalQuery = sb.toString(); LOGGER.log(Level.INFO, "Query: {0}", joinedTextChemicalQuery); int from = 0; int numShow = 10; String strFrom = req.getParameter("from"); String strNumShow = req.getParameter("numShow"); if (strFrom != null) from = Integer.parseInt(strFrom); if (strNumShow != null) numShow = Math.min(Integer.parseInt(strNumShow), 20); int to = from + numShow; Integer[] fromTo = { new Integer(from), new Integer(to) }; LOGGER.log(Level.INFO, "Requested results range: from {0} to {1}", fromTo); JSONObject jsonResponse = new JSONObject(); JSONArray jsonResult = new JSONArray(); try { //Preapre for search String directorySourceClassName = getServletConfig().getInitParameter("directorySourceClassName"); String directorySourceParams = getServletConfig().getInitParameter("directorySourceParams"); Directory directory = directorySource.getDirectory(directorySourceClassName, directorySourceParams); IndexReader reader = null; reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); //Perform query Query query = null; Analyzer analyzer = getAnalyzer(); query = new MultiFieldQueryParser(Version.LUCENE_43, getTextFields(), analyzer, getFieldWeights()) .parse(joinedTextChemicalQuery); TopScoreDocCollector collector = TopScoreDocCollector.create(to, true); //TODO: use from, to searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int totalResults = collector.getTotalHits(); LOGGER.log(Level.INFO, "Found {0} documents", hits.length); //Wrap results into json object HighlightedFragmentsRetriever highlighter = new HighlightedFragmentsRetriever(); to = Math.min(to, hits.length); for (int i = from; i < to; ++i) { ScoreDoc hit = hits[i]; Document foundDoc = searcher.doc(hit.doc); JSONObject jsonDoc = extractJSON(query, analyzer, highlighter, foundDoc); jsonResult.put(jsonDoc); } jsonResponse.put("result", jsonResult); jsonResponse.put("total", totalResults); } catch (ParseException e) { JSONObject jsonFailure = SearchFailureJSONResponse.create("info", "We couldn't understand query", "Use quotes for phrase search. Use AND,OR,NOT for boolean search"); try { jsonResponse.put("failure", jsonFailure); } catch (JSONException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } } catch (RuntimeException e) { if (e.getCause() instanceof InvalidSmilesException) { JSONObject jsonFailure = SearchFailureJSONResponse.create("info", "We couldn't understand query", "Your structure formula doesn't seem like correct SMILES. Use structure editor for generating correct SMILES structures"); try { jsonResponse.put("failure", jsonFailure); } catch (JSONException e1) { e1.printStackTrace(); throw new RuntimeException(e1); } } else { e.printStackTrace(); throw e; } } catch (Exception e) { e.printStackTrace(); throw new RuntimeException("Exception in servlet SearchServlet", e); } resp.setContentType("application/json"); PrintWriter out = resp.getWriter(); out.print(jsonResponse); out.flush(); }
From source file:net.paissad.waqtsalat.utils.geoip.WorldCitiesLucene.java
License:Open Source License
/** * @param queryStr// www . java2s .co m * The <tt>String</tt>(location) to search into the * index/database. * @return A {@link List} of arrays of String containing the results. * * @throws IOException * @throws ParseException * */ public static List<String[]> search(String queryStr) throws IOException, ParseException { try { List<String[]> locations = new ArrayList<String[]>(); connectToIndex(); int hitsPerPage = 500; // TODO: when the String entry is escaped, the search is less // productive ... // will try to figure it out in the future // queryStr = QueryParser.escape(queryStr); Query q = new QueryParser(LUCENE_VERSION, FIELD_LOCATION, analyzer).parse(queryStr); IndexSearcher searcher = new IndexSearcher(IndexReader.open(getIndexDir())); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; logger.debug("Found {} hits.", hits.length); for (int i = 0; i < hits.length; i++) { int docID = hits[i].doc; Document doc = searcher.doc(docID); locations.add(doc.getValues(FIELD_LOCATION)); } return locations; } finally { closeIndex(); } }
From source file:net.sf.okapi.lib.tmdb.lucene.Seeker.java
License:Open Source License
private List<TmHit> getTopHits(Query query, String tmId, String locale, HashMap<String, String> attributes) throws IOException { IndexSearcher is = getIndexSearcher(); int maxHits = 0; List<TmHit> tmHitCandidates = new ArrayList<TmHit>(maxTopDocuments); String gtextFName = TmEntry.GTEXT_PREFIX + locale; String codesFName = TmEntry.CODES_PREFIX + locale; // Set filter data (TM id and other fields) QueryWrapperFilter filter = null;// ww w .ja v a2s.co m BooleanQuery bq = null; if (tmId != null) { bq = new BooleanQuery(); bq.add(new TermQuery(new Term(TmEntry.TMID_FIELDNAME, tmId)), BooleanClause.Occur.MUST); } bq = createQuery(attributes, bq); if (bq != null) { filter = new QueryWrapperFilter(bq); } // Collect hits in increments of maxTopDocuments until we have all the possible candidate hits TopScoreDocCollector topCollector; do { maxHits += maxTopDocuments; topCollector = TopScoreDocCollector.create(maxHits, true); is.search(query, filter, topCollector); } while (topCollector.getTotalHits() >= maxHits); // Go through the candidates and create TmHits from them TopDocs topDocs = topCollector.topDocs(); for (int i = 0; i < topDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; Document doc = getIndexSearcher().doc(scoreDoc.doc); // Build the hit TmHit tmHit = new TmHit(); tmHit.setId(getFieldValue(doc, TmEntry.ID_FIELDNAME)); tmHit.setScore(scoreDoc.score); tmHit.setSegKey(getFieldValue(doc, TmEntry.SEGKEY_FIELDNAME)); Variant variant = new Variant(locale, getFieldValue(doc, gtextFName), getFieldValue(doc, codesFName)); tmHit.setVariant(variant); // Add it to the list tmHitCandidates.add(tmHit); } // Remove duplicate hits ArrayList<TmHit> noDups = new ArrayList<TmHit>(new LinkedHashSet<TmHit>(tmHitCandidates)); return noDups; }
From source file:net.sourceforge.docfetcher.model.ScopeRegistry.java
License:Open Source License
/** * Performs a search on the given list of indexes for <tt>searchString</tt> * and returns an array of results.//from www .j a va 2 s . c o m * * @param searchString * The search string * @param searchScopes * The list of indexes to use * @return An array of results * @throws SearchException * Thrown if no indexes have been created yet, if the * <tt>searchString</tt> is invalid, or if an IOException * occurred. */ public ResultDocument[] search(final String searchString, List<RootScope> searchScopes) throws SearchException { MultiSearcher multiSearcher = null; try { if (searchScopes == null) { searchScopes = new ArrayList<RootScope>(); for (final RootScope rootScope : rootScopes) { searchScopes.add(rootScope); } } // Build a lucene query object QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, Document.contents, RootScope.analyzer); queryParser.setAllowLeadingWildcard(true); queryParser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); if (!Pref.Bool.UseOrOperator.getValue()) queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.parse(searchString); // Check that all indexes still exist for (RootScope rootScope : searchScopes) if (!rootScope.getIndexDir().exists()) throw new SearchException(Msg.folders_not_found.value() + "\n" + //$NON-NLS-1$ rootScope.getIndexDir().getAbsolutePath()); // Perform search Searchable[] searchables = new Searchable[searchScopes.size()]; int i = 0; for (RootScope rootScope : searchScopes) { Directory luceneIndexDir = new SimpleFSDirectory(rootScope.getIndexDir()); searchables[i++] = new IndexSearcher(luceneIndexDir); } multiSearcher = new MultiSearcher(searchables); TopScoreDocCollector collector = TopScoreDocCollector.create(Pref.Int.MaxResultsTotal.getValue(), false); multiSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // Process results final ResultDocument[] results = new ResultDocument[hits.length]; for (i = 0; i < results.length; i++) results[i] = new ResultDocument(multiSearcher.doc(hits[i].doc), hits[i].score, query); return results; } catch (final ParseException e) { throw new SearchException(Msg.invalid_query.value() + "\n" + e.getLocalizedMessage()); //$NON-NLS-1$ } catch (final IOException e) { throw new SearchException(e.getLocalizedMessage()); } finally { if (multiSearcher != null) { try { multiSearcher.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:newseman.lucene.whisperer.SearchFiles.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.// www . j a va2 s. c o m * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } collector = TopScoreDocCollector.create(numTotalHits, false); searcher.search(query, collector); hits = collector.topDocs().scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String key = doc.get("key"); if (key != null) { System.out.println((i + 1) + ". " + key); String value = doc.get("value"); if (value != null) { System.out.println(" " + value + " [" + doc.get("mode") + "]"); } } else { System.out.println((i + 1) + ". " + "No key for this document"); } } if (!interactive) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } else if (line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { try { int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } catch (Exception e) { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:NewsIR_search.NewsIRSearcher.java
public void retrieveAll() throws IOException, Exception { ScoreDoc[] hits = null;/*from ww w .ja v a 2 s . c om*/ TopDocs topDocs = null; DocVector docVector = new DocVector(propFileName); // queries = constructQueries(); // /* queries has all the raw data read from the query file like: // query_num, paper_title, paper_abtract, context etc. // */ System.out.println("Using BOW query:"); File file = new File(resultsFile); System.out.println("creating directory: " + file.getParentFile().getAbsolutePath()); if (!file.getParentFile().exists()) { System.out.println("creating directory: " + file.getParentFile().getName()); boolean result = false; try { file.getParentFile().mkdir(); result = true; } catch (SecurityException se) { //handle it } if (result) { System.out.println("DIR created"); } } FileWriter fw = new FileWriter(resultsFile); int query_searched_count = 0; for (TRECQuery q : queries) { System.out.println("Query: " + q.qId + ": "); setAnalyzer(); Query qry = q.getBOWQuery(getAnalyzer()); TopScoreDocCollector collector = TopScoreDocCollector.create(num_wanted, true); //System.out.println(qry.toString()); searcher.search(qry, collector); topDocs = collector.topDocs(); hits = topDocs.scoreDocs; if (hits == null) { System.out.println("Nothing found"); } /* writing the result in file */ StringBuilder buff = new StringBuilder(); String d[], date; int hits_length = hits.length; System.err.println("Searching in index :"); for (int i = 0; i < hits_length; ++i) { buff.append(q.qId).append(" Q0 ").append(hits[i].score).append(" ").append(run_name).append(" ") .append(docVector.reader.document(hits[i].doc).get("DOCNO")).append(" ") .append(docVector.reader.document(hits[i].doc).get("source")).append(" \n"); } fw.write(buff.toString()); /* writing the result in file ends */ query_searched_count++; } System.out.println(query_searched_count + " queries searched"); fw.close(); }
From source file:openlr.mapviewer.linesearch.model.LineNameModel.java
License:Apache License
/** * Performs a search on the index using the given query string. * //from ww w.j a v a 2s .c o m * @param queryStr * A valid Lucene query string. * @return the matching index elements * @throws ParseException * If an error occurs evaluating the search string. * @throws IOException * If an IO error occurs reading the index. */ private Collection<Document> searchMatchingDocs(final String queryStr) throws ParseException, IOException { Collection<Document> result; Query query = QUERY_PARSER.parse(queryStr); int hitsPerPage = MAX_HITS; // if the single IndexReader reader = IndexReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; if (LOG.isDebugEnabled()) { LOG.debug("Query " + queryStr + " Found " + hits.length + " hits."); } result = new ArrayList<Document>(hits.length); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; result.add(searcher.doc(docId)); } searcher.close(); return result; }
From source file:org.aksw.lucene.search.IndexSearch.java
License:Apache License
@Override public List<Place> findByDescription(Integer hitsPerPage, String indexPath, String queryString) throws IOException, ParseException { List<Place> result = new ArrayList<Place>(); Query query = null;/*from ww w . j a v a 2 s . c o m*/ query = new QueryParser(Version.LUCENE_43, IndexField.DESCRIPTION, analyzer).parse(queryString); File indexDir = new File(indexPath); IndexReader reader = IndexReader.open(FSDirectory.open(indexDir)); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document doc = searcher.doc(docId); List<String> docs = new ArrayList<String>(); Place place = new Place(); place.setName(doc.get(IndexField.DESCRIPTION)); place.setLatitude(doc.get(IndexField.LATITUDE)); place.setLongitude(doc.get(IndexField.LONGITUDE)); place.setUrl(doc.get(IndexField.URL)); place.setTypes(doc.get(IndexField.TYPES)); place.setCity(doc.get(IndexField.CITY)); result.add(place); } reader.close(); return result; }
From source file:org.apache.camel.component.lucene.LuceneSearcher.java
License:Apache License
private int doSearch(String searchPhrase, int maxNumberOfHits, Version luenceVersion) throws NullPointerException, ParseException, IOException { LOG.trace("*** Search Phrase: {} ***", searchPhrase); QueryParser parser = new QueryParser(luenceVersion, "contents", analyzer); Query query = parser.parse(searchPhrase); TopScoreDocCollector collector = TopScoreDocCollector.create(maxNumberOfHits, true); indexSearcher.search(query, collector); hits = collector.topDocs().scoreDocs; LOG.trace("*** Search generated {} hits ***", hits.length); return hits.length; }
From source file:org.apache.clerezza.rdf.cris.GraphIndexer.java
License:Apache License
/** * Find resources using conditions and collect facets and specify a sort * order.// ww w . j av a2 s . c o m * * This method allows to specify the indices of the query results to return * (e.g. for pagination). * * @param conditions a list of conditions to construct a query from. * @param facetCollectors Facet collectors to apply to the query result. Can * be {@link Collections#EMPTY_LIST}, if not used. * @param sortSpecification Specifies the sort order. Can be null, if not * used. * @param from return results starting from this index (inclusive). * @param to return results until this index (exclusive). * @return a list of resources that match the query. * * @throws ParseException when the resulting query is illegal. */ public List<NonLiteral> findResources(List<? extends Condition> conditions, SortSpecification sortSpecification, List<FacetCollector> facetCollectors, int from, int to) throws ParseException { if (from < 0) { from = 0; } if (to < from) { to = from + 1; } if (facetCollectors == null) { facetCollectors = Collections.EMPTY_LIST; } BooleanQuery booleanQuery = new BooleanQuery(); for (Condition c : conditions) { booleanQuery.add(c.query(), BooleanClause.Occur.MUST); } IndexSearcher searcher = luceneTools.getIndexSearcher(); ScoreDoc[] hits = null; try { if (sortSpecification != null) { SortFieldArrayWrapper fieldKey = new SortFieldArrayWrapper(sortSpecification.getSortFields()); Sort sort = sortCache.get(fieldKey); if (sort == null) { sort = new Sort(sortSpecification.getSortFields()); sortCache.put(fieldKey, sort); } //searcher.setDefaultFieldSortScoring(true, true); TopFieldDocs topFieldDocs = searcher.search(booleanQuery, null, to, sort); hits = topFieldDocs.scoreDocs; } else { TopScoreDocCollector collector = TopScoreDocCollector.create(to, true); searcher.search(booleanQuery, collector); hits = collector.topDocs().scoreDocs; } } catch (IOException ex) { throw new RuntimeException(ex); } List<NonLiteral> result = new ArrayList<NonLiteral>(); for (int i = from; i < hits.length; ++i) { int docId = hits[i].doc; Document d; try { d = searcher.doc(docId); collectFacets(facetCollectors, d); result.add(getResource(d)); } catch (IOException ex) { logger.error("CRIS Error: ", ex); } } for (FacetCollector facetCollector : facetCollectors) { facetCollector.postProcess(); } return result; }