List of usage examples for org.apache.lucene.search Sort Sort
public Sort(SortField... fields)
From source file:info.extensiblecatalog.OAIToolkit.db.LuceneSearcher.java
License:Open Source License
public String getLatestDatestamp() { String latest = null;/*from w w w . jav a 2s . c o m*/ try { IndexSearcher searcher = getSearcher(); Document doc = searcher.doc(searcher.maxDoc() - 1); Field[] flds = doc.getFields("modification_date"); // this field is stored in order (if it weren't we'd have to sort them first) // most recent is at the top of the list latest = flds[0].stringValue(); prglog.info("getLatestDatestamp:" + latest); // It's extremely possible that the max doc id is NOT the most recent record // we just need to narrow our range search to something reasonable Sort sort = new Sort(new SortField("modification_date", SortField.STRING, true)); String queryString = "+modification_date:[\"" + latest + "\" TO \"" + TextUtil.utcToMysqlTimestamp(TextUtil.nowInUTC()) + "\"]"; prglog.info("queryString for latest datestamp:" + queryString); TopDocs hits = search(queryString, sort, 1); if (hits.scoreDocs.length > 0) { int id = hits.scoreDocs[0].doc; doc = searcher.doc(id); flds = doc.getFields("modification_date"); latest = flds[0].stringValue(); } prglog.info("getLatestDatestamp pass two:" + latest); } catch (Exception e) { prglog.error("[PRG] " + e); } return latest; }
From source file:info.extensiblecatalog.OAIToolkit.oai.dataproviders.LuceneFacadeDataProvider.java
License:Open Source License
public int prepareQuery() { if (null != tokenId) { ResumptionTokenDTO tokenDTO = getSQLsFromResumptionToken(tokenId); if (tokenDTO == null) { badResumptionTokenError = true; } else {/*www . j a v a2 s .c om*/ queryString = tokenDTO.getQuery(); metadataPrefix = tokenDTO.getMetadataPrefix(); } if (cachedFullHarvestTokenIds.contains(tokenId)) { cachedFullHarvest = true; } else if (initialHarvest == 1) { // Uh-oh. This harvester using this resumption token had used the cached full harvest prior to this server's restart // This means it's STALE. We need to throw an exception here. //TODO: throw invalid resumption token error prglog.warn( "[PRG] A prior harvester is attempting to harvest via STALE (no longer viable) cached full harvest resumptionToken."); return -1; } } else { extractQueriesFromParameters(from, until, set); if (0 >= queryString.length()) { prglog.error("[PRG] query string is null"); } // Can we use the cached full harvest? (fast!) initializeCachedFullHarvest(); if (cachedFullHarvestIds == null) { prglog.warn("[PRG] The cached full harvest was not created for some reason (???)"); } else { if (set == null) { boolean fromIsTooRecent = true; boolean untilIsTooRecent = true; boolean untilIsTooOld = true; if (until == null) { until = TextUtil.nowInUTC(); } try { String queryString = "+modification_date:{\"" + cachedFullHarvestExpiry + "\" TO \"" + TextUtil.utcToMysqlTimestamp(until) + "\"}"; //prglog.info("testing if untilIsTooRecent, queryString:" + queryString); TopDocs h = ApplInfo.luceneSearcher.search(queryString); if (h.totalHits < 1) untilIsTooRecent = false; Date uts = TextUtil.utcToDate(until); Date lts = TextUtil.luceneToDate(cachedFullHarvestExpiry); //prglog.info("testing if untilTimestamp:" + uts + " is more recent than the oldest record:" + lts); if (uts.after(lts)) { untilIsTooOld = false; } } catch (ParseException pe) { prglog.error("[PRG] " + pe); } if (from == null) { fromIsTooRecent = false; } else { try { Date fts = TextUtil.utcToDate(from); if (fts.before(cachedFullHarvestEarliestDate)) fromIsTooRecent = false; //prglog.info("testing if fromTimestamp:" + fts + " is before oldest created rec:" + cachedFullHarvestEarliestDate); } catch (ParseException pe) { prglog.error("[PRG] " + pe); } } if (!fromIsTooRecent && !untilIsTooRecent && !untilIsTooOld) { cachedFullHarvest = true; } prglog.info("fromIsTooRecent:" + fromIsTooRecent + " untilIsTooRecent:" + untilIsTooRecent + " untilIsTooOld:" + untilIsTooOld); } } } if (cachedFullHarvest) { prglog.info("[PRG] We are using the cached full harvest for extra speed! (That's good!)"); // for all others, we perform the search each time } else { prglog.info("[PRG] We are not using the cached full harvest. (Standard query.)"); Sort sort = new Sort(new SortField("xc_id", SortField.INT)); try { // query recordLimit+1 (one extra) so that way we'll know if we're done with our list if (lastRecordRead > 0) { String from = String.format("%d", lastRecordRead); hits = ApplInfo.luceneSearcher.searchRange(queryString, "xc_id", Integer.valueOf(from), null, false, false, sort, recordLimit + 1); } else { hits = ApplInfo.luceneSearcher.search(queryString, sort, recordLimit + 1); } } catch (Exception ex) { hits = null; } } return cachedFullHarvest ? 1 : 0; }
From source file:io.crate.action.sql.query.CrateResultSorter.java
License:Apache License
/** * copied from SearchPhaseController, to manually set offset * * @param resultsArr Shard result holder * @param offset the number of results to skip * @param limit the number of results to return at max *//*w ww . ja va2 s .c o m*/ public ScoreDoc[] sortDocs(AtomicArray<? extends QuerySearchResultProvider> resultsArr, int offset, int limit) throws IOException { List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> results = resultsArr.asList(); if (results.isEmpty()) { return SearchPhaseController.EMPTY_DOCS; } if (optimizeSingleShard) { boolean canOptimize = false; QuerySearchResult result = null; int shardIndex = -1; if (results.size() == 1) { canOptimize = true; result = results.get(0).value.queryResult(); shardIndex = results.get(0).index; } else { // lets see if we only got hits from a single shard, if so, we can optimize... for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : results) { if (entry.value.queryResult().topDocs().scoreDocs.length > 0) { if (result != null) { // we already have one, can't really optimize canOptimize = false; break; } canOptimize = true; result = entry.value.queryResult(); shardIndex = entry.index; } } } if (canOptimize) { ScoreDoc[] scoreDocs = result.topDocs().scoreDocs; if (scoreDocs.length == 0 || scoreDocs.length < offset) { return SearchPhaseController.EMPTY_DOCS; } int resultDocsSize = result.size(); if ((scoreDocs.length - offset) < resultDocsSize) { resultDocsSize = scoreDocs.length - offset; } ScoreDoc[] docs = new ScoreDoc[resultDocsSize]; for (int i = 0; i < resultDocsSize; i++) { ScoreDoc scoreDoc = scoreDocs[offset + i]; scoreDoc.shardIndex = shardIndex; docs[i] = scoreDoc; } return docs; } } @SuppressWarnings("unchecked") AtomicArray.Entry<? extends QuerySearchResultProvider>[] sortedResults = results .toArray(new AtomicArray.Entry[results.size()]); Arrays.sort(sortedResults, SearchPhaseController.QUERY_RESULT_ORDERING); QuerySearchResultProvider firstResult = sortedResults[0].value; final Sort sort; if (firstResult.queryResult().topDocs() instanceof TopFieldDocs) { TopFieldDocs firstTopDocs = (TopFieldDocs) firstResult.queryResult().topDocs(); sort = new Sort(firstTopDocs.fields); } else { sort = null; } // Need to use the length of the resultsArr array, since the slots will be based on the position in the resultsArr array TopDocs[] shardTopDocs = new TopDocs[resultsArr.length()]; if (firstResult.includeFetch()) { // if we did both query and fetch on the same go, we have fetched all the docs from each shards already, use them... // this is also important since we shortcut and fetch only docs from "from" and up to "size" limit *= sortedResults.length; } for (AtomicArray.Entry<? extends QuerySearchResultProvider> sortedResult : sortedResults) { TopDocs topDocs = sortedResult.value.queryResult().topDocs(); // the 'index' field is the position in the resultsArr atomic array shardTopDocs[sortedResult.index] = topDocs; } // TopDocs#merge can't deal with null shard TopDocs for (int i = 0; i < shardTopDocs.length; i++) { if (shardTopDocs[i] == null) { shardTopDocs[i] = Lucene.EMPTY_TOP_DOCS; } } TopDocs mergedTopDocs = TopDocs.merge(sort, offset, limit, shardTopDocs); return mergedTopDocs.scoreDocs; }
From source file:io.crate.action.sql.query.CrateSearchService.java
License:Apache License
@Nullable private Sort generateLuceneSort(SearchContext context, List<Symbol> symbols, boolean[] reverseFlags, Boolean[] nullsFirst) {/* w w w . j a v a 2s .c om*/ if (symbols.isEmpty()) { return null; } SortField[] sortFields = new SortField[symbols.size()]; for (int i = 0, symbolsSize = symbols.size(); i < symbolsSize; i++) { sortFields[i] = sortSymbolVisitor.generateSortField(symbols.get(i), new SortSymbolContext(context, reverseFlags[i], nullsFirst[i])); } return new Sort(sortFields); }
From source file:io.crate.action.sql.query.LuceneSortGenerator.java
License:Apache License
@Nullable public static Sort generateLuceneSort(CollectorContext context, OrderBy orderBy, CollectInputSymbolVisitor<?> inputSymbolVisitor) { if (orderBy.orderBySymbols().isEmpty()) { return null; }/*from ww w. ja v a 2 s. co m*/ SortSymbolVisitor sortSymbolVisitor = new SortSymbolVisitor(inputSymbolVisitor); SortField[] sortFields = sortSymbolVisitor.generateSortFields(orderBy.orderBySymbols(), context, orderBy.reverseFlags(), orderBy.nullsFirst()); return new Sort(sortFields); }
From source file:io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollectorTest.java
License:Apache License
private Long[] nextPageQuery(IndexReader reader, FieldDoc lastCollected, boolean reverseFlag, @Nullable Boolean nullFirst) throws IOException { OrderBy orderBy = new OrderBy(ImmutableList.of(REFERENCE), new boolean[] { reverseFlag }, new Boolean[] { nullFirst }); SortField sortField = new SortedNumericSortField("value", SortField.Type.LONG, reverseFlag); Long missingValue = (Long) LuceneMissingValue.missingValue(orderBy, 0); sortField.setMissingValue(missingValue); Sort sort = new Sort(sortField); OptimizeQueryForSearchAfter queryForSearchAfter = new OptimizeQueryForSearchAfter(orderBy, mock(QueryShardContext.class), name -> valueFieldType); Query nextPageQuery = queryForSearchAfter.apply(lastCollected); TopFieldDocs result = search(reader, nextPageQuery, sort); Long results[] = new Long[result.scoreDocs.length]; for (int i = 0; i < result.scoreDocs.length; i++) { Long value = (Long) ((FieldDoc) result.scoreDocs[i]).fields[0]; results[i] = value.equals(missingValue) ? null : value; }/*from w w w .j a va2 s . com*/ return results; }
From source file:io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollectorTest.java
License:Apache License
private LuceneOrderedDocCollector collectorWithMinScore(IndexSearcher searcher, List<LuceneCollectorExpression<?>> columnReferences, Query query, @Nullable Float minScore) { return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.base64UUID(), 0), searcher, query, minScore, true, 2, new CollectorContext(mappedFieldType -> null, new CollectorFieldsVisitor(0)), f -> null, new Sort(SortField.FIELD_SCORE), columnReferences, columnReferences); }
From source file:io.crate.execution.engine.collect.collectors.OrderedLuceneBatchIteratorBenchmark.java
License:Apache License
private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, String sortByColumnName) { List<LuceneCollectorExpression<?>> expressions = Collections .singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o)); return new LuceneOrderedDocCollector(dummyShardId, searcher, new MatchAllDocsQuery(), null, false, 10_000_000, collectorContext, f -> null, new Sort(new SortedNumericSortField(sortByColumnName, SortField.Type.INT, reverseFlags[0])), expressions, expressions);//from ww w .j a v a2 s. c o m }
From source file:io.crate.execution.engine.collect.collectors.OrderedLuceneBatchIteratorFactoryTest.java
License:Apache License
private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, int shardId) { CollectorContext collectorContext = new CollectorContext(mappedFieldType -> null, new CollectorFieldsVisitor(0)); List<LuceneCollectorExpression<?>> expressions = Collections .singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o)); return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.randomBase64UUID(), shardId), searcher, new MatchAllDocsQuery(), null, false, 5, // batchSize < 10 to have at least one searchMore call. collectorContext, f -> null, new Sort(new SortedNumericSortField(columnName, SortField.Type.LONG, reverseFlags[0])), expressions, expressions);/*from ww w .j a v a2 s . c om*/ }
From source file:io.crate.execution.engine.sort.LuceneSortGenerator.java
License:Apache License
@Nullable public static Sort generateLuceneSort(CollectorContext context, OrderBy orderBy, DocInputFactory docInputFactory, FieldTypeLookup fieldTypeLookup) { if (orderBy.orderBySymbols().isEmpty()) { return null; }/*w w w.j a v a2 s .c o m*/ SortSymbolVisitor sortSymbolVisitor = new SortSymbolVisitor(docInputFactory, fieldTypeLookup); SortField[] sortFields = sortSymbolVisitor.generateSortFields(orderBy.orderBySymbols(), context, orderBy.reverseFlags(), orderBy.nullsFirst()); return new Sort(sortFields); }