Example usage for org.apache.lucene.search Sort Sort

Introduction

In this page you can find the example usage for org.apache.lucene.search Sort Sort.

Prototype

public Sort(SortField... fields)

Source Link

Document

Sets the sort to the given criteria in succession: the first SortField is checked first, but if it produces a tie, then the second SortField is used to break the tie, etc.

Usage

From source file:info.extensiblecatalog.OAIToolkit.db.LuceneSearcher.java

License:Open Source License

public String getLatestDatestamp() {
    String latest = null;/*from w w w  . jav  a 2s  . c o  m*/
    try {
        IndexSearcher searcher = getSearcher();
        Document doc = searcher.doc(searcher.maxDoc() - 1);
        Field[] flds = doc.getFields("modification_date");
        // this field is stored in order (if it weren't we'd have to sort them first)
        // most recent is at the top of the list
        latest = flds[0].stringValue();
        prglog.info("getLatestDatestamp:" + latest);

        // It's extremely possible that the max doc id is NOT the most recent record
        // we just need to narrow our range search to something reasonable
        Sort sort = new Sort(new SortField("modification_date", SortField.STRING, true));
        String queryString = "+modification_date:[\"" + latest + "\" TO \""
                + TextUtil.utcToMysqlTimestamp(TextUtil.nowInUTC()) + "\"]";
        prglog.info("queryString for latest datestamp:" + queryString);
        TopDocs hits = search(queryString, sort, 1);

        if (hits.scoreDocs.length > 0) {
            int id = hits.scoreDocs[0].doc;
            doc = searcher.doc(id);
            flds = doc.getFields("modification_date");
            latest = flds[0].stringValue();
        }
        prglog.info("getLatestDatestamp pass two:" + latest);

    } catch (Exception e) {
        prglog.error("[PRG] " + e);
    }
    return latest;
}

From source file:info.extensiblecatalog.OAIToolkit.oai.dataproviders.LuceneFacadeDataProvider.java

License:Open Source License

public int prepareQuery() {
    if (null != tokenId) {
        ResumptionTokenDTO tokenDTO = getSQLsFromResumptionToken(tokenId);
        if (tokenDTO == null) {
            badResumptionTokenError = true;
        } else {/*www  .  j  a v  a2  s  .c  om*/
            queryString = tokenDTO.getQuery();
            metadataPrefix = tokenDTO.getMetadataPrefix();
        }
        if (cachedFullHarvestTokenIds.contains(tokenId)) {
            cachedFullHarvest = true;
        } else if (initialHarvest == 1) {
            // Uh-oh.  This harvester using this resumption token had used the cached full harvest prior to this server's restart
            // This means it's STALE.  We need to throw an exception here.
            //TODO: throw invalid resumption token error
            prglog.warn(
                    "[PRG] A prior harvester is attempting to harvest via STALE (no longer viable) cached full harvest resumptionToken.");
            return -1;
        }
    } else {
        extractQueriesFromParameters(from, until, set);
        if (0 >= queryString.length()) {
            prglog.error("[PRG] query string is null");
        }

        // Can we use the cached full harvest? (fast!)
        initializeCachedFullHarvest();
        if (cachedFullHarvestIds == null) {

            prglog.warn("[PRG] The cached full harvest was not created for some reason (???)");

        } else {

            if (set == null) {
                boolean fromIsTooRecent = true;
                boolean untilIsTooRecent = true;
                boolean untilIsTooOld = true;
                if (until == null) {
                    until = TextUtil.nowInUTC();
                }

                try {
                    String queryString = "+modification_date:{\"" + cachedFullHarvestExpiry + "\" TO \""
                            + TextUtil.utcToMysqlTimestamp(until) + "\"}";
                    //prglog.info("testing if untilIsTooRecent, queryString:" + queryString);
                    TopDocs h = ApplInfo.luceneSearcher.search(queryString);
                    if (h.totalHits < 1)
                        untilIsTooRecent = false;

                    Date uts = TextUtil.utcToDate(until);
                    Date lts = TextUtil.luceneToDate(cachedFullHarvestExpiry);
                    //prglog.info("testing if untilTimestamp:" + uts + " is more recent than the oldest record:" + lts);
                    if (uts.after(lts)) {
                        untilIsTooOld = false;
                    }
                } catch (ParseException pe) {
                    prglog.error("[PRG] " + pe);
                }

                if (from == null) {
                    fromIsTooRecent = false;
                } else {
                    try {
                        Date fts = TextUtil.utcToDate(from);
                        if (fts.before(cachedFullHarvestEarliestDate))
                            fromIsTooRecent = false;
                        //prglog.info("testing if fromTimestamp:" + fts + " is before oldest created rec:" + cachedFullHarvestEarliestDate);                     
                    } catch (ParseException pe) {
                        prglog.error("[PRG] " + pe);
                    }
                }
                if (!fromIsTooRecent && !untilIsTooRecent && !untilIsTooOld) {
                    cachedFullHarvest = true;
                }
                prglog.info("fromIsTooRecent:" + fromIsTooRecent + " untilIsTooRecent:" + untilIsTooRecent
                        + " untilIsTooOld:" + untilIsTooOld);

            }
        }
    }

    if (cachedFullHarvest) {

        prglog.info("[PRG] We are using the cached full harvest for extra speed! (That's good!)");

        // for all others, we perform the search each time
    } else {

        prglog.info("[PRG] We are not using the cached full harvest. (Standard query.)");

        Sort sort = new Sort(new SortField("xc_id", SortField.INT));
        try {
            // query recordLimit+1 (one extra) so that way we'll know if we're done with our list
            if (lastRecordRead > 0) {
                String from = String.format("%d", lastRecordRead);
                hits = ApplInfo.luceneSearcher.searchRange(queryString, "xc_id", Integer.valueOf(from), null,
                        false, false, sort, recordLimit + 1);
            } else {
                hits = ApplInfo.luceneSearcher.search(queryString, sort, recordLimit + 1);
            }
        } catch (Exception ex) {
            hits = null;
        }
    }

    return cachedFullHarvest ? 1 : 0;

}

From source file:io.crate.action.sql.query.CrateResultSorter.java

License:Apache License

/**
 * copied from SearchPhaseController, to manually set offset
 *
 * @param resultsArr Shard result holder
 * @param offset the number of results to skip
 * @param limit the number of results to return at max
 *//*w ww  .  ja va2 s .c  o m*/
public ScoreDoc[] sortDocs(AtomicArray<? extends QuerySearchResultProvider> resultsArr, int offset, int limit)
        throws IOException {
    List<? extends AtomicArray.Entry<? extends QuerySearchResultProvider>> results = resultsArr.asList();
    if (results.isEmpty()) {
        return SearchPhaseController.EMPTY_DOCS;
    }

    if (optimizeSingleShard) {
        boolean canOptimize = false;
        QuerySearchResult result = null;
        int shardIndex = -1;
        if (results.size() == 1) {
            canOptimize = true;
            result = results.get(0).value.queryResult();
            shardIndex = results.get(0).index;
        } else {
            // lets see if we only got hits from a single shard, if so, we can optimize...
            for (AtomicArray.Entry<? extends QuerySearchResultProvider> entry : results) {
                if (entry.value.queryResult().topDocs().scoreDocs.length > 0) {
                    if (result != null) { // we already have one, can't really optimize
                        canOptimize = false;
                        break;
                    }
                    canOptimize = true;
                    result = entry.value.queryResult();
                    shardIndex = entry.index;
                }
            }
        }
        if (canOptimize) {
            ScoreDoc[] scoreDocs = result.topDocs().scoreDocs;
            if (scoreDocs.length == 0 || scoreDocs.length < offset) {
                return SearchPhaseController.EMPTY_DOCS;
            }

            int resultDocsSize = result.size();
            if ((scoreDocs.length - offset) < resultDocsSize) {
                resultDocsSize = scoreDocs.length - offset;
            }
            ScoreDoc[] docs = new ScoreDoc[resultDocsSize];
            for (int i = 0; i < resultDocsSize; i++) {
                ScoreDoc scoreDoc = scoreDocs[offset + i];
                scoreDoc.shardIndex = shardIndex;
                docs[i] = scoreDoc;
            }
            return docs;
        }
    }

    @SuppressWarnings("unchecked")
    AtomicArray.Entry<? extends QuerySearchResultProvider>[] sortedResults = results
            .toArray(new AtomicArray.Entry[results.size()]);
    Arrays.sort(sortedResults, SearchPhaseController.QUERY_RESULT_ORDERING);
    QuerySearchResultProvider firstResult = sortedResults[0].value;

    final Sort sort;
    if (firstResult.queryResult().topDocs() instanceof TopFieldDocs) {
        TopFieldDocs firstTopDocs = (TopFieldDocs) firstResult.queryResult().topDocs();
        sort = new Sort(firstTopDocs.fields);
    } else {
        sort = null;
    }

    // Need to use the length of the resultsArr array, since the slots will be based on the position in the resultsArr array
    TopDocs[] shardTopDocs = new TopDocs[resultsArr.length()];
    if (firstResult.includeFetch()) {
        // if we did both query and fetch on the same go, we have fetched all the docs from each shards already, use them...
        // this is also important since we shortcut and fetch only docs from "from" and up to "size"
        limit *= sortedResults.length;
    }
    for (AtomicArray.Entry<? extends QuerySearchResultProvider> sortedResult : sortedResults) {
        TopDocs topDocs = sortedResult.value.queryResult().topDocs();
        // the 'index' field is the position in the resultsArr atomic array
        shardTopDocs[sortedResult.index] = topDocs;
    }
    // TopDocs#merge can't deal with null shard TopDocs
    for (int i = 0; i < shardTopDocs.length; i++) {
        if (shardTopDocs[i] == null) {
            shardTopDocs[i] = Lucene.EMPTY_TOP_DOCS;
        }
    }
    TopDocs mergedTopDocs = TopDocs.merge(sort, offset, limit, shardTopDocs);
    return mergedTopDocs.scoreDocs;
}

From source file:io.crate.action.sql.query.CrateSearchService.java

License:Apache License

@Nullable
private Sort generateLuceneSort(SearchContext context, List<Symbol> symbols, boolean[] reverseFlags,
        Boolean[] nullsFirst) {/*  w w  w .  j  a  v a 2s .c om*/
    if (symbols.isEmpty()) {
        return null;
    }
    SortField[] sortFields = new SortField[symbols.size()];
    for (int i = 0, symbolsSize = symbols.size(); i < symbolsSize; i++) {
        sortFields[i] = sortSymbolVisitor.generateSortField(symbols.get(i),
                new SortSymbolContext(context, reverseFlags[i], nullsFirst[i]));
    }
    return new Sort(sortFields);
}

From source file:io.crate.action.sql.query.LuceneSortGenerator.java

License:Apache License

@Nullable
public static Sort generateLuceneSort(CollectorContext context, OrderBy orderBy,
        CollectInputSymbolVisitor<?> inputSymbolVisitor) {
    if (orderBy.orderBySymbols().isEmpty()) {
        return null;
    }/*from ww  w. ja  v a 2  s.  co  m*/
    SortSymbolVisitor sortSymbolVisitor = new SortSymbolVisitor(inputSymbolVisitor);
    SortField[] sortFields = sortSymbolVisitor.generateSortFields(orderBy.orderBySymbols(), context,
            orderBy.reverseFlags(), orderBy.nullsFirst());
    return new Sort(sortFields);
}

From source file:io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollectorTest.java

License:Apache License

private Long[] nextPageQuery(IndexReader reader, FieldDoc lastCollected, boolean reverseFlag,
        @Nullable Boolean nullFirst) throws IOException {
    OrderBy orderBy = new OrderBy(ImmutableList.of(REFERENCE), new boolean[] { reverseFlag },
            new Boolean[] { nullFirst });

    SortField sortField = new SortedNumericSortField("value", SortField.Type.LONG, reverseFlag);
    Long missingValue = (Long) LuceneMissingValue.missingValue(orderBy, 0);
    sortField.setMissingValue(missingValue);
    Sort sort = new Sort(sortField);

    OptimizeQueryForSearchAfter queryForSearchAfter = new OptimizeQueryForSearchAfter(orderBy,
            mock(QueryShardContext.class), name -> valueFieldType);
    Query nextPageQuery = queryForSearchAfter.apply(lastCollected);
    TopFieldDocs result = search(reader, nextPageQuery, sort);
    Long results[] = new Long[result.scoreDocs.length];
    for (int i = 0; i < result.scoreDocs.length; i++) {
        Long value = (Long) ((FieldDoc) result.scoreDocs[i]).fields[0];
        results[i] = value.equals(missingValue) ? null : value;
    }/*from   w w w .j a va2  s  . com*/
    return results;
}

From source file:io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollectorTest.java

License:Apache License

private LuceneOrderedDocCollector collectorWithMinScore(IndexSearcher searcher,
        List<LuceneCollectorExpression<?>> columnReferences, Query query, @Nullable Float minScore) {
    return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.base64UUID(), 0), searcher, query, minScore,
            true, 2, new CollectorContext(mappedFieldType -> null, new CollectorFieldsVisitor(0)), f -> null,
            new Sort(SortField.FIELD_SCORE), columnReferences, columnReferences);
}

From source file:io.crate.execution.engine.collect.collectors.OrderedLuceneBatchIteratorBenchmark.java

License:Apache License

private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, String sortByColumnName) {
    List<LuceneCollectorExpression<?>> expressions = Collections
            .singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o));
    return new LuceneOrderedDocCollector(dummyShardId, searcher, new MatchAllDocsQuery(), null, false,
            10_000_000, collectorContext, f -> null,
            new Sort(new SortedNumericSortField(sortByColumnName, SortField.Type.INT, reverseFlags[0])),
            expressions, expressions);//from   ww w  .j a v  a2  s. c o m
}

From source file:io.crate.execution.engine.collect.collectors.OrderedLuceneBatchIteratorFactoryTest.java

License:Apache License

private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, int shardId) {
    CollectorContext collectorContext = new CollectorContext(mappedFieldType -> null,
            new CollectorFieldsVisitor(0));
    List<LuceneCollectorExpression<?>> expressions = Collections
            .singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o));
    return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.randomBase64UUID(), shardId), searcher,
            new MatchAllDocsQuery(), null, false, 5, // batchSize < 10 to have at least one searchMore call.
            collectorContext, f -> null,
            new Sort(new SortedNumericSortField(columnName, SortField.Type.LONG, reverseFlags[0])), expressions,
            expressions);/*from   ww w  .j a v  a2 s  . c  om*/
}

From source file:io.crate.execution.engine.sort.LuceneSortGenerator.java

License:Apache License

@Nullable
public static Sort generateLuceneSort(CollectorContext context, OrderBy orderBy,
        DocInputFactory docInputFactory, FieldTypeLookup fieldTypeLookup) {
    if (orderBy.orderBySymbols().isEmpty()) {
        return null;
    }/*w  w  w.j  a  v  a2 s  .c  o  m*/
    SortSymbolVisitor sortSymbolVisitor = new SortSymbolVisitor(docInputFactory, fieldTypeLookup);
    SortField[] sortFields = sortSymbolVisitor.generateSortFields(orderBy.orderBySymbols(), context,
            orderBy.reverseFlags(), orderBy.nullsFirst());
    return new Sort(sortFields);
}