org.apache.solr.search.federated.DJoinMergeStrategy.java Source code

Introduction

Here is the source code for org.apache.solr.search.federated.DJoinMergeStrategy.java
Source

package org.apache.solr.search.federated;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.MergeStrategy;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardDoc;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.CursorMark;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;

/**
 * During merge, when encountering docs with the same id as seen before, do not
 * ignore, rather, group together in results.
 */
public class DJoinMergeStrategy implements MergeStrategy {

    @Override
    public boolean mergesIds() {
        return true;
    }

    @Override
    @SuppressWarnings({ "rawtypes", "unchecked" })
    public void merge(ResponseBuilder rb, ShardRequest sreq) {
        SortSpec ss = rb.getSortSpec();
        Sort sort = ss.getSort();

        SortField[] sortFields = null;
        if (sort != null)
            sortFields = sort.getSort();
        else {
            sortFields = new SortField[] { SortField.FIELD_SCORE };
        }

        IndexSchema schema = rb.req.getSchema();
        SchemaField uniqueKeyField = schema.getUniqueKeyField();

        // Merge the docs via a priority queue so we don't have to sort *all* of the
        // documents... we only need to order the top (rows+start)
        Map<String, NamedList> sortFieldValuesMap = new HashMap<>();
        Map<String, NamedList> unmarshalledSortFieldValuesMap = new HashMap<>();
        ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(unmarshalledSortFieldValuesMap, sortFields,
                ss.getOffset() + ss.getCount(), rb.req.getSearcher());

        NamedList<Object> shardInfo = null;
        if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
            shardInfo = new SimpleOrderedMap<>();
            rb.rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo);
        }

        long numFound = 0;
        Float maxScore = null;
        boolean partialResults = false;
        for (ShardResponse srsp : sreq.responses) {
            String shard = srsp.getShard();
            // this hack is needed for test code since ShardResponse is so unfriendly
            if (shard == null) {
                shard = (String) srsp.getSolrResponse().getResponse().get("shard");
            }

            SolrDocumentList docs = null;

            if (shardInfo != null) {
                SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();

                if (srsp.getException() != null) {
                    Throwable t = srsp.getException();
                    if (t instanceof SolrServerException) {
                        t = ((SolrServerException) t).getCause();
                    }
                    nl.add("error", t.toString());
                    StringWriter trace = new StringWriter();
                    t.printStackTrace(new PrintWriter(trace));
                    nl.add("trace", trace.toString());
                    if (srsp.getShardAddress() != null) {
                        nl.add("shardAddress", srsp.getShardAddress());
                    }
                } else {
                    docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");
                    nl.add("numFound", docs.getNumFound());
                    nl.add("maxScore", docs.getMaxScore());
                    nl.add("shardAddress", srsp.getShardAddress());
                }
                if (srsp.getSolrResponse() != null) {
                    nl.add("time", srsp.getSolrResponse().getElapsedTime());
                }

                shardInfo.add(shard, nl);
            }
            // now that we've added the shard info, let's only proceed if we have no error.
            if (srsp.getException() != null) {
                partialResults = true;
                continue;
            }

            if (docs == null) { // could have been initialized in the shards info block above
                docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");
            }

            NamedList<?> responseHeader = (NamedList<?>) srsp.getSolrResponse().getResponse().get("responseHeader");
            if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) {
                partialResults = true;
            }

            // calculate global maxScore and numDocsFound
            if (docs.getMaxScore() != null) {
                maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore());
            }
            numFound += docs.getNumFound();

            NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values"));
            sortFieldValuesMap.put(shard, sortFieldValues);
            NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema);
            unmarshalledSortFieldValuesMap.put(shard, unmarshalledSortFieldValues);

            // go through every doc in this response, construct a ShardDoc, and
            // put it in the priority queue so it can be ordered.
            for (int i = 0; i < docs.size(); i++) {
                SolrDocument doc = docs.get(i);
                Object id = doc.getFieldValue(uniqueKeyField.getName());

                Object scoreObj = doc.getFieldValue("score");
                Float score = null;
                if (scoreObj != null) {
                    if (scoreObj instanceof String) {
                        score = Float.parseFloat((String) scoreObj);
                    } else {
                        score = (Float) scoreObj;
                    }
                }

                ShardDoc shardDoc = new ShardDoc();
                shardDoc.id = id;
                shardDoc.shard = shard;
                shardDoc.orderInShard = i;
                if (score != null) {
                    shardDoc.score = score;
                }

                queue.insertWithReplacement(shardDoc);
            } // end for-each-doc-in-response
        } // end for-each-response

        // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
        // So we want to pop the last documents off the queue to get
        // the docs offset -> queuesize
        int resultSize = queue.size() - ss.getOffset();
        resultSize = Math.max(0, resultSize); // there may not be any docs in range

        // build resultIds, which is used to request fields from each shard, and initialise responseDocs
        DuplicateDocumentList responseDocs = new DuplicateDocumentList(resultSize, maxScore, numFound,
                ss.getOffset());
        Map<Object, ShardDoc> resultIds = new AllShardsResultIds(sreq.actualShards);
        for (int i = resultSize - 1; i >= 0; i--) {
            ShardDoc shardDoc = queue.pop();
            shardDoc.positionInResponse = i;

            // Need the toString() for correlation with other lists that must
            // be strings (like keys in highlighting, explain, etc)
            resultIds.put(shardDoc.id.toString(), shardDoc);

            // pre-populate responseDocs
            NamedList docSortValues = sortFieldValuesMap.get(shardDoc.shard);
            NamedList sortValue = new NamedList();
            for (int j = 0; j < docSortValues.size(); ++j) {
                String fieldName = docSortValues.getName(j);
                List values = (List) docSortValues.getVal(j);
                sortValue.add(fieldName, values.get(shardDoc.orderInShard));
            }
            responseDocs.setParentDoc(shardDoc.positionInResponse, docSortValues.size() > 0 ? sortValue : null,
                    shardDoc.score);
        }

        // Add hits for distributed requests
        // https://issues.apache.org/jira/browse/SOLR-3518
        rb.rsp.addToLog("hits", numFound);

        // save these results in a private area so we can access them
        // again when retrieving stored fields.
        // TODO: use ResponseBuilder (w/ comments) or the request context?
        rb.resultIds = resultIds;
        rb.setResponseDocs(responseDocs);

        populateNextCursorMarkFromMergedShards(rb, unmarshalledSortFieldValuesMap);

        if (partialResults) {
            if (rb.rsp.getResponseHeader().get("partialResults") == null) {
                rb.rsp.getResponseHeader().add("partialResults", Boolean.TRUE);
            }
        }
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    private void populateNextCursorMarkFromMergedShards(ResponseBuilder rb,
            Map<String, NamedList> sortFieldValuesMap) {
        final CursorMark lastCursorMark = rb.getCursorMark();
        if (null == lastCursorMark) {
            // Not a cursor based request
            return; // NOOP
        }

        assert null != rb.resultIds : "resultIds was not set in ResponseBuilder";

        Collection<ShardDoc> docsOnThisPage = rb.resultIds.values();

        if (0 == docsOnThisPage.size()) {
            // nothing more matching query, re-use existing totem so user can "resume"
            // search later if it makes sense for this sort.
            rb.setNextCursorMark(lastCursorMark);
            return;
        }

        ShardDoc lastDoc = null;
        // ShardDoc and rb.resultIds are weird structures to work with...
        for (ShardDoc eachDoc : docsOnThisPage) {
            if (null == lastDoc || lastDoc.positionInResponse < eachDoc.positionInResponse) {
                lastDoc = eachDoc;
            }
        }
        SortField[] sortFields = lastCursorMark.getSortSpec().getSort().getSort();
        List<Object> nextCursorMarkValues = new ArrayList<>(sortFields.length);
        for (SortField sf : sortFields) {
            if (sf.getType().equals(SortField.Type.SCORE)) {
                nextCursorMarkValues.add(lastDoc.score);
            } else {
                assert null != sf.getField() : "SortField has null field";
                NamedList sortFieldValues = sortFieldValuesMap.get(lastDoc.shard);
                List<Object> fieldVals = (List<Object>) sortFieldValues.get(sf.getField());
                nextCursorMarkValues.add(fieldVals.get(lastDoc.orderInShard));
            }
        }
        CursorMark nextCursorMark = lastCursorMark.createNext(nextCursorMarkValues);
        assert null != nextCursorMark : "null nextCursorMark";
        rb.setNextCursorMark(nextCursorMark);
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    private NamedList unmarshalSortValues(SortSpec sortSpec, NamedList sortFieldValues, IndexSchema schema) {
        NamedList unmarshalledSortValsPerField = new NamedList();

        if (0 == sortFieldValues.size())
            return unmarshalledSortValsPerField;

        List<SchemaField> schemaFields = sortSpec.getSchemaFields();
        SortField[] sortFields = sortSpec.getSort().getSort();

        int marshalledFieldNum = 0;
        for (int sortFieldNum = 0; sortFieldNum < sortFields.length; sortFieldNum++) {
            final SortField sortField = sortFields[sortFieldNum];
            final SortField.Type type = sortField.getType();

            // :TODO: would be simpler to always serialize every position of SortField[]
            if (type == SortField.Type.SCORE || type == SortField.Type.DOC)
                continue;

            final String sortFieldName = sortField.getField();
            final String valueFieldName = sortFieldValues.getName(marshalledFieldNum);
            assert sortFieldName
                    .equals(valueFieldName) : "sortFieldValues name key does not match expected SortField.getField";

            List sortVals = (List) sortFieldValues.getVal(marshalledFieldNum);

            final SchemaField schemaField = schemaFields.get(sortFieldNum);
            if (null == schemaField) {
                unmarshalledSortValsPerField.add(sortField.getField(), sortVals);
            } else {
                FieldType fieldType = schemaField.getType();
                List unmarshalledSortVals = new ArrayList();
                for (Object sortVal : sortVals) {
                    unmarshalledSortVals.add(fieldType.unmarshalSortValue(sortVal));
                }
                unmarshalledSortValsPerField.add(sortField.getField(), unmarshalledSortVals);
            }
            marshalledFieldNum++;
        }
        return unmarshalledSortValsPerField;
    }

    @Override
    public boolean handlesMergeFields() {
        return false;
    }

    @Override
    public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException {
        // do nothing (since handlesMergeFields is false)
    }

    @Override
    public int getCost() {
        return 0;
    }

}