org.apache.solr.search.MultiCollector.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.search.MultiCollector.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource;

import java.io.IOException;
import java.util.*;

public class MultiCollector extends Collector {
    final Collector[] collectors;
    final boolean acceptsDocsOutOfOrder;

    public static Collector wrap(List<? extends Collector> collectors) {
        return collectors.size() == 1 ? collectors.get(0) : new MultiCollector(collectors);
    }

    public static Collector[] subCollectors(Collector collector) {
        if (collector instanceof MultiCollector)
            return ((MultiCollector) collector).collectors;
        return new Collector[] { collector };
    }

    public MultiCollector(List<? extends Collector> collectors) {
        this(collectors.toArray(new Collector[collectors.size()]));
    }

    public MultiCollector(Collector[] collectors) {
        this.collectors = collectors;

        boolean acceptsDocsOutOfOrder = true;
        for (Collector collector : collectors) {
            if (collector.acceptsDocsOutOfOrder() == false) {
                acceptsDocsOutOfOrder = false;
                break;
            }
        }
        this.acceptsDocsOutOfOrder = acceptsDocsOutOfOrder;
    }

    @Override
    public void setScorer(Scorer scorer) throws IOException {
        for (Collector collector : collectors)
            collector.setScorer(scorer);
    }

    @Override
    public void collect(int doc) throws IOException {
        for (Collector collector : collectors)
            collector.collect(doc);
    }

    @Override
    public void setNextReader(IndexReader reader, int docBase) throws IOException {
        for (Collector collector : collectors)
            collector.setNextReader(reader, docBase);
    }

    @Override
    public boolean acceptsDocsOutOfOrder() {
        return acceptsDocsOutOfOrder;
    }
}

class SearchGroup {
    public MutableValue groupValue;
    int matches;
    int topDoc;
    // float topDocScore;  // currently unused
    int comparatorSlot;

    // currently only used when sort != sort.group
    FieldComparator[] sortGroupComparators;
    int[] sortGroupReversed;

    /***
    @Override
    public int hashCode() {
      return super.hashCode();
    }
        
    @Override
    public boolean equals(Object obj) {
      return groupValue.equalsSameType(((SearchGroup)obj).groupValue);
    }
    ***/
}

/** Finds the top set of groups, grouped by groupByVS when sort == group.sort */
class TopGroupCollector extends Collector {
    final int nGroups;
    final HashMap<MutableValue, SearchGroup> groupMap;
    TreeSet<SearchGroup> orderedGroups;
    final ValueSource vs;
    final Map context;
    final FieldComparator[] comparators;
    final int[] reversed;

    DocValues docValues;
    DocValues.ValueFiller filler;
    MutableValue mval;
    Scorer scorer;
    int docBase;
    int spareSlot;

    int matches;

    public TopGroupCollector(ValueSource groupByVS, Map vsContext, Sort sort, int nGroups) throws IOException {
        this.vs = groupByVS;
        this.context = vsContext;
        this.nGroups = nGroups;

        SortField[] sortFields = sort.getSort();
        this.comparators = new FieldComparator[sortFields.length];
        this.reversed = new int[sortFields.length];
        for (int i = 0; i < sortFields.length; i++) {
            SortField sortField = sortFields[i];
            reversed[i] = sortField.getReverse() ? -1 : 1;
            // use nGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot)
            comparators[i] = sortField.getComparator(nGroups + 1, i);
        }
        this.spareSlot = nGroups;

        this.groupMap = new HashMap<MutableValue, SearchGroup>(nGroups);
    }

    @Override
    public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
        for (FieldComparator fc : comparators)
            fc.setScorer(scorer);
    }

    @Override
    public void collect(int doc) throws IOException {
        matches++;
        filler.fillValue(doc);
        SearchGroup group = groupMap.get(mval);
        if (group == null) {
            int num = groupMap.size();
            if (groupMap.size() < nGroups) {
                SearchGroup sg = new SearchGroup();
                sg.groupValue = mval.duplicate();
                sg.comparatorSlot = num++;
                sg.matches = 1;
                sg.topDoc = docBase + doc;
                // sg.topDocScore = scorer.score();
                for (FieldComparator fc : comparators)
                    fc.copy(sg.comparatorSlot, doc);
                groupMap.put(sg.groupValue, sg);
                return;
            }

            if (orderedGroups == null) {
                buildSet();
            }

            for (int i = 0;; i++) {
                final int c = reversed[i] * comparators[i].compareBottom(doc);
                if (c < 0) {
                    // Definitely not competitive.
                    return;
                } else if (c > 0) {
                    // Definitely competitive.
                    break;
                } else if (i == comparators.length - 1) {
                    // Here c=0. If we're at the last comparator, this doc is not
                    // competitive, since docs are visited in doc Id order, which means
                    // this doc cannot compete with any other document in the queue.
                    return;
                }
            }

            // remove current smallest group
            SearchGroup smallest = orderedGroups.pollLast();
            groupMap.remove(smallest.groupValue);

            // reuse the removed SearchGroup
            smallest.groupValue.copy(mval);
            smallest.matches = 1;
            smallest.topDoc = docBase + doc;
            // smallest.topDocScore = scorer.score();
            for (FieldComparator fc : comparators)
                fc.copy(smallest.comparatorSlot, doc);

            groupMap.put(smallest.groupValue, smallest);
            orderedGroups.add(smallest);

            for (FieldComparator fc : comparators)
                fc.setBottom(orderedGroups.last().comparatorSlot);

            return;
        }

        //
        // update existing group
        //

        group.matches++; // TODO: these aren't valid if the group is every discarded then re-added.  keep track if there have been discards?

        for (int i = 0;; i++) {
            FieldComparator fc = comparators[i];
            fc.copy(spareSlot, doc);

            final int c = reversed[i] * fc.compare(group.comparatorSlot, spareSlot);
            if (c < 0) {
                // Definitely not competitive.
                return;
            } else if (c > 0) {
                // Definitely competitive.
                // Set remaining comparators
                for (int j = i + 1; j < comparators.length; j++)
                    comparators[j].copy(spareSlot, doc);
                break;
            } else if (i == comparators.length - 1) {
                // Here c=0. If we're at the last comparator, this doc is not
                // competitive, since docs are visited in doc Id order, which means
                // this doc cannot compete with any other document in the queue.
                return;
            }
        }

        // remove before updating the group since lookup is done via comparators
        // TODO: optimize this
        if (orderedGroups != null)
            orderedGroups.remove(group);

        group.topDoc = docBase + doc;
        // group.topDocScore = scorer.score();
        int tmp = spareSlot;
        spareSlot = group.comparatorSlot;
        group.comparatorSlot = tmp; // swap slots

        // re-add the changed group
        if (orderedGroups != null)
            orderedGroups.add(group);
    }

    void buildSet() {
        Comparator<SearchGroup> comparator = new Comparator<SearchGroup>() {
            public int compare(SearchGroup o1, SearchGroup o2) {
                for (int i = 0;; i++) {
                    FieldComparator fc = comparators[i];
                    int c = reversed[i] * fc.compare(o1.comparatorSlot, o2.comparatorSlot);
                    if (c != 0) {
                        return c;
                    } else if (i == comparators.length - 1) {
                        return o1.topDoc - o2.topDoc;
                    }
                }
            }
        };

        orderedGroups = new TreeSet<SearchGroup>(comparator);
        orderedGroups.addAll(groupMap.values());
        if (orderedGroups.size() == 0)
            return;
        for (FieldComparator fc : comparators)
            fc.setBottom(orderedGroups.last().comparatorSlot);
    }

    @Override
    public void setNextReader(IndexReader reader, int docBase) throws IOException {
        this.docBase = docBase;
        docValues = vs.getValues(context, reader);
        filler = docValues.getValueFiller();
        mval = filler.getValue();
        for (FieldComparator fc : comparators)
            fc.setNextReader(reader, docBase);
    }

    @Override
    public boolean acceptsDocsOutOfOrder() {
        return false;
    }

    public int getMatches() {
        return matches;
    }
}

/**
 * This class allows a different sort within a group than what is used between groups.
 * Sorting between groups is done by the sort value of the first (highest ranking)
 * document in that group.
 */
class TopGroupSortCollector extends TopGroupCollector {

    IndexReader reader;
    Sort groupSort;

    public TopGroupSortCollector(ValueSource groupByVS, Map vsContext, Sort sort, Sort groupSort, int nGroups)
            throws IOException {
        super(groupByVS, vsContext, sort, nGroups);
        this.groupSort = groupSort;
    }

    void constructComparators(FieldComparator[] comparators, int[] reversed, SortField[] sortFields, int size)
            throws IOException {
        for (int i = 0; i < sortFields.length; i++) {
            SortField sortField = sortFields[i];
            reversed[i] = sortField.getReverse() ? -1 : 1;
            comparators[i] = sortField.getComparator(size, i);
            if (scorer != null)
                comparators[i].setScorer(scorer);
            if (reader != null)
                comparators[i].setNextReader(reader, docBase);
        }
    }

    @Override
    public void setScorer(Scorer scorer) throws IOException {
        super.setScorer(scorer);
        for (SearchGroup searchGroup : groupMap.values()) {
            for (FieldComparator fc : searchGroup.sortGroupComparators) {
                fc.setScorer(scorer);
            }
        }
    }

    @Override
    public void collect(int doc) throws IOException {
        matches++;
        filler.fillValue(doc);
        SearchGroup group = groupMap.get(mval);
        if (group == null) {
            int num = groupMap.size();
            if (groupMap.size() < nGroups) {
                SearchGroup sg = new SearchGroup();
                SortField[] sortGroupFields = groupSort.getSort();
                sg.sortGroupComparators = new FieldComparator[sortGroupFields.length];
                sg.sortGroupReversed = new int[sortGroupFields.length];
                constructComparators(sg.sortGroupComparators, sg.sortGroupReversed, sortGroupFields, 1);

                sg.groupValue = mval.duplicate();
                sg.comparatorSlot = num++;
                sg.matches = 1;
                sg.topDoc = docBase + doc;
                // sg.topDocScore = scorer.score();
                for (FieldComparator fc : comparators)
                    fc.copy(sg.comparatorSlot, doc);
                for (FieldComparator fc : sg.sortGroupComparators) {
                    fc.copy(0, doc);
                    fc.setBottom(0);
                }
                groupMap.put(sg.groupValue, sg);
                return;
            }

            if (orderedGroups == null) {
                buildSet();
            }

            SearchGroup leastSignificantGroup = orderedGroups.last();
            for (int i = 0;; i++) {
                final int c = leastSignificantGroup.sortGroupReversed[i]
                        * leastSignificantGroup.sortGroupComparators[i].compareBottom(doc);
                if (c < 0) {
                    // Definitely not competitive.
                    return;
                } else if (c > 0) {
                    // Definitely competitive.
                    break;
                } else if (i == leastSignificantGroup.sortGroupComparators.length - 1) {
                    // Here c=0. If we're at the last comparator, this doc is not
                    // competitive, since docs are visited in doc Id order, which means
                    // this doc cannot compete with any other document in the queue.
                    return;
                }
            }

            // remove current smallest group
            SearchGroup smallest = orderedGroups.pollLast();
            groupMap.remove(smallest.groupValue);

            // reuse the removed SearchGroup
            smallest.groupValue.copy(mval);
            smallest.matches = 1;
            smallest.topDoc = docBase + doc;
            // smallest.topDocScore = scorer.score();
            for (FieldComparator fc : comparators)
                fc.copy(smallest.comparatorSlot, doc);
            for (FieldComparator fc : smallest.sortGroupComparators) {
                fc.copy(0, doc);
                fc.setBottom(0);
            }

            groupMap.put(smallest.groupValue, smallest);
            orderedGroups.add(smallest);

            for (FieldComparator fc : comparators)
                fc.setBottom(orderedGroups.last().comparatorSlot);
            for (FieldComparator fc : smallest.sortGroupComparators)
                fc.setBottom(0);

            return;
        }

        //
        // update existing group
        //

        group.matches++; // TODO: these aren't valid if the group is every discarded then re-added.  keep track if there have been discards?

        for (int i = 0;; i++) {
            FieldComparator fc = group.sortGroupComparators[i];

            final int c = group.sortGroupReversed[i] * fc.compareBottom(doc);
            if (c < 0) {
                // Definitely not competitive.
                return;
            } else if (c > 0) {
                // Definitely competitive.
                // Set remaining comparators
                for (int j = 0; j < group.sortGroupComparators.length; j++) {
                    group.sortGroupComparators[j].copy(0, doc);
                    group.sortGroupComparators[j].setBottom(0);
                }
                for (FieldComparator comparator : comparators)
                    comparator.copy(spareSlot, doc);
                break;
            } else if (i == group.sortGroupComparators.length - 1) {
                // Here c=0. If we're at the last comparator, this doc is not
                // competitive, since docs are visited in doc Id order, which means
                // this doc cannot compete with any other document in the queue.
                return;
            }
        }

        // remove before updating the group since lookup is done via comparators
        // TODO: optimize this
        if (orderedGroups != null)
            orderedGroups.remove(group);

        group.topDoc = docBase + doc;
        // group.topDocScore = scorer.score();
        int tmp = spareSlot;
        spareSlot = group.comparatorSlot;
        group.comparatorSlot = tmp; // swap slots

        // re-add the changed group
        if (orderedGroups != null)
            orderedGroups.add(group);
    }

    @Override
    public void setNextReader(IndexReader reader, int docBase) throws IOException {
        super.setNextReader(reader, docBase);
        this.reader = reader;
        for (SearchGroup searchGroup : groupMap.values()) {
            for (FieldComparator fc : searchGroup.sortGroupComparators) {
                fc.setNextReader(reader, docBase);
            }
        }
    }

}

class Phase2GroupCollector extends Collector {
    final HashMap<MutableValue, SearchGroupDocs> groupMap;
    final ValueSource vs;
    final Map context;

    DocValues docValues;
    DocValues.ValueFiller filler;
    MutableValue mval;
    Scorer scorer;
    int docBase;

    // TODO: may want to decouple from the phase1 collector
    public Phase2GroupCollector(TopGroupCollector topGroups, ValueSource groupByVS, Map vsContext, Sort sort,
            int docsPerGroup, boolean getScores) throws IOException {
        boolean getSortFields = false;

        groupMap = new HashMap<MutableValue, SearchGroupDocs>(topGroups.groupMap.size());
        for (SearchGroup group : topGroups.groupMap.values()) {
            SearchGroupDocs groupDocs = new SearchGroupDocs();
            groupDocs.groupValue = group.groupValue;
            groupDocs.collector = TopFieldCollector.create(sort, docsPerGroup, getSortFields, getScores, getScores,
                    true);
            groupMap.put(groupDocs.groupValue, groupDocs);
        }

        this.vs = groupByVS;
        this.context = vsContext;
    }

    @Override
    public void setScorer(Scorer scorer) throws IOException {
        this.scorer = scorer;
        for (SearchGroupDocs group : groupMap.values())
            group.collector.setScorer(scorer);
    }

    @Override
    public void collect(int doc) throws IOException {
        filler.fillValue(doc);
        SearchGroupDocs group = groupMap.get(mval);
        if (group == null)
            return;
        group.matches++;
        group.collector.collect(doc);
    }

    @Override
    public void setNextReader(IndexReader reader, int docBase) throws IOException {
        this.docBase = docBase;
        docValues = vs.getValues(context, reader);
        filler = docValues.getValueFiller();
        mval = filler.getValue();
        for (SearchGroupDocs group : groupMap.values())
            group.collector.setNextReader(reader, docBase);
    }

    @Override
    public boolean acceptsDocsOutOfOrder() {
        return false;
    }
}

// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
class SearchGroupDocs {
    public MutableValue groupValue;
    int matches;
    TopFieldCollector collector;
}