org.apache.druid.query.groupby.GroupByQuery.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.druid.query.groupby.GroupByQuery.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.query.groupby;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Longs;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.granularity.Granularity;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.Queries;
import org.apache.druid.query.Query;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.groupby.having.HavingSpec;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.query.groupby.orderby.LimitSpec;
import org.apache.druid.query.groupby.orderby.NoopLimitSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.query.ordering.StringComparator;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.spec.LegacySegmentSpec;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ValueType;
import org.joda.time.DateTime;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

/**
 *
 */
public class GroupByQuery extends BaseQuery<ResultRow> {
    public static final String CTX_KEY_SORT_BY_DIMS_FIRST = "sortByDimsFirst";
    private static final String CTX_KEY_FUDGE_TIMESTAMP = "fudgeTimestamp";

    private static final Comparator<ResultRow> NON_GRANULAR_TIME_COMP = (ResultRow lhs, ResultRow rhs) -> Longs
            .compare(lhs.getLong(0), rhs.getLong(0));

    public static Builder builder() {
        return new Builder();
    }

    private final VirtualColumns virtualColumns;
    private final LimitSpec limitSpec;
    @Nullable
    private final HavingSpec havingSpec;
    @Nullable
    private final DimFilter dimFilter;
    private final List<DimensionSpec> dimensions;
    private final List<AggregatorFactory> aggregatorSpecs;
    private final List<PostAggregator> postAggregatorSpecs;
    @Nullable
    private final List<List<String>> subtotalsSpec;

    private final boolean applyLimitPushDown;
    private final Function<Sequence<ResultRow>, Sequence<ResultRow>> postProcessingFn;
    private final List<String> resultRowOrder;
    private final Object2IntMap<String> resultRowPositionLookup;

    /**
     * This is set when we know that all rows will have the same timestamp, and allows us to not actually store
     * and track it throughout the query execution process.
     */
    @Nullable
    private final DateTime universalTimestamp;

    @JsonCreator
    public GroupByQuery(@JsonProperty("dataSource") DataSource dataSource,
            @JsonProperty("intervals") QuerySegmentSpec querySegmentSpec,
            @JsonProperty("virtualColumns") VirtualColumns virtualColumns,
            @JsonProperty("filter") @Nullable DimFilter dimFilter,
            @JsonProperty("granularity") Granularity granularity,
            @JsonProperty("dimensions") List<DimensionSpec> dimensions,
            @JsonProperty("aggregations") List<AggregatorFactory> aggregatorSpecs,
            @JsonProperty("postAggregations") List<PostAggregator> postAggregatorSpecs,
            @JsonProperty("having") @Nullable HavingSpec havingSpec, @JsonProperty("limitSpec") LimitSpec limitSpec,
            @JsonProperty("subtotalsSpec") @Nullable List<List<String>> subtotalsSpec,
            @JsonProperty("context") Map<String, Object> context) {
        this(dataSource, querySegmentSpec, virtualColumns, dimFilter, granularity, dimensions, aggregatorSpecs,
                postAggregatorSpecs, havingSpec, limitSpec, subtotalsSpec, null, context);
    }

    private Function<Sequence<ResultRow>, Sequence<ResultRow>> makePostProcessingFn() {
        Function<Sequence<ResultRow>, Sequence<ResultRow>> postProcessingFn = limitSpec.build(this);

        if (havingSpec != null) {
            postProcessingFn = Functions.compose(postProcessingFn, (Sequence<ResultRow> input) -> {
                havingSpec.setQuery(this);
                return Sequences.filter(input, havingSpec::eval);
            });
        }
        return postProcessingFn;
    }

    /**
     * A private constructor that avoids recomputing postProcessingFn.
     */
    private GroupByQuery(final DataSource dataSource, final QuerySegmentSpec querySegmentSpec,
            final VirtualColumns virtualColumns, final @Nullable DimFilter dimFilter, final Granularity granularity,
            final @Nullable List<DimensionSpec> dimensions, final @Nullable List<AggregatorFactory> aggregatorSpecs,
            final @Nullable List<PostAggregator> postAggregatorSpecs, final @Nullable HavingSpec havingSpec,
            final LimitSpec limitSpec, final @Nullable List<List<String>> subtotalsSpec,
            final @Nullable Function<Sequence<ResultRow>, Sequence<ResultRow>> postProcessingFn,
            final Map<String, Object> context) {
        super(dataSource, querySegmentSpec, false, context, granularity);

        this.virtualColumns = VirtualColumns.nullToEmpty(virtualColumns);
        this.dimFilter = dimFilter;
        this.dimensions = dimensions == null ? ImmutableList.of() : dimensions;
        for (DimensionSpec spec : this.dimensions) {
            Preconditions.checkArgument(spec != null, "dimensions has null DimensionSpec");
        }

        this.aggregatorSpecs = aggregatorSpecs == null ? ImmutableList.of() : aggregatorSpecs;
        this.postAggregatorSpecs = Queries.prepareAggregations(
                this.dimensions.stream().map(DimensionSpec::getOutputName).collect(Collectors.toList()),
                this.aggregatorSpecs, postAggregatorSpecs == null ? ImmutableList.of() : postAggregatorSpecs);

        this.universalTimestamp = computeUniversalTimestamp();
        this.resultRowOrder = computeResultRowOrder();
        this.resultRowPositionLookup = computeResultRowOrderLookup();
        this.havingSpec = havingSpec;
        this.limitSpec = LimitSpec.nullToNoopLimitSpec(limitSpec);
        this.subtotalsSpec = verifySubtotalsSpec(subtotalsSpec, this.dimensions);

        // Verify no duplicate names between dimensions, aggregators, and postAggregators.
        // They will all end up in the same namespace in the returned Rows and we can't have them clobbering each other.
        // We're not counting __time, even though that name is problematic. See: https://github.com/apache/incubator-druid/pull/3684
        verifyOutputNames(this.dimensions, this.aggregatorSpecs, this.postAggregatorSpecs);

        this.postProcessingFn = postProcessingFn != null ? postProcessingFn : makePostProcessingFn();

        // Check if limit push down configuration is valid and check if limit push down will be applied
        this.applyLimitPushDown = determineApplyLimitPushDown();
    }

    @Nullable
    private List<List<String>> verifySubtotalsSpec(@Nullable List<List<String>> subtotalsSpec,
            List<DimensionSpec> dimensions) {
        // if subtotalsSpec exists then validate that all are subsets of dimensions spec.
        if (subtotalsSpec != null) {
            for (List<String> subtotalSpec : subtotalsSpec) {
                for (String s : subtotalSpec) {
                    boolean found = false;
                    for (DimensionSpec ds : dimensions) {
                        if (s.equals(ds.getOutputName())) {
                            found = true;
                            break;
                        }
                    }
                    if (!found) {
                        throw new IAE("Subtotal spec %s is either not a subset of top level dimensions.",
                                subtotalSpec);
                    }
                }
            }
        }

        return subtotalsSpec;
    }

    @JsonProperty
    public VirtualColumns getVirtualColumns() {
        return virtualColumns;
    }

    @Nullable
    @JsonProperty("filter")
    public DimFilter getDimFilter() {
        return dimFilter;
    }

    @JsonProperty
    public List<DimensionSpec> getDimensions() {
        return dimensions;
    }

    @JsonProperty("aggregations")
    public List<AggregatorFactory> getAggregatorSpecs() {
        return aggregatorSpecs;
    }

    @JsonProperty("postAggregations")
    public List<PostAggregator> getPostAggregatorSpecs() {
        return postAggregatorSpecs;
    }

    @JsonProperty("having")
    public HavingSpec getHavingSpec() {
        return havingSpec;
    }

    @JsonProperty
    public LimitSpec getLimitSpec() {
        return limitSpec;
    }

    @JsonInclude(JsonInclude.Include.NON_NULL)
    @JsonProperty("subtotalsSpec")
    @Nullable
    public List<List<String>> getSubtotalsSpec() {
        return subtotalsSpec;
    }

    /**
     * Returns a list of field names, of the same size as {@link #getResultRowSizeWithPostAggregators()}, in the
     * order that they will appear in ResultRows for this query.
     *
     * @see ResultRow for documentation about the order that fields will be in
     */
    public List<String> getResultRowOrder() {
        return resultRowOrder;
    }

    /**
     * Returns the size of ResultRows for this query when they do not include post-aggregators.
     */
    public int getResultRowSizeWithoutPostAggregators() {
        return getResultRowPostAggregatorStart();
    }

    /**
     * Returns the size of ResultRows for this query when they include post-aggregators.
     */
    public int getResultRowSizeWithPostAggregators() {
        return resultRowOrder.size();
    }

    /**
     * Returns a map that can be used to look up the position within ResultRows of certain field names. The map's
     * {@link Object2IntMap#getInt(Object)} method will return -1 if the field is not found.
     */
    public Object2IntMap<String> getResultRowPositionLookup() {
        return resultRowPositionLookup;
    }

    /**
     * If this query has a single universal timestamp, return it. Otherwise return null.
     *
     * This method will return a nonnull timestamp in the following two cases:
     *
     * 1) CTX_KEY_FUDGE_TIMESTAMP is set (in which case this timestamp will be returned).
     * 2) Granularity is "ALL".
     *
     * If this method returns null, then {@link #getResultRowHasTimestamp()} will return true. The reverse is also true:
     * if this method returns nonnull, then {@link #getResultRowHasTimestamp()} will return false.
     */
    @Nullable
    public DateTime getUniversalTimestamp() {
        return universalTimestamp;
    }

    /**
     * Returns true if ResultRows for this query include timestamps, false otherwise.
     *
     * @see #getUniversalTimestamp() for details about when timestamps are included in ResultRows
     */
    public boolean getResultRowHasTimestamp() {
        return universalTimestamp == null;
    }

    /**
     * Returns the position of the first dimension in ResultRows for this query.
     */
    public int getResultRowDimensionStart() {
        return getResultRowHasTimestamp() ? 1 : 0;
    }

    /**
     * Returns the position of the first aggregator in ResultRows for this query.
     */
    public int getResultRowAggregatorStart() {
        return getResultRowDimensionStart() + dimensions.size();
    }

    /**
     * Returns the position of the first post-aggregator in ResultRows for this query.
     */
    public int getResultRowPostAggregatorStart() {
        return getResultRowAggregatorStart() + aggregatorSpecs.size();
    }

    @Override
    public boolean hasFilters() {
        return dimFilter != null;
    }

    @Override
    @Nullable
    public DimFilter getFilter() {
        return dimFilter;
    }

    @Override
    public String getType() {
        return GROUP_BY;
    }

    @JsonIgnore
    public boolean getContextSortByDimsFirst() {
        return getContextBoolean(CTX_KEY_SORT_BY_DIMS_FIRST, false);
    }

    @JsonIgnore
    public boolean isApplyLimitPushDown() {
        return applyLimitPushDown;
    }

    @JsonIgnore
    public boolean getApplyLimitPushDownFromContext() {
        return getContextBoolean(GroupByQueryConfig.CTX_KEY_APPLY_LIMIT_PUSH_DOWN, true);
    }

    @Override
    public Ordering getResultOrdering() {
        final Ordering<ResultRow> rowOrdering = getRowOrdering(false);

        return Ordering.from((lhs, rhs) -> {
            if (lhs instanceof ResultRow) {
                return rowOrdering.compare((ResultRow) lhs, (ResultRow) rhs);
            } else {
                //noinspection unchecked (Probably bySegment queries; see BySegmentQueryRunner for details)
                return ((Ordering) Comparators.naturalNullsFirst()).compare(lhs, rhs);
            }
        });
    }

    private boolean validateAndGetForceLimitPushDown() {
        final boolean forcePushDown = getContextBoolean(GroupByQueryConfig.CTX_KEY_FORCE_LIMIT_PUSH_DOWN, false);
        if (forcePushDown) {
            if (!(limitSpec instanceof DefaultLimitSpec)) {
                throw new IAE("When forcing limit push down, a limit spec must be provided.");
            }

            if (!((DefaultLimitSpec) limitSpec).isLimited()) {
                throw new IAE("When forcing limit push down, the provided limit spec must have a limit.");
            }

            if (havingSpec != null) {
                throw new IAE("Cannot force limit push down when a having spec is present.");
            }

            for (OrderByColumnSpec orderBySpec : ((DefaultLimitSpec) limitSpec).getColumns()) {
                if (OrderByColumnSpec.getPostAggIndexForOrderBy(orderBySpec, postAggregatorSpecs) > -1) {
                    throw new UnsupportedOperationException(
                            "Limit push down when sorting by a post aggregator is not supported.");
                }
            }
        }
        return forcePushDown;
    }

    private Object2IntMap<String> computeResultRowOrderLookup() {
        final Object2IntMap<String> indexes = new Object2IntOpenHashMap<>();
        indexes.defaultReturnValue(-1);

        int index = 0;
        for (String columnName : resultRowOrder) {
            indexes.put(columnName, index++);
        }

        return indexes;
    }

    private List<String> computeResultRowOrder() {
        final List<String> retVal = new ArrayList<>();

        if (universalTimestamp == null) {
            retVal.add(ColumnHolder.TIME_COLUMN_NAME);
        }

        dimensions.stream().map(DimensionSpec::getOutputName).forEach(retVal::add);
        aggregatorSpecs.stream().map(AggregatorFactory::getName).forEach(retVal::add);
        postAggregatorSpecs.stream().map(PostAggregator::getName).forEach(retVal::add);

        return retVal;
    }

    private boolean determineApplyLimitPushDown() {
        if (subtotalsSpec != null) {
            return false;
        }

        final boolean forceLimitPushDown = validateAndGetForceLimitPushDown();

        if (limitSpec instanceof DefaultLimitSpec) {
            DefaultLimitSpec defaultLimitSpec = (DefaultLimitSpec) limitSpec;

            // If only applying an orderby without a limit, don't try to push down
            if (!defaultLimitSpec.isLimited()) {
                return false;
            }

            if (forceLimitPushDown) {
                return true;
            }

            if (!getApplyLimitPushDownFromContext()) {
                return false;
            }

            if (havingSpec != null) {
                return false;
            }

            // If the sorting order only uses columns in the grouping key, we can always push the limit down
            // to the buffer grouper without affecting result accuracy
            boolean sortHasNonGroupingFields = DefaultLimitSpec
                    .sortingOrderHasNonGroupingFields((DefaultLimitSpec) limitSpec, getDimensions());

            return !sortHasNonGroupingFields;
        }

        return false;
    }

    /**
     * When limit push down is applied, the partial results would be sorted by the ordering specified by the
     * limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
     * so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
     */
    private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular,
            final DefaultLimitSpec limitSpec) {
        final boolean sortByDimsFirst = getContextSortByDimsFirst();

        final IntList orderedFieldNumbers = new IntArrayList();
        final Set<Integer> dimsInOrderBy = new HashSet<>();
        final List<Boolean> needsReverseList = new ArrayList<>();
        final List<ValueType> dimensionTypes = new ArrayList<>();
        final List<StringComparator> comparators = new ArrayList<>();

        for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
            boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
            int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
            if (dimIndex >= 0) {
                DimensionSpec dim = dimensions.get(dimIndex);
                orderedFieldNumbers.add(resultRowPositionLookup.getInt(dim.getOutputName()));
                dimsInOrderBy.add(dimIndex);
                needsReverseList.add(needsReverse);
                final ValueType type = dimensions.get(dimIndex).getOutputType();
                dimensionTypes.add(type);
                comparators.add(orderSpec.getDimensionComparator());
            }
        }

        for (int i = 0; i < dimensions.size(); i++) {
            if (!dimsInOrderBy.contains(i)) {
                orderedFieldNumbers.add(resultRowPositionLookup.getInt(dimensions.get(i).getOutputName()));
                needsReverseList.add(false);
                final ValueType type = dimensions.get(i).getOutputType();
                dimensionTypes.add(type);
                comparators.add(StringComparators.LEXICOGRAPHIC);
            }
        }

        final Comparator<ResultRow> timeComparator = getTimeComparator(granular);

        if (timeComparator == null) {
            return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList,
                    dimensionTypes, comparators, lhs, rhs));
        } else if (sortByDimsFirst) {
            return Ordering.from((lhs, rhs) -> {
                final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes,
                        comparators, lhs, rhs);
                if (cmp != 0) {
                    return cmp;
                }

                return timeComparator.compare(lhs, rhs);
            });
        } else {
            return Ordering.from((lhs, rhs) -> {
                final int timeCompare = timeComparator.compare(lhs, rhs);

                if (timeCompare != 0) {
                    return timeCompare;
                }

                return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes,
                        comparators, lhs, rhs);
            });
        }
    }

    public Ordering<ResultRow> getRowOrdering(final boolean granular) {
        if (applyLimitPushDown) {
            if (!DefaultLimitSpec.sortingOrderHasNonGroupingFields((DefaultLimitSpec) limitSpec, dimensions)) {
                return getRowOrderingForPushDown(granular, (DefaultLimitSpec) limitSpec);
            }
        }

        final boolean sortByDimsFirst = getContextSortByDimsFirst();
        final Comparator<ResultRow> timeComparator = getTimeComparator(granular);

        if (timeComparator == null) {
            return Ordering.from((lhs, rhs) -> compareDims(dimensions, lhs, rhs));
        } else if (sortByDimsFirst) {
            return Ordering.from((lhs, rhs) -> {
                final int cmp = compareDims(dimensions, lhs, rhs);
                if (cmp != 0) {
                    return cmp;
                }

                return timeComparator.compare(lhs, rhs);
            });
        } else {
            return Ordering.from((lhs, rhs) -> {
                final int timeCompare = timeComparator.compare(lhs, rhs);

                if (timeCompare != 0) {
                    return timeCompare;
                }

                return compareDims(dimensions, lhs, rhs);
            });
        }
    }

    @Nullable
    private Comparator<ResultRow> getTimeComparator(boolean granular) {
        if (Granularities.ALL.equals(getGranularity())) {
            return null;
        } else {
            if (!getResultRowHasTimestamp()) {
                // Sanity check (should never happen).
                throw new ISE("Cannot do time comparisons!");
            }

            if (granular) {
                return (lhs, rhs) -> Longs.compare(
                        getGranularity().bucketStart(DateTimes.utc(lhs.getLong(0))).getMillis(),
                        getGranularity().bucketStart(DateTimes.utc(rhs.getLong(0))).getMillis());
            } else {
                return NON_GRANULAR_TIME_COMP;
            }
        }
    }

    private int compareDims(List<DimensionSpec> dimensions, ResultRow lhs, ResultRow rhs) {
        final int dimensionStart = getResultRowDimensionStart();

        for (int i = 0; i < dimensions.size(); i++) {
            DimensionSpec dimension = dimensions.get(i);
            final int dimCompare = DimensionHandlerUtils.compareObjectsAsType(lhs.get(dimensionStart + i),
                    rhs.get(dimensionStart + i), dimension.getOutputType());
            if (dimCompare != 0) {
                return dimCompare;
            }
        }

        return 0;
    }

    /**
     * Computes the timestamp that will be returned by {@link #getUniversalTimestamp()}.
     */
    @Nullable
    private DateTime computeUniversalTimestamp() {
        final String timestampStringFromContext = getContextValue(CTX_KEY_FUDGE_TIMESTAMP, "");
        final Granularity granularity = getGranularity();

        if (!timestampStringFromContext.isEmpty()) {
            return DateTimes.utc(Long.parseLong(timestampStringFromContext));
        } else if (Granularities.ALL.equals(granularity)) {
            final DateTime timeStart = getIntervals().get(0).getStart();
            return granularity.getIterable(new Interval(timeStart, timeStart.plus(1))).iterator().next().getStart();
        } else {
            return null;
        }
    }

    private static int compareDimsForLimitPushDown(final IntList fields, final List<Boolean> needsReverseList,
            final List<ValueType> dimensionTypes, final List<StringComparator> comparators, final ResultRow lhs,
            final ResultRow rhs) {
        for (int i = 0; i < fields.size(); i++) {
            final int fieldNumber = fields.getInt(i);
            final StringComparator comparator = comparators.get(i);
            final ValueType dimensionType = dimensionTypes.get(i);

            final int dimCompare;
            final Object lhsObj = lhs.get(fieldNumber);
            final Object rhsObj = rhs.get(fieldNumber);

            if (ValueType.isNumeric(dimensionType)) {
                if (comparator.equals(StringComparators.NUMERIC)) {
                    dimCompare = DimensionHandlerUtils.compareObjectsAsType(lhsObj, rhsObj, dimensionType);
                } else {
                    dimCompare = comparator.compare(String.valueOf(lhsObj), String.valueOf(rhsObj));
                }
            } else {
                dimCompare = comparator.compare((String) lhsObj, (String) rhsObj);
            }

            if (dimCompare != 0) {
                return needsReverseList.get(i) ? -dimCompare : dimCompare;
            }
        }
        return 0;
    }

    /**
     * Apply the havingSpec and limitSpec. Because havingSpecs are not thread safe, and because they are applied during
     * accumulation of the returned sequence, callers must take care to avoid accumulating two different Sequences
     * returned by this method in two different threads.
     *
     * @param results sequence of rows to apply havingSpec and limitSpec to
     *
     * @return sequence of rows after applying havingSpec and limitSpec
     */
    public Sequence<ResultRow> postProcess(Sequence<ResultRow> results) {
        return postProcessingFn.apply(results);
    }

    @Override
    public GroupByQuery withOverriddenContext(Map<String, Object> contextOverride) {
        return new Builder(this).overrideContext(contextOverride).build();
    }

    @Override
    public GroupByQuery withQuerySegmentSpec(QuerySegmentSpec spec) {
        return new Builder(this).setQuerySegmentSpec(spec).build();
    }

    public GroupByQuery withDimFilter(@Nullable final DimFilter dimFilter) {
        return new Builder(this).setDimFilter(dimFilter).build();
    }

    @Override
    public Query<ResultRow> withDataSource(DataSource dataSource) {
        return new Builder(this).setDataSource(dataSource).build();
    }

    public GroupByQuery withDimensionSpecs(final List<DimensionSpec> dimensionSpecs) {
        return new Builder(this).setDimensions(dimensionSpecs).build();
    }

    public GroupByQuery withLimitSpec(LimitSpec limitSpec) {
        return new Builder(this).setLimitSpec(limitSpec).build();
    }

    public GroupByQuery withAggregatorSpecs(final List<AggregatorFactory> aggregatorSpecs) {
        return new Builder(this).setAggregatorSpecs(aggregatorSpecs).build();
    }

    public GroupByQuery withSubtotalsSpec(@Nullable final List<List<String>> subtotalsSpec) {
        return new Builder(this).setSubtotalsSpec(subtotalsSpec).build();
    }

    public GroupByQuery withPostAggregatorSpecs(final List<PostAggregator> postAggregatorSpecs) {
        return new Builder(this).setPostAggregatorSpecs(postAggregatorSpecs).build();
    }

    private static void verifyOutputNames(List<DimensionSpec> dimensions, List<AggregatorFactory> aggregators,
            List<PostAggregator> postAggregators) {
        final Set<String> outputNames = new HashSet<>();
        for (DimensionSpec dimension : dimensions) {
            if (!outputNames.add(dimension.getOutputName())) {
                throw new IAE("Duplicate output name[%s]", dimension.getOutputName());
            }
        }

        for (AggregatorFactory aggregator : aggregators) {
            if (!outputNames.add(aggregator.getName())) {
                throw new IAE("Duplicate output name[%s]", aggregator.getName());
            }
        }

        for (PostAggregator postAggregator : postAggregators) {
            if (!outputNames.add(postAggregator.getName())) {
                throw new IAE("Duplicate output name[%s]", postAggregator.getName());
            }
        }

        if (outputNames.contains(ColumnHolder.TIME_COLUMN_NAME)) {
            throw new IAE("'%s' cannot be used as an output name for dimensions, aggregators, or post-aggregators.",
                    ColumnHolder.TIME_COLUMN_NAME);
        }
    }

    public static class Builder {
        @Nullable
        private static List<List<String>> copySubtotalSpec(@Nullable List<List<String>> subtotalsSpec) {
            if (subtotalsSpec == null) {
                return null;
            }
            return subtotalsSpec.stream().map(ArrayList::new).collect(Collectors.toList());
        }

        private DataSource dataSource;
        private QuerySegmentSpec querySegmentSpec;
        private VirtualColumns virtualColumns;
        @Nullable
        private DimFilter dimFilter;
        private Granularity granularity;
        @Nullable
        private List<DimensionSpec> dimensions;
        @Nullable
        private List<AggregatorFactory> aggregatorSpecs;
        @Nullable
        private List<PostAggregator> postAggregatorSpecs;
        @Nullable
        private HavingSpec havingSpec;

        private Map<String, Object> context;

        @Nullable
        private List<List<String>> subtotalsSpec = null;
        @Nullable
        private LimitSpec limitSpec = null;
        @Nullable
        private Function<Sequence<ResultRow>, Sequence<ResultRow>> postProcessingFn;
        private List<OrderByColumnSpec> orderByColumnSpecs = new ArrayList<>();
        private int limit = Integer.MAX_VALUE;

        public Builder() {
        }

        public Builder(GroupByQuery query) {
            dataSource = query.getDataSource();
            querySegmentSpec = query.getQuerySegmentSpec();
            virtualColumns = query.getVirtualColumns();
            dimFilter = query.getDimFilter();
            granularity = query.getGranularity();
            dimensions = query.getDimensions();
            aggregatorSpecs = query.getAggregatorSpecs();
            postAggregatorSpecs = query.getPostAggregatorSpecs();
            havingSpec = query.getHavingSpec();
            limitSpec = query.getLimitSpec();
            subtotalsSpec = query.subtotalsSpec;
            postProcessingFn = query.postProcessingFn;
            context = query.getContext();
        }

        public Builder(Builder builder) {
            dataSource = builder.dataSource;
            querySegmentSpec = builder.querySegmentSpec;
            virtualColumns = builder.virtualColumns;
            dimFilter = builder.dimFilter;
            granularity = builder.granularity;
            dimensions = builder.dimensions;
            aggregatorSpecs = builder.aggregatorSpecs;
            postAggregatorSpecs = builder.postAggregatorSpecs;
            havingSpec = builder.havingSpec;
            limitSpec = builder.limitSpec;
            subtotalsSpec = copySubtotalSpec(builder.subtotalsSpec);
            postProcessingFn = builder.postProcessingFn;
            limit = builder.limit;
            orderByColumnSpecs = new ArrayList<>(builder.orderByColumnSpecs);
            context = builder.context;
        }

        public Builder setDataSource(DataSource dataSource) {
            this.dataSource = dataSource;
            return this;
        }

        public Builder setDataSource(String dataSource) {
            this.dataSource = new TableDataSource(dataSource);
            return this;
        }

        public Builder setDataSource(Query query) {
            this.dataSource = new QueryDataSource(query);
            return this;
        }

        public Builder setInterval(QuerySegmentSpec interval) {
            return setQuerySegmentSpec(interval);
        }

        public Builder setInterval(List<Interval> intervals) {
            return setQuerySegmentSpec(new LegacySegmentSpec(intervals));
        }

        public Builder setInterval(Interval interval) {
            return setQuerySegmentSpec(new LegacySegmentSpec(interval));
        }

        public Builder setInterval(String interval) {
            return setQuerySegmentSpec(new LegacySegmentSpec(interval));
        }

        public Builder setVirtualColumns(VirtualColumns virtualColumns) {
            this.virtualColumns = Preconditions.checkNotNull(virtualColumns, "virtualColumns");
            return this;
        }

        public Builder setVirtualColumns(VirtualColumn... virtualColumns) {
            this.virtualColumns = VirtualColumns.create(Arrays.asList(virtualColumns));
            return this;
        }

        public Builder setLimit(int limit) {
            ensureExplicitLimitSpecNotSet();
            this.limit = limit;
            this.postProcessingFn = null;
            return this;
        }

        public Builder setSubtotalsSpec(@Nullable List<List<String>> subtotalsSpec) {
            this.subtotalsSpec = subtotalsSpec;
            return this;
        }

        public Builder addOrderByColumn(String dimension) {
            return addOrderByColumn(dimension, null);
        }

        public Builder addOrderByColumn(String dimension, @Nullable OrderByColumnSpec.Direction direction) {
            return addOrderByColumn(new OrderByColumnSpec(dimension, direction));
        }

        public Builder addOrderByColumn(OrderByColumnSpec columnSpec) {
            ensureExplicitLimitSpecNotSet();
            this.orderByColumnSpecs.add(columnSpec);
            this.postProcessingFn = null;
            return this;
        }

        public Builder setLimitSpec(LimitSpec limitSpec) {
            Preconditions.checkNotNull(limitSpec);
            ensureFluentLimitsNotSet();
            this.limitSpec = limitSpec;
            this.postProcessingFn = null;
            return this;
        }

        private void ensureExplicitLimitSpecNotSet() {
            if (limitSpec != null) {
                throw new ISE("Ambiguous build, limitSpec[%s] already set", limitSpec);
            }
        }

        private void ensureFluentLimitsNotSet() {
            if (!(limit == Integer.MAX_VALUE && orderByColumnSpecs.isEmpty())) {
                throw new ISE("Ambiguous build, limit[%s] or columnSpecs[%s] already set.", limit,
                        orderByColumnSpecs);
            }
        }

        public Builder setQuerySegmentSpec(QuerySegmentSpec querySegmentSpec) {
            this.querySegmentSpec = querySegmentSpec;
            return this;
        }

        public Builder setDimFilter(@Nullable DimFilter dimFilter) {
            this.dimFilter = dimFilter;
            return this;
        }

        public Builder setGranularity(Granularity granularity) {
            this.granularity = granularity;
            return this;
        }

        public Builder addDimension(String column) {
            return addDimension(column, column);
        }

        public Builder addDimension(String column, String outputName) {
            return addDimension(new DefaultDimensionSpec(column, outputName));
        }

        public Builder addDimension(DimensionSpec dimension) {
            if (dimensions == null) {
                dimensions = new ArrayList<>();
            }

            dimensions.add(dimension);
            this.postProcessingFn = null;
            return this;
        }

        public Builder setDimensions(List<DimensionSpec> dimensions) {
            this.dimensions = Lists.newArrayList(dimensions);
            this.postProcessingFn = null;
            return this;
        }

        public Builder setDimensions(DimensionSpec... dimensions) {
            this.dimensions = new ArrayList<>(Arrays.asList(dimensions));
            this.postProcessingFn = null;
            return this;
        }

        public Builder addAggregator(AggregatorFactory aggregator) {
            if (aggregatorSpecs == null) {
                aggregatorSpecs = new ArrayList<>();
            }

            aggregatorSpecs.add(aggregator);
            this.postProcessingFn = null;
            return this;
        }

        public Builder setAggregatorSpecs(List<AggregatorFactory> aggregatorSpecs) {
            this.aggregatorSpecs = Lists.newArrayList(aggregatorSpecs);
            this.postProcessingFn = null;
            return this;
        }

        public Builder setAggregatorSpecs(AggregatorFactory... aggregatorSpecs) {
            this.aggregatorSpecs = new ArrayList<>(Arrays.asList(aggregatorSpecs));
            this.postProcessingFn = null;
            return this;
        }

        public Builder setPostAggregatorSpecs(List<PostAggregator> postAggregatorSpecs) {
            this.postAggregatorSpecs = Lists.newArrayList(postAggregatorSpecs);
            this.postProcessingFn = null;
            return this;
        }

        public Builder setContext(Map<String, Object> context) {
            this.context = context;
            return this;
        }

        public Builder overrideContext(Map<String, Object> contextOverride) {
            this.context = computeOverriddenContext(context, contextOverride);
            return this;
        }

        public Builder setHavingSpec(@Nullable HavingSpec havingSpec) {
            this.havingSpec = havingSpec;
            this.postProcessingFn = null;
            return this;
        }

        public Builder copy() {
            return new Builder(this);
        }

        public GroupByQuery build() {
            final LimitSpec theLimitSpec;
            if (limitSpec == null) {
                if (orderByColumnSpecs.isEmpty() && limit == Integer.MAX_VALUE) {
                    theLimitSpec = NoopLimitSpec.instance();
                } else {
                    theLimitSpec = new DefaultLimitSpec(orderByColumnSpecs, limit);
                }
            } else {
                theLimitSpec = limitSpec;
            }

            return new GroupByQuery(dataSource, querySegmentSpec, virtualColumns, dimFilter, granularity,
                    dimensions, aggregatorSpecs, postAggregatorSpecs, havingSpec, theLimitSpec, subtotalsSpec,
                    postProcessingFn, context);
        }
    }

    @Override
    public String toString() {
        return "GroupByQuery{" + "dataSource='" + getDataSource() + '\'' + ", querySegmentSpec="
                + getQuerySegmentSpec() + ", virtualColumns=" + virtualColumns + ", limitSpec=" + limitSpec
                + ", dimFilter=" + dimFilter + ", granularity=" + getGranularity() + ", dimensions=" + dimensions
                + ", aggregatorSpecs=" + aggregatorSpecs + ", postAggregatorSpecs=" + postAggregatorSpecs
                + ", havingSpec=" + havingSpec + ", context=" + getContext() + '}';
    }

    @Override
    public boolean equals(final Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        if (!super.equals(o)) {
            return false;
        }
        final GroupByQuery that = (GroupByQuery) o;
        return Objects.equals(virtualColumns, that.virtualColumns) && Objects.equals(limitSpec, that.limitSpec)
                && Objects.equals(havingSpec, that.havingSpec) && Objects.equals(dimFilter, that.dimFilter)
                && Objects.equals(dimensions, that.dimensions)
                && Objects.equals(aggregatorSpecs, that.aggregatorSpecs)
                && Objects.equals(postAggregatorSpecs, that.postAggregatorSpecs);
    }

    @Override
    public int hashCode() {
        return Objects.hash(super.hashCode(), virtualColumns, limitSpec, havingSpec, dimFilter, dimensions,
                aggregatorSpecs, postAggregatorSpecs);
    }
}