org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.parse.GenTezUtils;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multiset;
import com.google.common.collect.TreeMultiset;

/**
 * Shared computation optimizer.
 *
 * <p>Originally, this rule would find scan operators over the same table
 * in the query plan and merge them if they met some preconditions.
 *
 *  TS   TS             TS
 *  |    |     ->      /  \
 *  Op   Op           Op  Op
 *
 * <p>Now the rule has been extended to find opportunities to other operators
 * downstream, not only a single table scan.
 *
 *  TS1   TS2    TS1   TS2            TS1   TS2
 *   |     |      |     |              |     |
 *   |    RS      |    RS              |    RS
 *    \   /        \   /       ->       \   /
 *   MapJoin      MapJoin              MapJoin
 *      |            |                  /   \
 *      Op           Op                Op   Op
 *
 * <p>A limitation in the current implementation is that the optimizer does not
 * go beyond a work boundary.
 *
 * <p>The optimization only works with the Tez execution engine.
 */
public class SharedWorkOptimizer extends Transform {

    private final static Logger LOG = LoggerFactory.getLogger(SharedWorkOptimizer.class);

    @Override
    public ParseContext transform(ParseContext pctx) throws SemanticException {

        final Map<String, TableScanOperator> topOps = pctx.getTopOps();
        if (topOps.size() < 2) {
            // Nothing to do, bail out
            return pctx;
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Before SharedWorkOptimizer:\n" + Operator.toString(pctx.getTopOps().values()));
        }

        // Cache to use during optimization
        SharedWorkOptimizerCache optimizerCache = new SharedWorkOptimizerCache();

        // Gather information about the DPP table scans and store it in the cache
        gatherDPPTableScanOps(pctx, optimizerCache);

        // Map of dbName.TblName -> TSOperator
        Multimap<String, TableScanOperator> tableNameToOps = splitTableScanOpsByTable(pctx);

        // We enforce a certain order when we do the reutilization.
        // In particular, we use size of table x number of reads to
        // rank the tables.
        List<Entry<String, Long>> sortedTables = rankTablesByAccumulatedSize(pctx);
        LOG.debug("Sorted tables by size: {}", sortedTables);

        // Execute optimization
        Multimap<String, TableScanOperator> existingOps = ArrayListMultimap.create();
        Set<Operator<?>> removedOps = new HashSet<>();
        for (Entry<String, Long> tablePair : sortedTables) {
            String tableName = tablePair.getKey();
            for (TableScanOperator discardableTsOp : tableNameToOps.get(tableName)) {
                if (removedOps.contains(discardableTsOp)) {
                    LOG.debug("Skip {} as it has been already removed", discardableTsOp);
                    continue;
                }
                Collection<TableScanOperator> prevTsOps = existingOps.get(tableName);
                for (TableScanOperator retainableTsOp : prevTsOps) {
                    if (removedOps.contains(retainableTsOp)) {
                        LOG.debug("Skip {} as it has been already removed", retainableTsOp);
                        continue;
                    }

                    // First we quickly check if the two table scan operators can actually be merged
                    boolean mergeable = areMergeable(pctx, optimizerCache, retainableTsOp, discardableTsOp);
                    if (!mergeable) {
                        // Skip
                        LOG.debug("{} and {} cannot be merged", retainableTsOp, discardableTsOp);
                        continue;
                    }

                    // Secondly, we extract information about the part of the tree that can be merged
                    // as well as some structural information (memory consumption) that needs to be
                    // used to determined whether the merge can happen
                    SharedResult sr = extractSharedOptimizationInfo(pctx, optimizerCache, retainableTsOp,
                            discardableTsOp);

                    // It seems these two operators can be merged.
                    // Check that plan meets some preconditions before doing it.
                    // In particular, in the presence of map joins in the upstream plan:
                    // - we cannot exceed the noconditional task size, and
                    // - if we already merged the big table, we cannot merge the broadcast
                    // tables.
                    if (!validPreConditions(pctx, optimizerCache, sr)) {
                        // Skip
                        LOG.debug("{} and {} do not meet preconditions", retainableTsOp, discardableTsOp);
                        continue;
                    }

                    // We can merge
                    if (sr.retainableOps.size() > 1) {
                        // More than TS operator
                        Operator<?> lastRetainableOp = sr.retainableOps.get(sr.retainableOps.size() - 1);
                        Operator<?> lastDiscardableOp = sr.discardableOps.get(sr.discardableOps.size() - 1);
                        if (lastDiscardableOp.getNumChild() != 0) {
                            List<Operator<? extends OperatorDesc>> allChildren = Lists
                                    .newArrayList(lastDiscardableOp.getChildOperators());
                            for (Operator<? extends OperatorDesc> op : allChildren) {
                                lastDiscardableOp.getChildOperators().remove(op);
                                op.replaceParent(lastDiscardableOp, lastRetainableOp);
                                lastRetainableOp.getChildOperators().add(op);
                            }
                        }

                        LOG.debug("Merging subtree starting at {} into subtree starting at {}", discardableTsOp,
                                retainableTsOp);
                    } else {
                        // Only TS operator
                        ExprNodeGenericFuncDesc exprNode = null;
                        if (retainableTsOp.getConf().getFilterExpr() != null) {
                            // Push filter on top of children
                            pushFilterToTopOfTableScan(optimizerCache, retainableTsOp);
                            // Clone to push to table scan
                            exprNode = (ExprNodeGenericFuncDesc) retainableTsOp.getConf().getFilterExpr();
                        }
                        if (discardableTsOp.getConf().getFilterExpr() != null) {
                            // Push filter on top
                            pushFilterToTopOfTableScan(optimizerCache, discardableTsOp);
                            ExprNodeGenericFuncDesc tsExprNode = discardableTsOp.getConf().getFilterExpr();
                            if (exprNode != null && !exprNode.isSame(tsExprNode)) {
                                // We merge filters from previous scan by ORing with filters from current scan
                                if (exprNode.getGenericUDF() instanceof GenericUDFOPOr) {
                                    List<ExprNodeDesc> newChildren = new ArrayList<>(
                                            exprNode.getChildren().size() + 1);
                                    for (ExprNodeDesc childExprNode : exprNode.getChildren()) {
                                        if (childExprNode.isSame(tsExprNode)) {
                                            // We do not need to do anything, it is in the OR expression
                                            break;
                                        }
                                        newChildren.add(childExprNode);
                                    }
                                    if (exprNode.getChildren().size() == newChildren.size()) {
                                        newChildren.add(tsExprNode);
                                        exprNode = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(),
                                                newChildren);
                                    }
                                } else {
                                    exprNode = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(),
                                            Arrays.<ExprNodeDesc>asList(exprNode, tsExprNode));
                                }
                            }
                        }
                        // Replace filter
                        retainableTsOp.getConf().setFilterExpr(exprNode);
                        // Replace table scan operator
                        List<Operator<? extends OperatorDesc>> allChildren = Lists
                                .newArrayList(discardableTsOp.getChildOperators());
                        for (Operator<? extends OperatorDesc> op : allChildren) {
                            discardableTsOp.getChildOperators().remove(op);
                            op.replaceParent(discardableTsOp, retainableTsOp);
                            retainableTsOp.getChildOperators().add(op);
                        }

                        LOG.debug("Merging {} into {}", discardableTsOp, retainableTsOp);
                    }

                    // First we remove the input operators of the expression that
                    // we are going to eliminate
                    for (Operator<?> op : sr.discardableInputOps) {
                        OperatorUtils.removeOperator(op);
                        optimizerCache.removeOp(op);
                        removedOps.add(op);
                        // Remove DPP predicates
                        if (op instanceof ReduceSinkOperator) {
                            SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
                            if (sjbi != null && !sr.discardableOps.contains(sjbi.getTsOp())
                                    && !sr.discardableInputOps.contains(sjbi.getTsOp())) {
                                GenTezUtils.removeSemiJoinOperator(pctx, (ReduceSinkOperator) op, sjbi.getTsOp());
                            }
                        } else if (op instanceof AppMasterEventOperator) {
                            DynamicPruningEventDesc dped = (DynamicPruningEventDesc) op.getConf();
                            if (!sr.discardableOps.contains(dped.getTableScan())
                                    && !sr.discardableInputOps.contains(dped.getTableScan())) {
                                GenTezUtils.removeSemiJoinOperator(pctx, (AppMasterEventOperator) op,
                                        dped.getTableScan());
                            }
                        }
                        LOG.debug("Input operator removed: {}", op);
                    }
                    // Then we merge the operators of the works we are going to merge
                    optimizerCache.removeOpAndCombineWork(discardableTsOp, retainableTsOp);
                    removedOps.add(discardableTsOp);
                    // Finally we remove the expression from the tree
                    for (Operator<?> op : sr.discardableOps) {
                        OperatorUtils.removeOperator(op);
                        optimizerCache.removeOp(op);
                        removedOps.add(op);
                        if (sr.discardableOps.size() == 1) {
                            // If there is a single discardable operator, it is a TableScanOperator
                            // and it means that we have merged filter expressions for it. Thus, we
                            // might need to remove DPP predicates from the retainable TableScanOperator
                            Collection<Operator<?>> c = optimizerCache.tableScanToDPPSource
                                    .get((TableScanOperator) op);
                            for (Operator<?> dppSource : c) {
                                if (dppSource instanceof ReduceSinkOperator) {
                                    GenTezUtils.removeSemiJoinOperator(pctx, (ReduceSinkOperator) dppSource,
                                            (TableScanOperator) sr.retainableOps.get(0));
                                } else if (dppSource instanceof AppMasterEventOperator) {
                                    GenTezUtils.removeSemiJoinOperator(pctx, (AppMasterEventOperator) dppSource,
                                            (TableScanOperator) sr.retainableOps.get(0));
                                }
                            }
                        }
                        LOG.debug("Operator removed: {}", op);
                    }

                    break;
                }

                if (removedOps.contains(discardableTsOp)) {
                    // This operator has been removed, remove it from the list of existing operators
                    existingOps.remove(tableName, discardableTsOp);
                } else {
                    // This operator has not been removed, include it in the list of existing operators
                    existingOps.put(tableName, discardableTsOp);
                }
            }
        }

        // Remove unused table scan operators
        Iterator<Entry<String, TableScanOperator>> it = topOps.entrySet().iterator();
        while (it.hasNext()) {
            Entry<String, TableScanOperator> e = it.next();
            if (e.getValue().getNumChild() == 0) {
                it.remove();
            }
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("After SharedWorkOptimizer:\n" + Operator.toString(pctx.getTopOps().values()));
        }

        return pctx;
    }

    /**
     * This method gathers the TS operators with DPP from the context and
     * stores them into the input optimization cache.
     */
    private static void gatherDPPTableScanOps(ParseContext pctx, SharedWorkOptimizerCache optimizerCache)
            throws SemanticException {
        // Find TS operators with partition pruning enabled in plan
        // because these TS may potentially read different data for
        // different pipeline.
        // These can be:
        // 1) TS with DPP.
        // 2) TS with semijoin DPP.
        Map<String, TableScanOperator> topOps = pctx.getTopOps();
        Collection<Operator<? extends OperatorDesc>> tableScanOps = Lists
                .<Operator<?>>newArrayList(topOps.values());
        Set<AppMasterEventOperator> s = OperatorUtils.findOperators(tableScanOps, AppMasterEventOperator.class);
        for (AppMasterEventOperator a : s) {
            if (a.getConf() instanceof DynamicPruningEventDesc) {
                DynamicPruningEventDesc dped = (DynamicPruningEventDesc) a.getConf();
                optimizerCache.tableScanToDPPSource.put(dped.getTableScan(), a);
            }
        }
        for (Entry<ReduceSinkOperator, SemiJoinBranchInfo> e : pctx.getRsToSemiJoinBranchInfo().entrySet()) {
            optimizerCache.tableScanToDPPSource.put(e.getValue().getTsOp(), e.getKey());
        }
        LOG.debug("DPP information stored in the cache: {}", optimizerCache.tableScanToDPPSource);
    }

    private static Multimap<String, TableScanOperator> splitTableScanOpsByTable(ParseContext pctx) {
        Multimap<String, TableScanOperator> tableNameToOps = ArrayListMultimap.create();
        for (Entry<String, TableScanOperator> e : pctx.getTopOps().entrySet()) {
            TableScanOperator tsOp = e.getValue();
            tableNameToOps.put(tsOp.getConf().getTableMetadata().getDbName() + "."
                    + tsOp.getConf().getTableMetadata().getTableName(), tsOp);
        }
        return tableNameToOps;
    }

    private static List<Entry<String, Long>> rankTablesByAccumulatedSize(ParseContext pctx) {
        Map<String, Long> tableToTotalSize = new HashMap<>();
        for (Entry<String, TableScanOperator> e : pctx.getTopOps().entrySet()) {
            TableScanOperator tsOp = e.getValue();
            String tableName = tsOp.getConf().getTableMetadata().getDbName() + "."
                    + tsOp.getConf().getTableMetadata().getTableName();
            long tableSize = tsOp.getStatistics() != null ? tsOp.getStatistics().getDataSize() : 0L;
            Long totalSize = tableToTotalSize.get(tableName);
            if (totalSize != null) {
                tableToTotalSize.put(tableName, StatsUtils.safeAdd(totalSize, tableSize));
            } else {
                tableToTotalSize.put(tableName, tableSize);
            }
        }
        List<Entry<String, Long>> sortedTables = new LinkedList<>(tableToTotalSize.entrySet());
        Collections.sort(sortedTables, Collections.reverseOrder(new Comparator<Map.Entry<String, Long>>() {
            public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
                return (o1.getValue()).compareTo(o2.getValue());
            }
        }));
        return sortedTables;
    }

    private static boolean areMergeable(ParseContext pctx, SharedWorkOptimizerCache optimizerCache,
            TableScanOperator tsOp1, TableScanOperator tsOp2) throws SemanticException {
        // First we check if the two table scan operators can actually be merged
        // If schemas do not match, we currently do not merge
        List<String> prevTsOpNeededColumns = tsOp1.getNeededColumns();
        List<String> tsOpNeededColumns = tsOp2.getNeededColumns();
        if (prevTsOpNeededColumns.size() != tsOpNeededColumns.size()) {
            return false;
        }
        boolean notEqual = false;
        for (int i = 0; i < prevTsOpNeededColumns.size(); i++) {
            if (!prevTsOpNeededColumns.get(i).equals(tsOpNeededColumns.get(i))) {
                notEqual = true;
                break;
            }
        }
        if (notEqual) {
            return false;
        }
        // If row limit does not match, we currently do not merge
        if (tsOp1.getConf().getRowLimit() != tsOp2.getConf().getRowLimit()) {
            return false;
        }
        // If partitions do not match, we currently do not merge
        PrunedPartitionList prevTsOpPPList = pctx.getPrunedPartitions(tsOp1);
        PrunedPartitionList tsOpPPList = pctx.getPrunedPartitions(tsOp2);
        if (!prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) {
            return false;
        }
        // If is a DPP, check if actually it refers to same target, column, etc.
        // Further, the DPP value needs to be generated from same subtree
        List<Operator<?>> dppsOp1 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp1));
        List<Operator<?>> dppsOp2 = new ArrayList<>(optimizerCache.tableScanToDPPSource.get(tsOp2));
        if (dppsOp1.isEmpty() && dppsOp2.isEmpty()) {
            return true;
        }
        for (int i = 0; i < dppsOp1.size(); i++) {
            Operator<?> op = dppsOp1.get(i);
            if (op instanceof ReduceSinkOperator) {
                Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
                if (ascendants.contains(tsOp2)) {
                    dppsOp1.remove(i);
                    i--;
                }
            }
        }
        for (int i = 0; i < dppsOp2.size(); i++) {
            Operator<?> op = dppsOp2.get(i);
            if (op instanceof ReduceSinkOperator) {
                Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, op);
                if (ascendants.contains(tsOp1)) {
                    dppsOp2.remove(i);
                    i--;
                }
            }
        }
        if (dppsOp1.size() != dppsOp2.size()) {
            // Only first or second operator contains DPP pruning
            return false;
        }
        // Check if DPP branches are equal
        for (int i = 0; i < dppsOp1.size(); i++) {
            Operator<?> dppOp1 = dppsOp1.get(i);
            BitSet bs = new BitSet();
            for (int j = 0; j < dppsOp2.size(); j++) {
                if (!bs.get(j)) {
                    // If not visited yet
                    Operator<?> dppOp2 = dppsOp2.get(j);
                    if (compareAndGatherOps(pctx, dppOp1, dppOp2) != null) {
                        // The DPP operator/branch are equal
                        bs.set(j);
                        break;
                    }
                }
            }
            if (bs.cardinality() == i) {
                return false;
            }
        }
        return true;
    }

    private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
            SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp,
            TableScanOperator discardableTsOp) throws SemanticException {
        Set<Operator<?>> retainableOps = new LinkedHashSet<>();
        Set<Operator<?>> discardableOps = new LinkedHashSet<>();
        Set<Operator<?>> discardableInputOps = new HashSet<>();
        long dataSize = 0l;
        long maxDataSize = 0l;

        retainableOps.add(retainableTsOp);
        discardableOps.add(discardableTsOp);
        Operator<?> equalOp1 = retainableTsOp;
        Operator<?> equalOp2 = discardableTsOp;
        if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
            // TODO: Support checking multiple child operators to merge further.
            discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
            return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
        }
        Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
        Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);

        // Special treatment for Filter operator that ignores the DPP predicates
        if (currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
            boolean equalFilters = false;
            FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
            FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();

            if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred()
                    && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
                Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
                Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
                if (conjsOp1String.equals(conjsOp2String)) {
                    equalFilters = true;
                }
            }

            if (equalFilters) {
                equalOp1 = currentOp1;
                equalOp2 = currentOp2;
                retainableOps.add(equalOp1);
                discardableOps.add(equalOp2);
                if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
                    // TODO: Support checking multiple child operators to merge further.
                    discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps));
                    discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
                    discardableInputOps
                            .addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
                    return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize,
                            maxDataSize);
                }
                currentOp1 = currentOp1.getChildOperators().get(0);
                currentOp2 = currentOp2.getChildOperators().get(0);
            } else {
                // Bail out
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps));
                discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
                discardableInputOps
                        .addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
                return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
            }
        }

        // Try to merge rest of operators
        while (!(currentOp1 instanceof ReduceSinkOperator)) {
            // Check whether current operators are equal
            if (!compareOperator(pctx, currentOp1, currentOp2)) {
                // If they are not equal, we could zip up till here
                break;
            }
            if (currentOp1.getParentOperators().size() != currentOp2.getParentOperators().size()) {
                // If they are not equal, we could zip up till here
                break;
            }
            if (currentOp1.getParentOperators().size() > 1) {
                List<Operator<?>> discardableOpsForCurrentOp = new ArrayList<>();
                int idx = 0;
                for (; idx < currentOp1.getParentOperators().size(); idx++) {
                    Operator<?> parentOp1 = currentOp1.getParentOperators().get(idx);
                    Operator<?> parentOp2 = currentOp2.getParentOperators().get(idx);
                    if (parentOp1 == equalOp1 && parentOp2 == equalOp2) {
                        continue;
                    }
                    if ((parentOp1 == equalOp1 && parentOp2 != equalOp2)
                            || (parentOp1 != equalOp1 && parentOp2 == equalOp2)) {
                        // Input operator is not in the same position
                        break;
                    }
                    // Compare input
                    List<Operator<?>> removeOpsForCurrentInput = compareAndGatherOps(pctx, parentOp1, parentOp2);
                    if (removeOpsForCurrentInput == null) {
                        // Inputs are not the same, bail out
                        break;
                    }
                    // Add inputs to ops to remove
                    discardableOpsForCurrentOp.addAll(removeOpsForCurrentInput);
                }
                if (idx != currentOp1.getParentOperators().size()) {
                    // If inputs are not equal, we could zip up till here
                    break;
                }
                discardableInputOps.addAll(discardableOpsForCurrentOp);
            }

            equalOp1 = currentOp1;
            equalOp2 = currentOp2;
            retainableOps.add(equalOp1);
            discardableOps.add(equalOp2);
            if (equalOp1 instanceof MapJoinOperator) {
                MapJoinOperator mop = (MapJoinOperator) equalOp1;
                dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
                maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
            }
            if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
                // TODO: Support checking multiple child operators to merge further.
                break;
            }
            // Update for next iteration
            currentOp1 = currentOp1.getChildOperators().get(0);
            currentOp2 = currentOp2.getChildOperators().get(0);
        }

        // Add the rest to the memory consumption
        Set<Operator<?>> opsWork1 = findWorkOperators(optimizerCache, currentOp1);
        for (Operator<?> op : opsWork1) {
            if (op instanceof MapJoinOperator && !retainableOps.contains(op)) {
                MapJoinOperator mop = (MapJoinOperator) op;
                dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
                maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
            }
        }
        Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, currentOp2);
        for (Operator<?> op : opsWork2) {
            if (op instanceof MapJoinOperator && !discardableOps.contains(op)) {
                MapJoinOperator mop = (MapJoinOperator) op;
                dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
                maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
            }
        }

        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableInputOps));
        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
        discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
        return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
    }

    private static Multiset<String> extractConjsIgnoringDPPPreds(ExprNodeDesc predicate) {
        List<ExprNodeDesc> conjsOp = ExprNodeDescUtils.split(predicate);
        Multiset<String> conjsOpString = TreeMultiset.create();
        for (int i = 0; i < conjsOp.size(); i++) {
            if (conjsOp.get(i) instanceof ExprNodeGenericFuncDesc) {
                ExprNodeGenericFuncDesc func = (ExprNodeGenericFuncDesc) conjsOp.get(i);
                if (GenericUDFInBloomFilter.class == func.getGenericUDF().getClass()) {
                    continue;
                } else if (GenericUDFBetween.class == func.getGenericUDF().getClass()
                        && (func.getChildren().get(2) instanceof ExprNodeDynamicValueDesc
                                || func.getChildren().get(3) instanceof ExprNodeDynamicValueDesc)) {
                    continue;
                }
            } else if (conjsOp.get(i) instanceof ExprNodeDynamicListDesc) {
                continue;
            }
            conjsOpString.add(conjsOp.get(i).toString());
        }
        return conjsOpString;
    }

    private static Set<Operator<?>> gatherDPPBranchOps(ParseContext pctx, SharedWorkOptimizerCache optimizerCache,
            Set<Operator<?>> ops) {
        Set<Operator<?>> dppBranches = new HashSet<>();
        for (Operator<?> op : ops) {
            if (op instanceof TableScanOperator) {
                Collection<Operator<?>> c = optimizerCache.tableScanToDPPSource.get((TableScanOperator) op);
                for (Operator<?> dppSource : c) {
                    // Remove the branches
                    Operator<?> currentOp = dppSource;
                    while (currentOp.getNumChild() <= 1) {
                        dppBranches.add(currentOp);
                        currentOp = currentOp.getParentOperators().get(0);
                    }
                }
            }
        }
        return dppBranches;
    }

    private static Set<Operator<?>> gatherDPPBranchOps(ParseContext pctx, SharedWorkOptimizerCache optimizerCache,
            Set<Operator<?>> ops, Set<Operator<?>> discardedOps) {
        Set<Operator<?>> dppBranches = new HashSet<>();
        for (Operator<?> op : ops) {
            if (op instanceof TableScanOperator) {
                Collection<Operator<?>> c = optimizerCache.tableScanToDPPSource.get((TableScanOperator) op);
                for (Operator<?> dppSource : c) {
                    Set<Operator<?>> ascendants = findAscendantWorkOperators(pctx, optimizerCache, dppSource);
                    if (!Collections.disjoint(ascendants, discardedOps)) {
                        // Remove branch
                        Operator<?> currentOp = dppSource;
                        while (currentOp.getNumChild() <= 1) {
                            dppBranches.add(currentOp);
                            currentOp = currentOp.getParentOperators().get(0);
                        }
                    }
                }
            }
        }
        return dppBranches;
    }

    private static List<Operator<?>> compareAndGatherOps(ParseContext pctx, Operator<?> op1, Operator<?> op2)
            throws SemanticException {
        List<Operator<?>> result = new ArrayList<>();
        boolean mergeable = compareAndGatherOps(pctx, op1, op2, result, true);
        if (!mergeable) {
            return null;
        }
        return result;
    }

    private static boolean compareAndGatherOps(ParseContext pctx, Operator<?> op1, Operator<?> op2,
            List<Operator<?>> result, boolean gather) throws SemanticException {
        if (!compareOperator(pctx, op1, op2)) {
            LOG.debug("Operators not equal: {} and {}", op1, op2);
            return false;
        }

        if (gather) {
            result.add(op2);
        }

        List<Operator<? extends OperatorDesc>> op1ParentOperators = op1.getParentOperators();
        List<Operator<? extends OperatorDesc>> op2ParentOperators = op2.getParentOperators();
        if (op1ParentOperators != null && op2ParentOperators != null) {
            if (op1ParentOperators.size() != op2ParentOperators.size()) {
                return false;
            }
            for (int i = 0; i < op1ParentOperators.size(); i++) {
                Operator<?> op1ParentOp = op1ParentOperators.get(i);
                Operator<?> op2ParentOp = op2ParentOperators.get(i);
                boolean mergeable;
                if (gather && op2ParentOp.getChildOperators().size() < 2) {
                    mergeable = compareAndGatherOps(pctx, op1ParentOp, op2ParentOp, result, true);
                } else {
                    mergeable = compareAndGatherOps(pctx, op1ParentOp, op2ParentOp, result, false);
                }
                if (!mergeable) {
                    return false;
                }
            }
        } else if (op1ParentOperators != null || op2ParentOperators != null) {
            return false;
        }

        return true;
    }

    @SuppressWarnings({ "rawtypes", "unchecked" })
    private static boolean compareOperator(ParseContext pctx, Operator<?> op1, Operator<?> op2)
            throws SemanticException {
        if (!op1.getClass().getName().equals(op2.getClass().getName())) {
            return false;
        }

        // We handle ReduceSinkOperator here as we can safely ignore table alias
        // and the current comparator implementation does not.
        // We can ignore table alias since when we compare ReduceSinkOperator, all
        // its ancestors need to match (down to table scan), thus we make sure that
        // both plans are the same.
        if (op1 instanceof ReduceSinkOperator) {
            ReduceSinkDesc op1Conf = ((ReduceSinkOperator) op1).getConf();
            ReduceSinkDesc op2Conf = ((ReduceSinkOperator) op2).getConf();

            if (StringUtils.equals(op1Conf.getKeyColString(), op2Conf.getKeyColString())
                    && StringUtils.equals(op1Conf.getValueColsString(), op2Conf.getValueColsString())
                    && StringUtils.equals(op1Conf.getParitionColsString(), op2Conf.getParitionColsString())
                    && op1Conf.getTag() == op2Conf.getTag()
                    && StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder())
                    && op1Conf.getTopN() == op2Conf.getTopN()
                    && op1Conf.isAutoParallel() == op2Conf.isAutoParallel()) {
                return true;
            } else {
                return false;
            }
        }

        // We handle TableScanOperator here as we can safely ignore table alias
        // and the current comparator implementation does not.
        if (op1 instanceof TableScanOperator) {
            TableScanOperator tsOp1 = (TableScanOperator) op1;
            TableScanOperator tsOp2 = (TableScanOperator) op2;
            TableScanDesc op1Conf = tsOp1.getConf();
            TableScanDesc op2Conf = tsOp2.getConf();

            if (StringUtils.equals(
                    op1Conf.getTableMetadata().getDbName() + "." + op1Conf.getTableMetadata().getTableName(),
                    op2Conf.getTableMetadata().getDbName() + "." + op2Conf.getTableMetadata().getTableName())
                    && op1Conf.getNeededColumns().equals(op2Conf.getNeededColumns())
                    && StringUtils.equals(op1Conf.getFilterExprString(), op2Conf.getFilterExprString())
                    && pctx.getPrunedPartitions(tsOp1).getPartitions()
                            .equals(pctx.getPrunedPartitions(tsOp2).getPartitions())
                    && op1Conf.getRowLimit() == op2Conf.getRowLimit()) {
                return true;
            } else {
                return false;
            }
        }

        OperatorComparatorFactory.OperatorComparator operatorComparator = OperatorComparatorFactory
                .getOperatorComparator(op1.getClass());
        return operatorComparator.equals(op1, op2);
    }

    private static boolean validPreConditions(ParseContext pctx, SharedWorkOptimizerCache optimizerCache,
            SharedResult sr) {

        // We check whether merging the works would cause the size of
        // the data in memory grow too large.
        // TODO: Currently ignores GBY and PTF which may also buffer data in memory.
        if (sr.dataSize > sr.maxDataSize) {
            // Size surpasses limit, we cannot convert
            LOG.debug("accumulated data size: {} / max size: {}", sr.dataSize, sr.maxDataSize);
            return false;
        }

        TableScanOperator tsOp1 = (TableScanOperator) sr.retainableOps.get(0);
        TableScanOperator tsOp2 = (TableScanOperator) sr.discardableOps.get(0);

        // 1) The set of operators in the works of the TS operators need to meet
        // some requirements. In particular:
        // 1.1. None of the works that contain the TS operators can contain a Union
        // operator. This is not supported yet as we might end up with cycles in
        // the Tez DAG.
        // 1.2. There cannot be more than one DummyStore operator in the new resulting
        // work when the TS operators are merged. This is due to an assumption in
        // MergeJoinProc that needs to be further explored.
        // If any of these conditions are not met, we cannot merge.
        // TODO: Extend rule so it can be applied for these cases.
        final Set<Operator<?>> workOps1 = findWorkOperators(optimizerCache, tsOp1);
        final Set<Operator<?>> workOps2 = findWorkOperators(optimizerCache, tsOp2);
        boolean foundDummyStoreOp = false;
        for (Operator<?> op : workOps1) {
            if (op instanceof UnionOperator) {
                // We cannot merge (1.1)
                return false;
            }
            if (op instanceof DummyStoreOperator) {
                foundDummyStoreOp = true;
            }
        }
        for (Operator<?> op : workOps2) {
            if (op instanceof UnionOperator) {
                // We cannot merge (1.1)
                return false;
            }
            if (foundDummyStoreOp && op instanceof DummyStoreOperator) {
                // We cannot merge (1.2)
                return false;
            }
        }
        // 2) We check whether output works when we merge the operators will collide.
        //
        //   Work1   Work2    (merge TS in W1 & W2)        Work1
        //       \   /                  ->                  | |       X
        //       Work3                                     Work3
        //
        // If we do, we cannot merge. The reason is that Tez currently does
        // not support parallel edges, i.e., multiple edges from same work x
        // into same work y.
        final Set<Operator<?>> outputWorksOps1 = findChildWorkOperators(pctx, optimizerCache, tsOp1);
        final Set<Operator<?>> outputWorksOps2 = findChildWorkOperators(pctx, optimizerCache, tsOp2);
        if (!Collections.disjoint(outputWorksOps1, outputWorksOps2)) {
            // We cannot merge
            return false;
        }
        // 3) We check whether we will end up with same operators inputing on same work.
        //
        //       Work1        (merge TS in W2 & W3)        Work1
        //       /   \                  ->                  | |       X
        //   Work2   Work3                                 Work2
        //
        // If we do, we cannot merge. The reason is the same as above, currently
        // Tez currently does not support parallel edges.
        final Set<Operator<?>> inputWorksOps1 = findParentWorkOperators(pctx, optimizerCache, tsOp1);
        final Set<Operator<?>> inputWorksOps2 = findParentWorkOperators(pctx, optimizerCache, tsOp2,
                sr.discardableInputOps);
        if (!Collections.disjoint(inputWorksOps1, inputWorksOps2)) {
            // We cannot merge
            return false;
        }
        // 4) We check whether one of the operators is part of a work that is an input for
        // the work of the other operator.
        //
        //   Work1            (merge TS in W1 & W3)        Work1
        //     |                        ->                   |        X
        //   Work2                                         Work2
        //     |                                             |
        //   Work3                                         Work1
        //
        // If we do, we cannot merge, as we would end up with a cycle in the DAG.
        final Set<Operator<?>> descendantWorksOps1 = findDescendantWorkOperators(pctx, optimizerCache, tsOp1,
                sr.discardableInputOps);
        final Set<Operator<?>> descendantWorksOps2 = findDescendantWorkOperators(pctx, optimizerCache, tsOp2,
                sr.discardableInputOps);
        if (!Collections.disjoint(descendantWorksOps1, workOps2)
                || !Collections.disjoint(workOps1, descendantWorksOps2)) {
            return false;
        }
        return true;
    }

    private static Set<Operator<?>> findParentWorkOperators(ParseContext pctx,
            SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
        return findParentWorkOperators(pctx, optimizerCache, start, ImmutableSet.of());
    }

    private static Set<Operator<?>> findParentWorkOperators(ParseContext pctx,
            SharedWorkOptimizerCache optimizerCache, Operator<?> start, Set<Operator<?>> excludeOps) {
        // Find operators in work
        Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
        // Gather input works operators
        Set<Operator<?>> set = new HashSet<Operator<?>>();
        for (Operator<?> op : workOps) {
            if (op.getParentOperators() != null) {
                for (Operator<?> parent : op.getParentOperators()) {
                    if (parent instanceof ReduceSinkOperator && !excludeOps.contains(parent)) {
                        set.addAll(findWorkOperators(optimizerCache, parent));
                    }
                }
            } else if (op instanceof TableScanOperator) {
                // Check for DPP and semijoin DPP
                for (Operator<?> parent : optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) {
                    if (!excludeOps.contains(parent)) {
                        set.addAll(findWorkOperators(optimizerCache, parent));
                    }
                }
            }
        }
        return set;
    }

    private static Set<Operator<?>> findAscendantWorkOperators(ParseContext pctx,
            SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
        // Find operators in work
        Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
        // Gather input works operators
        Set<Operator<?>> result = new HashSet<Operator<?>>();
        Set<Operator<?>> set;
        while (!workOps.isEmpty()) {
            set = new HashSet<Operator<?>>();
            for (Operator<?> op : workOps) {
                if (op.getParentOperators() != null) {
                    for (Operator<?> parent : op.getParentOperators()) {
                        if (parent instanceof ReduceSinkOperator) {
                            set.addAll(findWorkOperators(optimizerCache, parent));
                        }
                    }
                } else if (op instanceof TableScanOperator) {
                    // Check for DPP and semijoin DPP
                    for (Operator<?> parent : optimizerCache.tableScanToDPPSource.get((TableScanOperator) op)) {
                        set.addAll(findWorkOperators(optimizerCache, parent));
                    }
                }
            }
            workOps = set;
            result.addAll(set);
        }
        return result;
    }

    private static Set<Operator<?>> findChildWorkOperators(ParseContext pctx,
            SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
        // Find operators in work
        Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
        // Gather output works operators
        Set<Operator<?>> set = new HashSet<Operator<?>>();
        for (Operator<?> op : workOps) {
            if (op instanceof ReduceSinkOperator) {
                if (op.getChildOperators() != null) {
                    // All children of RS are descendants
                    for (Operator<?> child : op.getChildOperators()) {
                        set.addAll(findWorkOperators(optimizerCache, child));
                    }
                }
                // Semijoin DPP work is considered a child because work needs
                // to finish for it to execute
                SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
                if (sjbi != null) {
                    set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
                }
            } else if (op.getConf() instanceof DynamicPruningEventDesc) {
                // DPP work is considered a child because work needs
                // to finish for it to execute
                set.addAll(
                        findWorkOperators(optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan()));
            }
        }
        return set;
    }

    private static Set<Operator<?>> findDescendantWorkOperators(ParseContext pctx,
            SharedWorkOptimizerCache optimizerCache, Operator<?> start, Set<Operator<?>> excludeOps) {
        // Find operators in work
        Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start);
        // Gather output works operators
        Set<Operator<?>> result = new HashSet<Operator<?>>();
        Set<Operator<?>> set;
        while (!workOps.isEmpty()) {
            set = new HashSet<Operator<?>>();
            for (Operator<?> op : workOps) {
                if (excludeOps.contains(op)) {
                    continue;
                }
                if (op instanceof ReduceSinkOperator) {
                    if (op.getChildOperators() != null) {
                        // All children of RS are descendants
                        for (Operator<?> child : op.getChildOperators()) {
                            set.addAll(findWorkOperators(optimizerCache, child));
                        }
                    }
                    // Semijoin DPP work is considered a descendant because work needs
                    // to finish for it to execute
                    SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op);
                    if (sjbi != null) {
                        set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
                    }
                } else if (op.getConf() instanceof DynamicPruningEventDesc) {
                    // DPP work is considered a descendant because work needs
                    // to finish for it to execute
                    set.addAll(findWorkOperators(optimizerCache,
                            ((DynamicPruningEventDesc) op.getConf()).getTableScan()));
                }
            }
            workOps = set;
            result.addAll(set);
        }
        return result;
    }

    // Stores result in cache
    private static Set<Operator<?>> findWorkOperators(SharedWorkOptimizerCache optimizerCache, Operator<?> start) {
        Set<Operator<?>> c = optimizerCache.operatorToWorkOperators.get(start);
        if (!c.isEmpty()) {
            return c;
        }
        c = findWorkOperators(start, new HashSet<Operator<?>>());
        for (Operator<?> op : c) {
            optimizerCache.operatorToWorkOperators.putAll(op, c);
        }
        return c;
    }

    private static Set<Operator<?>> findWorkOperators(Operator<?> start, Set<Operator<?>> found) {
        found.add(start);
        if (start.getParentOperators() != null) {
            for (Operator<?> parent : start.getParentOperators()) {
                if (parent instanceof ReduceSinkOperator) {
                    continue;
                }
                if (!found.contains(parent)) {
                    findWorkOperators(parent, found);
                }
            }
        }
        if (start instanceof ReduceSinkOperator) {
            return found;
        }
        if (start.getChildOperators() != null) {
            for (Operator<?> child : start.getChildOperators()) {
                if (!found.contains(child)) {
                    findWorkOperators(child, found);
                }
            }
        }
        return found;
    }

    private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp)
            throws UDFArgumentException {
        ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
        List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
        for (Operator<? extends OperatorDesc> op : allChildren) {
            if (op instanceof FilterOperator) {
                FilterOperator filterOp = (FilterOperator) op;
                ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
                if (tableScanExprNode.isSame(filterExprNode)) {
                    // We do not need to do anything
                    return;
                }
                if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
                    for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
                        if (childExprNode.isSame(filterExprNode)) {
                            // We do not need to do anything, it is in the OR expression
                            // so probably we pushed previously
                            return;
                        }
                    }
                }
                ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(),
                        Arrays.<ExprNodeDesc>asList(tableScanExprNode.clone(), filterExprNode));
                filterOp.getConf().setPredicate(newPred);
            } else {
                Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(),
                        new FilterDesc(tableScanExprNode.clone(), false),
                        new RowSchema(tsOp.getSchema().getSignature()));
                tsOp.replaceChild(op, newOp);
                newOp.getParentOperators().add(tsOp);
                op.replaceParent(tsOp, newOp);
                newOp.getChildOperators().add(op);
                // Add to cache (same group as tsOp)
                optimizerCache.putIfWorkExists(newOp, tsOp);
            }
        }
    }

    private static class SharedResult {
        final List<Operator<?>> retainableOps;
        final List<Operator<?>> discardableOps;
        final Set<Operator<?>> discardableInputOps;
        final long dataSize;
        final long maxDataSize;

        private SharedResult(Collection<Operator<?>> retainableOps, Collection<Operator<?>> discardableOps,
                Set<Operator<?>> discardableInputOps, long dataSize, long maxDataSize) {
            this.retainableOps = ImmutableList.copyOf(retainableOps);
            this.discardableOps = ImmutableList.copyOf(discardableOps);
            this.discardableInputOps = ImmutableSet.copyOf(discardableInputOps);
            this.dataSize = dataSize;
            this.maxDataSize = maxDataSize;
        }
    }

    /** Cache to accelerate optimization */
    private static class SharedWorkOptimizerCache {
        // Operators that belong to each work
        final HashMultimap<Operator<?>, Operator<?>> operatorToWorkOperators = HashMultimap
                .<Operator<?>, Operator<?>>create();
        // Table scan operators to DPP sources
        final Multimap<TableScanOperator, Operator<?>> tableScanToDPPSource = HashMultimap
                .<TableScanOperator, Operator<?>>create();

        // Add new operator to cache work group of existing operator (if group exists)
        void putIfWorkExists(Operator<?> opToAdd, Operator<?> existingOp) {
            List<Operator<?>> c = ImmutableList.copyOf(operatorToWorkOperators.get(existingOp));
            if (!c.isEmpty()) {
                for (Operator<?> op : c) {
                    operatorToWorkOperators.get(op).add(opToAdd);
                }
                operatorToWorkOperators.putAll(opToAdd, c);
                operatorToWorkOperators.put(opToAdd, opToAdd);
            }
        }

        // Remove operator
        void removeOp(Operator<?> opToRemove) {
            Set<Operator<?>> s = operatorToWorkOperators.get(opToRemove);
            s.remove(opToRemove);
            List<Operator<?>> c1 = ImmutableList.copyOf(s);
            if (!c1.isEmpty()) {
                for (Operator<?> op1 : c1) {
                    operatorToWorkOperators.remove(op1, opToRemove); // Remove operator
                }
                operatorToWorkOperators.removeAll(opToRemove); // Remove entry for operator
            }
        }

        // Remove operator and combine
        void removeOpAndCombineWork(Operator<?> opToRemove, Operator<?> replacementOp) {
            Set<Operator<?>> s = operatorToWorkOperators.get(opToRemove);
            s.remove(opToRemove);
            List<Operator<?>> c1 = ImmutableList.copyOf(s);
            List<Operator<?>> c2 = ImmutableList.copyOf(operatorToWorkOperators.get(replacementOp));
            if (!c1.isEmpty() && !c2.isEmpty()) {
                for (Operator<?> op1 : c1) {
                    operatorToWorkOperators.remove(op1, opToRemove); // Remove operator
                    operatorToWorkOperators.putAll(op1, c2); // Add ops of new collection
                }
                operatorToWorkOperators.removeAll(opToRemove); // Remove entry for operator
                for (Operator<?> op2 : c2) {
                    operatorToWorkOperators.putAll(op2, c1); // Add ops to existing collection
                }
            }
        }
    }

}