org.apache.hadoop.hive.ql.parse.CommonSubtreeDetect.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.CommonSubtreeDetect.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.ExtractOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;

/**
 * Detecting the common sub tree of two Optree
 *
 */
public class CommonSubtreeDetect {

    private static final Log LOG = LogFactory.getLog(CommonSubtreeDetect.class.getName());

    private List<Object> opTree1;
    private List<Object> opTree2;

    private int[][] opCmpResTab;

    private HashMap<Operator<?>, Operator<?>> sameOpToOp;

    private ParseContext pCtx;

    public CommonSubtreeDetect() {

    }

    public CommonSubtreeDetect(Operator<? extends OperatorDesc> rootOp1, Operator<? extends OperatorDesc> rootOp2,
            ParseContext pCtx) {
        this.pCtx = pCtx;
        sameOpToOp = new HashMap<Operator<?>, Operator<?>>();
        opTree1 = new ArrayList<Object>();
        opTree2 = new ArrayList<Object>();
        opTree1.addAll(genOptree(rootOp1));
        opTree2.addAll(genOptree(rootOp2));
        //a two-dimensional table entry established
        int rowLen = opTree1.size();
        int colLen = opTree2.size();
        this.opCmpResTab = new int[rowLen][colLen];
        buildResTab();
    }

    private void buildResTab() {
        for (int i = 0; i < opCmpResTab.length; i++) {
            for (int j = 0; j < opCmpResTab[i].length; j++) {
                if (opTree1.get(i) instanceof Operator && opTree2.get(j) instanceof Operator) {
                    Operator<?> op1 = (Operator<?>) opTree1.get(i);
                    Operator<?> op2 = (Operator<?>) opTree2.get(j);
                    if (op1.getName().equals(op2.getName()) && (opCmp(op1, op2) || transitiveOpCmp(op1, op2))) {
                        if ((i - 1) >= 0 && (j - 1) >= 0) {
                            opCmpResTab[i][j] = opCmpResTab[i - 1][j - 1] + 1;
                        } else {
                            opCmpResTab[i][j] = 1;
                        }
                    } else {
                        opCmpResTab[i][j] = 0;
                    }
                } else if (opTree1.get(i) instanceof String && opTree2.get(j) instanceof String) {
                    String op1 = (String) opTree1.get(i);
                    String op2 = (String) opTree2.get(j);
                    if (op1.toString().equals(op2.toString())) {
                        if ((i - 1) >= 0 && (j - 1) >= 0) {
                            opCmpResTab[i][j] = opCmpResTab[i - 1][j - 1] + 1;
                        } else {
                            opCmpResTab[i][j] = 1;
                        }
                    }
                } else {
                    opCmpResTab[i][j] = 0;
                }
            }
        }
    }

    public List<Operator<?>> getTsOps(Operator<?> rootOp) {
        List<Operator<?>> visited = new ArrayList<Operator<?>>();
        visited.clear();
        getTsOps(visited, rootOp);
        return visited;
    }

    //recursive get all top operators of currently optree
    private void getTsOps(List<Operator<?>> visited, Operator<?> op) {
        if (op.getNumParent() == 0 && op.getNumChild() != 0) {
            visited.add(op);
        }
        if (op.getNumParent() > 0) {
            List<Operator<?>> parent = op.getParentOperators();
            for (int i = 0; i < parent.size(); i++) {
                getTsOps(visited, parent.get(i));
            }
        }
    }

    /**
     * according to the last operator of Optree , generating the link
     * of operator.for example , as the following Optree
     *
     *       FS[13]
     *         |
     *      SEL[12]
     *         |
     *      JOIN[11]
     *       /   \
     *      /     \
     *     /       \
     *   RS[10]  RS[9]
     *     |       |
     *   TS[7]   TS[8]
     *
     * it will be converted the following link of operator:
     * TS[7]-RS[10]-JOIN[11]-SEL[12]-FS[13]-X-X-X-X-X-TS[8]-RS[9]-X-X
     * @param rootOp
     * @return list of Operators
     */

    private List<?> genOptree(Operator<?> rootOp) {
        List<Operator<?>> topOps = getTsOps(rootOp);
        List<Object> visited = new ArrayList<Object>();
        List<Operator<?>> walked = new ArrayList<Operator<?>>();
        for (Operator<?> topOp : topOps) {
            genOptree(walked, topOp, rootOp);
            int vaildLen = 0;
            while (!walked.isEmpty()) {
                Operator<?> curOp = walked.remove(0);
                if (!visited.contains(curOp)) {
                    visited.add(curOp);
                    vaildLen++;
                }
            }

            while (vaildLen-- > 0) {
                String str = new String("X");
                visited.add(str);
            }
            walked.clear();
        }
        return visited;
    }

    private void genOptree(List<Operator<?>> walked, Operator<?> op, Operator<?> rootOp) {
        walked.add(op);
        if (op.equals(rootOp)) {
            return;
        }
        if (op.getNumChild() > 0) {
            List<Operator<?>> children = op.getChildOperators();
            for (int i = 0; i < children.size(); i++) {
                genOptree(walked, children.get(i), rootOp);
            }
        }

    }

    public HashSet<List<Object>> getCSTree() {
        /*
         *  Extracting string version CSTree
         */
        HashSet<List<Object>> res = new HashSet<List<Object>>();

        boolean[] valid = new boolean[opCmpResTab.length];
        for (int i = 0; i < opCmpResTab.length; i++) {
            valid[i] = true;
        }

        while (true) {
            List<Object> opList = getLargestCSTree(valid);
            if (opList == null || opList.isEmpty()) {
                break;
            }

            Collections.reverse(opList);
            res.add(opList);
        }

        validate(res);
        return res;
    }

    private boolean transitiveOpCmp(Operator<?> op1, Operator<?> op2) {
        // precondition: op1.getName() == op2.getName()
        if (!(op1 instanceof TableScanOperator)) {
            return false;
        }

        MultiParseContext multiPctx = (MultiParseContext) pCtx;
        InterQueryFlowCtx flowCtx = multiPctx.getQueryFlowCtx();
        if (flowCtx == null || flowCtx.getCsqSameVerMap().isEmpty()) {
            return false;
        }

        HashMap<Integer, Integer> verMap = flowCtx.getCsqSameVerMap();
        int ver1 = flowCtx.getOpToTabRd().get((TableScanOperator) op1).getKey();
        int ver2 = flowCtx.getOpToTabRd().get((TableScanOperator) op2).getKey();

        if (ver1 == ver2 || verMap.get(ver1) != null && verMap.get(ver1) == ver2
                || verMap.get(ver2) != null && verMap.get(ver2) == ver1) {
            LOG.info("Adjust comparing result of " + op1 + " and " + op2);
            return true;
        }

        return false;
    }// including common sub-strings which contain no operator

    private List<Object> getLargestCSTree(boolean[] valid) {
        int threshold = 2; // The smallest CSTree should be TS-RS
        int r_idx = -1, c_idx = -1;
        List<Object> opList = new ArrayList<Object>();

        /* Scan the right-upper triangle to find the longest sub string ( which has
         * not been recorded before).
         * If there are more than one candidates, record the last one we meet each time.
         * Other candidates would be recorded in further iterations.
         * There is no need to scan the the left part of the triangle as it is symmetrical.
         */
        for (int r = opCmpResTab.length - 1, c = opCmpResTab[0].length - 1; r >= 0; r--) {
            for (int or = r, oc = c; or >= 0 && oc >= 0; or--, oc--) {
                if (opCmpResTab[or][oc] >= threshold && valid[or] == true) {
                    threshold = opCmpResTab[or][oc];
                    r_idx = or;
                    c_idx = oc;
                }
            }
        }

        // Chances are one of the subString is longer, so further search is needed
        for (int r = opCmpResTab.length - 1, c = opCmpResTab[0].length - 2; c >= opCmpResTab.length - 1; c--) {
            for (int or = r, oc = c; or >= 0 && oc >= 0; or--, oc--) {
                if (opCmpResTab[or][oc] >= threshold && valid[or] == true) {
                    threshold = opCmpResTab[or][oc];
                    r_idx = or;
                    c_idx = oc;
                }
            }
        }

        if (r_idx == -1 || c_idx == -1) {
            return opList;
        }
        LOG.info("Length of the longest common substring: " + threshold);

        /* Update the valid set so that none of the largest CSTree we've
         * found could share any common operator.
         */
        for (int i = r_idx; i > r_idx - threshold; i--) {
            valid[i] = false;
        }

        /* At this point, we've got the longest common substring.
         * What we have to do next is to find and record the corresponding
         * common sub-OPtree (CSTree).
         *
         * POTENTIAL THREAT:
         * Chances are the corresponding CSTree might not exist.
         */
        for (; r_idx >= 0 && c_idx >= 0 && opCmpResTab[r_idx][c_idx] > 0; r_idx--, c_idx--) {
            Object r_obj = getOpTree1().get(r_idx);
            opList.add(r_obj);

            if (r_obj instanceof Operator<?>) {
                Operator<?> r_op = (Operator<?>) r_obj;
                Operator<?> l_op = (Operator<?>) getOpTree2().get(c_idx);
                if (sameOpToOp.get(r_op) == null) {
                    sameOpToOp.put(r_op, l_op);
                } else if (!sameOpToOp.get(r_op).equals(l_op)) {
                    // This branch should never been taken
                    LOG.info("Potential Threat: reset csq mapping for operator " + r_op + " from "
                            + sameOpToOp.get(r_op) + " to " + l_op);
                    sameOpToOp.put(r_op, l_op);
                }
            }
        }
        LOG.info(opList.toString());
        return opList;
    }

    private void validate(HashSet<List<Object>> candidates) {
        /*
         * Check validation:
         * A sub-tree should be considered as a common sub-tree if and
         * only if all of its sub-trees are common sub-trees
         */
        boolean changed = true;
        int totalValidOps = 0, iterId = 0;

        while (changed) {
            changed = false;
            totalValidOps = 0;
            Iterator<List<Object>> iter = candidates.iterator();
            LOG.debug("# of iteration: " + iterId++);

            while (iter.hasNext()) {
                List<Object> obj = iter.next();
                List<Object> invalidList = new ArrayList<Object>();
                boolean last_valid = true;
                LOG.debug("current sub-string: " + obj.toString());

                for (int i = 0; i < obj.size(); i++) {
                    if (obj.get(i) instanceof Operator<?>) {
                        /* If last op is invalid which means the subtree lead by it is not
                         * a common sub-tree, then the current op could never be able to
                         * lead a common sub-tree either. Otherwise, the current op might
                         * be able to lead a common sub-tree if all of its ancestors have
                         * already be considered to be in some common sub-trees.
                         * If the current op is not an instance of logical operator, reset
                         * the last invalid flag.
                         */
                        if (!last_valid) {
                            invalidList.add(obj.get(i));
                            sameOpToOp.remove((Operator<?>) (obj.get(i)));
                        } else { /* last_valid is true */
                            Operator<?> op = (Operator<?>) (obj.get(i));
                            List<Operator<?>> parents = op.getParentOperators();

                            if (parents != null) {
                                for (Operator<?> parent : parents) {
                                    if (sameOpToOp.get(parent) == null) {
                                        last_valid = false;
                                        break;
                                    }
                                }
                            }
                            if (!last_valid) {
                                invalidList.add(op);
                                sameOpToOp.remove(op);
                            } else if (op instanceof JoinOperator || op instanceof GroupByOperator) {
                                totalValidOps++;
                            }
                        }
                    } else {
                        last_valid = true;
                    }
                }

                // remove common sub-strings which contain no valid operator
                if (invalidList.size() > 0) {
                    changed = true;
                    if (invalidList.size() == obj.size()) {
                        LOG.debug("remove a whole string: " + obj.toString());
                        iter.remove();
                        continue;
                    }
                }
                // including common sub-strings which contain no operator
                int validOps = 0;
                for (int i = 0; i < obj.size(); i++) {
                    if (obj.get(i) instanceof Operator<?> && !invalidList.contains(obj.get(i))) {
                        validOps++;
                    }
                }
                if (validOps <= 0) {
                    LOG.debug("remove a whole string: " + obj.toString());
                    iter.remove();
                } else if (invalidList.size() > 0) {
                    LOG.debug("remove a sub string: " + invalidList.toString());
                    obj.removeAll(invalidList);
                }
            }
            // Too strict
            /*if (totalValidOps <= 0) {
              LOG.debug("remove all candidate strings");
              candidates.clear();
              break;
            }*/
        }
        LOG.info("After validation: ");
        for (List<Object> candidate : candidates) {
            //LOG.info(candidate.toString());
            System.out.println(candidate.toString());
        }
    }

    public List<SubOpTree> genCSTree(HashSet<List<Object>> res) {
        /*
         * Transform string version CSTree into query/tree version.
         * Note that, different common substrings may belong to the
         * same common sub-query/tree.
         */
        HashMap<Operator<?>, List<Object>> cstCandidates = new HashMap<Operator<?>, List<Object>>();
        HashMap<Operator<?>, Operator<?>> rootMap = new HashMap<Operator<?>, Operator<?>>();
        List<SubOpTree> cstList = new ArrayList<SubOpTree>();

        /* Step 1. identify root node candidates
         * Nodes whose child operators can not be found in the current
         * common substring are considered as candidates.
         */
        for (List<Object> opList : res) {
            for (int i = 0; i < opList.size(); i++) {
                if (opList.get(i) instanceof Operator<?>
                        && (i + 1 == opList.size() || !(opList.get(i + 1) instanceof Operator<?>))) { // A potential common sub-tree is found
                    Operator<?> op = (Operator<?>) (opList.get(i));
                    List<Object> cstCandidate = new ArrayList<Object>();
                    for (int j = i; j >= 0; j--) {
                        if (!(opList.get(j) instanceof Operator<?>)) {
                            break;
                        }
                        cstCandidate.add(opList.get(j));
                    }
                    cstCandidates.put((Operator<?>) op, cstCandidate);
                    LOG.debug("cst candidate: " + cstCandidate.toString());
                }
            }
        }

        /* Step2. Identify real root nodes and create the initial CSTree for
         * each of them.
         * If all of the children of some root candidate could not be found
         * in any other substring, the candidate is considered to be a real
         * root node of a common sub-tree. Candidates whose children exist
         * in other substrings should be merged into the subtrees that their
         * children belong to.
         *
         * 2.1 identify real root nodes
         */
        Set<Operator<?>> rootCandidates = cstCandidates.keySet();
        Set<Map.Entry<Operator<?>, List<Object>>> entries = cstCandidates.entrySet();
        for (Operator<?> rootCandidate : rootCandidates) {
            List<Operator<?>> children = rootCandidate.getChildOperators();
            if (children != null) {
                for (Operator<?> child : children) {
                    for (Map.Entry<Operator<?>, List<Object>> e : entries) {
                        if (!e.getKey().equals(rootCandidate) && e.getValue().contains(child)) {
                            rootMap.put(rootCandidate, e.getKey());
                            LOG.debug("subTree lead by " + rootCandidate + " should be merged"
                                    + " into the one lead by " + e.getKey());
                            break;
                        }
                    }
                }
            }
        }
        // 2.2 create the initial CSTree for each root node
        for (Operator<?> rootCandidate : rootCandidates) {
            if (rootMap.get(rootCandidate) == null) {
                // a real root is found
                LOG.info("generate initial CSTree for root " + rootCandidate);
                SubOpTree cst = mergeIntoCSTree(rootCandidate, cstCandidates.get(rootCandidate), null);
                cstList.add(cst);
            }
        }

        // Step 3. merge left substrings into recent generated CSTrees
        rootCandidates = cstCandidates.keySet();
        for (Operator<?> rootCandidate : rootCandidates) {
            Operator<?> mapOp = rootMap.get(rootCandidate);
            if (mapOp == null) {
                continue;
            }
            while (rootMap.get(mapOp) != null) {
                mapOp = rootMap.get(mapOp);
            }
            for (SubOpTree cst : cstList) {
                if (mapOp.equals(cst.getRootNode())) {
                    LOG.info("merge the subTree lead by " + rootCandidate + " into the one lead by "
                            + cst.getRootNode());
                    mergeIntoCSTree(rootCandidate, cstCandidates.get(rootCandidate), cst);
                    break;
                }
            }
        }

        // Step 4. validation
        for (int i = cstList.size() - 1; i >= 0; i--) {
            SubOpTree cst = cstList.get(i);
            if (cst.getMaxDepth() < 2 && cst.getJoinNodes().size() <= 0 && cst.getAggrNodes().size() < 2) {
                List<Operator<?>> childOps = cst.getRootNode().getChildOperators();
                if (childOps != null && childOps.size() >= 1 && (childOps.get(0) instanceof FileSinkOperator)
                        || (childOps.get(0) instanceof ExtractOperator)
                        || (childOps.get(0) instanceof LimitOperator)) {
                    continue;
                }
                LOG.info("remove invalid CSTree lead by " + cst.getRootNode());
                cstList.remove(i);
            }
        }
        return cstList;
    }

    private SubOpTree mergeIntoCSTree(Operator<?> root, List<Object> opList, SubOpTree cst) {
        if (opList == null) {
            return cst;
        }
        if (cst == null) {
            cst = new SubOpTree(root);
        }

        int depth = 0;
        for (int i = 0; i < opList.size(); i++) {
            Object obj = opList.get(i);
            if (obj instanceof Operator<?>) {
                Operator<?> op = (Operator<?>) obj;
                cst.getCommonNodes().add(op);
                if (op instanceof JoinOperator) {
                    cst.getJoinNodes().add(op);
                }
                if (op instanceof GroupByOperator) {
                    cst.getAggrNodes().add(op);
                }
                if (op instanceof ReduceSinkOperator) {
                    depth++;
                }
                if (op instanceof TableScanOperator) {
                    cst.getLeafNodes().add(op);
                }
            }
        }
        if (depth > cst.getMaxDepth()) {
            cst.setMaxDepth(depth);
        }
        return cst;
    }

    public boolean reuseCSTree(List<SubOpTree> cstList) {
        for (SubOpTree cst : cstList) {
            LOG.info("data reusing optimization for Common SubTree:" + cst.toString());
            Operator<?> root = cst.getRootNode();
            /* Preprocessing: for CSQs lead by rsops, set the direct ancestor
             * of each rsop as the root for each CSQ respectively in order to
             * avoid violations in JFC or TC detection.
             */
            if (root instanceof ReduceSinkOperator) {
                root = root.getParentOperators().get(0);
            }
            Operator<?> shadow = sameOpToOp.get(root);
            try {
                // Step 1: iteratively delete ancestors of shadowOp
                List<Operator<?>> parentOps = root.getParentOperators();
                for (Operator<?> parent : parentOps) {
                    removeShadow(parent, cst);
                }
                // Step 2. merge shadowOp with rootOp
                List<Operator<?>> childOfShadow = shadow.getChildOperators();
                if (childOfShadow != null) { // Will it be null ?
                    for (Operator<?> child : childOfShadow) {
                        LOG.info("replace parentOp of " + child + " from shadowOp " + shadow + " to " + root);
                        child.replaceParent(shadow, root);
                        root.getChildOperators().add(child);
                    }
                    shadow.getChildOperators().removeAll(childOfShadow);
                }
                // Step 3. remove shadowOp itself
                LOG.info("remove the shadowOp itself");
                pCtx.getOpParseCtx().remove(shadow);
                if (shadow instanceof JoinOperator) {
                    pCtx.getJoinContext().remove(shadow);
                }
            } catch (SemanticException e) {
                return false;
            }
        }
        return true;
    }

    private void removeShadow(Operator<?> root, SubOpTree cst) throws SemanticException {
        if (!cst.getCommonNodes().contains(root)) {
            throw new SemanticException(root + " to be detected could not be found in CSTree");
        }

        Operator<?> shadow = sameOpToOp.get(root);
        List<Operator<?>> parentOps = root.getParentOperators();
        if (parentOps != null && parentOps.size() > 0) {
            for (Operator<?> parent : parentOps) {
                removeShadow(parent, cst);
            }
        }
        LOG.info("remove ancestors of the shadowOp: " + shadow);
        if (shadow instanceof TableScanOperator) {
            String key = "";
            for (String str : pCtx.getTopOps().keySet()) {
                if (pCtx.getTopOps().get(str).equals(shadow)) {
                    key = str;
                    break;
                }
            }
            pCtx.getTopOps().remove(key);
            pCtx.getTopToTable().remove(shadow);
        }
        if (shadow instanceof JoinOperator) {
            pCtx.getJoinContext().remove(shadow);
        }
        pCtx.getOpParseCtx().remove(shadow);
        for (Operator<?> child : shadow.getChildOperators()) {
            child.removeParent(shadow);
        }
    }

    //to eliminate sub tree that have common operator
    private void removeCommonPart(HashSet<List<Object>> commonList) {
        HashSet<List<Object>> commonl = new HashSet<List<Object>>();
        Iterator<List<Object>> listIter = commonList.iterator();
        while (listIter.hasNext()) {
            List<Object> l = listIter.next();
            Iterator<List<Object>> tmpIter = commonList.iterator();
            while (tmpIter.hasNext()) {
                List<Object> tmpl = tmpIter.next();
                if (!l.equals(tmpl) && l.containsAll(tmpl)) {
                    commonl.add(tmpl);
                }
            }
            //remove list that don't include operator
            boolean visitedOp = false;
            for (int i = 0; i < l.size(); i++) {
                if (l.get(i) instanceof Operator<?>) {
                    visitedOp = true;
                    break;
                }
            }
            if (visitedOp == false) {
                listIter.remove();
                continue;
            }
        }

        //merge common sub optree
        for (List<Object> list : commonl) {
            commonList.remove(list);
        }
    }

    public class SubOpTree {
        private final Operator<? extends OperatorDesc> rootNode;
        // the value that the longest branch of optree
        private int maxDepth;
        // the value that the shortest branch.
        private int minDepth;
        private List<Operator<?>> leafNodes;
        private List<Operator<?>> joinNodes;
        private List<Operator<?>> aggrNodes;
        private Set<Operator<?>> commonNodes;

        public SubOpTree(Operator<?> rootNode) {
            this.rootNode = rootNode;
            this.maxDepth = 0;
            this.minDepth = 0;
            this.leafNodes = new ArrayList<Operator<?>>();
            this.joinNodes = new ArrayList<Operator<?>>();
            this.aggrNodes = new ArrayList<Operator<?>>();
            this.commonNodes = new HashSet<Operator<?>>();
        }

        public SubOpTree(Operator<?> rootNode, int maxDepth, int minDepth, List<Operator<?>> leafNodes,
                List<Operator<?>> joinNodes) {
            this.rootNode = rootNode;
            this.maxDepth = maxDepth;
            this.minDepth = minDepth;
            this.leafNodes = leafNodes;
            this.joinNodes = joinNodes;
            this.aggrNodes = new ArrayList<Operator<?>>();
        }

        public Operator<? extends OperatorDesc> getRootNode() {
            return rootNode;
        }

        public int getMaxDepth() {
            return maxDepth;
        }

        public int getMinDepth() {
            return minDepth;
        }

        public void setMaxDepth(int max) {
            maxDepth = max;
        }

        public void setMinDepth(int min) {
            minDepth = min;
        }

        public List<Operator<?>> getLeafNodes() {
            return leafNodes;
        }

        public void setLeafNodes(List<Operator<?>> leafNodes) {
            this.leafNodes = leafNodes;
        }

        public List<Operator<?>> getJoinNodes() {
            return joinNodes;
        }

        public void setJoinNodes(List<Operator<?>> joinNodes) {
            this.joinNodes = joinNodes;
        }

        public List<Operator<?>> getAggrNodes() {
            return aggrNodes;
        }

        public void setAggrNodes(List<Operator<?>> aggrNodes) {
            this.aggrNodes = aggrNodes;
        }

        public Set<Operator<?>> getCommonNodes() {
            return commonNodes;
        }

        public void setCommonNodes(Set<Operator<?>> commonNodes) {
            this.commonNodes = commonNodes;
        }

        public boolean check(Set<Operator<?>> allCommonNodes) {

            boolean res[] = { true };
            for (Operator<?> leafNode : leafNodes) {
                checkNode(allCommonNodes, leafNode, res);
                if (res[0] == false) {
                    break;
                } else {
                }
            }

            return res[0];
        }

        private void checkNode(Set<Operator<?>> allCommonNodes, Operator<?> op, boolean res[]) {
            if (op.equals(rootNode)) {
                return;
            }
            if (op.getNumChild() > 0) {
                List<Operator<?>> children = op.getChildOperators();
                for (int i = 0; i < children.size(); i++) {
                    if (!allCommonNodes.contains(children.get(i))) {
                        return;
                    }
                    List<Operator<? extends OperatorDesc>> parentOps = children.get(i).getParentOperators();
                    for (Operator<? extends OperatorDesc> parentOp : parentOps) {
                        //            if( ! allCommonNodes.contains(parentOp ) && !(parentOp instanceof FilterOperator)){
                        if (!allCommonNodes.contains(parentOp)) {
                            res[0] = false;
                            return;
                        }
                    }
                    checkNode(allCommonNodes, children.get(i), res);
                }
            }
        }

        public String walker() {
            StringBuilder builder = new StringBuilder();
            Set<String> visited = new HashSet<String>();
            for (Operator<?> leafNode : leafNodes) {
                walker(builder, visited, leafNode, 0);
            }
            return builder.toString();
        }

        private void walker(StringBuilder builder, Set<String> visited, Operator<?> op, int start) {
            if (op.equals(rootNode)) {
                return;
            }
            String name = op.toString();
            boolean added = visited.add(name);
            if (start > 0) {
                builder.append("-");
                start++;
            }
            builder.append(name);
            start += name.length();
            if (added) {
                if (op.getNumChild() > 0) {
                    List<Operator<?>> children = op.getChildOperators();
                    for (int i = 0; i < children.size(); i++) {
                        if (i > 0) {
                            builder.append('\n');
                            for (int j = 0; j < start; j++) {
                                builder.append(' ');
                            }
                        }
                        walker(builder, visited, children.get(i), start);
                    }
                }
            }
        }

        @Override
        public String toString() {
            StringBuilder builder = new StringBuilder();
            builder.append("{root: " + getRootNode() + " maxDepth: " + getMaxDepth() + " minDepth: " + getMinDepth()
                    + " leaves: " + getLeafNodes() + "}");
            return builder.toString();
        }
    }

    public void setPctx(ParseContext pCtx) {
        this.pCtx = pCtx;
    }

    public ParseContext getPctx() {
        return this.pCtx;
    }

    public HashMap<Operator<?>, Operator<?>> getSameOpToOp() {
        return sameOpToOp;
    }

    public Operator<?> getSameOpToOp(Operator<?> op) {
        return sameOpToOp.get(op);
    }

    //comparison of two operator,if they are equal,return true,or return false
    private boolean opCmp(Operator<?> op1, Operator<?> op2) {

        if (op1.getName().equals(op2.getName())) {
            if (op1 instanceof TableScanOperator) {
                TableScanOperator t1 = (TableScanOperator) op1;
                TableScanOperator t2 = (TableScanOperator) op2;

                // the TableScanOperator are same means that
                // they have the same table name
                String tableName1 = "null";
                String tableName2 = "NULL";

                if (pCtx.getTopToTable().containsKey(t1)) {
                    tableName1 = pCtx.getTopToTable().get(t1).getTableName();
                }
                if (pCtx.getTopToTable().containsKey(t2)) {
                    tableName2 = pCtx.getTopToTable().get(t2).getTableName();
                }
                if (tableName1.equals(tableName2)) {
                    return true;
                }

            }

            if (op1 instanceof SelectOperator) {
                SelectOperator so1 = (SelectOperator) op1;
                SelectOperator so2 = (SelectOperator) op2;
                if (sameKeys(so1.getConf().getColList(), so2.getConf().getColList())
                        && so1.getConf().getOutputColumnNames().equals(so2.getConf().getOutputColumnNames())) {
                    LOG.debug("Two SEL operator have same column List " + exprToString(so1.getConf().getColList())
                            + " the same output column name " + so1.getConf().getOutputColumnNames());
                    return true;
                }

            }

            if (op1 instanceof ReduceSinkOperator) {
                // Two ReduceSinkOperators are same means that
                // they have same key columns, same partitioning columns,
                // same sorting orders.
                ReduceSinkOperator rsOp1 = (ReduceSinkOperator) op1;
                ReduceSinkOperator rsOp2 = (ReduceSinkOperator) op2;
                ArrayList<ExprNodeDesc> keyCols1 = rsOp1.getConf().getKeyCols();
                ArrayList<ExprNodeDesc> keyCols2 = rsOp2.getConf().getKeyCols();
                ArrayList<ExprNodeDesc> valCols1 = rsOp1.getConf().getValueCols();
                ArrayList<ExprNodeDesc> valCols2 = rsOp2.getConf().getValueCols();
                ArrayList<ExprNodeDesc> partitionCols1 = rsOp1.getConf().getPartitionCols();
                ArrayList<ExprNodeDesc> partitionCols2 = rsOp2.getConf().getPartitionCols();
                String order1 = rsOp1.getConf().getOrder();
                String order2 = rsOp2.getConf().getOrder();
                boolean isSame = sameKeys(keyCols1, keyCols2) && sameKeys(valCols1, valCols2)
                        && sameKeys(partitionCols1, partitionCols2) && sameOrder(order1, order2);
                if (isSame) {
                    LOG.debug("Two RS have same key columns " + exprToString(keyCols1) + " the same values columns "
                            + exprToString(valCols1) + " the same partition key " + exprToString(partitionCols1)
                            + " the same order " + order1);
                    return true;
                }
            }

            if (op1 instanceof FileSinkOperator) {
                // when two FileSinkOperator have the same
                FileSinkOperator fsOp1 = (FileSinkOperator) op1;
                FileSinkOperator fsOp2 = (FileSinkOperator) op2;

                boolean isSame = fsOp1.getConf().getTableInfo().equals(fsOp2.getConf().getTableInfo())
                        && fsOp1.getConf().getDestTableId() == fsOp2.getConf().getDestTableId()
                        && fsOp1.getConf().getFinalDirName().equals(fsOp2.getConf().getFinalDirName());

                if (isSame) {
                    LOG.debug("Two fileSinkOperator have  same  tableInfo , destTableID");
                    return true;
                }

            }
            if (op1 instanceof JoinOperator) {
                JoinOperator joinOp1 = (JoinOperator) op1;
                JoinOperator joinOp2 = (JoinOperator) op2;

                boolean isSame = sameMap(joinOp1.getConf().getExprs(), joinOp2.getConf().getExprs())
                        && joinOp1.getConf().getOutputColumnNames().equals(joinOp2.getConf().getOutputColumnNames())
                        && sameMap(joinOp1.getConf().getFilters(), joinOp2.getConf().getFilters())
                        && sameCondList(joinOp1.getConf().getCondsList(), joinOp2.getConf().getCondsList())
                        && joinOp1.getConf().getHandleSkewJoin() == joinOp2.getConf().getHandleSkewJoin();

                if (isSame) {
                    LOG.debug("Two JoinOperator is same ");
                    return true;
                }
            }

            if (op1 instanceof GroupByOperator) {
                GroupByOperator groupByOp1 = (GroupByOperator) op1;
                GroupByOperator groupByOp2 = (GroupByOperator) op2;
                boolean isSame = groupByOp1.getConf().getMode().equals(groupByOp2.getConf().getMode())
                        && sameKeys(groupByOp1.getConf().getKeys(), groupByOp2.getConf().getKeys())
                        && sameAggrList(groupByOp1.getConf().getAggregators(),
                                groupByOp2.getConf().getAggregators())
                        && groupByOp1.getConf().getBucketGroup() == groupByOp2.getConf().getBucketGroup()
                        && groupByOp1.getConf().getOutputColumnNames()
                                .equals(groupByOp2.getConf().getOutputColumnNames());

                if (isSame) {
                    LOG.debug("Two GroupByOperator is same ");
                    return true;
                }

            }

            if (op1 instanceof UnionOperator) {
                UnionOperator unionOp1 = (UnionOperator) op1;
                UnionOperator unionOp2 = (UnionOperator) op2;

                boolean isSame = unionOp1.getConf().getNumInputs() == unionOp2.getConf().getNumInputs();
                if (isSame) {
                    LOG.debug("Two UnionOperator is same ");
                    return true;
                }
            }

            if (op1 instanceof LimitOperator) {
                LimitOperator limitOp1 = (LimitOperator) op1;
                LimitOperator limitOp2 = (LimitOperator) op2;
                boolean isSame = limitOp1.getConf().getLimit() == limitOp2.getConf().getLimit()
                        && limitOp1.getConf().getLeastRows() == limitOp2.getConf().getLeastRows();

                if (isSame) {
                    LOG.debug("Two LimitOperator is same ");
                    return true;
                }
            }

            if (op1 instanceof ExtractOperator) {
                ExtractOperator extractOp1 = (ExtractOperator) op1;
                ExtractOperator extractOp2 = (ExtractOperator) op2;

                boolean isSame = extractOp1.getConf().getCol().isSame(extractOp2.getConf().getCol());

                if (isSame) {
                    LOG.debug("Two ExtractOperator is same ");
                    return true;
                }

            }

            if (op1 instanceof FilterOperator) {
                FilterOperator filterOp1 = (FilterOperator) op1;
                FilterOperator filterOp2 = (FilterOperator) op2;

                boolean isSame = filterOp1.getConf().getPredicate().isSame(filterOp2.getConf().getPredicate());

                if (isSame) {
                    LOG.debug("Two FilterOperator is same ");
                    return true;
                } else {
                    LOG.debug("Two FilterOperator is different : " + filterOp1.toString() + " [ "
                            + filterOp2.toString() + " ]");
                }

            }

        }

        return false;
    }

    private boolean sameAggrList(ArrayList<AggregationDesc> l1, ArrayList<AggregationDesc> l2) {
        if (l1.size() != l2.size()) {
            return false;
        }

        if (l1.size() == 0 && l2.size() == 0) {
            System.out.println("AggregationDesc is null");
            return true;
        }

        for (int i = 0; i < l1.size(); i++) {
            AggregationDesc l1Aggr = l1.get(i);
            AggregationDesc l2Aggr = l2.get(i);
            System.out.println("AggregationDesc -> getExprString getGenericUDAFName " + l1Aggr.getExprString()
                    + l2Aggr.getExprString() + "        " + l1Aggr.getGenericUDAFName()
                    + l2Aggr.getGenericUDAFName());
            if (!l1Aggr.getMode().equals(l2Aggr.getMode())
                    || !sameKeys(l1Aggr.getParameters(), l2Aggr.getParameters())) {
                return false;
            }
            if (!l1Aggr.getExprString().equals(l2Aggr.getExprString())) {
                return false;
            }
        }
        return true;
    }

    private boolean sameCondList(List<JoinCondDesc> l1, List<JoinCondDesc> l2) {
        if (l1.size() != l2.size()) {
            return false;
        }
        for (int i = 0; i < l1.size(); i++) {
            JoinCondDesc jd1 = l1.get(i);
            JoinCondDesc jd2 = l2.get(i);
            if (!jd1.getJoinCondString().equals(jd2.getJoinCondString())) {
                return false;
            } else {
                continue;
            }
        }
        return true;
    }

    private boolean sameMap(Map<?, List<ExprNodeDesc>> map1, Map<?, List<ExprNodeDesc>> map2) {
        if (map1.size() != map2.size()) {
            return false;
        }
        if (map1.keySet().containsAll(map2.keySet()) && map2.keySet().containsAll(map1.keySet())) {
            for (Object key : map1.keySet()) {
                if (!sameKeys(map1.get(key), map2.get(key))) {
                    return false;
                } else {
                    continue;
                }
            }
        }
        return true;
    }

    private boolean sameKeys(List<ExprNodeDesc> k1, List<ExprNodeDesc> k2) {
        if (k1.size() != k2.size()) {
            return false;
        }
        for (int i = 0; i < k1.size(); i++) {
            ExprNodeDesc expr1 = k1.get(i);
            ExprNodeDesc expr2 = k2.get(i);
            if (expr1 == null) {
                if (expr2 == null) {
                    continue;
                } else {
                    return false;
                }
            } else {
                if (!expr1.isSame(expr2)) {
                    return false;
                }
            }
        }
        return true;
    }

    private boolean sameOrder(String order1, String order2) {
        if (order1 == null || order1.trim().equals("")) {
            if (order2 == null || order2.trim().equals("")) {
                return true;
            }
            return false;
        }
        if (order2 == null || order2.trim().equals("")) {
            return false;
        }
        order1 = order1.trim();
        order2 = order2.trim();
        if (!order1.equals(order2)) {
            return false;
        }
        return true;
    }

    private String exprToString(List<ExprNodeDesc> exprs) {
        String exprNames = "";
        for (ExprNodeDesc expr : exprs) {
            exprNames += "\t" + expr.getName();
        }
        return exprNames;
    }

    public int[][] getOpCmpResTab() {
        return opCmpResTab;
    }

    public List<Object> getOpTree1() {
        return opTree1;
    }

    public List<Object> getOpTree2() {
        return opTree2;
    }

}