org.apache.hadoop.hive.ql.parse.mr3.MR3Compiler.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.ql.parse.mr3.MR3Compiler.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable
 * law or agreed to in writing, software distributed under the License is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License
 * for the specific language governing permissions and limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse.mr3;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator;
import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator;
import org.apache.hadoop.hive.ql.exec.DummyStoreOperator;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.lib.CompositeProcessor;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.lib.TypeRule;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc;
import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism;
import org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinHintOptimizer;
import org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer;
import org.apache.hadoop.hive.ql.parse.AppMasterEventProcessor;
import org.apache.hadoop.hive.ql.parse.FileSinkProcessor;
import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.TaskCompiler;
import org.apache.hadoop.hive.ql.parse.UnionProcessor;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;

/**
 * MR3Compiler translates the operator plan into MR3Tasks.
 */
public class MR3Compiler extends TaskCompiler {
    private static final String CLASS_NAME = MR3Compiler.class.getName();
    private static final PerfLogger PERF_LOGGER = PerfLogger.getPerfLogger();
    protected final Log LOG = LogFactory.getLog(MR3Compiler.class);

    public MR3Compiler() {
    }

    @Override
    protected void optimizeOperatorPlan(ParseContext pCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs)
            throws SemanticException {
        PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.MR3_OPTIMIZE_OPERATOR_TREE);

        // Sequence of TableScan operators to be walked
        Deque<Operator<? extends OperatorDesc>> deque = new LinkedList<Operator<? extends OperatorDesc>>();
        deque.addAll(pCtx.getTopOps().values());

        Set<String> keys = pCtx.getTopOps().keySet();
        for (String key : keys) {
            console.printInfo("bubu key " + key);
            console.printInfo("bubu operator " + pCtx.getTopOps().get(key).toString());
        }

        //    // Create the context for the walker
        //    OptimizeMR3ProcContext procCtx = new OptimizeMR3ProcContext(conf, pCtx, inputs, outputs, deque);
        //
        //    // create a walker which walks the tree in a DFS manner while maintaining
        //    // the operator stack.
        //    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
        //    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName()
        //        + "%"), new SetSparkReducerParallelism());
        //
        //    opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx));
        //
        //    opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx));
        //
        //    // The dispatcher fires the processor corresponding to the closest matching
        //    // rule and passes the context along
        //    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
        //    GraphWalker ogw = new DefaultGraphWalker(disp);
        //
        //    // Create a list of topop nodes
        //    ArrayList<Node> topNodes = new ArrayList<Node>();
        //    topNodes.addAll(pCtx.getTopOps().values());
        //    ogw.startWalking(topNodes, null);

        PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.MR3_OPTIMIZE_OPERATOR_TREE);
    }

    @Override
    protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx,
            List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs)
            throws SemanticException {

        GenMR3Utils.getUtils().resetSequenceNumber();

        ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
        GenMR3Work genMR3Work = new GenMR3Work(GenMR3Utils.getUtils());

        GenMR3ProcContext procCtx = new GenMR3ProcContext(conf, tempParseContext, mvTask, rootTasks, inputs,
                outputs);

        // create a walker which walks the tree in a DFS manner while maintaining
        // the operator stack.
        // The dispatcher generates the plan from the operator tree
        Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
        opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"),
                genMR3Work);

        opRules.put(
                new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"),
                new ReduceSinkMapJoinProc());

        opRules.put(new RuleRegExp(
                "Recoginze a Sorted Merge Join operator to setup the right edge and"
                        + " stop traversing the DummyStore-MapJoin",
                CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());

        opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"),
                new CompositeProcessor(new FileSinkProcessor(), genMR3Work));

        opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"),
                genMR3Work);

        opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"),
                new MR3ProcessAnalyzeTable(GenMR3Utils.getUtils()));

        opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());

        opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"),
                new AppMasterEventProcessor());

        // The dispatcher fires the processor corresponding to the closest matching
        // rule and passes the context along
        Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
        List<Node> topNodes = new ArrayList<Node>();
        topNodes.addAll(pCtx.getTopOps().values());
        GraphWalker ogw = new GenMR3WorkWalker(disp, procCtx);
        ogw.startWalking(topNodes, null);

        // we need to clone some operator plans and remove union operators still
        for (BaseWork w : procCtx.workWithUnionOperators) {
            GenMR3Utils.getUtils().removeUnionOperators(conf, procCtx, w);
        }

        // then we make sure the file sink operators are set up right
        for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
            GenMR3Utils.getUtils().processFileSink(procCtx, fileSink);
        }

        // and finally we hook up any events that need to be sent to the MR3 AM
        LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
        for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
            LOG.debug("Handling AppMasterEventOperator: " + event);
            GenMR3Utils.getUtils().processAppMasterEvent(procCtx, event);
        }

        // write dag to log
        // try {
        // MR3Graph dag = generateGraph(rootTasks);
        // // System.err.println(dag.generateGraphViz());
        // dag.save("test.dot");
        // new MR3Log(dag.generateGraphViz()).run();
        // } catch (IOException e) {
        // e.printStackTrace();
        // }
    }

    @Override
    protected void setInputFormat(Task<? extends Serializable> rootTask) {
        // TODO Auto-generated method stub
    }

    @Override
    protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx)
            throws SemanticException {
        // TODO Auto-generated method stub
    }

    @Override
    protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
            GlobalLimitCtx globalLimitCtx) throws SemanticException {
        // TODO Auto-generated method stub
    }

    // generate graph of tasks
    // private MR3Graph generateGraph(List<Task<? extends Serializable>> tasks) {
    // MR3Graph dag = new MR3Graph("test");
    // for (Task<? extends Serializable> t : tasks) {
    // String s = t.getId() + "_" + t.getClass().getSimpleName();
    // Node n = dag.getNode(t.getId());
    // n.setLabel(s);
    // dag = addChildsOfGraph(dag, n, t.getChildTasks());
    // }
    // return dag;
    // }
    //
    // private MR3Graph addChildsOfGraph(MR3Graph dag, Node parent,
    // List<Task<? extends Serializable>> childTasks) {
    // if (childTasks != null)
    // for (Task<? extends Serializable> t : childTasks) {
    // String s = t.getId() + "_" + t.getClass().getSimpleName();
    // Node n = dag.getNode(t.getId());
    // n.setLabel(s);
    // parent.addEdge(n, "input=, output=");
    // dag = addChildsOfGraph(dag, n, t.getChildTasks());
    // }
    // return dag;
    // }
}