Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional information regarding * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. You may obtain a * copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable * law or agreed to in writing, software distributed under the License is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License * for the specific language governing permissions and limitations under the License. */ package org.apache.hadoop.hive.ql.parse.mr3; import java.io.Serializable; import java.util.ArrayList; import java.util.Deque; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.CompositeProcessor; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TypeRule; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc; import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinHintOptimizer; import org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer; import org.apache.hadoop.hive.ql.parse.AppMasterEventProcessor; import org.apache.hadoop.hive.ql.parse.FileSinkProcessor; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TaskCompiler; import org.apache.hadoop.hive.ql.parse.UnionProcessor; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * MR3Compiler translates the operator plan into MR3Tasks. */ public class MR3Compiler extends TaskCompiler { private static final String CLASS_NAME = MR3Compiler.class.getName(); private static final PerfLogger PERF_LOGGER = PerfLogger.getPerfLogger(); protected final Log LOG = LogFactory.getLog(MR3Compiler.class); public MR3Compiler() { } @Override protected void optimizeOperatorPlan(ParseContext pCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.MR3_OPTIMIZE_OPERATOR_TREE); // Sequence of TableScan operators to be walked Deque<Operator<? extends OperatorDesc>> deque = new LinkedList<Operator<? extends OperatorDesc>>(); deque.addAll(pCtx.getTopOps().values()); Set<String> keys = pCtx.getTopOps().keySet(); for (String key : keys) { console.printInfo("bubu key " + key); console.printInfo("bubu operator " + pCtx.getTopOps().get(key).toString()); } // // Create the context for the walker // OptimizeMR3ProcContext procCtx = new OptimizeMR3ProcContext(conf, pCtx, inputs, outputs, deque); // // // create a walker which walks the tree in a DFS manner while maintaining // // the operator stack. // Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); // opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() // + "%"), new SetSparkReducerParallelism()); // // opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx)); // // opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx)); // // // The dispatcher fires the processor corresponding to the closest matching // // rule and passes the context along // Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); // GraphWalker ogw = new DefaultGraphWalker(disp); // // // Create a list of topop nodes // ArrayList<Node> topNodes = new ArrayList<Node>(); // topNodes.addAll(pCtx.getTopOps().values()); // ogw.startWalking(topNodes, null); PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.MR3_OPTIMIZE_OPERATOR_TREE); } @Override protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { GenMR3Utils.getUtils().resetSequenceNumber(); ParseContext tempParseContext = getParseContext(pCtx, rootTasks); GenMR3Work genMR3Work = new GenMR3Work(GenMR3Utils.getUtils()); GenMR3ProcContext procCtx = new GenMR3ProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. // The dispatcher generates the plan from the operator tree Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genMR3Work); opRules.put( new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc()); opRules.put(new RuleRegExp( "Recoginze a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc()); opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genMR3Work)); opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genMR3Work); opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new MR3ProcessAnalyzeTable(GenMR3Utils.getUtils())); opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor()); opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pCtx.getTopOps().values()); GraphWalker ogw = new GenMR3WorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); // we need to clone some operator plans and remove union operators still for (BaseWork w : procCtx.workWithUnionOperators) { GenMR3Utils.getUtils().removeUnionOperators(conf, procCtx, w); } // then we make sure the file sink operators are set up right for (FileSinkOperator fileSink : procCtx.fileSinkSet) { GenMR3Utils.getUtils().processFileSink(procCtx, fileSink); } // and finally we hook up any events that need to be sent to the MR3 AM LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events."); for (AppMasterEventOperator event : procCtx.eventOperatorSet) { LOG.debug("Handling AppMasterEventOperator: " + event); GenMR3Utils.getUtils().processAppMasterEvent(procCtx, event); } // write dag to log // try { // MR3Graph dag = generateGraph(rootTasks); // // System.err.println(dag.generateGraphViz()); // dag.save("test.dot"); // new MR3Log(dag.generateGraphViz()).run(); // } catch (IOException e) { // e.printStackTrace(); // } } @Override protected void setInputFormat(Task<? extends Serializable> rootTask) { // TODO Auto-generated method stub } @Override protected void optimizeTaskPlan(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, Context ctx) throws SemanticException { // TODO Auto-generated method stub } @Override protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException { // TODO Auto-generated method stub } // generate graph of tasks // private MR3Graph generateGraph(List<Task<? extends Serializable>> tasks) { // MR3Graph dag = new MR3Graph("test"); // for (Task<? extends Serializable> t : tasks) { // String s = t.getId() + "_" + t.getClass().getSimpleName(); // Node n = dag.getNode(t.getId()); // n.setLabel(s); // dag = addChildsOfGraph(dag, n, t.getChildTasks()); // } // return dag; // } // // private MR3Graph addChildsOfGraph(MR3Graph dag, Node parent, // List<Task<? extends Serializable>> childTasks) { // if (childTasks != null) // for (Task<? extends Serializable> t : childTasks) { // String s = t.getId() + "_" + t.getClass().getSimpleName(); // Node n = dag.getNode(t.getId()); // n.setLabel(s); // parent.addEdge(n, "input=, output="); // dag = addChildsOfGraph(dag, n, t.getChildTasks()); // } // return dag; // } }