Java tutorial
package org.apache.hadoop.hive.ql; /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.DataInput; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.Pair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FakeOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.TaskResult; import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; import org.apache.hadoop.hive.ql.hooks.Hook; import org.apache.hadoop.hive.ql.hooks.HookContext; import org.apache.hadoop.hive.ql.hooks.HookUtils; import org.apache.hadoop.hive.ql.hooks.PostExecute; import org.apache.hadoop.hive.ql.hooks.PreExecute; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx; import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.DummyPartition; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.formatting.JsonMetaDataFormatter; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatter; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.CommonSubtreeReuse; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.InterQueryFlowCtx; import org.apache.hadoop.hive.ql.parse.MultiParseContext; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.ParseDriver; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SplitSample; import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FakeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; public class MultiDriver_BAK implements CommandProcessor { static final private Log LOG = LogFactory.getLog(Driver.class.getName()); static final private LogHelper console = new LogHelper(LOG); private static final Object compileMonitor = new Object(); private int maxRows = 100; ByteStream.Output bos = new ByteStream.Output(); private HiveConf conf; private DataInput resStream; private Context ctx; private QueryPlan plan; private Schema schema; private HiveLockManager hiveLockMgr; private String errorMessage; private String SQLState; private Throwable downstreamError; // A limit on the number of threads that can be launched private int maxthreads; private static final int SLEEP_TIME = 2000; protected int tryCount = Integer.MAX_VALUE; // new variable for multi query private final ArrayList<Schema> schemas = new ArrayList<Schema>(); private final ArrayList<String> cmds = new ArrayList<String>(); private final ArrayList<HiveConf> hconfs = new ArrayList<HiveConf>(); private int multiQueryNum; private final HashMap<Integer, ParseContext> multiPctx = new HashMap<Integer, ParseContext>(); private boolean checkLockManager() { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (!supportConcurrency) { return false; } if ((hiveLockMgr == null)) { try { setLockManager(); } catch (SemanticException e) { errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return false; } } // the reason that we set the lock manager for the cxt here is because each // query has its own ctx object. The hiveLockMgr is shared accross the // same instance of Driver, which can run multiple queries. ctx.setHiveLockMgr(hiveLockMgr); return hiveLockMgr != null; } private void setLockManager() throws SemanticException { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (supportConcurrency) { String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER); if ((lockMgr == null) || (lockMgr.isEmpty())) { throw new SemanticException(ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg()); } try { hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf); hiveLockMgr.setContext(new HiveLockManagerCtx(conf)); } catch (Exception e) { // set hiveLockMgr to null just in case this invalid manager got set to // next query's ctx. if (hiveLockMgr != null) { try { hiveLockMgr.close(); } catch (LockException e1) { // nothing can do here } hiveLockMgr = null; } throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage()); } } } public void init() { Operator.resetId(); } /** * Return the status information about the Map-Reduce cluster */ public ClusterStatus getClusterStatus() throws Exception { ClusterStatus cs; try { JobConf job = new JobConf(conf); JobClient jc = new JobClient(job); cs = jc.getClusterStatus(); } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning cluster status: " + cs.toString()); return cs; } public Schema getSchema() { return schema; } public ArrayList<Schema> getSchemas() { return schemas; } /** * Get a Schema with fields represented with native Hive types */ public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) { Schema schema = null; // If we have a plan, prefer its logical result schema if it's // available; otherwise, try digging out a fetch task; failing that, // give up. if (sem == null) { // can't get any info without a plan } else if (sem.getResultSchema() != null) { List<FieldSchema> lst = sem.getResultSchema(); schema = new Schema(lst, null); } else if (sem.getFetchTask() != null) { FetchTask ft = sem.getFetchTask(); TableDesc td = ft.getTblDesc(); // partitioned tables don't have tableDesc set on the FetchTask. Instead // they have a list of PartitionDesc objects, each with a table desc. // Let's // try to fetch the desc for the first partition and use it's // deserializer. if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) { if (ft.getWork().getPartDesc().size() > 0) { td = ft.getWork().getPartDesc().get(0).getTableDesc(); } } if (td == null) { LOG.info("No returning schema."); } else { String tableName = "result"; List<FieldSchema> lst = null; try { lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer()); } catch (Exception e) { LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e)); } if (lst != null) { schema = new Schema(lst, null); } } } if (schema == null) { schema = new Schema(); } LOG.info("Returning Hive schema: " + schema); return schema; } /** * Get a Schema with fields represented with Thrift DDL types */ public Schema getThriftSchema() throws Exception { Schema schema; try { schema = getSchema(); if (schema != null) { List<FieldSchema> lst = schema.getFieldSchemas(); // Go over the schema and convert type to thrift type if (lst != null) { for (FieldSchema f : lst) { f.setType(MetaStoreUtils.typeToThriftType(f.getType())); } } } } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning Thrift schema: " + schema); return schema; } /** * Return the maximum number of rows returned by getResults */ public int getMaxRows() { return maxRows; } /** * Set the maximum number of rows returned by getResults */ public void setMaxRows(int maxRows) { this.maxRows = maxRows; } public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) { if (tasks == null) { return false; } boolean hasReduce = false; for (Task<? extends Serializable> task : tasks) { if (task.hasReduce()) { return true; } hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks())); } return hasReduce; } /** * for backwards compatibility with current tests */ public MultiDriver_BAK(HiveConf conf) { this.conf = conf; } public MultiDriver_BAK() { if (SessionState.get() != null) { conf = SessionState.get().getConf(); } } /** * Compile a new query. Any currently-planned query associated with this Driver is discarded. * * @param command * The SQL query to compile. */ public int multiCompile(ArrayList<Pair<String, Configuration>> multiCmds) { return multiCompile(multiCmds, true); } /** * Hold state variables specific to each query being executed, that may not * be consistent in the overall SessionState */ private static class QueryState { private HiveOperation op; private String cmd; private boolean init = false; /** * Initialize the queryState with the query state variables */ public void init(HiveOperation op, String cmd) { this.op = op; this.cmd = cmd; this.init = true; } public boolean isInitialized() { return this.init; } public HiveOperation getOp() { return this.op; } public String getCmd() { return this.cmd; } } public void saveSession(QueryState qs) { SessionState oldss = SessionState.get(); if (oldss != null && oldss.getHiveOperation() != null) { qs.init(oldss.getHiveOperation(), oldss.getCmd()); } } public void restoreSession(QueryState qs) { SessionState ss = SessionState.get(); if (ss != null && qs != null && qs.isInitialized()) { ss.setCmd(qs.getCmd()); ss.setCommandType(qs.getOp()); } } /** * Compile a new query, but potentially reset taskID counter. Not resetting task counter * is useful for generating re-entrant QL queries. * * @param command * The HiveQL query to compile * @param resetTaskIds * Resets taskID counter if true. * @return 0 for ok */ public int multiCompile(ArrayList<Pair<String, Configuration>> multiCmds, boolean resetTaskIds) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.MULTICOMPILE); // holder for parent command type/string when executing reentrant queries QueryState queryState = new QueryState(); if (plan != null) { close(); plan = null; } if (resetTaskIds) { TaskFactory.resetId(); } saveSession(queryState); SemanticAnalyzer sem = null; try { for (int i = 0; i < multiCmds.size(); i++) { Pair<String, Configuration> cmdConf = multiCmds.get(i); String command = cmdConf.getKey(); cmds.add(command); conf = (HiveConf) cmdConf.getValue(); hconfs.add(conf); command = new VariableSubstitution().substitute(conf, command); ctx = new Context(conf); ctx.setTryCount(getTryCount()); ctx.setCmd(command); ctx.setHDFSCleanup(true); perfLogger.PerfLogBegin(LOG, PerfLogger.MULTIPARSE); ParseDriver pd = new ParseDriver(); ASTNode tree = pd.parse(command, ctx); tree = ParseUtils.findRootNonNullToken(tree); perfLogger.PerfLogEnd(LOG, PerfLogger.MULTIPARSE); perfLogger.PerfLogBegin(LOG, PerfLogger.MULTIANALYZE1); sem = new SemanticAnalyzer(conf); List<HiveSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class); // Do semantic analysis and plan generation if (saHooks != null) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } // do ananlyze to get pctx and store in multiPctx sem.initSem(ctx); multiPctx.put(i, sem.analyzePhase1(tree)); hookCtx.update(sem); for (HiveSemanticAnalyzerHook hook : saHooks) { hook.postAnalyze(hookCtx, sem.getRootTasks()); } } else { sem.initSem(ctx); multiPctx.put(i, sem.analyzePhase1(tree)); } LOG.info("Multi Semantic Analysis Phase1 Completed"); // validate the plan sem.validate(); perfLogger.PerfLogEnd(LOG, PerfLogger.MULTIANALYZE1); // get the output schema schemas.add(getSchema(sem, conf)); // do the authorization check if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { try { perfLogger.PerfLogBegin(LOG, PerfLogger.DO_AUTHORIZATION); doAuthorization(sem); } catch (AuthorizationException authExp) { errorMessage = "Authorization failed:" + authExp.getMessage() + ". Use show grant to get more details."; console.printError(errorMessage); return 403; } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.DO_AUTHORIZATION); } } } // Optree output LOG.info("======Print Multi Query Optree After sem.analyzePhase1 ======="); for (int key = 0; key < multiPctx.size(); key++) { ParseContext pCtx = multiPctx.get(key); String cmd = cmds.get(key); LOG.info("The " + key + "st Query:"); LOG.info("Command:\t " + cmd); LOG.info("OPtree:\t " + Operator.toString(pCtx.getTopOps().values())); } LOG.info("======Print Multi Query Optree After sem.analyzePhase1 ======="); //Set Explain //set the explain the query plan stmt variables boolean extended = false; boolean formatted = false; boolean dependency = false; boolean logical = false; if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMULTIEXPLAIN)) { ctx.setExplain(true); } ctx.setExplainLogical(logical); //merge multipctx MultiParseContext multipCtx; multipCtx = mergePctx(); //log print LOG.info("Optree After mergePctx:\n" + Operator.toString(multipCtx.getTopOps().values())); // do InterQueryFlow analysis // set InterQueryFlowCtx class's result to multipCtx InterQueryFlowCtx queryFlowCtx = new InterQueryFlowCtx(conf, multiPctx); queryFlowCtx.multiQueryFlowAnalysis(); multipCtx.setQueryFlowCtx(queryFlowCtx); // Common-sub-tree if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEOPTCSQ)) { LOG.info("======Print merge-common-sub-tree test======="); CommonSubtreeReuse reuseSubtree = new CommonSubtreeReuse(multiPctx, multipCtx); reuseSubtree.subtreeReuse(); } //sem2: do optimization and gen MR task SemanticAnalyzer sem2; sem2 = new SemanticAnalyzer(conf); sem2.analyzePhase2(multipCtx); sem2.validate(); if (ctx.getExplain()) { ctx.setResFile(new Path(ctx.getLocalTmpFileURI())); List<Task<? extends Serializable>> tasks = sem2.getRootTasks(); //Task<? extends Serializable> fetchTask = sem.getFetchTask(); HashMap<Integer, FetchTask> FetchTaskList = sem2.getfetchtasklist(); if (tasks == null) { if (FetchTaskList != null) { tasks = new ArrayList<Task<? extends Serializable>>(); for (int i = 0; i < multiPctx.size(); i++) { if (FetchTaskList.get(i) != null) { tasks.add(FetchTaskList.get(i)); } } } } else { if (FetchTaskList != null) { for (int i = 0; i < multiPctx.size(); i++) { if (FetchTaskList.get(i) != null) { tasks.add(FetchTaskList.get(i)); } } } } Task<? extends Serializable> explTask = TaskFactory .get(new ExplainWork(ctx.getResFile().toString(), multipCtx, tasks, // ((ASTNode) ast.getChild(0)).toStringTree(), null, sem2.getInputs(), extended, formatted, dependency, logical), conf); List<FieldSchema> lst = explTask.getResultSchema(); schema = new Schema(lst, null); // sem2.getRootTasks().clear(); sem2.getRootTasks().add(explTask); } plan = new MultiQueryPlan("Multiquery:" + cmds.get(0), sem2, perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); // initialize Multi-FetchTask List right here if (((MultiQueryPlan) plan).getfetchtasklist() != null) { HashMap<Integer, FetchTask> FetchTaskList = ((MultiQueryPlan) plan).getfetchtasklist(); int i; for (i = 0; i < multiPctx.size(); i++) { if (FetchTaskList.get(i) != null) { FetchTaskList.get(i).initialize(conf, plan, null); } } } /* if(sem != null){ sem.analyzeMultiQuery(); sem.analyzePhase2(); } */ perfLogger.PerfLogBegin(LOG, PerfLogger.PREOPTTEST); //int ret=0; //try { //multipreoptimizetest is for testing //it read each pctx in the multipctx and execute one by one //TODO it should not be called when the multiquery execute normally // ret=multipreoptimizetest(); //} //catch (CommandNeedRetryException e) { // TODO Auto-generated catch block // e.printStackTrace(); //} perfLogger.PerfLogEnd(LOG, PerfLogger.PREOPTTEST); return 0; } catch (Exception e) { ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage()); errorMessage = "FAILED: " + e.getClass().getSimpleName(); if (error != ErrorMsg.GENERIC_ERROR) { errorMessage += " [Error " + error.getErrorCode() + "]:"; } // HIVE-4889 if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) { errorMessage += " " + e.getCause().getMessage(); } else { errorMessage += " " + e.getMessage(); } SQLState = error.getSQLState(); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return error.getErrorCode(); } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.MULTICOMPILE); restoreSession(queryState); } } public static void addbottomfakeoperator(MultiParseContext multipCtx, HashSet<Integer> set) { // TODO Auto-generated method stub List<Operator<? extends OperatorDesc>> oplistFS = new ArrayList<Operator<? extends OperatorDesc>>(); List<Operator<? extends OperatorDesc>> oplistRS = new ArrayList<Operator<? extends OperatorDesc>>(); HashMap<Integer, ParseContext> multipctx = multipCtx.getmultipctx(); for (int i = 0; i < multipctx.size(); i++) { if (!set.contains(i)) { Iterator<String> it = multipctx.get(i).getTopOps().keySet().iterator(); if (it.hasNext()) { String key = it.next(); genbottomopFSlist(oplistFS, multipctx.get(i).getTopOps().get(key)); } } } for (int i = 0; i < oplistFS.size(); i++) { genbottomopRSlist(oplistRS, oplistFS.get(i)); } FakeDesc FD = new FakeDesc(); //RowResolver rs =new RowResolver(); ArrayList<ColumnInfo> vecCol = new ArrayList<ColumnInfo>(); RowSchema rs = new RowSchema(vecCol); FakeOperator FO = (FakeOperator) OperatorFactory.getAndMakeChild(FD, rs, oplistRS); // FakeOperator FO=(FakeOperator)OperatorFactory.getAndMakeChild(FD,rs, oplistFS); multipCtx.setFakeOperator(FO); } public static Operator<? extends OperatorDesc> getOneTSbyFS(Operator<? extends OperatorDesc> op) { if (op instanceof TableScanOperator) { return op; } if (op.getParentOperators() != null && op.getParentOperators().size() > 0) { return getOneTSbyFS(op.getParentOperators().get(0)); } return null; } private static void genbottomopRSlist(List<Operator<? extends OperatorDesc>> oplistRS, Operator<? extends OperatorDesc> opFS) { // TODO Auto-generated method stub //Operator<? extends OperatorDesc> fs =oplistFS.get(i); if (opFS.getParentOperators() != null) { /* if(opFS.getParentOperators().size()==1){ Operator<? extends OperatorDesc> current = opFS.getParentOperators().get(0); if(current instanceof ReduceSinkOperator){ oplistRS.add(current); return; }else{ genbottomopRSlist(oplistRS,current); } } */ for (Operator<? extends OperatorDesc> current : opFS.getParentOperators()) { if (current instanceof JoinOperator) { return; } if (current instanceof ReduceSinkOperator) { oplistRS.add(current); } else { genbottomopRSlist(oplistRS, current); } } } } private static void genbottomopFSlist(List<Operator<? extends OperatorDesc>> oplist, Operator<? extends OperatorDesc> topop) { if (topop.getChildOperators() == null || topop.getChildOperators().size() == 0) { if (topop instanceof TableScanOperator) { return; } oplist.add(topop); } else { genbottomopFSlist(oplist, topop.getChildOperators().get(0)); } } private MultiParseContext mergePctx() { MultiParseContext multipCtx = new MultiParseContext(new HiveConf(), //conf new QB(), //qb new ASTNode(), //ast new HashMap<TableScanOperator, ExprNodeDesc>(), //opToPartPruner new HashMap<TableScanOperator, PrunedPartitionList>(), //opToPartList new HashMap<String, Operator<? extends OperatorDesc>>(), //topOps new HashMap<String, Operator<? extends OperatorDesc>>(), //topSelOps new LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext>(), //opParseCtx new HashMap<JoinOperator, QBJoinTree>(), //joinContext new HashMap<SMBMapJoinOperator, QBJoinTree>(), //smbMapJoinContext new HashMap<TableScanOperator, Table>(), //topToTable new HashMap<TableScanOperator, Map<String, String>>(), //topToProps new HashMap<FileSinkOperator, Table>(), //fsopToTable new ArrayList<LoadTableDesc>(), //loadTableWork new ArrayList<LoadFileDesc>(), //loadFileWork null, //ctx new HashMap<String, String>(), //idToTableNameMap 0, //destTableId new UnionProcContext(), //uCtx new ArrayList<AbstractMapJoinOperator<? extends MapJoinDesc>>(), //listMapJoinOpsNoReducer new HashMap<GroupByOperator, Set<String>>(), //groupOpToInputTables new HashMap<String, PrunedPartitionList>(), //prunedPartitions new HashMap<TableScanOperator, sampleDesc>(), //opToSamplePruner new GlobalLimitCtx(), //globalLimitCtx new HashMap<String, SplitSample>(), //nameToSplitSample new HashSet<ReadEntity>(), //semanticInputs new ArrayList<Task<? extends Serializable>>(), //rootTasks new HashMap<TableScanOperator, Map<String, ExprNodeDesc>>(), //opToPartToSkewedPruner new HashMap<String, ReadEntity>(), //viewAliasToInput new ArrayList<ReduceSinkOperator>(), //reduceSinkOperatorsAddedByEnforceBucketingSorting new QueryProperties(), //queryProperties multiPctx //multipctx ); for (int i = 0; i < cmds.size(); i++) { ParseContext pCtx = multiPctx.get(i); int queryid = i; //1 conf // set the first query conf to the multipctx.hiveconf if (i == cmds.size() - 1) { multipCtx.setConf(pCtx.getConf()); } //2 qb // it's useless for qb field if (i == 0) { multipCtx.setQB(pCtx.getQB()); } //3 ast // TODO //4 opToPartPruner multipCtx.getOpToPartPruner().putAll(pCtx.getOpToPartPruner()); //5 opToPartList multipCtx.getOpToPartList().putAll(pCtx.getOpToPartList()); //6 topOps // add "query id" string in front of the hashmap key string Iterator iter = pCtx.getTopOps().entrySet().iterator(); while (iter.hasNext()) { Map.Entry<String, Operator<? extends OperatorDesc>> entry = (Map.Entry<String, Operator<? extends OperatorDesc>>) iter .next(); Object key = entry.getKey(); Operator<? extends OperatorDesc> value = entry.getValue(); multipCtx.getTopOps().put("query" + queryid + ":" + key, value); } //7 topSelOps iter = pCtx.getTopSelOps().entrySet().iterator(); while (iter.hasNext()) { Map.Entry<String, Operator<? extends OperatorDesc>> entry = (Map.Entry<String, Operator<? extends OperatorDesc>>) iter .next(); Object key = entry.getKey(); Operator<? extends OperatorDesc> value = entry.getValue(); multipCtx.getTopSelOps().put("query" + queryid + ":" + key, value); } //8 opParseCtx //private LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx; multipCtx.getOpParseCtx().putAll(pCtx.getOpParseCtx()); //9 joinContext multipCtx.getJoinContext().putAll(pCtx.getJoinContext()); // TODO //10 smbMapJoinContext //multipCtx.getSmbMapJoinContext().putAll(pCtx.getSmbMapJoinContext()); //11 topToTable multipCtx.getTopToTable().putAll(pCtx.getTopToTable()); //12 topToProps multipCtx.getTopToProps().putAll(pCtx.getTopToProps()); //13 fsopToTable multipCtx.getFsopToTable().putAll(pCtx.getFsopToTable()); //14 loadTableWork //15 loadFileWork //16 ctx if (i == cmds.size() - 1) { multipCtx.setContext(pCtx.getContext()); } //TODO //17 idToTableNameMap //18 destTableId //19 uCtx //20 listMapJoinOpsNoReducer //21 groupOpToInputTables //22 prunedPartitions iter = pCtx.getPrunedPartitions().entrySet().iterator(); while (iter.hasNext()) { Map.Entry<String, PrunedPartitionList> entry = (Map.Entry<String, PrunedPartitionList>) iter.next(); Object key = entry.getKey(); PrunedPartitionList value = entry.getValue(); multipCtx.getPrunedPartitions().put("query" + queryid + ":" + key, value); } //23 opToSamplePruner multipCtx.getOpToSamplePruner().putAll(pCtx.getOpToSamplePruner()); //24 globalLimitCtx // set globallimitctx disable GlobalLimitCtx globalLimitCtx = new GlobalLimitCtx(); globalLimitCtx.disableOpt(); multipCtx.setGlobalLimitCtx(globalLimitCtx); //TODO //25 nameToSplitSample //26 semanticInputs //27 rootTasks //28 opToPartToSkewedPruner multipCtx.getOpToPartToSkewedPruner().putAll(pCtx.getOpToPartToSkewedPruner()); //29 viewAliasToInput iter = pCtx.getViewAliasToInput().entrySet().iterator(); while (iter.hasNext()) { Map.Entry<String, ReadEntity> entry = (Map.Entry<String, ReadEntity>) iter.next(); Object key = entry.getKey(); ReadEntity value = entry.getValue(); multipCtx.getViewAliasToInput().put("query" + queryid + ":" + key, value); } //30 reduceSinkOperatorsAddedByEnforceBucketingSorting multipCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting() .addAll(multipCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting()); //31 queryProperties } return multipCtx; } private int multipreoptimizetest() throws CommandNeedRetryException { int i; PerfLogger perfLogger = PerfLogger.getPerfLogger(); for (i = 0; i < cmds.size(); i++) { TaskFactory.resetId(); ParseContext pCtx = multiPctx.get(i); // conf=(HiveConf)confs.get(i); conf = pCtx.getConf(); ctx = pCtx.getContext(); LOG.info("Before MultidoPhase2forTest Optree:\n" + Operator.toString(pCtx.getTopOps().values())); // do Optimizer gen MR task SemanticAnalyzer sem; try { sem = new SemanticAnalyzer(conf); sem.MultidoPhase2forTest(pCtx); sem.validate(); plan = new QueryPlan(cmds.get(i), sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); if (false) { String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml"; LOG.info("query plan = " + queryPlanFileName); queryPlanFileName = new Path(queryPlanFileName).toUri().getPath(); // serialize the queryPlan FileOutputStream fos = new FileOutputStream(queryPlanFileName); Utilities.serializeObject(plan, fos); fos.close(); } // initialize FetchTask right here if (plan.getFetchTask() != null) { plan.getFetchTask().initialize(conf, plan, null); } // get the output schema schema = schemas.get(i); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } int ret; if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } } ret = execute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); //test output SessionState ss = SessionState.get(); PrintStream out = ss.out; ArrayList<String> res = new ArrayList<String>(); LOG.info("Output the result of query ID(" + i + "):"); printHeader(this, out); int counter = 0; try { while (this.getResults(res)) { for (String r : res) { out.println(r); } counter += res.size(); res.clear(); if (out.checkError()) { break; } } } catch (IOException e) { console.printError("Failed with exception " + e.getClass().getName() + ":" + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); ret = 1; } } return 0; } private void printHeader(MultiDriver_BAK qp, PrintStream out) { List<FieldSchema> fieldSchemas = qp.getSchema().getFieldSchemas(); if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_HEADER) && fieldSchemas != null) { // Print the column names boolean first_col = true; for (FieldSchema fs : fieldSchemas) { if (!first_col) { out.print('\t'); } out.print(fs.getName()); first_col = false; } out.println(); } } private void doAuthorization(BaseSemanticAnalyzer sem) throws HiveException, AuthorizationException { HashSet<ReadEntity> inputs = sem.getInputs(); HashSet<WriteEntity> outputs = sem.getOutputs(); SessionState ss = SessionState.get(); HiveOperation op = ss.getHiveOperation(); Hive db = sem.getDb(); if (op != null) { if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) { ss.getAuthorizer().authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); } else { if (op.equals(HiveOperation.IMPORT)) { ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem; if (!isa.existsTable()) { ss.getAuthorizer().authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); } } } if (outputs != null && outputs.size() > 0) { for (WriteEntity write : outputs) { if (write.getType() == WriteEntity.Type.PARTITION) { Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false); if (part != null) { ss.getAuthorizer().authorize(write.getPartition(), null, op.getOutputRequiredPrivileges()); continue; } } if (write.getTable() != null) { ss.getAuthorizer().authorize(write.getTable(), null, op.getOutputRequiredPrivileges()); } } } } if (inputs != null && inputs.size() > 0) { Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>(); Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>(); Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>(); for (ReadEntity read : inputs) { Table tbl = read.getTable(); if ((read.getPartition() != null) || (tbl.isPartitioned())) { String tblName = tbl.getTableName(); if (tableUsePartLevelAuth.get(tblName) == null) { boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")))); if (usePartLevelPriv) { tableUsePartLevelAuth.put(tblName, Boolean.TRUE); } else { tableUsePartLevelAuth.put(tblName, Boolean.FALSE); } } } } if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) { SemanticAnalyzer querySem = (SemanticAnalyzer) sem; ParseContext parseCtx = querySem.getParseContext(); Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable(); for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem.getParseContext() .getTopOps().entrySet()) { Operator<? extends OperatorDesc> topOp = topOpMap.getValue(); if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) { TableScanOperator tableScanOp = (TableScanOperator) topOp; Table tbl = tsoTopMap.get(tableScanOp); List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs(); List<FieldSchema> columns = tbl.getCols(); List<String> cols = new ArrayList<String>(); if (neededColumnIds != null && neededColumnIds.size() > 0) { for (int i = 0; i < neededColumnIds.size(); i++) { cols.add(columns.get(neededColumnIds.get(i)).getName()); } } else { for (int i = 0; i < columns.size(); i++) { cols.add(columns.get(i).getName()); } } // map may not contain all sources, since input list may have been optimized out // or non-existent tho such sources may still be referenced by the TableScanOperator // if it's null then the partition probably doesn't exist so let's use table permission if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) { String alias_id = topOpMap.getKey(); PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id); Set<Partition> parts = partsList.getPartitions(); for (Partition part : parts) { List<String> existingCols = part2Cols.get(part); if (existingCols == null) { existingCols = new ArrayList<String>(); } existingCols.addAll(cols); part2Cols.put(part, existingCols); } } else { List<String> existingCols = tab2Cols.get(tbl); if (existingCols == null) { existingCols = new ArrayList<String>(); } existingCols.addAll(cols); tab2Cols.put(tbl, existingCols); } } } } // cache the results for table authorization Set<String> tableAuthChecked = new HashSet<String>(); for (ReadEntity read : inputs) { Table tbl = read.getTable(); if (read.getPartition() != null) { Partition partition = read.getPartition(); tbl = partition.getTable(); // use partition level authorization if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) { List<String> cols = part2Cols.get(partition); if (cols != null && cols.size() > 0) { ss.getAuthorizer().authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null); } else { ss.getAuthorizer().authorize(partition, op.getInputRequiredPrivileges(), null); } continue; } } // if we reach here, it means it needs to do a table authorization // check, and the table authorization may already happened because of other // partitions if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) { List<String> cols = tab2Cols.get(tbl); if (cols != null && cols.size() > 0) { ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null); } else { ss.getAuthorizer().authorize(tbl, op.getInputRequiredPrivileges(), null); } tableAuthChecked.add(tbl.getTableName()); } } } } /** * @return The current query plan associated with this Driver, if any. */ public QueryPlan getPlan() { return plan; } /** * @param t * The table to be locked * @param p * The partition to be locked * @param mode * The mode of the lock (SHARED/EXCLUSIVE) Get the list of objects to be locked. If a * partition needs to be locked (in any mode), all its parents should also be locked in * SHARED mode. **/ private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException { List<HiveLockObj> locks = new LinkedList<HiveLockObj>(); HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); if (t != null) { locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode)); mode = HiveLockMode.SHARED; locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode)); return locks; } if (p != null) { if (!(p instanceof DummyPartition)) { locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode)); } // All the parents are locked in shared mode mode = HiveLockMode.SHARED; // For dummy partitions, only partition name is needed String name = p.getName(); if (p instanceof DummyPartition) { name = p.getName().split("@")[2]; } String partialName = ""; String[] partns = name.split("/"); int len = p instanceof DummyPartition ? partns.length : partns.length - 1; Map<String, String> partialSpec = new LinkedHashMap<String, String>(); for (int idx = 0; idx < len; idx++) { String partn = partns[idx]; partialName += partn; String[] nameValue = partn.split("="); assert (nameValue.length == 2); partialSpec.put(nameValue[0], nameValue[1]); try { locks.add( new HiveLockObj(new HiveLockObject( new DummyPartition(p.getTable(), p.getTable().getDbName() + "/" + p.getTable().getTableName() + "/" + partialName, partialSpec), lockData), mode)); partialName += "/"; } catch (HiveException e) { throw new SemanticException(e.getMessage()); } } locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode)); locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode)); } return locks; } /** * Acquire read and write locks needed by the statement. The list of objects to be locked are * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making * sure that the locks are lexicographically sorted. **/ public int acquireReadWriteLocks() { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); try { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (!supportConcurrency) { return 0; } List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>(); // Sort all the inputs, outputs. // If a lock needs to be acquired on any partition, a read lock needs to be acquired on all // its parents also for (ReadEntity input : plan.getInputs()) { if (input.getType() == ReadEntity.Type.TABLE) { lockObjects.addAll(getLockObjects(input.getTable(), null, HiveLockMode.SHARED)); } else { lockObjects.addAll(getLockObjects(null, input.getPartition(), HiveLockMode.SHARED)); } } for (WriteEntity output : plan.getOutputs()) { List<HiveLockObj> lockObj = null; if (output.getTyp() == WriteEntity.Type.TABLE) { lockObj = getLockObjects(output.getTable(), null, output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED); } else if (output.getTyp() == WriteEntity.Type.PARTITION) { lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE); } // In case of dynamic queries, it is possible to have incomplete dummy partitions else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) { lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.SHARED); } if (lockObj != null) { lockObjects.addAll(lockObj); ctx.getOutputLockObjects().put(output, lockObj); } } if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) { return 0; } HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); // Lock the database also String currentDb = SessionState.get().getCurrentDatabase(); lockObjects.add(new HiveLockObj(new HiveLockObject(currentDb, lockData), HiveLockMode.SHARED)); List<HiveLock> hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false); if (hiveLocks == null) { throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg()); } else { ctx.setHiveLocks(hiveLocks); } return (0); } catch (SemanticException e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (10); } catch (LockException e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (10); } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); } } /** * @param hiveLocks * list of hive locks to be released Release all the locks specified. If some of the * locks have already been released, ignore them **/ private void releaseLocks(List<HiveLock> hiveLocks) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.RELEASE_LOCKS); if (hiveLocks != null) { ctx.getHiveLockMgr().releaseLocks(hiveLocks); } ctx.setHiveLocks(null); perfLogger.PerfLogEnd(LOG, PerfLogger.RELEASE_LOCKS); } public CommandProcessorResponse multiRun(ArrayList<Pair<String, Configuration>> multiCmds) throws CommandNeedRetryException { CommandProcessorResponse cpr = runInternal(multiCmds); if (cpr.getResponseCode() == 0) { return cpr; } SessionState ss = SessionState.get(); if (ss == null) { return cpr; } MetaDataFormatter mdf = MetaDataFormatUtils.getFormatter(ss.getConf()); if (!(mdf instanceof JsonMetaDataFormatter)) { return cpr; } /* * Here we want to encode the error in machine readable way (e.g. JSON) * Ideally, errorCode would always be set to a canonical error defined in ErrorMsg. * In practice that is rarely the case, so the messy logic below tries to tease * out canonical error code if it can. Exclude stack trace from output when * the error is a specific/expected one. * It's written to stdout for backward compatibility (WebHCat consumes it). */ try { if (downstreamError == null) { mdf.error(ss.out, errorMessage, cpr.getResponseCode(), SQLState); return cpr; } ErrorMsg canonicalErr = ErrorMsg.getErrorMsg(cpr.getResponseCode()); if (canonicalErr != null && canonicalErr != ErrorMsg.GENERIC_ERROR) { /* * Some HiveExceptions (e.g. SemanticException) don't set * canonical ErrorMsg explicitly, but there is logic * (e.g. #compile()) to find an appropriate canonical error and * return its code as error code. In this case we want to * preserve it for downstream code to interpret */ mdf.error(ss.out, errorMessage, cpr.getResponseCode(), SQLState, null); return cpr; } if (downstreamError instanceof HiveException) { HiveException rc = (HiveException) downstreamError; mdf.error(ss.out, errorMessage, rc.getCanonicalErrorMsg().getErrorCode(), SQLState, rc.getCanonicalErrorMsg() == ErrorMsg.GENERIC_ERROR ? org.apache.hadoop.util.StringUtils.stringifyException(rc) : null); } else { ErrorMsg canonicalMsg = ErrorMsg.getErrorMsg(downstreamError.getMessage()); mdf.error(ss.out, errorMessage, canonicalMsg.getErrorCode(), SQLState, org.apache.hadoop.util.StringUtils.stringifyException(downstreamError)); } } catch (HiveException ex) { console.printError("Unable to JSON-encode the error", org.apache.hadoop.util.StringUtils.stringifyException(ex)); } return cpr; } private CommandProcessorResponse runInternal(ArrayList<Pair<String, Configuration>> multiCmds) throws CommandNeedRetryException { errorMessage = null; SQLState = null; downstreamError = null; if (!validateConfVariables()) { return new CommandProcessorResponse(12, errorMessage, SQLState); } // Reset the perf logger PerfLogger perfLogger = PerfLogger.getPerfLogger(true); perfLogger.PerfLogBegin(LOG, PerfLogger.MULTIDRIVER_RUN); perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT); int ret; synchronized (compileMonitor) { ret = multiCompile(multiCmds); } if (ret != 0) { for (int key = 0; key < multiPctx.size(); key++) { Context ctx = multiPctx.get(key).getContext(); releaseLocks(ctx.getHiveLocks()); } return new CommandProcessorResponse(ret, errorMessage, SQLState); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); // return new CommandProcessorResponse(ret, errorMessage, SQLState); } } boolean isexplain = ctx.getExplain(); // if(isexplain){ // multiOutputexplain(); // }else{ // reserved function ret = multiExecute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); return new CommandProcessorResponse(ret, errorMessage, SQLState); } multiOutputResult(); //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); for (int key = 0; key < multiPctx.size(); key++) { Context ctx = multiPctx.get(key).getContext(); releaseLocks(ctx.getHiveLocks()); } multiPctx.clear(); perfLogger.PerfLogEnd(LOG, PerfLogger.MULTIDRIVER_RUN); perfLogger.close(LOG, plan); return new CommandProcessorResponse(ret); } private void multiOutputResult() throws CommandNeedRetryException { //test output //output the result of the multiquery // set the var:plan schema ctx SessionState ss = SessionState.get(); PrintStream out = ss.out; ArrayList<String> res = new ArrayList<String>(); LOG.info("Output the result of Multi-query"); try { boolean isexplain = ctx.getExplain(); if (isexplain) { LOG.info("Output explain query plan:"); printHeader(this, out); int counter = 0; while (this.getResults(res)) { for (String r : res) { out.println(r); } counter += res.size(); res.clear(); if (out.checkError()) { break; } } return; } int i; for (i = 0; i < multiPctx.size(); i++) { System.out.println("Output Multi-query ID:" + i); int counter = 0; if (!isexplain) { schema = schemas.get(i); } else { //schema=null; } printHeader(this, out); if (plan != null && ((MultiQueryPlan) plan).getfetchtasklist() != null) { plan.setFetchTask(((MultiQueryPlan) plan).getfetchtasklist().get(i)); } this.ctx = multiPctx.get(i).getContext(); while (this.getResults(res)) { for (String r : res) { out.println(r); } counter += res.size(); res.clear(); if (out.checkError()) { break; } } } } catch (IOException e) { console.printError("Failed with exception " + e.getClass().getName() + ":" + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); // ret = 1; } } private void multiOutputexplain() throws CommandNeedRetryException { //test output //output the result of the multiquery // set the var:plan schema ctx SessionState ss = SessionState.get(); PrintStream out = ss.out; ArrayList<String> res = new ArrayList<String>(); LOG.info("Output the result of Multi-query"); try { boolean isexplain = ctx.getExplain(); if (true) { LOG.info("Output explain query plan:"); printHeader(this, out); int counter = 0; while (this.getResults(res)) { for (String r : res) { out.println(r); } counter += res.size(); res.clear(); if (out.checkError()) { break; } } return; } int i; for (i = 0; i < multiPctx.size(); i++) { LOG.info("Output Multi-query ID:" + i); int counter = 0; if (!isexplain) { schema = schemas.get(i); } else { //schema=null; } printHeader(this, out); if (plan != null && ((MultiQueryPlan) plan).getfetchtasklist() != null) { plan.setFetchTask(((MultiQueryPlan) plan).getfetchtasklist().get(i)); } this.ctx = multiPctx.get(i).getContext(); while (this.getResults(res)) { for (String r : res) { out.println(r); } counter += res.size(); res.clear(); if (out.checkError()) { break; } } } } catch (IOException e) { console.printError("Failed with exception " + e.getClass().getName() + ":" + e.getMessage(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); // ret = 1; } } /** * Validate configuration variables. * * @return */ private boolean validateConfVariables() { boolean valid = true; if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES)) && ((conf.getBoolVar(HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE)) || (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) || ((conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE))))) { errorMessage = "FAILED: Hive Internal Error: " + ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getMsg(); SQLState = ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getSQLState(); console.printError(errorMessage + "\n"); valid = false; } return valid; } /** * Returns a set of hooks specified in a configuration variable. * See getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz) */ private List<Hook> getHooks(HiveConf.ConfVars hookConfVar) throws Exception { return getHooks(hookConfVar, Hook.class); } /** * Returns the hooks specified in a configuration variable. * * @param hookConfVar * The configuration variable specifying a comma separated list of the hook * class names. * @param clazz * The super type of the hooks. * @return A list of the hooks cast as the type specified in clazz, in the order * they are listed in the value of hookConfVar * @throws Exception */ private <T extends Hook> List<T> getHooks(ConfVars hookConfVar, Class<T> clazz) throws Exception { try { return HookUtils.getHooks(conf, hookConfVar, clazz); } catch (ClassNotFoundException e) { console.printError(hookConfVar.varname + " Class not found:" + e.getMessage()); throw e; } } public int multiExecute() throws CommandNeedRetryException { return execute(); } public int execute() throws CommandNeedRetryException { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_EXECUTE); boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME)); int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); String queryId = plan.getQueryId(); String queryStr = plan.getQueryStr(); conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId); conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr); conf.set("mapreduce.workflow.id", "hive_" + queryId); conf.set("mapreduce.workflow.name", queryStr); maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER); try { LOG.info("Starting command: " + queryStr); plan.setStarted(); if (SessionState.get() != null) { SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); SessionState.get().getHiveHistory().logPlanProgress(plan); } resStream = null; HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS()); hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK); for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) { if (peh instanceof ExecuteWithHookContext) { perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((ExecuteWithHookContext) peh).run(hookContext); perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); } else if (peh instanceof PreExecute) { perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader.getHadoopShims().getUGIForConf(conf)); perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); } } int jobs = Utilities.getMRTasks(plan.getRootTasks()).size(); if (jobs > 0) { console.printInfo("Total MapReduce jobs = " + jobs); } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs)); SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap()); } String jobname = Utilities.abbreviate(queryStr, maxlen - 6); // A runtime that launches runnable tasks as separate Threads through // TaskRunners // As soon as a task isRunnable, it is put in a queue // At any time, at most maxthreads tasks can be running // The main thread polls the TaskRunners to check if they have finished. Queue<Task<? extends Serializable>> runnable = new ConcurrentLinkedQueue<Task<? extends Serializable>>(); Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>(); DriverContext driverCxt = new DriverContext(runnable, ctx); ctx.setHDFSCleanup(true); SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>()); SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>()); SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>()); // Add root Tasks to runnable for (Task<? extends Serializable> tsk : plan.getRootTasks()) { // This should never happen, if it does, it's a bug with the potential to produce // incorrect results. assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty(); driverCxt.addToRunnable(tsk); } perfLogger.PerfLogEnd(LOG, PerfLogger.TIME_TO_SUBMIT); perfLogger.PerfLogBegin(LOG, PerfLogger.RUN_TASKS); // Loop while you either have tasks running, or tasks queued up while (running.size() != 0 || runnable.peek() != null) { // Launch upto maxthreads tasks while (runnable.peek() != null && running.size() < maxthreads) { Task<? extends Serializable> tsk = runnable.remove(); perfLogger.PerfLogBegin(LOG, PerfLogger.TASK + tsk.getName() + "." + tsk.getId()); launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt); } // poll the Tasks to see which one completed TaskResult tskRes = pollTasks(running.keySet()); TaskRunner tskRun = running.remove(tskRes); Task<? extends Serializable> tsk = tskRun.getTask(); perfLogger.PerfLogEnd(LOG, PerfLogger.TASK + tsk.getName() + "." + tsk.getId()); hookContext.addCompleteTask(tskRun); int exitVal = tskRes.getExitVal(); if (exitVal != 0) { if (tsk.ifRetryCmdWhenFail()) { if (!running.isEmpty()) { taskCleanup(running); } // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); throw new CommandNeedRetryException(); } Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask(); if (backupTask != null) { setErrorMsgAndDetail(exitVal, tskRes.getTaskError(), tsk); console.printError(errorMessage); errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName(); console.printError(errorMessage); // add backup task to runnable if (DriverContext.isLaunchable(backupTask)) { driverCxt.addToRunnable(backupTask); } continue; } else { hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK); // Get all the failure execution hooks and execute them. for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) { perfLogger.PerfLogBegin(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName()); ((ExecuteWithHookContext) ofh).run(hookContext); perfLogger.PerfLogEnd(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName()); } setErrorMsgAndDetail(exitVal, tskRes.getTaskError(), tsk); SQLState = "08S01"; console.printError(errorMessage); if (!running.isEmpty()) { taskCleanup(running); } // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); return exitVal; } } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal)); SessionState.get().getHiveHistory().endTask(queryId, tsk); } if (tsk.getChildTasks() != null) { for (Task<? extends Serializable> child : tsk.getChildTasks()) { if (DriverContext.isLaunchable(child)) { driverCxt.addToRunnable(child); } } } } perfLogger.PerfLogEnd(LOG, PerfLogger.RUN_TASKS); // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); // remove incomplete outputs. // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions. // remove them HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>(); for (WriteEntity output : plan.getOutputs()) { if (!output.isComplete()) { remOutputs.add(output); } } for (WriteEntity output : remOutputs) { plan.getOutputs().remove(output); } hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK); // Get all the post execution hooks and execute them. for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) { if (peh instanceof ExecuteWithHookContext) { perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); ((ExecuteWithHookContext) peh).run(hookContext); perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); } else if (peh instanceof PostExecute) { perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null), ShimLoader.getHadoopShims().getUGIForConf(conf)); perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); } } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0)); SessionState.get().getHiveHistory().printRowCount(queryId); } } catch (CommandNeedRetryException e) { throw e; } catch (Exception e) { ctx.restoreOriginalTracker(); if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12)); } // TODO: do better with handling types of Exception here errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); SQLState = "08S01"; downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (12); } finally { if (SessionState.get() != null) { SessionState.get().getHiveHistory().endQuery(queryId); } if (noName) { conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, ""); } perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_EXECUTE); if (SessionState.get().getLastMapRedStatsList() != null && SessionState.get().getLastMapRedStatsList().size() > 0) { long totalCpu = 0; console.printInfo("MapReduce Jobs Launched: "); for (int i = 0; i < SessionState.get().getLastMapRedStatsList().size(); i++) { console.printInfo("Job " + i + ": " + SessionState.get().getLastMapRedStatsList().get(i)); totalCpu += SessionState.get().getLastMapRedStatsList().get(i).getCpuMSec(); } console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu)); } } plan.setDone(); if (SessionState.get() != null) { try { SessionState.get().getHiveHistory().logPlanProgress(plan); } catch (Exception e) { } } console.printInfo("OK"); return (0); } private void setErrorMsgAndDetail(int exitVal, Throwable downstreamError, Task tsk) { this.downstreamError = downstreamError; errorMessage = "FAILED: Execution Error, return code " + exitVal + " from " + tsk.getClass().getName(); if (downstreamError != null) { // here we assume that upstream code may have parametrized the msg from ErrorMsg // so we want to keep it errorMessage += ". " + downstreamError.getMessage(); } else { ErrorMsg em = ErrorMsg.getErrorMsg(exitVal); if (em != null) { errorMessage += ". " + em.getMsg(); } } } /** * Launches a new task * * @param tsk * task being launched * @param queryId * Id of the query containing the task * @param noName * whether the task has a name set * @param running * map from taskresults to taskrunners * @param jobname * name of the task, if it is a map-reduce job * @param jobs * number of map-reduce jobs * @param cxt * the driver context */ public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName, Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) { if (SessionState.get() != null) { SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName()); } if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) { if (noName) { conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")"); } conf.set("mapreduce.workflow.node.name", tsk.getId()); Utilities.setWorkflowAdjacencies(conf, plan); cxt.incCurJobNo(1); console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs); } tsk.initialize(conf, plan, cxt); TaskResult tskRes = new TaskResult(); TaskRunner tskRun = new TaskRunner(tsk, tskRes); // Launch Task if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) { // Launch it in the parallel mode, as a separate thread only for MR tasks tskRun.start(); } else { tskRun.runSequential(); } running.put(tskRes, tskRun); return; } /** * Cleans up remaining tasks in case of failure */ public void taskCleanup(Map<TaskResult, TaskRunner> running) { for (Map.Entry<TaskResult, TaskRunner> entry : running.entrySet()) { if (entry.getKey().isRunning()) { Task<?> task = entry.getValue().getTask(); try { task.shutdown(); } catch (Exception e) { console.printError("Exception on shutting down task " + task.getId() + ": " + e); } } } running.clear(); } /** * Polls running tasks to see if a task has ended. * * @param results * Set of result objects for running tasks * @return The result object for any completed/failed task */ public TaskResult pollTasks(Set<TaskResult> results) { Iterator<TaskResult> resultIterator = results.iterator(); while (true) { while (resultIterator.hasNext()) { TaskResult tskRes = resultIterator.next(); if (!tskRes.isRunning()) { return tskRes; } } // In this loop, nothing was found // Sleep 10 seconds and restart try { Thread.sleep(SLEEP_TIME); } catch (InterruptedException ie) { // Do Nothing ; } resultIterator = results.iterator(); } } public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException { if (plan != null && plan.getFetchTask() != null) { FetchTask ft = plan.getFetchTask(); ft.setMaxRows(maxRows); return ft.fetch(res); } if (resStream == null) { resStream = ctx.getStream(); } if (resStream == null) { return false; } int numRows = 0; String row = null; while (numRows < maxRows) { if (resStream == null) { if (numRows > 0) { return true; } else { return false; } } bos.reset(); Utilities.StreamStatus ss; try { ss = Utilities.readColumn(resStream, bos); if (bos.getCount() > 0) { row = new String(bos.getData(), 0, bos.getCount(), "UTF-8"); } else if (ss == Utilities.StreamStatus.TERMINATED) { row = new String(); } if (row != null) { numRows++; res.add(row); } } catch (IOException e) { console.printError("FAILED: Unexpected IO exception : " + e.getMessage()); res = null; return false; } if (ss == Utilities.StreamStatus.EOF) { resStream = ctx.getStream(); } } return true; } public int getTryCount() { return tryCount; } public void setTryCount(int tryCount) { this.tryCount = tryCount; } public int close() { try { if (plan != null) { FetchTask fetchTask = plan.getFetchTask(); if (null != fetchTask) { try { fetchTask.clearFetch(); } catch (Exception e) { LOG.debug(" Exception while clearing the Fetch task ", e); } } } if (ctx != null) { ctx.clear(); } if (null != resStream) { try { ((FSDataInputStream) resStream).close(); } catch (Exception e) { LOG.debug(" Exception while closing the resStream ", e); } } } catch (Exception e) { console.printError("FAILED: Hive Internal Error: " + Utilities.getNameMessage(e) + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 13; } return 0; } public void destroy() { if (ctx != null) { releaseLocks(ctx.getHiveLocks()); } if (hiveLockMgr != null) { try { hiveLockMgr.close(); } catch (LockException e) { LOG.warn("Exception in closing hive lock manager. " + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } } public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException { return plan.getQueryPlan(); } @Override public CommandProcessorResponse run(String command) throws CommandNeedRetryException { // TODO Auto-generated method stub return null; } }