Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.intel.ssg.dcst.panthera.parse; import java.io.DataInput; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.CommandNeedRetryException; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.HiveDriverRunHook; import org.apache.hadoop.hive.ql.HiveDriverRunHookContext; import org.apache.hadoop.hive.ql.HiveDriverRunHookContextImpl; import org.apache.hadoop.hive.ql.MapRedStats; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.TaskResult; import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; import org.apache.hadoop.hive.ql.hooks.Hook; import org.apache.hadoop.hive.ql.hooks.HookContext; import org.apache.hadoop.hive.ql.hooks.HookUtils; import org.apache.hadoop.hive.ql.hooks.PostExecute; import org.apache.hadoop.hive.ql.hooks.PreExecute; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx; import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.DummyPartition; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.formatting.JsonMetaDataFormatter; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatter; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; import com.intel.ssg.dcst.panthera.parse.sql.ExplainSession; import com.intel.ssg.dcst.panthera.parse.sql.SqlParseDriver; import com.intel.ssg.dcst.panthera.parse.sql.SqlParseException; import com.intel.ssg.dcst.panthera.parse.sql.SqlTextSession; import com.intel.ssg.dcst.panthera.parse.sql.SqlXlateException; public class SkinDriver extends Driver { static final private Log LOG = LogFactory.getLog(SkinDriver.class.getName()); static final private LogHelper console = new LogHelper(LOG); private static final Object compileMonitor = new Object(); private int maxRows = 100; ByteStream.Output bos = new ByteStream.Output(); private HiveConf conf; private DataInput resStream; private Context ctx; private QueryPlan plan; private Schema schema; private HiveLockManager hiveLockMgr; private String errorMessage; private String SQLState; private Throwable downstreamError; // A limit on the number of threads that can be launched private int maxthreads; private static final int SLEEP_TIME = 2000; protected int tryCount = Integer.MAX_VALUE; private boolean checkLockManager() { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (!supportConcurrency) { return false; } if ((hiveLockMgr == null)) { try { setLockManager(); } catch (SemanticException e) { errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return false; } } // the reason that we set the lock manager for the cxt here is because each // query has its own ctx object. The hiveLockMgr is shared accross the // same instance of Driver, which can run multiple queries. ctx.setHiveLockMgr(hiveLockMgr); return hiveLockMgr != null; } private void setLockManager() throws SemanticException { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (supportConcurrency) { String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER); if ((lockMgr == null) || (lockMgr.isEmpty())) { throw new SemanticException(ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg()); } try { hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf); hiveLockMgr.setContext(new HiveLockManagerCtx(conf)); } catch (Exception e) { // set hiveLockMgr to null just in case this invalid manager got set to // next query's ctx. if (hiveLockMgr != null) { try { hiveLockMgr.close(); } catch (LockException e1) { //nothing can do here } hiveLockMgr = null; } throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage()); } } } public void init() { Operator.resetId(); } /** * Return the status information about the Map-Reduce cluster */ public ClusterStatus getClusterStatus() throws Exception { ClusterStatus cs; try { JobConf job = new JobConf(conf); JobClient jc = new JobClient(job); cs = jc.getClusterStatus(); } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning cluster status: " + cs.toString()); return cs; } public Schema getSchema() { return schema; } /** * Get a Schema with fields represented with native Hive types */ public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) { Schema schema = null; // If we have a plan, prefer its logical result schema if it's // available; otherwise, try digging out a fetch task; failing that, // give up. if (sem == null) { // can't get any info without a plan } else if (sem.getResultSchema() != null) { List<FieldSchema> lst = sem.getResultSchema(); schema = new Schema(lst, null); } else if (sem.getFetchTask() != null) { FetchTask ft = sem.getFetchTask(); TableDesc td = ft.getTblDesc(); // partitioned tables don't have tableDesc set on the FetchTask. Instead // they have a list of PartitionDesc objects, each with a table desc. // Let's // try to fetch the desc for the first partition and use it's // deserializer. if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) { if (ft.getWork().getPartDesc().size() > 0) { td = ft.getWork().getPartDesc().get(0).getTableDesc(); } } if (td == null) { LOG.info("No returning schema."); } else { String tableName = "result"; List<FieldSchema> lst = null; try { lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer()); } catch (Exception e) { LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e)); } if (lst != null) { schema = new Schema(lst, null); } } } if (schema == null) { schema = new Schema(); } LOG.info("Returning Hive schema: " + schema); return schema; } /** * Get a Schema with fields represented with Thrift DDL types */ public Schema getThriftSchema() throws Exception { Schema schema; try { schema = getSchema(); if (schema != null) { List<FieldSchema> lst = schema.getFieldSchemas(); // Go over the schema and convert type to thrift type if (lst != null) { for (FieldSchema f : lst) { f.setType(MetaStoreUtils.typeToThriftType(f.getType())); } } } } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning Thrift schema: " + schema); return schema; } /** * Return the maximum number of rows returned by getResults */ public int getMaxRows() { return maxRows; } /** * Set the maximum number of rows returned by getResults */ public void setMaxRows(int maxRows) { this.maxRows = maxRows; } public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) { if (tasks == null) { return false; } boolean hasReduce = false; for (Task<? extends Serializable> task : tasks) { if (task.hasReduce()) { return true; } hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks())); } return hasReduce; } /** * for backwards compatibility with current tests */ public SkinDriver(HiveConf conf) { this.conf = conf; } public SkinDriver() { if (SessionState.get() != null) { conf = SessionState.get().getConf(); } } /** * Compile a new query. Any currently-planned query associated with this Driver is discarded. * * @param command * The SQL query to compile. */ public int compile(String command) { return compile(command, true); } /** * Hold state variables specific to each query being executed, that may not * be consistent in the overall SessionState */ private static class QueryState { private HiveOperation op; private String cmd; private boolean init = false; /** * Initialize the queryState with the query state variables */ public void init(HiveOperation op, String cmd) { this.op = op; this.cmd = cmd; this.init = true; } public boolean isInitialized() { return this.init; } public HiveOperation getOp() { return this.op; } public String getCmd() { return this.cmd; } } public void saveSession(QueryState qs) { SessionState oldss = SessionState.get(); if (oldss != null && oldss.getHiveOperation() != null) { qs.init(oldss.getHiveOperation(), oldss.getCmd()); } } public void restoreSession(QueryState qs) { SessionState ss = SessionState.get(); if (ss != null && qs != null && qs.isInitialized()) { ss.setCmd(qs.getCmd()); ss.setCommandType(qs.getOp()); } } /** * Compile a new query, but potentially reset taskID counter. Not resetting task counter * is useful for generating re-entrant QL queries. * @param command The HiveQL query to compile * @param resetTaskIds Resets taskID counter if true. * @return 0 for ok */ public int compile(String command, boolean resetTaskIds) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.COMPILE); //holder for parent command type/string when executing reentrant queries QueryState queryState = new QueryState(); if (plan != null) { close(); plan = null; } if (resetTaskIds) { TaskFactory.resetId(); } saveSession(queryState); try { try { command = new VariableSubstitution().substitute(conf, command); ctx = new Context(conf); ctx.setTryCount(getTryCount()); ctx.setCmd(command); ctx.setHDFSCleanup(true); perfLogger.PerfLogBegin(LOG, PerfLogger.PARSE); // use SqlParse directly, in which SQL Parser will be called first, then Hive Parser if SQL // Parser failed. SqlParseDriver pd = new SqlParseDriver(conf); ASTNode tree = pd.parse(command, ctx); tree = ParseUtils.findRootNonNullToken(tree); perfLogger.PerfLogEnd(LOG, PerfLogger.PARSE); perfLogger.PerfLogBegin(LOG, PerfLogger.ANALYZE); BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree); List<HiveSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class); // Do semantic analysis and plan generation if (saHooks != null) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } sem.analyze(tree, ctx); hookCtx.update(sem); for (HiveSemanticAnalyzerHook hook : saHooks) { hook.postAnalyze(hookCtx, sem.getRootTasks()); } } else { sem.analyze(tree, ctx); } LOG.info("Semantic Analysis Completed"); // validate the plan sem.validate(); perfLogger.PerfLogEnd(LOG, PerfLogger.ANALYZE); plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); // test Only - serialize the query plan and deserialize it if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) { String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml"; LOG.info("query plan = " + queryPlanFileName); queryPlanFileName = new Path(queryPlanFileName).toUri().getPath(); // serialize the queryPlan FileOutputStream fos = new FileOutputStream(queryPlanFileName); Utilities.serializeObject(plan, fos); fos.close(); // deserialize the queryPlan FileInputStream fis = new FileInputStream(queryPlanFileName); QueryPlan newPlan = Utilities.deserializeObject(fis); fis.close(); // Use the deserialized plan plan = newPlan; } // initialize FetchTask right here if (plan.getFetchTask() != null) { plan.getFetchTask().initialize(conf, plan, null); } // get the output schema schema = getSchema(sem, conf); // do the authorization check if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { try { perfLogger.PerfLogBegin(LOG, PerfLogger.DO_AUTHORIZATION); doAuthorization(sem); } catch (AuthorizationException authExp) { errorMessage = "Authorization failed:" + authExp.getMessage() + ". Use show grant to get more details."; console.printError(errorMessage); return 403; } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.DO_AUTHORIZATION); } } // restore state after we're done executing a specific query return 0; } catch (SemanticException semanticException) { command = new VariableSubstitution().substitute(conf, command); ctx = new Context(conf); ctx.setTryCount(getTryCount()); ctx.setCmd(command); ctx.setHDFSCleanup(true); perfLogger.PerfLogBegin(LOG, PerfLogger.PARSE); // do again, may be semantic exception is triggered from SQL parser, then try Hive parser again. // for second time to execute these code, only use HIVE parser, since SQL parser must be tested. SqlParseDriver pd = new SqlParseDriver(conf); ASTNode tree = null; try { tree = pd.parse(command, ctx, true); } catch (ParseException parseException) { // // if reEntry the parse, and get ParseException, that means HIVE Paser can // not parse the query while SQL Parser can, then throw SemanticException // throw semanticException; } try { tree = ParseUtils.findRootNonNullToken(tree); perfLogger.PerfLogEnd(LOG, PerfLogger.PARSE); perfLogger.PerfLogBegin(LOG, PerfLogger.ANALYZE); BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree); List<HiveSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class); // Do semantic analysis and plan generation if (saHooks != null) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } sem.analyze(tree, ctx); hookCtx.update(sem); for (HiveSemanticAnalyzerHook hook : saHooks) { hook.postAnalyze(hookCtx, sem.getRootTasks()); } } else { sem.analyze(tree, ctx); } LOG.info("Semantic Analysis Completed"); // validate the plan sem.validate(); perfLogger.PerfLogEnd(LOG, PerfLogger.ANALYZE); plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); // test Only - serialize the query plan and deserialize it if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) { String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml"; LOG.info("query plan = " + queryPlanFileName); queryPlanFileName = new Path(queryPlanFileName).toUri().getPath(); // serialize the queryPlan FileOutputStream fos = new FileOutputStream(queryPlanFileName); Utilities.serializeObject(plan, fos); fos.close(); // deserialize the queryPlan FileInputStream fis = new FileInputStream(queryPlanFileName); QueryPlan newPlan = Utilities.deserializeObject(fis); fis.close(); // Use the deserialized plan plan = newPlan; } // initialize FetchTask right here if (plan.getFetchTask() != null) { plan.getFetchTask().initialize(conf, plan, null); } // get the output schema schema = getSchema(sem, conf); //do the authorization check if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { try { perfLogger.PerfLogBegin(LOG, PerfLogger.DO_AUTHORIZATION); doAuthorization(sem); } catch (AuthorizationException authExp) { errorMessage = "Authorization failed:" + authExp.getMessage() + ". Use show grant to get more details."; console.printError(errorMessage); return 403; } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.DO_AUTHORIZATION); } } //restore state after we're done executing a specific query return 0; } catch (SemanticException hiveSemanticEx) { // // if use HIVE Parser still get SemanticException, then still throw SemanticException // which is throw from the first time of choosing Parser. Since that semanticeException // can either throw from SQL Parser or HIVE Parser while this hiveSemanticEx would only // thrown from HIVE Parser. // throw semanticException; } } } catch (SqlParseException e) { errorMessage = "FAILED: SQL Parse Error: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (15); } catch (SqlXlateException e) { errorMessage = "FAILED: SQL AST Translate Error: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (14); } catch (Exception e) { ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage()); errorMessage = "FAILED: " + e.getClass().getSimpleName(); if (error != ErrorMsg.GENERIC_ERROR) { errorMessage += " [Error " + error.getErrorCode() + "]:"; } // HIVE-4889 if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) { errorMessage += " " + e.getCause().getMessage(); } else { errorMessage += " " + e.getMessage(); } SQLState = error.getSQLState(); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return error.getErrorCode(); } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.COMPILE); restoreSession(queryState); } } private void doAuthorization(BaseSemanticAnalyzer sem) throws HiveException, AuthorizationException { HashSet<ReadEntity> inputs = sem.getInputs(); HashSet<WriteEntity> outputs = sem.getOutputs(); SessionState ss = SessionState.get(); HiveOperation op = ss.getHiveOperation(); Hive db = sem.getDb(); if (op != null) { if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) { ss.getAuthorizer().authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); } else { if (op.equals(HiveOperation.IMPORT)) { ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem; if (!isa.existsTable()) { ss.getAuthorizer().authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); } } } if (outputs != null && outputs.size() > 0) { for (WriteEntity write : outputs) { if (write.getType() == WriteEntity.Type.PARTITION) { Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false); if (part != null) { ss.getAuthorizer().authorize(write.getPartition(), null, op.getOutputRequiredPrivileges()); continue; } } if (write.getTable() != null) { ss.getAuthorizer().authorize(write.getTable(), null, op.getOutputRequiredPrivileges()); } } } } if (inputs != null && inputs.size() > 0) { Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>(); Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>(); Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>(); for (ReadEntity read : inputs) { Table tbl = read.getTable(); if ((read.getPartition() != null) || (tbl.isPartitioned())) { String tblName = tbl.getTableName(); if (tableUsePartLevelAuth.get(tblName) == null) { boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")))); if (usePartLevelPriv) { tableUsePartLevelAuth.put(tblName, Boolean.TRUE); } else { tableUsePartLevelAuth.put(tblName, Boolean.FALSE); } } } } if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) { SemanticAnalyzer querySem = (SemanticAnalyzer) sem; ParseContext parseCtx = querySem.getParseContext(); Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable(); for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem.getParseContext() .getTopOps().entrySet()) { Operator<? extends OperatorDesc> topOp = topOpMap.getValue(); if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) { TableScanOperator tableScanOp = (TableScanOperator) topOp; Table tbl = tsoTopMap.get(tableScanOp); List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs(); List<FieldSchema> columns = tbl.getCols(); List<String> cols = new ArrayList<String>(); if (neededColumnIds != null && neededColumnIds.size() > 0) { for (int i = 0; i < neededColumnIds.size(); i++) { cols.add(columns.get(neededColumnIds.get(i)).getName()); } } else { for (int i = 0; i < columns.size(); i++) { cols.add(columns.get(i).getName()); } } //map may not contain all sources, since input list may have been optimized out //or non-existent tho such sources may still be referenced by the TableScanOperator //if it's null then the partition probably doesn't exist so let's use table permission if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) { String alias_id = topOpMap.getKey(); PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id); Set<Partition> parts = partsList.getPartitions(); for (Partition part : parts) { List<String> existingCols = part2Cols.get(part); if (existingCols == null) { existingCols = new ArrayList<String>(); } existingCols.addAll(cols); part2Cols.put(part, existingCols); } } else { List<String> existingCols = tab2Cols.get(tbl); if (existingCols == null) { existingCols = new ArrayList<String>(); } existingCols.addAll(cols); tab2Cols.put(tbl, existingCols); } } } } // cache the results for table authorization Set<String> tableAuthChecked = new HashSet<String>(); for (ReadEntity read : inputs) { Table tbl = read.getTable(); if (read.getPartition() != null) { Partition partition = read.getPartition(); tbl = partition.getTable(); // use partition level authorization if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) { List<String> cols = part2Cols.get(partition); if (cols != null && cols.size() > 0) { ss.getAuthorizer().authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null); } else { ss.getAuthorizer().authorize(partition, op.getInputRequiredPrivileges(), null); } continue; } } // if we reach here, it means it needs to do a table authorization // check, and the table authorization may already happened because of other // partitions if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) { List<String> cols = tab2Cols.get(tbl); if (cols != null && cols.size() > 0) { ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null); } else { ss.getAuthorizer().authorize(tbl, op.getInputRequiredPrivileges(), null); } tableAuthChecked.add(tbl.getTableName()); } } } } /** * @return The current query plan associated with this Driver, if any. */ public QueryPlan getPlan() { return plan; } /** * @param t * The table to be locked * @param p * The partition to be locked * @param mode * The mode of the lock (SHARED/EXCLUSIVE) Get the list of objects to be locked. If a * partition needs to be locked (in any mode), all its parents should also be locked in * SHARED mode. **/ private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException { List<HiveLockObj> locks = new LinkedList<HiveLockObj>(); HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); if (t != null) { locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode)); mode = HiveLockMode.SHARED; locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode)); return locks; } if (p != null) { if (!(p instanceof DummyPartition)) { locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode)); } // All the parents are locked in shared mode mode = HiveLockMode.SHARED; // For dummy partitions, only partition name is needed String name = p.getName(); if (p instanceof DummyPartition) { name = p.getName().split("@")[2]; } String partialName = ""; String[] partns = name.split("/"); int len = p instanceof DummyPartition ? partns.length : partns.length - 1; Map<String, String> partialSpec = new LinkedHashMap<String, String>(); for (int idx = 0; idx < len; idx++) { String partn = partns[idx]; partialName += partn; String[] nameValue = partn.split("="); assert (nameValue.length == 2); partialSpec.put(nameValue[0], nameValue[1]); try { locks.add( new HiveLockObj(new HiveLockObject( new DummyPartition(p.getTable(), p.getTable().getDbName() + "/" + p.getTable().getTableName() + "/" + partialName, partialSpec), lockData), mode)); partialName += "/"; } catch (HiveException e) { throw new SemanticException(e.getMessage()); } } locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode)); locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode)); } return locks; } /** * Acquire read and write locks needed by the statement. The list of objects to be locked are * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making * sure that the locks are lexicographically sorted. **/ public int acquireReadWriteLocks() { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); try { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (!supportConcurrency) { return 0; } List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>(); // Sort all the inputs, outputs. // If a lock needs to be acquired on any partition, a read lock needs to be acquired on all // its parents also for (ReadEntity input : plan.getInputs()) { if (input.getType() == ReadEntity.Type.TABLE) { lockObjects.addAll(getLockObjects(input.getTable(), null, HiveLockMode.SHARED)); } else { lockObjects.addAll(getLockObjects(null, input.getPartition(), HiveLockMode.SHARED)); } } for (WriteEntity output : plan.getOutputs()) { List<HiveLockObj> lockObj = null; if (output.getTyp() == WriteEntity.Type.TABLE) { lockObj = getLockObjects(output.getTable(), null, output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED); } else if (output.getTyp() == WriteEntity.Type.PARTITION) { lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE); } // In case of dynamic queries, it is possible to have incomplete dummy partitions else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) { lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.SHARED); } if (lockObj != null) { lockObjects.addAll(lockObj); ctx.getOutputLockObjects().put(output, lockObj); } } if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) { return 0; } HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System.currentTimeMillis()), "IMPLICIT", plan.getQueryStr()); // Lock the database also String currentDb = SessionState.get().getCurrentDatabase(); lockObjects.add(new HiveLockObj(new HiveLockObject(currentDb, lockData), HiveLockMode.SHARED)); List<HiveLock> hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false); if (hiveLocks == null) { throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg()); } else { ctx.setHiveLocks(hiveLocks); } return (0); } catch (SemanticException e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (10); } catch (LockException e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (10); } finally { perfLogger.PerfLogEnd(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); } } /** * @param hiveLocks * list of hive locks to be released Release all the locks specified. If some of the * locks have already been released, ignore them **/ private void releaseLocks(List<HiveLock> hiveLocks) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.RELEASE_LOCKS); if (hiveLocks != null) { ctx.getHiveLockMgr().releaseLocks(hiveLocks); } ctx.setHiveLocks(null); perfLogger.PerfLogEnd(LOG, PerfLogger.RELEASE_LOCKS); } public CommandProcessorResponse run(String command) throws CommandNeedRetryException { CommandProcessorResponse cpr = runInternal(command); if (cpr.getResponseCode() == 0) { return cpr; } SessionState ss = SessionState.get(); if (ss == null) { return cpr; } MetaDataFormatter mdf = MetaDataFormatUtils.getFormatter(ss.getConf()); if (!(mdf instanceof JsonMetaDataFormatter)) { return cpr; } /*Here we want to encode the error in machine readable way (e.g. JSON) * Ideally, errorCode would always be set to a canonical error defined in ErrorMsg. * In practice that is rarely the case, so the messy logic below tries to tease * out canonical error code if it can. Exclude stack trace from output when * the error is a specific/expected one. * It's written to stdout for backward compatibility (WebHCat consumes it).*/ try { if (downstreamError == null) { mdf.error(ss.out, errorMessage, cpr.getResponseCode(), SQLState); return cpr; } ErrorMsg canonicalErr = ErrorMsg.getErrorMsg(cpr.getResponseCode()); if (canonicalErr != null && canonicalErr != ErrorMsg.GENERIC_ERROR) { /*Some HiveExceptions (e.g. SemanticException) don't set canonical ErrorMsg explicitly, but there is logic (e.g. #compile()) to find an appropriate canonical error and return its code as error code. In this case we want to preserve it for downstream code to interpret*/ mdf.error(ss.out, errorMessage, cpr.getResponseCode(), SQLState, null); return cpr; } if (downstreamError instanceof HiveException) { HiveException rc = (HiveException) downstreamError; mdf.error(ss.out, errorMessage, rc.getCanonicalErrorMsg().getErrorCode(), SQLState, rc.getCanonicalErrorMsg() == ErrorMsg.GENERIC_ERROR ? org.apache.hadoop.util.StringUtils.stringifyException(rc) : null); } else { ErrorMsg canonicalMsg = ErrorMsg.getErrorMsg(downstreamError.getMessage()); mdf.error(ss.out, errorMessage, canonicalMsg.getErrorCode(), SQLState, org.apache.hadoop.util.StringUtils.stringifyException(downstreamError)); } } catch (HiveException ex) { console.printError("Unable to JSON-encode the error", org.apache.hadoop.util.StringUtils.stringifyException(ex)); } return cpr; } private CommandProcessorResponse runInternal(String command) throws CommandNeedRetryException { errorMessage = null; SQLState = null; downstreamError = null; if (!validateConfVariables()) { return new CommandProcessorResponse(12, errorMessage, SQLState); } HiveDriverRunHookContext hookContext = new HiveDriverRunHookContextImpl(conf, command); // Get all the driver run hooks and pre-execute them. List<HiveDriverRunHook> driverRunHooks; try { driverRunHooks = getHooks(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class); for (HiveDriverRunHook driverRunHook : driverRunHooks) { driverRunHook.preDriverRun(hookContext); } } catch (Exception e) { errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return new CommandProcessorResponse(12, errorMessage, SQLState); } // Reset the perf logger PerfLogger perfLogger = PerfLogger.getPerfLogger(true); perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_RUN); perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT); int ret; synchronized (compileMonitor) { ret = compile(command); } if (ret != 0) { releaseLocks(ctx.getHiveLocks()); return new CommandProcessorResponse(ret, errorMessage, SQLState); } boolean requireLock = false; boolean ckLock = checkLockManager(); if (ckLock) { boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY); if (lockOnlyMapred) { Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); requireLock = requireLock || tsk.requireLock(); if (requireLock) { break; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } } else { requireLock = true; } } if (requireLock) { ret = acquireReadWriteLocks(); if (ret != 0) { releaseLocks(ctx.getHiveLocks()); return new CommandProcessorResponse(ret, errorMessage, SQLState); } } ret = execute(); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); return new CommandProcessorResponse(ret, errorMessage, SQLState); } //if needRequireLock is false, the release here will do nothing because there is no lock releaseLocks(ctx.getHiveLocks()); perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_RUN); perfLogger.close(LOG, plan); // Take all the driver run hooks and post-execute them. try { for (HiveDriverRunHook driverRunHook : driverRunHooks) { driverRunHook.postDriverRun(hookContext); } } catch (Exception e) { errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return new CommandProcessorResponse(12, errorMessage, SQLState); } return new CommandProcessorResponse(ret); } /** * Validate configuration variables. * * @return */ private boolean validateConfVariables() { boolean valid = true; if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES)) && ((conf.getBoolVar(HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE)) || (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) || ((conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE))))) { errorMessage = "FAILED: Hive Internal Error: " + ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getMsg(); SQLState = ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getSQLState(); console.printError(errorMessage + "\n"); valid = false; } return valid; } /** * Returns a set of hooks specified in a configuration variable. * See getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz) */ private List<Hook> getHooks(HiveConf.ConfVars hookConfVar) throws Exception { return getHooks(hookConfVar, Hook.class); } /** * Returns the hooks specified in a configuration variable. * * @param hookConfVar The configuration variable specifying a comma separated list of the hook * class names. * @param clazz The super type of the hooks. * @return A list of the hooks cast as the type specified in clazz, in the order * they are listed in the value of hookConfVar * @throws Exception */ private <T extends Hook> List<T> getHooks(ConfVars hookConfVar, Class<T> clazz) throws Exception { try { return HookUtils.getHooks(conf, hookConfVar, clazz); } catch (ClassNotFoundException e) { console.printError(hookConfVar.varname + " Class not found:" + e.getMessage()); throw e; } } public int execute() throws CommandNeedRetryException { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_EXECUTE); boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME)); int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); String queryId = plan.getQueryId(); String queryStr = plan.getQueryStr(); conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId); conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr); conf.set("mapreduce.workflow.id", "hive_" + queryId); conf.set("mapreduce.workflow.name", queryStr); maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER); try { LOG.info("Starting command: " + queryStr); plan.setStarted(); if (SessionState.get() != null) { SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID)); SessionState.get().getHiveHistory().logPlanProgress(plan); } resStream = null; HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS()); hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK); for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) { if (peh instanceof ExecuteWithHookContext) { perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((ExecuteWithHookContext) peh).run(hookContext); perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); } else if (peh instanceof PreExecute) { perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader.getHadoopShims().getUGIForConf(conf)); perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName()); } } int jobs = Utilities.getMRTasks(plan.getRootTasks()).size(); if (jobs > 0) { console.printInfo("Total MapReduce jobs = " + jobs); } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs)); SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap()); } String jobname = Utilities.abbreviate(queryStr, maxlen - 6); // A runtime that launches runnable tasks as separate Threads through // TaskRunners // As soon as a task isRunnable, it is put in a queue // At any time, at most maxthreads tasks can be running // The main thread polls the TaskRunners to check if they have finished. Queue<Task<? extends Serializable>> runnable = new ConcurrentLinkedQueue<Task<? extends Serializable>>(); Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>(); DriverContext driverCxt = new DriverContext(runnable, ctx); ctx.setHDFSCleanup(true); SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>()); SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>()); SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>()); // Add root Tasks to runnable for (Task<? extends Serializable> tsk : plan.getRootTasks()) { // This should never happen, if it does, it's a bug with the potential to produce // incorrect results. assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty(); driverCxt.addToRunnable(tsk); } perfLogger.PerfLogEnd(LOG, PerfLogger.TIME_TO_SUBMIT); perfLogger.PerfLogBegin(LOG, PerfLogger.RUN_TASKS); // Loop while you either have tasks running, or tasks queued up while (running.size() != 0 || runnable.peek() != null) { // Launch upto maxthreads tasks while (runnable.peek() != null && running.size() < maxthreads) { Task<? extends Serializable> tsk = runnable.remove(); perfLogger.PerfLogBegin(LOG, PerfLogger.TASK + tsk.getName() + "." + tsk.getId()); launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt); } // poll the Tasks to see which one completed TaskResult tskRes = pollTasks(running.keySet()); TaskRunner tskRun = running.remove(tskRes); Task<? extends Serializable> tsk = tskRun.getTask(); perfLogger.PerfLogEnd(LOG, PerfLogger.TASK + tsk.getName() + "." + tsk.getId()); hookContext.addCompleteTask(tskRun); int exitVal = tskRes.getExitVal(); if (exitVal != 0) { if (tsk.ifRetryCmdWhenFail()) { if (!running.isEmpty()) { taskCleanup(running); } // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); throw new CommandNeedRetryException(); } Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask(); if (backupTask != null) { setErrorMsgAndDetail(exitVal, tskRes.getTaskError(), tsk); console.printError(errorMessage); errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName(); console.printError(errorMessage); // add backup task to runnable if (DriverContext.isLaunchable(backupTask)) { driverCxt.addToRunnable(backupTask); } continue; } else { hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK); // Get all the failure execution hooks and execute them. for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) { perfLogger.PerfLogBegin(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName()); ((ExecuteWithHookContext) ofh).run(hookContext); perfLogger.PerfLogEnd(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName()); } setErrorMsgAndDetail(exitVal, tskRes.getTaskError(), tsk); SQLState = "08S01"; console.printError(errorMessage); if (!running.isEmpty()) { taskCleanup(running); } // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); return exitVal; } } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal)); SessionState.get().getHiveHistory().endTask(queryId, tsk); } if (tsk.getChildTasks() != null) { for (Task<? extends Serializable> child : tsk.getChildTasks()) { if (DriverContext.isLaunchable(child)) { driverCxt.addToRunnable(child); } } } } perfLogger.PerfLogEnd(LOG, PerfLogger.RUN_TASKS); // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); // remove incomplete outputs. // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions. // remove them HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>(); for (WriteEntity output : plan.getOutputs()) { if (!output.isComplete()) { remOutputs.add(output); } } for (WriteEntity output : remOutputs) { plan.getOutputs().remove(output); } hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK); // Get all the post execution hooks and execute them. for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) { if (peh instanceof ExecuteWithHookContext) { perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); ((ExecuteWithHookContext) peh).run(hookContext); perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); } else if (peh instanceof PostExecute) { perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null), ShimLoader.getHadoopShims().getUGIForConf(conf)); perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName()); } } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0)); SessionState.get().getHiveHistory().printRowCount(queryId); } } catch (CommandNeedRetryException e) { throw e; } catch (Exception e) { ctx.restoreOriginalTracker(); if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12)); } // TODO: do better with handling types of Exception here errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); SQLState = "08S01"; downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (12); } finally { if (SessionState.get() != null) { SessionState.get().getHiveHistory().endQuery(queryId); } if (noName) { conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, ""); } perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_EXECUTE); if (SessionState.get().getLastMapRedStatsList() != null && SessionState.get().getLastMapRedStatsList().size() > 0) { long totalCpu = 0; console.printInfo("MapReduce Jobs Launched: "); for (int i = 0; i < SessionState.get().getLastMapRedStatsList().size(); i++) { console.printInfo("Job " + i + ": " + SessionState.get().getLastMapRedStatsList().get(i)); totalCpu += SessionState.get().getLastMapRedStatsList().get(i).getCpuMSec(); } console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu)); } } plan.setDone(); if (SessionState.get() != null) { try { SessionState.get().getHiveHistory().logPlanProgress(plan); } catch (Exception e) { } } console.printInfo("OK"); return (0); } private void setErrorMsgAndDetail(int exitVal, Throwable downstreamError, Task tsk) { this.downstreamError = downstreamError; errorMessage = "FAILED: Execution Error, return code " + exitVal + " from " + tsk.getClass().getName(); if (downstreamError != null) { //here we assume that upstream code may have parametrized the msg from ErrorMsg //so we want to keep it errorMessage += ". " + downstreamError.getMessage(); } else { ErrorMsg em = ErrorMsg.getErrorMsg(exitVal); if (em != null) { errorMessage += ". " + em.getMsg(); } } } /** * Launches a new task * * @param tsk * task being launched * @param queryId * Id of the query containing the task * @param noName * whether the task has a name set * @param running * map from taskresults to taskrunners * @param jobname * name of the task, if it is a map-reduce job * @param jobs * number of map-reduce jobs * @param cxt * the driver context */ public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName, Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) { if (SessionState.get() != null) { SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName()); } if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) { if (noName) { conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")"); } conf.set("mapreduce.workflow.node.name", tsk.getId()); Utilities.setWorkflowAdjacencies(conf, plan); cxt.incCurJobNo(1); console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs); } tsk.initialize(conf, plan, cxt); TaskResult tskRes = new TaskResult(); TaskRunner tskRun = new TaskRunner(tsk, tskRes); // Launch Task if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) { // Launch it in the parallel mode, as a separate thread only for MR tasks tskRun.start(); } else { tskRun.runSequential(); } running.put(tskRes, tskRun); return; } /** * Cleans up remaining tasks in case of failure */ public void taskCleanup(Map<TaskResult, TaskRunner> running) { for (Map.Entry<TaskResult, TaskRunner> entry : running.entrySet()) { if (entry.getKey().isRunning()) { Task<?> task = entry.getValue().getTask(); try { task.shutdown(); } catch (Exception e) { console.printError("Exception on shutting down task " + task.getId() + ": " + e); } } } running.clear(); } /** * Polls running tasks to see if a task has ended. * * @param results * Set of result objects for running tasks * @return The result object for any completed/failed task */ public TaskResult pollTasks(Set<TaskResult> results) { Iterator<TaskResult> resultIterator = results.iterator(); while (true) { while (resultIterator.hasNext()) { TaskResult tskRes = resultIterator.next(); if (!tskRes.isRunning()) { return tskRes; } } // In this loop, nothing was found // Sleep 10 seconds and restart try { Thread.sleep(SLEEP_TIME); } catch (InterruptedException ie) { // Do Nothing ; } resultIterator = results.iterator(); } } public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException { // If SQL is explain plan for ..., output transformed SQL if (ExplainSession.isExplain()) { res.add("TRANSFORMED SQL:"); res.add(" " + SqlTextSession.get()); res.add(""); SqlTextSession.clean(); ExplainSession.clean(); } if (plan != null && plan.getFetchTask() != null) { FetchTask ft = plan.getFetchTask(); ft.setMaxRows(maxRows); return ft.fetch(res); } if (resStream == null) { resStream = ctx.getStream(); } if (resStream == null) { return false; } int numRows = 0; String row = null; while (numRows < maxRows) { if (resStream == null) { if (numRows > 0) { return true; } else { return false; } } bos.reset(); Utilities.StreamStatus ss; try { ss = Utilities.readColumn(resStream, bos); if (bos.getCount() > 0) { row = new String(bos.getData(), 0, bos.getCount(), "UTF-8"); } else if (ss == Utilities.StreamStatus.TERMINATED) { row = new String(); } if (row != null) { numRows++; res.add(row); } } catch (IOException e) { console.printError("FAILED: Unexpected IO exception : " + e.getMessage()); res = null; return false; } if (ss == Utilities.StreamStatus.EOF) { resStream = ctx.getStream(); } } return true; } public int getTryCount() { return tryCount; } public void setTryCount(int tryCount) { this.tryCount = tryCount; } public int close() { try { if (plan != null) { FetchTask fetchTask = plan.getFetchTask(); if (null != fetchTask) { try { fetchTask.clearFetch(); } catch (Exception e) { LOG.debug(" Exception while clearing the Fetch task ", e); } } } if (ctx != null) { ctx.clear(); } if (null != resStream) { try { ((FSDataInputStream) resStream).close(); } catch (Exception e) { LOG.debug(" Exception while closing the resStream ", e); } } } catch (Exception e) { console.printError("FAILED: Hive Internal Error: " + Utilities.getNameMessage(e) + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 13; } return 0; } public void destroy() { if (ctx != null) { releaseLocks(ctx.getHiveLocks()); } if (hiveLockMgr != null) { try { hiveLockMgr.close(); } catch (LockException e) { LOG.warn("Exception in closing hive lock manager. " + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } } public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException { return plan.getQueryPlan(); } }