Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; import java.net.InetAddress; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantLock; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.metrics.common.Metrics; import org.apache.hadoop.hive.common.metrics.common.MetricsConstant; import org.apache.hadoop.hive.common.metrics.common.MetricsFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.conf.HiveVariableSource; import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Schema; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.TaskResult; import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.Entity; import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; import org.apache.hadoop.hive.ql.hooks.Hook; import org.apache.hadoop.hive.ql.hooks.HookContext; import org.apache.hadoop.hive.ql.hooks.HookUtils; import org.apache.hadoop.hive.ql.hooks.HooksLoader; import org.apache.hadoop.hive.ql.hooks.PostExecute; import org.apache.hadoop.hive.ql.hooks.PreExecute; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.formatting.JsonMetaDataFormatter; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils; import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatter; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl; import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivObjectActionType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.shims.Utils; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hive.common.util.ShutdownHookManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; public class Driver implements CommandProcessor { static final private String CLASS_NAME = Driver.class.getName(); private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); static final private LogHelper console = new LogHelper(LOG); static final int SHUTDOWN_HOOK_PRIORITY = 0; private final QueryInfo queryInfo; private Runnable shutdownRunner = null; private int maxRows = 100; ByteStream.Output bos = new ByteStream.Output(); private final HiveConf conf; private final boolean isParallelEnabled; private DataInput resStream; private Context ctx; private DriverContext driverCxt; private QueryPlan plan; private Schema schema; private String errorMessage; private String SQLState; private Throwable downstreamError; private FetchTask fetchTask; List<HiveLock> hiveLocks = new ArrayList<HiveLock>(); // A limit on the number of threads that can be launched private int maxthreads; private int tryCount = Integer.MAX_VALUE; private String userName; // HS2 operation handle guid string private String operationId; // For WebUI. Kept alive after queryPlan is freed. private final QueryDisplay queryDisplay = new QueryDisplay(); private LockedDriverState lDrvState = new LockedDriverState(); // Query specific info private QueryState queryState; // Query hooks that execute before compilation and after execution private QueryLifeTimeHookRunner queryLifeTimeHookRunner; private final HooksLoader hooksLoader; public enum DriverState { INITIALIZED, COMPILING, COMPILED, EXECUTING, EXECUTED, // a state that the driver enters after close() has been called to interrupt its running // query in the query cancellation INTERRUPT, // a state that the driver enters after close() has been called to clean the query results // and release the resources after the query has been executed CLOSED, // a state that the driver enters after destroy() is called and it is the end of driver life cycle DESTROYED, ERROR } public static class LockedDriverState { // a lock is used for synchronizing the state transition and its associated // resource releases public final ReentrantLock stateLock = new ReentrantLock(); public DriverState driverState = DriverState.INITIALIZED; private static ThreadLocal<LockedDriverState> lds = new ThreadLocal<LockedDriverState>() { @Override protected LockedDriverState initialValue() { return new LockedDriverState(); } }; public static void setLockedDriverState(LockedDriverState lDrv) { lds.set(lDrv); } public static LockedDriverState getLockedDriverState() { return lds.get(); } public static void removeLockedDriverState() { if (lds != null) lds.remove(); } } private boolean checkConcurrency() { boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY); if (!supportConcurrency) { LOG.info("Concurrency mode is disabled, not creating a lock manager"); return false; } return true; } @Override public void init() { // Nothing for now. } /** * Return the status information about the Map-Reduce cluster */ public ClusterStatus getClusterStatus() throws Exception { ClusterStatus cs; try { JobConf job = new JobConf(conf); JobClient jc = new JobClient(job); cs = jc.getClusterStatus(); } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning cluster status: " + cs.toString()); return cs; } public Schema getSchema() { return schema; } /** * Get a Schema with fields represented with native Hive types */ private static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) { Schema schema = null; // If we have a plan, prefer its logical result schema if it's // available; otherwise, try digging out a fetch task; failing that, // give up. if (sem == null) { // can't get any info without a plan } else if (sem.getResultSchema() != null) { List<FieldSchema> lst = sem.getResultSchema(); schema = new Schema(lst, null); } else if (sem.getFetchTask() != null) { FetchTask ft = sem.getFetchTask(); TableDesc td = ft.getTblDesc(); // partitioned tables don't have tableDesc set on the FetchTask. Instead // they have a list of PartitionDesc objects, each with a table desc. // Let's // try to fetch the desc for the first partition and use it's // deserializer. if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) { if (ft.getWork().getPartDesc().size() > 0) { td = ft.getWork().getPartDesc().get(0).getTableDesc(); } } if (td == null) { LOG.info("No returning schema."); } else { String tableName = "result"; List<FieldSchema> lst = null; try { lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer(conf)); } catch (Exception e) { LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e)); } if (lst != null) { schema = new Schema(lst, null); } } } if (schema == null) { schema = new Schema(); } LOG.info("Returning Hive schema: " + schema); return schema; } /** * Get a Schema with fields represented with Thrift DDL types */ public Schema getThriftSchema() throws Exception { Schema schema; try { schema = getSchema(); if (schema != null) { List<FieldSchema> lst = schema.getFieldSchemas(); // Go over the schema and convert type to thrift type if (lst != null) { for (FieldSchema f : lst) { f.setType(MetaStoreUtils.typeToThriftType(f.getType())); } } } } catch (Exception e) { e.printStackTrace(); throw e; } LOG.info("Returning Thrift schema: " + schema); return schema; } /** * Return the maximum number of rows returned by getResults */ public int getMaxRows() { return maxRows; } /** * Set the maximum number of rows returned by getResults */ public void setMaxRows(int maxRows) { this.maxRows = maxRows; } public Driver() { this(getNewQueryState((SessionState.get() != null) ? SessionState.get().getConf() : new HiveConf()), null); } public Driver(HiveConf conf) { this(getNewQueryState(conf), null); } public Driver(HiveConf conf, Context ctx) { this(getNewQueryState(conf), null); this.ctx = ctx; } public Driver(HiveConf conf, String userName) { this(getNewQueryState(conf), userName, null); } public Driver(QueryState queryState, String userName) { this(queryState, userName, new HooksLoader(queryState.getConf()), null); } public Driver(HiveConf conf, HooksLoader hooksLoader) { this(getNewQueryState(conf), null, hooksLoader, null); } public Driver(QueryState queryState, String userName, QueryInfo queryInfo) { this(queryState, userName, new HooksLoader(queryState.getConf()), queryInfo); } public Driver(QueryState queryState, String userName, HooksLoader hooksLoader, QueryInfo queryInfo) { this.queryState = queryState; this.conf = queryState.getConf(); isParallelEnabled = (conf != null) && HiveConf.getBoolVar(conf, ConfVars.HIVE_SERVER2_PARALLEL_COMPILATION); this.userName = userName; this.hooksLoader = hooksLoader; this.queryLifeTimeHookRunner = new QueryLifeTimeHookRunner(conf, hooksLoader, console); this.queryInfo = queryInfo; } /** * Generating the new QueryState object. Making sure, that the new queryId is generated. * @param conf The HiveConf which should be used * @return The new QueryState object */ private static QueryState getNewQueryState(HiveConf conf) { return new QueryState.Builder().withGenerateNewQueryId(true).withHiveConf(conf).build(); } /** * Compile a new query. Any currently-planned query associated with this Driver is discarded. * Do not reset id for inner queries(index, etc). Task ids are used for task identity check. * * @param command * The SQL query to compile. */ public int compile(String command) { return compile(command, true); } /** * Compile a new query, but potentially reset taskID counter. Not resetting task counter * is useful for generating re-entrant QL queries. * @param command The HiveQL query to compile * @param resetTaskIds Resets taskID counter if true. * @return 0 for ok */ public int compile(String command, boolean resetTaskIds) { return compile(command, resetTaskIds, false); } // deferClose indicates if the close/destroy should be deferred when the process has been // interrupted, it should be set to true if the compile is called within another method like // runInternal, which defers the close to the called in that method. private int compile(String command, boolean resetTaskIds, boolean deferClose) { PerfLogger perfLogger = SessionState.getPerfLogger(true); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE); lDrvState.stateLock.lock(); try { lDrvState.driverState = DriverState.COMPILING; } finally { lDrvState.stateLock.unlock(); } command = new VariableSubstitution(new HiveVariableSource() { @Override public Map<String, String> getHiveVariable() { return SessionState.get().getHiveVariables(); } }).substitute(conf, command); String queryStr = command; try { // command should be redacted to avoid to logging sensitive data queryStr = HookUtils.redactLogString(conf, command); } catch (Exception e) { LOG.warn("WARNING! Query command could not be redacted." + e); } if (isInterrupted()) { return handleInterruption("at beginning of compilation."); //indicate if need clean resource } if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) { // close the existing ctx etc before compiling a new query, but does not destroy driver closeInProcess(false); } if (resetTaskIds) { TaskFactory.resetId(); } LockedDriverState.setLockedDriverState(lDrvState); String queryId = queryState.getQueryId(); //save some info for webUI for use after plan is freed this.queryDisplay.setQueryStr(queryStr); this.queryDisplay.setQueryId(queryId); LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr); SessionState.get().setupQueryCurrentTimestamp(); // Whether any error occurred during query compilation. Used for query lifetime hook. boolean compileError = false; boolean parseError = false; try { // Initialize the transaction manager. This must be done before analyze is called. final HiveTxnManager txnManager = SessionState.get().initTxnMgr(conf); // In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks // if compile is being called multiple times, clear the old shutdownhook ShutdownHookManager.removeShutdownHook(shutdownRunner); shutdownRunner = new Runnable() { @Override public void run() { try { releaseLocksAndCommitOrRollback(false, txnManager); } catch (LockException e) { LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage()); } } }; ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY); if (isInterrupted()) { return handleInterruption("before parsing and analysing the query"); } if (ctx == null) { ctx = new Context(conf); } ctx.setTryCount(getTryCount()); ctx.setCmd(command); ctx.setHDFSCleanup(true); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE); // Trigger query hook before compilation queryLifeTimeHookRunner.runBeforeParseHook(command); ASTNode tree; try { tree = ParseUtils.parse(command, ctx); } catch (ParseException e) { parseError = true; throw e; } finally { queryLifeTimeHookRunner.runAfterParseHook(command, parseError); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE); queryLifeTimeHookRunner.runBeforeCompileHook(command); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE); BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree); List<HiveSemanticAnalyzerHook> saHooks = hooksLoader.getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, console); // Flush the metastore cache. This assures that we don't pick up objects from a previous // query running in this same thread. This has to be done after we get our semantic // analyzer (this is when the connection to the metastore is made) but before we analyze, // because at that point we need access to the objects. Hive.get().getMSC().flushCache(); if (checkConcurrency() && startImplicitTxn(txnManager)) { String userFromUGI = getUserFromUGI(); if (!txnManager.isTxnOpen()) { if (userFromUGI == null) { return 10; } long txnid = txnManager.openTxn(ctx, userFromUGI); } } // Do semantic analysis and plan generation if (saHooks != null && !saHooks.isEmpty()) { HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl(); hookCtx.setConf(conf); hookCtx.setUserName(userName); hookCtx.setIpAddress(SessionState.get().getUserIpAddress()); hookCtx.setCommand(command); hookCtx.setHiveOperation(queryState.getHiveOperation()); for (HiveSemanticAnalyzerHook hook : saHooks) { tree = hook.preAnalyze(hookCtx, tree); } sem.analyze(tree, ctx); hookCtx.update(sem); for (HiveSemanticAnalyzerHook hook : saHooks) { hook.postAnalyze(hookCtx, sem.getAllRootTasks()); } } else { sem.analyze(tree, ctx); } LOG.info("Semantic Analysis Completed"); // validate the plan sem.validate(); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE); if (isInterrupted()) { return handleInterruption("after analyzing query."); } // get the output schema schema = getSchema(sem, conf); plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema); conf.setQueryString(queryStr); conf.set("mapreduce.workflow.id", "hive_" + queryId); conf.set("mapreduce.workflow.name", queryStr); // initialize FetchTask right here if (plan.getFetchTask() != null) { plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext()); } //do the authorization check if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { try { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION); doAuthorization(queryState.getHiveOperation(), sem, command); } catch (AuthorizationException authExp) { console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details."); errorMessage = authExp.getMessage(); SQLState = "42000"; return 403; } finally { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION); } } if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) { String explainOutput = getExplainOutput(sem, plan, tree); if (explainOutput != null) { LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput); if (conf.isWebUiQueryInfoCacheEnabled()) { queryDisplay.setExplainPlan(explainOutput); } } } return 0; } catch (Exception e) { if (isInterrupted()) { return handleInterruption("during query compilation: " + e.getMessage()); } compileError = true; ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage()); errorMessage = "FAILED: " + e.getClass().getSimpleName(); if (error != ErrorMsg.GENERIC_ERROR) { errorMessage += " [Error " + error.getErrorCode() + "]:"; } // HIVE-4889 if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) { errorMessage += " " + e.getCause().getMessage(); } else { errorMessage += " " + e.getMessage(); } if (error == ErrorMsg.TXNMGR_NOT_ACID) { errorMessage += ". Failed command: " + queryStr; } SQLState = error.getSQLState(); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return error.getErrorCode();//todo: this is bad if returned as cmd shell exit // since it exceeds valid range of shell return values } finally { // Trigger post compilation hook. Note that if the compilation fails here then // before/after execution hook will never be executed. if (!parseError) { try { queryLifeTimeHookRunner.runAfterCompilationHook(command, compileError); } catch (Exception e) { LOG.warn("Failed when invoking query after-compilation hook.", e); } } double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00; ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation"); queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings); boolean isInterrupted = isInterrupted(); if (isInterrupted && !deferClose) { closeInProcess(true); } lDrvState.stateLock.lock(); try { if (isInterrupted) { lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR; } else { lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED; } } finally { lDrvState.stateLock.unlock(); } if (isInterrupted) { LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds"); } else { LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds"); } } } private boolean startImplicitTxn(HiveTxnManager txnManager) throws LockException { boolean shouldOpenImplicitTxn = !ctx.isExplainPlan(); //this is dumb. HiveOperation is not always set. see HIVE-16447/HIVE-16443 switch (queryState.getHiveOperation() == null ? HiveOperation.QUERY : queryState.getHiveOperation()) { case COMMIT: case ROLLBACK: if (!txnManager.isTxnOpen()) { throw new LockException(null, ErrorMsg.OP_NOT_ALLOWED_WITHOUT_TXN, queryState.getHiveOperation().getOperationName()); } case SWITCHDATABASE: case SET_AUTOCOMMIT: /** * autocommit is here for completeness. TM doesn't use it. If we want to support JDBC * semantics (or any other definition of autocommit) it should be done at session level. */ case SHOWDATABASES: case SHOWTABLES: case SHOWCOLUMNS: case SHOWFUNCTIONS: case SHOWINDEXES: case SHOWPARTITIONS: case SHOWLOCKS: case SHOWVIEWS: case SHOW_ROLES: case SHOW_ROLE_PRINCIPALS: case SHOW_COMPACTIONS: case SHOW_TRANSACTIONS: case ABORT_TRANSACTIONS: shouldOpenImplicitTxn = false; //this implies that no locks are needed for such a command } return shouldOpenImplicitTxn; } private int handleInterruption(String msg) { return handleInterruptionWithHook(msg, null, null); } private int handleInterruptionWithHook(String msg, HookContext hookContext, PerfLogger perfLogger) { SQLState = "HY008"; //SQLState for cancel operation errorMessage = "FAILED: command has been interrupted: " + msg; console.printError(errorMessage); if (hookContext != null) { try { invokeFailureHooks(perfLogger, hookContext, errorMessage, null); } catch (Exception e) { LOG.warn("Caught exception attempting to invoke Failure Hooks", e); } } return 1000; } private boolean isInterrupted() { lDrvState.stateLock.lock(); try { if (lDrvState.driverState == DriverState.INTERRUPT) { return true; } else { return false; } } finally { lDrvState.stateLock.unlock(); } } private ImmutableMap<String, Long> dumpMetaCallTimingWithoutEx(String phase) { try { return Hive.get().dumpAndClearMetaCallTiming(phase); } catch (HiveException he) { LOG.warn("Caught exception attempting to write metadata call information " + he, he); } return null; } /** * Returns EXPLAIN EXTENDED output for a semantically * analyzed query. * * @param sem semantic analyzer for analyzed query * @param plan query plan * @param astTree AST tree dump * @throws java.io.IOException */ private String getExplainOutput(BaseSemanticAnalyzer sem, QueryPlan plan, ASTNode astTree) throws IOException { String ret = null; ExplainTask task = new ExplainTask(); task.initialize(queryState, plan, null, ctx.getOpContext()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); try { List<Task<?>> rootTasks = sem.getAllRootTasks(); task.getJSONPlan(ps, rootTasks, sem.getFetchTask(), false, true, true); ret = baos.toString(); } catch (Exception e) { LOG.warn("Exception generating explain output: " + e, e); } return ret; } /** * Do authorization using post semantic analysis information in the semantic analyzer * The original command is also passed so that authorization interface can provide * more useful information in logs. * @param sem SemanticAnalyzer used to parse input query * @param command input query * @throws HiveException * @throws AuthorizationException */ public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, String command) throws HiveException, AuthorizationException { SessionState ss = SessionState.get(); Hive db = sem.getDb(); Set<ReadEntity> additionalInputs = new HashSet<ReadEntity>(); for (Entity e : sem.getInputs()) { if (e.getType() == Entity.Type.PARTITION) { additionalInputs.add(new ReadEntity(e.getTable())); } } Set<WriteEntity> additionalOutputs = new HashSet<WriteEntity>(); for (WriteEntity e : sem.getOutputs()) { if (e.getType() == Entity.Type.PARTITION) { additionalOutputs.add(new WriteEntity(e.getTable(), e.getWriteType())); } } // The following union operation returns a union, which traverses over the // first set once and then then over each element of second set, in order, // that is not contained in first. This means it doesn't replace anything // in first set, and would preserve the WriteType in WriteEntity in first // set in case of outputs list. Set<ReadEntity> inputs = Sets.union(sem.getInputs(), additionalInputs); Set<WriteEntity> outputs = Sets.union(sem.getOutputs(), additionalOutputs); if (ss.isAuthorizationModeV2()) { // get mapping of tables to columns used ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo(); // colAccessInfo is set only in case of SemanticAnalyzer Map<String, List<String>> selectTab2Cols = colAccessInfo != null ? colAccessInfo.getTableToColumnAccessMap() : null; Map<String, List<String>> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null; doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols); return; } if (op == null) { throw new HiveException("Operation should not be null"); } HiveAuthorizationProvider authorizer = ss.getAuthorizer(); if (op.equals(HiveOperation.CREATEDATABASE)) { authorizer.authorize(op.getInputRequiredPrivileges(), op.getOutputRequiredPrivileges()); } else if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) { authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); } else { if (op.equals(HiveOperation.IMPORT)) { ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem; if (!isa.existsTable()) { authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges()); } } } if (outputs != null && outputs.size() > 0) { for (WriteEntity write : outputs) { if (write.isDummy() || write.isPathType()) { continue; } if (write.getType() == Entity.Type.DATABASE) { if (!op.equals(HiveOperation.IMPORT)) { // We skip DB check for import here because we already handle it above // as a CTAS check. authorizer.authorize(write.getDatabase(), null, op.getOutputRequiredPrivileges()); } continue; } if (write.getType() == WriteEntity.Type.PARTITION) { Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false); if (part != null) { authorizer.authorize(write.getPartition(), null, op.getOutputRequiredPrivileges()); continue; } } if (write.getTable() != null) { authorizer.authorize(write.getTable(), null, op.getOutputRequiredPrivileges()); } } } if (inputs != null && inputs.size() > 0) { Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>(); Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>(); //determine if partition level privileges should be checked for input tables Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>(); for (ReadEntity read : inputs) { if (read.isDummy() || read.isPathType() || read.getType() == Entity.Type.DATABASE) { continue; } Table tbl = read.getTable(); if ((read.getPartition() != null) || (tbl != null && tbl.isPartitioned())) { String tblName = tbl.getTableName(); if (tableUsePartLevelAuth.get(tblName) == null) { boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE")))); if (usePartLevelPriv) { tableUsePartLevelAuth.put(tblName, Boolean.TRUE); } else { tableUsePartLevelAuth.put(tblName, Boolean.FALSE); } } } } // column authorization is checked through table scan operators. getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth); // cache the results for table authorization Set<String> tableAuthChecked = new HashSet<String>(); for (ReadEntity read : inputs) { // if read is not direct, we do not need to check its autho. if (read.isDummy() || read.isPathType() || !read.isDirect()) { continue; } if (read.getType() == Entity.Type.DATABASE) { authorizer.authorize(read.getDatabase(), op.getInputRequiredPrivileges(), null); continue; } Table tbl = read.getTable(); if (tbl.isView() && sem instanceof SemanticAnalyzer) { tab2Cols.put(tbl, sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName())); } if (read.getPartition() != null) { Partition partition = read.getPartition(); tbl = partition.getTable(); // use partition level authorization if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) { List<String> cols = part2Cols.get(partition); if (cols != null && cols.size() > 0) { authorizer.authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null); } else { authorizer.authorize(partition, op.getInputRequiredPrivileges(), null); } continue; } } // if we reach here, it means it needs to do a table authorization // check, and the table authorization may already happened because of other // partitions if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) { List<String> cols = tab2Cols.get(tbl); if (cols != null && cols.size() > 0) { authorizer.authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null); } else { authorizer.authorize(tbl, op.getInputRequiredPrivileges(), null); } tableAuthChecked.add(tbl.getTableName()); } } } } private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols, Map<String, Boolean> tableUsePartLevelAuth) throws HiveException { // for a select or create-as-select query, populate the partition to column // (par2Cols) or // table to columns mapping (tab2Cols) if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) { SemanticAnalyzer querySem = (SemanticAnalyzer) sem; ParseContext parseCtx = querySem.getParseContext(); for (Map.Entry<String, TableScanOperator> topOpMap : querySem.getParseContext().getTopOps() .entrySet()) { TableScanOperator tableScanOp = topOpMap.getValue(); if (!tableScanOp.isInsideView()) { Table tbl = tableScanOp.getConf().getTableMetadata(); List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs(); List<FieldSchema> columns = tbl.getCols(); List<String> cols = new ArrayList<String>(); for (int i = 0; i < neededColumnIds.size(); i++) { cols.add(columns.get(neededColumnIds.get(i)).getName()); } // map may not contain all sources, since input list may have been // optimized out // or non-existent tho such sources may still be referenced by the // TableScanOperator // if it's null then the partition probably doesn't exist so let's use // table permission if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) { String alias_id = topOpMap.getKey(); PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id); Set<Partition> parts = partsList.getPartitions(); for (Partition part : parts) { List<String> existingCols = part2Cols.get(part); if (existingCols == null) { existingCols = new ArrayList<String>(); } existingCols.addAll(cols); part2Cols.put(part, existingCols); } } else { List<String> existingCols = tab2Cols.get(tbl); if (existingCols == null) { existingCols = new ArrayList<String>(); } existingCols.addAll(cols); tab2Cols.put(tbl, existingCols); } } } } } private static void doAuthorizationV2(SessionState ss, HiveOperation op, Set<ReadEntity> inputs, Set<WriteEntity> outputs, String command, Map<String, List<String>> tab2cols, Map<String, List<String>> updateTab2Cols) throws HiveException { /* comment for reviewers -> updateTab2Cols needed to be separate from tab2cols because if I pass tab2cols to getHivePrivObjects for the output case it will trip up insert/selects, since the insert will get passed the columns from the select. */ HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder(); authzContextBuilder.setUserIpAddress(ss.getUserIpAddress()); authzContextBuilder.setForwardedAddresses(ss.getForwardedAddresses()); authzContextBuilder.setCommandString(command); HiveOperationType hiveOpType = getHiveOperationType(op); List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputs, tab2cols); List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs, updateTab2Cols); ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build()); } private static List<HivePrivilegeObject> getHivePrivObjects(Set<? extends Entity> privObjects, Map<String, List<String>> tableName2Cols) { List<HivePrivilegeObject> hivePrivobjs = new ArrayList<HivePrivilegeObject>(); if (privObjects == null) { return hivePrivobjs; } for (Entity privObject : privObjects) { HivePrivilegeObjectType privObjType = AuthorizationUtils .getHivePrivilegeObjectType(privObject.getType()); if (privObject.isDummy()) { //do not authorize dummy readEntity or writeEntity continue; } if (privObject instanceof ReadEntity && !((ReadEntity) privObject).isDirect()) { // In case of views, the underlying views or tables are not direct dependencies // and are not used for authorization checks. // This ReadEntity represents one of the underlying tables/views, so skip it. // See description of the isDirect in ReadEntity continue; } if (privObject instanceof WriteEntity && ((WriteEntity) privObject).isTempURI()) { //do not authorize temporary uris continue; } //support for authorization on partitions needs to be added String dbname = null; String objName = null; List<String> partKeys = null; List<String> columns = null; switch (privObject.getType()) { case DATABASE: dbname = privObject.getDatabase().getName(); break; case TABLE: dbname = privObject.getTable().getDbName(); objName = privObject.getTable().getTableName(); columns = tableName2Cols == null ? null : tableName2Cols.get(Table.getCompleteName(dbname, objName)); break; case DFS_DIR: case LOCAL_DIR: objName = privObject.getD().toString(); break; case FUNCTION: if (privObject.getDatabase() != null) { dbname = privObject.getDatabase().getName(); } objName = privObject.getFunctionName(); break; case DUMMYPARTITION: case PARTITION: // not currently handled continue; default: throw new AssertionError("Unexpected object type"); } HivePrivObjectActionType actionType = AuthorizationUtils.getActionType(privObject); HivePrivilegeObject hPrivObject = new HivePrivilegeObject(privObjType, dbname, objName, partKeys, columns, actionType, null); hivePrivobjs.add(hPrivObject); } return hivePrivobjs; } private static HiveOperationType getHiveOperationType(HiveOperation op) { return HiveOperationType.valueOf(op.name()); } /** * @return The current query plan associated with this Driver, if any. */ public QueryPlan getPlan() { return plan; } /** * @return The current FetchTask associated with the Driver's plan, if any. */ public FetchTask getFetchTask() { return fetchTask; } // Write the current set of valid transactions into the conf file so that it can be read by // the input format. private void recordValidTxns() throws LockException { ValidTxnList oldList = null; String s = conf.get(ValidTxnList.VALID_TXNS_KEY); if (s != null && s.length() > 0) { oldList = new ValidReadTxnList(s); } HiveTxnManager txnMgr = SessionState.get().getTxnMgr(); ValidTxnList txns = txnMgr.getValidTxns(); if (oldList != null) { throw new IllegalStateException("calling recordValidTxn() more than once in the same " + JavaUtils.txnIdToString(txnMgr.getCurrentTxnId())); } String txnStr = txns.toString(); conf.set(ValidTxnList.VALID_TXNS_KEY, txnStr); if (plan.getFetchTask() != null) { /** * This is needed for {@link HiveConf.ConfVars.HIVEFETCHTASKCONVERSION} optimization which * initializes JobConf in FetchOperator before recordValidTxns() but this has to be done * after locks are acquired to avoid race conditions in ACID. */ plan.getFetchTask().setValidTxnList(txnStr); } LOG.debug("Encoding valid txns info " + txnStr + " txnid:" + txnMgr.getCurrentTxnId()); } private String getUserFromUGI() { // Don't use the userName member, as it may or may not have been set. Get the value from // conf, which calls into getUGI to figure out who the process is running as. try { return conf.getUser(); } catch (IOException e) { errorMessage = "FAILED: Error in determining user while acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } return null; } /** * Acquire read and write locks needed by the statement. The list of objects to be locked are * obtained from the inputs and outputs populated by the compiler. Locking strategy depends on * HiveTxnManager and HiveLockManager configured * * This method also records the list of valid transactions. This must be done after any * transactions have been opened. **/ private int acquireLocks() { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); SessionState ss = SessionState.get(); HiveTxnManager txnMgr = ss.getTxnMgr(); if (!txnMgr.isTxnOpen() && txnMgr.supportsAcid()) { /*non acid txn managers don't support txns but fwd lock requests to lock managers acid txn manager requires all locks to be associated with a txn so if we end up here w/o an open txn it's because we are processing something like "use <database> which by definition needs no locks*/ return 0; } try { String userFromUGI = getUserFromUGI(); if (userFromUGI == null) { return 10; } // Set the transaction id in all of the acid file sinks if (haveAcidWrite()) { for (FileSinkDesc desc : plan.getAcidSinks()) { desc.setTransactionId(txnMgr.getCurrentTxnId()); //it's possible to have > 1 FileSink writing to the same table/partition //e.g. Merge stmt, multi-insert stmt when mixing DP and SP writes desc.setStatementId(txnMgr.getWriteIdAndIncrement()); } } /*It's imperative that {@code acquireLocks()} is called for all commands so that HiveTxnManager can transition its state machine correctly*/ txnMgr.acquireLocks(plan, ctx, userFromUGI, lDrvState); if (txnMgr.recordSnapshot(plan)) { recordValidTxns(); } return 0; } catch (Exception e) { errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage(); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 10; } finally { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ACQUIRE_READ_WRITE_LOCKS); } } private boolean haveAcidWrite() { return !plan.getAcidSinks().isEmpty(); } /** * @param commit if there is an open transaction and if true, commit, * if false rollback. If there is no open transaction this parameter is ignored. * @param txnManager an optional existing transaction manager retrieved earlier from the session * **/ @VisibleForTesting public void releaseLocksAndCommitOrRollback(boolean commit, HiveTxnManager txnManager) throws LockException { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS); HiveTxnManager txnMgr; if (txnManager == null) { SessionState ss = SessionState.get(); txnMgr = ss.getTxnMgr(); } else { txnMgr = txnManager; } // If we've opened a transaction we need to commit or rollback rather than explicitly // releasing the locks. conf.unset(ValidTxnList.VALID_TXNS_KEY); if (txnMgr.isTxnOpen()) { if (commit) { if (conf.getBoolVar(ConfVars.HIVE_IN_TEST) && conf.getBoolVar(ConfVars.HIVETESTMODEROLLBACKTXN)) { txnMgr.rollbackTxn(); } else { txnMgr.commitTxn();//both commit & rollback clear ALL locks for this tx } } else { txnMgr.rollbackTxn(); } } else { //since there is no tx, we only have locks for current query (if any) if (ctx != null && ctx.getHiveLocks() != null) { hiveLocks.addAll(ctx.getHiveLocks()); } txnMgr.releaseLocks(hiveLocks); } hiveLocks.clear(); if (ctx != null) { ctx.setHiveLocks(null); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS); } /** * Release some resources after a query is executed * while keeping the result around. */ private void releaseResources() { releasePlan(); releaseDriverContext(); if (SessionState.get() != null) { SessionState.get().getLineageState().clear(); } } @Override public CommandProcessorResponse run(String command) throws CommandNeedRetryException { return run(command, false); } public CommandProcessorResponse run() throws CommandNeedRetryException { return run(null, true); } public CommandProcessorResponse run(String command, boolean alreadyCompiled) throws CommandNeedRetryException { CommandProcessorResponse cpr = runInternal(command, alreadyCompiled); if (cpr.getResponseCode() == 0) { return cpr; } SessionState ss = SessionState.get(); if (ss == null) { return cpr; } MetaDataFormatter mdf = MetaDataFormatUtils.getFormatter(ss.getConf()); if (!(mdf instanceof JsonMetaDataFormatter)) { return cpr; } /*Here we want to encode the error in machine readable way (e.g. JSON) * Ideally, errorCode would always be set to a canonical error defined in ErrorMsg. * In practice that is rarely the case, so the messy logic below tries to tease * out canonical error code if it can. Exclude stack trace from output when * the error is a specific/expected one. * It's written to stdout for backward compatibility (WebHCat consumes it).*/ try { if (downstreamError == null) { mdf.error(ss.out, errorMessage, cpr.getResponseCode(), SQLState); return cpr; } ErrorMsg canonicalErr = ErrorMsg.getErrorMsg(cpr.getResponseCode()); if (canonicalErr != null && canonicalErr != ErrorMsg.GENERIC_ERROR) { /*Some HiveExceptions (e.g. SemanticException) don't set canonical ErrorMsg explicitly, but there is logic (e.g. #compile()) to find an appropriate canonical error and return its code as error code. In this case we want to preserve it for downstream code to interpret*/ mdf.error(ss.out, errorMessage, cpr.getResponseCode(), SQLState, null); return cpr; } if (downstreamError instanceof HiveException) { HiveException rc = (HiveException) downstreamError; mdf.error(ss.out, errorMessage, rc.getCanonicalErrorMsg().getErrorCode(), SQLState, rc.getCanonicalErrorMsg() == ErrorMsg.GENERIC_ERROR ? org.apache.hadoop.util.StringUtils.stringifyException(rc) : null); } else { ErrorMsg canonicalMsg = ErrorMsg.getErrorMsg(downstreamError.getMessage()); mdf.error(ss.out, errorMessage, canonicalMsg.getErrorCode(), SQLState, org.apache.hadoop.util.StringUtils.stringifyException(downstreamError)); } } catch (HiveException ex) { console.printError("Unable to JSON-encode the error", org.apache.hadoop.util.StringUtils.stringifyException(ex)); } return cpr; } public CommandProcessorResponse compileAndRespond(String command) { return createProcessorResponse(compileInternal(command, false)); } private static final ReentrantLock globalCompileLock = new ReentrantLock(); private int compileInternal(String command, boolean deferClose) { int ret; Metrics metrics = MetricsFactory.getInstance(); if (metrics != null) { metrics.incrementCounter(MetricsConstant.WAITING_COMPILE_OPS, 1); } PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.WAIT_COMPILE); final ReentrantLock compileLock = tryAcquireCompileLock(isParallelEnabled, command); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.WAIT_COMPILE); if (metrics != null) { metrics.decrementCounter(MetricsConstant.WAITING_COMPILE_OPS, 1); } if (compileLock == null) { return ErrorMsg.COMPILE_LOCK_TIMED_OUT.getErrorCode(); } try { ret = compile(command, true, deferClose); } finally { compileLock.unlock(); } if (ret != 0) { try { releaseLocksAndCommitOrRollback(false, null); } catch (LockException e) { LOG.warn("Exception in releasing locks. " + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } //Save compile-time PerfLogging for WebUI. //Execution-time Perf logs are done by either another thread's PerfLogger //or a reset PerfLogger. queryDisplay.setPerfLogStarts(QueryDisplay.Phase.COMPILATION, perfLogger.getStartTimes()); queryDisplay.setPerfLogEnds(QueryDisplay.Phase.COMPILATION, perfLogger.getEndTimes()); return ret; } /** * Acquires the compile lock. If the compile lock wait timeout is configured, * it will acquire the lock if it is not held by another thread within the given * waiting time. * @return the ReentrantLock object if the lock was successfully acquired, * or {@code null} if compile lock wait timeout is configured and * either the waiting time elapsed before the lock could be acquired * or if the current thread is interrupted. */ private ReentrantLock tryAcquireCompileLock(boolean isParallelEnabled, String command) { final ReentrantLock compileLock = isParallelEnabled ? SessionState.get().getCompileLock() : globalCompileLock; long maxCompileLockWaitTime = HiveConf.getTimeVar(this.conf, ConfVars.HIVE_SERVER2_COMPILE_LOCK_TIMEOUT, TimeUnit.SECONDS); final String lockAcquiredMsg = "Acquired the compile lock."; // First shot without waiting. try { if (compileLock.tryLock(0, TimeUnit.SECONDS)) { LOG.debug(lockAcquiredMsg); return compileLock; } } catch (InterruptedException e) { Thread.currentThread().interrupt(); if (LOG.isDebugEnabled()) { LOG.debug("Interrupted Exception ignored", e); } return null; } // If the first shot fails, then we log the waiting messages. if (LOG.isDebugEnabled()) { LOG.debug("Waiting to acquire compile lock: " + command); } if (maxCompileLockWaitTime > 0) { try { if (!compileLock.tryLock(maxCompileLockWaitTime, TimeUnit.SECONDS)) { errorMessage = ErrorMsg.COMPILE_LOCK_TIMED_OUT.getErrorCodedMsg(); LOG.error(errorMessage + ": " + command); return null; } } catch (InterruptedException e) { Thread.currentThread().interrupt(); if (LOG.isDebugEnabled()) { LOG.debug("Interrupted Exception ignored", e); } return null; } } else { compileLock.lock(); } LOG.debug(lockAcquiredMsg); return compileLock; } private CommandProcessorResponse runInternal(String command, boolean alreadyCompiled) throws CommandNeedRetryException { errorMessage = null; SQLState = null; downstreamError = null; LockedDriverState.setLockedDriverState(lDrvState); lDrvState.stateLock.lock(); try { if (alreadyCompiled) { if (lDrvState.driverState == DriverState.COMPILED) { lDrvState.driverState = DriverState.EXECUTING; } else { errorMessage = "FAILED: Precompiled query has been cancelled or closed."; console.printError(errorMessage); return createProcessorResponse(12); } } else { lDrvState.driverState = DriverState.COMPILING; } } finally { lDrvState.stateLock.unlock(); } // a flag that helps to set the correct driver state in finally block by tracking if // the method has been returned by an error or not. boolean isFinishedWithError = true; try { HiveDriverRunHookContext hookContext = new HiveDriverRunHookContextImpl(conf, alreadyCompiled ? ctx.getCmd() : command); // Get all the driver run hooks and pre-execute them. List<HiveDriverRunHook> driverRunHooks; try { driverRunHooks = hooksLoader.getHooks(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, console); for (HiveDriverRunHook driverRunHook : driverRunHooks) { driverRunHook.preDriverRun(hookContext); } } catch (Exception e) { errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return createProcessorResponse(12); } PerfLogger perfLogger = null; int ret; if (!alreadyCompiled) { // compile internal will automatically reset the perf logger ret = compileInternal(command, true); // then we continue to use this perf logger perfLogger = SessionState.getPerfLogger(); if (ret != 0) { return createProcessorResponse(ret); } } else { // reuse existing perf logger. perfLogger = SessionState.getPerfLogger(); // Since we're reusing the compiled plan, we need to update its start time for current run plan.setQueryStartTime(perfLogger.getStartTime(PerfLogger.DRIVER_RUN)); } // the reason that we set the txn manager for the cxt here is because each // query has its own ctx object. The txn mgr is shared across the // same instance of Driver, which can run multiple queries. HiveTxnManager txnManager = SessionState.get().getTxnMgr(); ctx.setHiveTxnManager(txnManager); if (requiresLock()) { // a checkpoint to see if the thread is interrupted or not before an expensive operation if (isInterrupted()) { ret = handleInterruption("at acquiring the lock."); } else { ret = acquireLocks(); } if (ret != 0) { return rollback(createProcessorResponse(ret)); } } ret = execute(true); if (ret != 0) { //if needRequireLock is false, the release here will do nothing because there is no lock return rollback(createProcessorResponse(ret)); } //if needRequireLock is false, the release here will do nothing because there is no lock try { //since set autocommit starts an implicit txn, close it if (txnManager.isImplicitTransactionOpen() || plan.getOperation() == HiveOperation.COMMIT) { releaseLocksAndCommitOrRollback(true, null); } else if (plan.getOperation() == HiveOperation.ROLLBACK) { releaseLocksAndCommitOrRollback(false, null); } else { //txn (if there is one started) is not finished } } catch (LockException e) { return handleHiveException(e, 12); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_RUN); queryDisplay.setPerfLogStarts(QueryDisplay.Phase.EXECUTION, perfLogger.getStartTimes()); queryDisplay.setPerfLogEnds(QueryDisplay.Phase.EXECUTION, perfLogger.getEndTimes()); // Take all the driver run hooks and post-execute them. try { for (HiveDriverRunHook driverRunHook : driverRunHooks) { driverRunHook.postDriverRun(hookContext); } } catch (Exception e) { errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); SQLState = ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return createProcessorResponse(12); } isFinishedWithError = false; return createProcessorResponse(ret); } finally { if (isInterrupted()) { closeInProcess(true); } else { // only release the related resources ctx, driverContext as normal releaseResources(); } lDrvState.stateLock.lock(); try { if (lDrvState.driverState == DriverState.INTERRUPT) { lDrvState.driverState = DriverState.ERROR; } else { lDrvState.driverState = isFinishedWithError ? DriverState.ERROR : DriverState.EXECUTED; } } finally { lDrvState.stateLock.unlock(); } } } private CommandProcessorResponse rollback(CommandProcessorResponse cpr) { //console.printError(cpr.toString()); try { releaseLocksAndCommitOrRollback(false, null); } catch (LockException e) { LOG.error("rollback() FAILED: " + cpr);//make sure not to loose handleHiveException(e, 12, "Additional info in hive.log at \"rollback() FAILED\""); } return cpr; } private CommandProcessorResponse handleHiveException(HiveException e, int ret) { return handleHiveException(e, ret, null); } private CommandProcessorResponse handleHiveException(HiveException e, int ret, String rootMsg) { errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); if (rootMsg != null) { errorMessage += "\n" + rootMsg; } SQLState = e.getCanonicalErrorMsg() != null ? e.getCanonicalErrorMsg().getSQLState() : ErrorMsg.findSQLState(e.getMessage()); downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return createProcessorResponse(ret); } private boolean requiresLock() { if (!checkConcurrency()) { return false; } // Lock operations themselves don't require the lock. if (isExplicitLockOperation()) { return false; } if (!HiveConf.getBoolVar(conf, ConfVars.HIVE_LOCK_MAPRED_ONLY)) { return true; } Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>(); taskQueue.addAll(plan.getRootTasks()); while (taskQueue.peek() != null) { Task<? extends Serializable> tsk = taskQueue.remove(); if (tsk.requireLock()) { return true; } if (tsk instanceof ConditionalTask) { taskQueue.addAll(((ConditionalTask) tsk).getListTasks()); } if (tsk.getChildTasks() != null) { taskQueue.addAll(tsk.getChildTasks()); } // does not add back up task here, because back up task should be the same // type of the original task. } return false; } private boolean isExplicitLockOperation() { HiveOperation currentOpt = plan.getOperation(); if (currentOpt != null) { switch (currentOpt) { case LOCKDB: case UNLOCKDB: case LOCKTABLE: case UNLOCKTABLE: return true; default: return false; } } return false; } private CommandProcessorResponse createProcessorResponse(int ret) { SessionState.getPerfLogger().cleanupPerfLogMetrics(); queryDisplay.setErrorMessage(errorMessage); if (downstreamError != null && downstreamError instanceof HiveException) { ErrorMsg em = ((HiveException) downstreamError).getCanonicalErrorMsg(); if (em != null) { return new CommandProcessorResponse(ret, errorMessage, SQLState, schema, downstreamError, em.getErrorCode(), null); } } return new CommandProcessorResponse(ret, errorMessage, SQLState, downstreamError); } public int execute() throws CommandNeedRetryException { return execute(false); } public int execute(boolean deferClose) throws CommandNeedRetryException { PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE); boolean noName = StringUtils.isEmpty(conf.get(MRJobConfig.JOB_NAME)); int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); Metrics metrics = MetricsFactory.getInstance(); String queryId = queryState.getQueryId(); // Get the query string from the conf file as the compileInternal() method might // hide sensitive information during query redaction. String queryStr = conf.getQueryString(); lDrvState.stateLock.lock(); try { // if query is not in compiled state, or executing state which is carried over from // a combined compile/execute in runInternal, throws the error if (lDrvState.driverState != DriverState.COMPILED && lDrvState.driverState != DriverState.EXECUTING) { SQLState = "HY008"; errorMessage = "FAILED: query " + queryStr + " has " + (lDrvState.driverState == DriverState.INTERRUPT ? "been cancelled" : "not been compiled."); console.printError(errorMessage); return 1000; } else { lDrvState.driverState = DriverState.EXECUTING; } } finally { lDrvState.stateLock.unlock(); } maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER); HookContext hookContext = null; // Whether there's any error occurred during query execution. Used for query lifetime hook. boolean executionError = false; try { LOG.info("Executing command(queryId=" + queryId + "): " + queryStr); // compile and execute can get called from different threads in case of HS2 // so clear timing in this thread's Hive object before proceeding. Hive.get().clearMetaCallTiming(); plan.setStarted(); if (SessionState.get() != null) { SessionState.get().getHiveHistory().startQuery(queryStr, queryId); SessionState.get().getHiveHistory().logPlanProgress(plan); } resStream = null; SessionState ss = SessionState.get(); hookContext = new HookContext(plan, queryState, ctx.getPathToCS(), SessionState.get().getUserName(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger, queryInfo); hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK); for (Hook peh : hooksLoader.getHooks(HiveConf.ConfVars.PREEXECHOOKS, console)) { if (peh instanceof ExecuteWithHookContext) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((ExecuteWithHookContext) peh).run(hookContext); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); } else if (peh instanceof PreExecute) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), Utils.getUGI()); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName()); } } // Trigger query hooks before query execution. queryLifeTimeHookRunner.runBeforeExecutionHook(queryStr, hookContext); setQueryDisplays(plan.getRootTasks()); int mrJobs = Utilities.getMRTasks(plan.getRootTasks()).size(); int jobs = mrJobs + Utilities.getTezTasks(plan.getRootTasks()).size() + Utilities.getSparkTasks(plan.getRootTasks()).size(); if (jobs > 0) { logMrWarning(mrJobs); console.printInfo("Query ID = " + queryId); console.printInfo("Total jobs = " + jobs); } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs)); SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap()); } String jobname = Utilities.abbreviate(queryStr, maxlen - 6); // A runtime that launches runnable tasks as separate Threads through // TaskRunners // As soon as a task isRunnable, it is put in a queue // At any time, at most maxthreads tasks can be running // The main thread polls the TaskRunners to check if they have finished. if (isInterrupted()) { return handleInterruptionWithHook("before running tasks.", hookContext, perfLogger); } DriverContext driverCxt = new DriverContext(ctx); driverCxt.prepare(plan); ctx.setHDFSCleanup(true); this.driverCxt = driverCxt; // for canceling the query (should be bound to session?) SessionState.get().setMapRedStats(new LinkedHashMap<String, MapRedStats>()); SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>()); SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>()); // Add root Tasks to runnable for (Task<? extends Serializable> tsk : plan.getRootTasks()) { // This should never happen, if it does, it's a bug with the potential to produce // incorrect results. assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty(); driverCxt.addToRunnable(tsk); if (metrics != null) { tsk.updateTaskMetrics(metrics); } } perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS); // Loop while you either have tasks running, or tasks queued up while (driverCxt.isRunning()) { // Launch upto maxthreads tasks Task<? extends Serializable> task; while ((task = driverCxt.getRunnable(maxthreads)) != null) { TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt); if (!runner.isRunning()) { break; } } // poll the Tasks to see which one completed TaskRunner tskRun = driverCxt.pollFinished(); if (tskRun == null) { continue; } hookContext.addCompleteTask(tskRun); queryDisplay.setTaskResult(tskRun.getTask().getId(), tskRun.getTaskResult()); Task<? extends Serializable> tsk = tskRun.getTask(); TaskResult result = tskRun.getTaskResult(); int exitVal = result.getExitVal(); if (isInterrupted()) { return handleInterruptionWithHook("when checking the execution result.", hookContext, perfLogger); } if (exitVal != 0) { if (tsk.ifRetryCmdWhenFail()) { driverCxt.shutdown(); // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); throw new CommandNeedRetryException(); } Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask(); if (backupTask != null) { setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk); console.printError(errorMessage); errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName(); console.printError(errorMessage); // add backup task to runnable if (DriverContext.isLaunchable(backupTask)) { driverCxt.addToRunnable(backupTask); } continue; } else { setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk); if (driverCxt.isShutdown()) { errorMessage = "FAILED: Operation cancelled. " + errorMessage; } invokeFailureHooks(perfLogger, hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError()); SQLState = "08S01"; // 08S01 (Communication error) is the default sql state. Override the sqlstate // based on the ErrorMsg set in HiveException. if (result.getTaskError() instanceof HiveException) { ErrorMsg errorMsg = ((HiveException) result.getTaskError()).getCanonicalErrorMsg(); if (errorMsg != ErrorMsg.GENERIC_ERROR) { SQLState = errorMsg.getSQLState(); } } console.printError(errorMessage); driverCxt.shutdown(); // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); return exitVal; } } driverCxt.finished(tskRun); if (SessionState.get() != null) { SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal)); SessionState.get().getHiveHistory().endTask(queryId, tsk); } if (tsk.getChildTasks() != null) { for (Task<? extends Serializable> child : tsk.getChildTasks()) { if (DriverContext.isLaunchable(child)) { driverCxt.addToRunnable(child); } } } } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS); // in case we decided to run everything in local mode, restore the // the jobtracker setting to its initial value ctx.restoreOriginalTracker(); if (driverCxt.isShutdown()) { SQLState = "HY008"; errorMessage = "FAILED: Operation cancelled"; invokeFailureHooks(perfLogger, hookContext, errorMessage, null); console.printError(errorMessage); return 1000; } // remove incomplete outputs. // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions. // remove them HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>(); for (WriteEntity output : plan.getOutputs()) { if (!output.isComplete()) { remOutputs.add(output); } } for (WriteEntity output : remOutputs) { plan.getOutputs().remove(output); } hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK); // Get all the post execution hooks and execute them. for (Hook peh : hooksLoader.getHooks(HiveConf.ConfVars.POSTEXECHOOKS, console)) { if (peh instanceof ExecuteWithHookContext) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName()); ((ExecuteWithHookContext) peh).run(hookContext); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName()); } else if (peh instanceof PostExecute) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName()); ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null), Utils.getUGI()); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName()); } } if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0)); SessionState.get().getHiveHistory().printRowCount(queryId); } releasePlan(plan); } catch (CommandNeedRetryException e) { executionError = true; throw e; } catch (Throwable e) { executionError = true; if (isInterrupted()) { return handleInterruptionWithHook("during query execution: \n" + e.getMessage(), hookContext, perfLogger); } ctx.restoreOriginalTracker(); if (SessionState.get() != null) { SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12)); } // TODO: do better with handling types of Exception here errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e); if (hookContext != null) { try { invokeFailureHooks(perfLogger, hookContext, errorMessage, e); } catch (Exception t) { LOG.warn("Failed to invoke failure hook", t); } } SQLState = "08S01"; downstreamError = e; console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return (12); } finally { // Trigger query hooks after query completes its execution. try { queryLifeTimeHookRunner.runAfterExecutionHook(queryStr, hookContext, executionError); } catch (Exception e) { LOG.warn("Failed when invoking query after execution hook", e); } if (SessionState.get() != null) { SessionState.get().getHiveHistory().endQuery(queryId); } if (noName) { conf.set(MRJobConfig.JOB_NAME, ""); } double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE) / 1000.00; ImmutableMap<String, Long> executionHMSTimings = dumpMetaCallTimingWithoutEx("execution"); queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings); Map<String, MapRedStats> stats = SessionState.get().getMapRedStats(); if (stats != null && !stats.isEmpty()) { long totalCpu = 0; console.printInfo("MapReduce Jobs Launched: "); for (Map.Entry<String, MapRedStats> entry : stats.entrySet()) { console.printInfo("Stage-" + entry.getKey() + ": " + entry.getValue()); totalCpu += entry.getValue().getCpuMSec(); } console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu)); } boolean isInterrupted = isInterrupted(); if (isInterrupted && !deferClose) { closeInProcess(true); } lDrvState.stateLock.lock(); try { if (isInterrupted) { if (!deferClose) { lDrvState.driverState = DriverState.ERROR; } } else { lDrvState.driverState = executionError ? DriverState.ERROR : DriverState.EXECUTED; } } finally { lDrvState.stateLock.unlock(); } if (isInterrupted) { LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds"); } else { LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds"); } } if (console != null) { console.printInfo("OK"); } return (0); } private void releasePlan(QueryPlan plan) { // Plan maybe null if Driver.close is called in another thread for the same Driver object lDrvState.stateLock.lock(); try { if (plan != null) { plan.setDone(); if (SessionState.get() != null) { try { SessionState.get().getHiveHistory().logPlanProgress(plan); } catch (Exception e) { // Log and ignore LOG.warn("Could not log query plan progress", e); } } } } finally { lDrvState.stateLock.unlock(); } } private void setQueryDisplays(List<Task<? extends Serializable>> tasks) { if (tasks != null) { for (Task<? extends Serializable> task : tasks) { task.setQueryDisplay(queryDisplay); setQueryDisplays(task.getDependentTasks()); } } } private void logMrWarning(int mrJobs) { if (mrJobs <= 0 || !("mr".equals(HiveConf.getVar(conf, ConfVars.HIVE_EXECUTION_ENGINE)))) { return; } String warning = HiveConf.generateMrDeprecationWarning(); LOG.warn(warning); } private void setErrorMsgAndDetail(int exitVal, Throwable downstreamError, Task tsk) { this.downstreamError = downstreamError; errorMessage = "FAILED: Execution Error, return code " + exitVal + " from " + tsk.getClass().getName(); if (downstreamError != null) { //here we assume that upstream code may have parametrized the msg from ErrorMsg //so we want to keep it errorMessage += ". " + downstreamError.getMessage(); } else { ErrorMsg em = ErrorMsg.getErrorMsg(exitVal); if (em != null) { errorMessage += ". " + em.getMsg(); } } } private void invokeFailureHooks(PerfLogger perfLogger, HookContext hookContext, String errorMessage, Throwable exception) throws Exception { hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK); hookContext.setErrorMessage(errorMessage); hookContext.setException(exception); // Get all the failure execution hooks and execute them. for (Hook ofh : hooksLoader.getHooks(HiveConf.ConfVars.ONFAILUREHOOKS, console)) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.FAILURE_HOOK + ofh.getClass().getName()); ((ExecuteWithHookContext) ofh).run(hookContext); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.FAILURE_HOOK + ofh.getClass().getName()); } } /** * Launches a new task * * @param tsk * task being launched * @param queryId * Id of the query containing the task * @param noName * whether the task has a name set * @param jobname * name of the task, if it is a map-reduce job * @param jobs * number of map-reduce jobs * @param cxt * the driver context */ private TaskRunner launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName, String jobname, int jobs, DriverContext cxt) throws HiveException { if (SessionState.get() != null) { SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName()); } if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) { if (noName) { conf.set(MRJobConfig.JOB_NAME, jobname + "(" + tsk.getId() + ")"); } conf.set("mapreduce.workflow.node.name", tsk.getId()); Utilities.setWorkflowAdjacencies(conf, plan); cxt.incCurJobNo(1); console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs); } tsk.initialize(queryState, plan, cxt, ctx.getOpContext()); TaskResult tskRes = new TaskResult(); TaskRunner tskRun = new TaskRunner(tsk, tskRes); cxt.launching(tskRun); // Launch Task if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) { // Launch it in the parallel mode, as a separate thread only for MR tasks if (LOG.isInfoEnabled()) { LOG.info("Starting task [" + tsk + "] in parallel"); } tskRun.start(); } else { if (LOG.isInfoEnabled()) { LOG.info("Starting task [" + tsk + "] in serial mode"); } tskRun.runSequential(); } return tskRun; } public boolean isFetchingTable() { return fetchTask != null; } @SuppressWarnings("unchecked") public boolean getResults(List res) throws IOException, CommandNeedRetryException { if (lDrvState.driverState == DriverState.DESTROYED || lDrvState.driverState == DriverState.CLOSED) { throw new IOException("FAILED: query has been cancelled, closed, or destroyed."); } if (isFetchingTable()) { /** * If resultset serialization to thrift object is enabled, and if the destination table is * indeed written using ThriftJDBCBinarySerDe, read one row from the output sequence file, * since it is a blob of row batches. */ if (fetchTask.getWork().isUsingThriftJDBCBinarySerDe()) { maxRows = 1; } fetchTask.setMaxRows(maxRows); return fetchTask.fetch(res); } if (resStream == null) { resStream = ctx.getStream(); } if (resStream == null) { return false; } int numRows = 0; String row = null; while (numRows < maxRows) { if (resStream == null) { if (numRows > 0) { return true; } else { return false; } } bos.reset(); Utilities.StreamStatus ss; try { ss = Utilities.readColumn(resStream, bos); if (bos.getLength() > 0) { row = new String(bos.getData(), 0, bos.getLength(), "UTF-8"); } else if (ss == Utilities.StreamStatus.TERMINATED) { row = new String(); } if (row != null) { numRows++; res.add(row); } row = null; } catch (IOException e) { console.printError("FAILED: Unexpected IO exception : " + e.getMessage()); return false; } if (ss == Utilities.StreamStatus.EOF) { resStream = ctx.getStream(); } } return true; } public void resetFetch() throws IOException { if (lDrvState.driverState == DriverState.DESTROYED || lDrvState.driverState == DriverState.CLOSED) { throw new IOException("FAILED: driver has been cancelled, closed or destroyed."); } if (isFetchingTable()) { try { fetchTask.clearFetch(); } catch (Exception e) { throw new IOException("Error closing the current fetch task", e); } // FetchTask should not depend on the plan. fetchTask.initialize(queryState, null, null, ctx.getOpContext()); } else { ctx.resetStream(); resStream = null; } } public int getTryCount() { return tryCount; } public void setTryCount(int tryCount) { this.tryCount = tryCount; } // DriverContext could be released in the query and close processes at same // time, which needs to be thread protected. private void releaseDriverContext() { lDrvState.stateLock.lock(); try { if (driverCxt != null) { driverCxt.shutdown(); driverCxt = null; } } catch (Exception e) { LOG.debug("Exception while shutting down the task runner", e); } finally { lDrvState.stateLock.unlock(); } } private void releasePlan() { try { if (plan != null) { fetchTask = plan.getFetchTask(); if (fetchTask != null) { fetchTask.setDriverContext(null); fetchTask.setQueryPlan(null); } } plan = null; } catch (Exception e) { LOG.debug("Exception while clearing the Fetch task", e); } } private void releaseContext() { try { if (ctx != null) { ctx.clear(); if (ctx.getHiveLocks() != null) { hiveLocks.addAll(ctx.getHiveLocks()); ctx.setHiveLocks(null); } ctx = null; } } catch (Exception e) { LOG.debug("Exception while clearing the context ", e); } } private void releaseResStream() { try { if (resStream != null) { ((FSDataInputStream) resStream).close(); resStream = null; } } catch (Exception e) { LOG.debug(" Exception while closing the resStream ", e); } } private void releaseFetchTask() { try { if (fetchTask != null) { fetchTask.clearFetch(); fetchTask = null; } } catch (Exception e) { LOG.debug(" Exception while clearing the FetchTask ", e); } } // Close and release resources within a running query process. Since it runs under // driver state COMPILING, EXECUTING or INTERRUPT, it would not have race condition // with the releases probably running in the other closing thread. private int closeInProcess(boolean destroyed) { releaseDriverContext(); releasePlan(); releaseFetchTask(); releaseResStream(); releaseContext(); if (SessionState.get() != null) { SessionState.get().getLineageState().clear(); } if (destroyed) { if (!hiveLocks.isEmpty()) { try { releaseLocksAndCommitOrRollback(false, null); } catch (LockException e) { LOG.warn("Exception when releasing locking in destroy: " + e.getMessage()); } } ShutdownHookManager.removeShutdownHook(shutdownRunner); } return 0; } // is called to stop the query if it is running, clean query results, and release resources. public int close() { lDrvState.stateLock.lock(); try { releaseDriverContext(); if (lDrvState.driverState == DriverState.COMPILING || lDrvState.driverState == DriverState.EXECUTING || lDrvState.driverState == DriverState.INTERRUPT) { lDrvState.driverState = DriverState.INTERRUPT; return 0; } releasePlan(); releaseFetchTask(); releaseResStream(); releaseContext(); lDrvState.driverState = DriverState.CLOSED; } finally { lDrvState.stateLock.unlock(); LockedDriverState.removeLockedDriverState(); } if (SessionState.get() != null) { SessionState.get().getLineageState().clear(); } return 0; } // is usually called after close() to commit or rollback a query and end the driver life cycle. // do not understand why it is needed and wonder if it could be combined with close. public void destroy() { lDrvState.stateLock.lock(); try { // in the cancel case where the driver state is INTERRUPTED, destroy will be deferred to // the query process if (lDrvState.driverState == DriverState.DESTROYED || lDrvState.driverState == DriverState.INTERRUPT) { return; } else { lDrvState.driverState = DriverState.DESTROYED; } } finally { lDrvState.stateLock.unlock(); } if (!hiveLocks.isEmpty()) { try { releaseLocksAndCommitOrRollback(false, null); } catch (LockException e) { LOG.warn("Exception when releasing locking in destroy: " + e.getMessage()); } } ShutdownHookManager.removeShutdownHook(shutdownRunner); } public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException { return plan.getQueryPlan(); } public String getErrorMsg() { return errorMessage; } public QueryDisplay getQueryDisplay() { return queryDisplay; } /** * Set the HS2 operation handle's guid string * @param opId base64 encoded guid string */ public void setOperationId(String opId) { this.operationId = opId; } /** * Resets QueryState to get new queryId on Driver reuse. */ public void resetQueryState() { // Note: Driver cleanup for reuse at this point is not very clear. The assumption here is that // repeated compile/execute calls create new contexts, plan, etc., so we don't need to worry // propagating queryState into those existing fields, or resetting them. releaseResources(); this.queryState = getNewQueryState(queryState.getConf()); } }