org.apache.hive.hcatalog.templeton.LauncherDelegator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hcatalog.templeton.LauncherDelegator.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hive.hcatalog.templeton;

import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.shims.HadoopShimsSecure;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hive.hcatalog.templeton.tool.JobState;
import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob;
import org.apache.hive.hcatalog.templeton.tool.TempletonStorage;
import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
import org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage;

/**
 * The helper class for all the Templeton delegator classes that
 * launch child jobs.
 */
public class LauncherDelegator extends TempletonDelegator {
    private static final Logger LOG = LoggerFactory.getLogger(LauncherDelegator.class);
    protected String runAs = null;

    static public enum JobType {
        JAR, STREAMING, PIG, HIVE, SQOOP
    }

    private boolean secureMeatastoreAccess = false;
    private final String HIVE_SHIMS_FILENAME_PATTERN = ".*hive-shims.*";
    private final String JOB_SUBMIT_EXECUTE_THREAD_PREFIX = "JobSubmitExecute";
    private final int jobTimeoutTaskRetryCount;
    private final int jobTimeoutTaskRetryIntervalInSec;

    /**
     * Current thread used to set in execution threads.
     */
    private final String submitThreadId = Thread.currentThread().getName();

    /**
     * Job request executor to submit job requests.
     */
    private static JobRequestExecutor<EnqueueBean> jobRequest = new JobRequestExecutor<EnqueueBean>(
            JobRequestExecutor.JobRequestType.Submit, AppConfig.JOB_SUBMIT_MAX_THREADS,
            AppConfig.JOB_SUBMIT_TIMEOUT, false);

    public LauncherDelegator(AppConfig appConf) {
        super(appConf);
        jobTimeoutTaskRetryCount = appConf.getInt(AppConfig.JOB_TIMEOUT_TASK_RETRY_COUNT, 0);
        jobTimeoutTaskRetryIntervalInSec = appConf.getInt(AppConfig.JOB_TIMEOUT_TASK_RETRY_INTERVAL, 0);
    }

    public void registerJob(String id, String user, String callback, Map<String, Object> userArgs)
            throws IOException {
        JobState state = null;
        try {
            state = new JobState(id, Main.getAppConfigInstance());
            state.setUser(user);
            state.setCallback(callback);
            state.setUserArgs(userArgs);
        } finally {
            if (state != null)
                state.close();
        }
    }

    /*
     * Submit job request. If maximum concurrent job submit requests are configured then submit
     * request will be executed on a thread from thread pool. If job submit request time out is
     * configured then request execution thread will be interrupted if thread times out. Also
     * does best efforts to identify if job is submitted and kill it quietly.
     */
    public EnqueueBean enqueueController(final String user, final Map<String, Object> userArgs,
            final String callback, final List<String> args)
            throws NotAuthorizedException, BusyException, IOException, QueueException, TooManyRequestsException {

        EnqueueBean bean = null;
        final TempletonControllerJob controllerJob = getTempletonController();

        if (jobRequest.isThreadPoolEnabled()) {
            JobCallable<EnqueueBean> jobExecuteCallable = getJobSubmitTask(user, userArgs, callback, args,
                    controllerJob);
            try {
                bean = jobRequest.execute(jobExecuteCallable);
            } catch (TimeoutException ex) {
                /*
                 * Job request got timed out. Job kill should have started. Return to client with
                 * QueueException.
                 */
                throw new QueueException(ex.getMessage());
            } catch (InterruptedException ex) {
                /*
                 * Job request got interrupted. Job kill should have started. Return to client with
                 * with QueueException.
                 */
                throw new QueueException(ex.getMessage());
            } catch (ExecutionException ex) {
                /*
                 * ExecutionException is raised if job execution gets an exception. Return to client
                 * with the exception.
                 */
                throw new QueueException(ex.getMessage());
            }
        } else {
            LOG.info("No thread pool configured for submit job request. Executing "
                    + "the job request in current thread.");

            bean = enqueueJob(user, userArgs, callback, args, controllerJob);
        }

        return bean;
    }

    /*
     * Job callable task for job submit operation. Overrides behavior of execute()
     * to submit job. Also, overrides the behavior of cleanup() to kill the job in case
     * job submission request is timed out or interrupted.
     */
    private JobCallable<EnqueueBean> getJobSubmitTask(final String user, final Map<String, Object> userArgs,
            final String callback, final List<String> args, final TempletonControllerJob controllerJob) {
        return new JobCallable<EnqueueBean>() {
            @Override
            public EnqueueBean execute() throws NotAuthorizedException, BusyException, IOException, QueueException {
                /*
                 * Change the current thread name to include parent thread Id if it is executed
                 * in thread pool. Useful to extract logs specific to a job request and helpful
                 * to debug job issues.
                 */
                Thread.currentThread().setName(String.format("%s-%s-%s", JOB_SUBMIT_EXECUTE_THREAD_PREFIX,
                        submitThreadId, Thread.currentThread().getId()));

                return enqueueJob(user, userArgs, callback, args, controllerJob);
            }

            @Override
            public void cleanup() {
                /*
                 * Failed to set job status as COMPLETED which mean the main thread would have
                 * exited and not waiting for the result. Kill the submitted job.
                 */
                LOG.info("Job kill not done by main thread. Trying to kill now.");
                killTempletonJobWithRetry(user, controllerJob.getSubmittedId());
            }
        };
    }

    /**
     * Enqueue the TempletonControllerJob directly calling doAs.
     */
    public EnqueueBean enqueueJob(String user, Map<String, Object> userArgs, String callback, List<String> args,
            TempletonControllerJob controllerJob)
            throws NotAuthorizedException, BusyException, IOException, QueueException {
        UserGroupInformation ugi = null;
        try {
            ugi = UgiFactory.getUgi(user);

            final long startTime = System.nanoTime();

            String id = queueAsUser(ugi, args, controllerJob);

            long elapsed = ((System.nanoTime() - startTime) / ((int) 1e6));
            LOG.debug("queued job " + id + " in " + elapsed + " ms");

            if (id == null) {
                throw new QueueException("Unable to get job id");
            }

            registerJob(id, user, callback, userArgs);

            return new EnqueueBean(id);
        } catch (InterruptedException e) {
            throw new QueueException("Unable to launch job " + e);
        } finally {
            if (ugi != null) {
                FileSystem.closeAllForUGI(ugi);
            }
        }
    }

    private String queueAsUser(UserGroupInformation ugi, final List<String> args,
            final TempletonControllerJob controllerJob) throws IOException, InterruptedException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Launching job: " + args);
        }
        return ugi.doAs(new PrivilegedExceptionAction<String>() {
            public String run() throws Exception {
                runTempletonControllerJob(controllerJob, args);
                return controllerJob.getSubmittedId();
            }
        });
    }

    /*
     * Kills templeton job with multiple retries if job exists. Returns true if kill job
     * attempt is success. Otherwise returns false.
     */
    private boolean killTempletonJobWithRetry(String user, String jobId) {
        /*
         * Make null safe Check if the job submission has gone through and if job is valid.
         */
        if (StringUtils.startsWith(jobId, "job_")) {
            LOG.info("Started killing the job " + jobId);

            boolean success = false;
            int count = 0;
            do {
                try {
                    count++;
                    killJob(user, jobId);
                    success = true;
                    LOG.info("Kill job attempt succeeded.");
                } catch (Exception e) {
                    LOG.info("Failed to kill the job due to exception: " + e.getMessage());
                    LOG.info("Waiting for " + jobTimeoutTaskRetryIntervalInSec + "s before retrying "
                            + "the operation. Iteration: " + count);
                    try {
                        Thread.sleep(jobTimeoutTaskRetryIntervalInSec * 1000);
                    } catch (InterruptedException ex) {
                        LOG.info("Got interrupted while waiting for next retry.");
                    }
                }
            } while (!success && count < jobTimeoutTaskRetryCount);

            return success;
        } else {
            LOG.info("Couldn't find a valid job id after job request is timed out.");
            return false;
        }
    }

    /*
     * Gets new templeton controller objects.
     */
    protected TempletonControllerJob getTempletonController() {
        return new TempletonControllerJob(secureMeatastoreAccess, appConf);
    }

    /*
     * Runs the templeton controller job with 'args'. Utilizes ToolRunner to run
     * the actual job.
     */
    protected int runTempletonControllerJob(TempletonControllerJob controllerJob, List<String> args)
            throws IOException, InterruptedException, TimeoutException, Exception {
        String[] array = new String[args.size()];
        return ToolRunner.run(controllerJob, args.toArray(array));
    }

    /*
     * Uses DeleteDelegator to kill a job and ignores all exceptions.
     */
    protected void killJob(String user, String jobId)
            throws NotAuthorizedException, BadParam, IOException, InterruptedException {
        DeleteDelegator d = new DeleteDelegator(appConf);
        d.run(user, jobId);
    }

    public List<String> makeLauncherArgs(AppConfig appConf, String statusdir, String completedUrl,
            List<String> copyFiles, boolean enablelog, Boolean enableJobReconnect, JobType jobType) {
        ArrayList<String> args = new ArrayList<String>();

        //note that in ToolRunner this is expected to be a local FS path
        //see GenericOptionsParser.getLibJars()
        args.add("-libjars");

        // Include shim and admin specified libjars
        String libJars = String.format("%s,%s", getShimLibjars(), appConf.libJars());
        args.add(libJars);

        addCacheFiles(args, appConf);

        // Hadoop vars
        addDef(args, "user.name", runAs);
        addDef(args, AppConfig.HADOOP_SPECULATIVE_NAME, "false");
        addDef(args, AppConfig.HADOOP_CHILD_JAVA_OPTS, appConf.controllerMRChildOpts());

        // Internal vars
        addDef(args, TempletonControllerJob.STATUSDIR_NAME, statusdir);
        //Use of ToolRunner "-files" option could be considered here
        addDef(args, TempletonControllerJob.COPY_NAME, TempletonUtils.encodeArray(copyFiles));
        addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH, makeOverrideClasspath(appConf));
        addDef(args, TempletonControllerJob.ENABLE_LOG, Boolean.toString(enablelog));
        addDef(args, TempletonControllerJob.JOB_TYPE, jobType.toString());
        addDef(args, TempletonControllerJob.TEMPLETON_JOB_LAUNCH_TIME_NAME,
                Long.toString(System.currentTimeMillis()));

        if (enableJobReconnect == null) {
            // If enablejobreconnect param was not passed by a user, use a cluster
            // wide default
            if (appConf.enableJobReconnectDefault() != null) {
                enableJobReconnect = Boolean.parseBoolean(appConf.enableJobReconnectDefault());
            } else {
                // default is false
                enableJobReconnect = false;
            }
        }
        addDef(args, TempletonControllerJob.ENABLE_JOB_RECONNECT, Boolean.toString(enableJobReconnect));

        // Hadoop queue information
        addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName());

        // Job vars
        addStorageVars(args);
        addCompletionVars(args, completedUrl);

        return args;
    }

    /**
     * Dynamically determine the list of hive shim jars that need to be added
     * to the Templeton launcher job classpath.
     */
    private String getShimLibjars() {
        WebHCatJTShim shim = null;
        UserGroupInformation ugi = null;
        try {
            ugi = UserGroupInformation.getCurrentUser();
            shim = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi);

            // Besides the HiveShims jar which is Hadoop version dependent we also
            // always need to include hive shims common jars.
            Path shimCommonJar = new Path(
                    TempletonUtils.findContainingJar(ShimLoader.class, HIVE_SHIMS_FILENAME_PATTERN));
            Path shimCommonSecureJar = new Path(
                    TempletonUtils.findContainingJar(HadoopShimsSecure.class, HIVE_SHIMS_FILENAME_PATTERN));
            Path shimJar = new Path(TempletonUtils.findContainingJar(shim.getClass(), HIVE_SHIMS_FILENAME_PATTERN));

            return String.format("%s,%s,%s", shimCommonJar.toString(), shimCommonSecureJar.toString(),
                    shimJar.toString());
        } catch (IOException e) {
            throw new RuntimeException("Failed to get shimLibJars", e);
        } finally {
            try {
                if (ugi != null) {
                    FileSystem.closeAllForUGI(ugi);
                }
            } catch (IOException e) {
                throw new RuntimeException("Failed to closeAllForUGI", e);
            }
        }

    }

    // Storage vars
    private void addStorageVars(List<String> args) {
        addDef(args, TempletonStorage.STORAGE_CLASS, appConf.get(TempletonStorage.STORAGE_CLASS));
        addDef(args, TempletonStorage.STORAGE_ROOT, appConf.get(TempletonStorage.STORAGE_ROOT));
        addDef(args, ZooKeeperStorage.ZK_HOSTS, appConf.get(ZooKeeperStorage.ZK_HOSTS));
        addDef(args, ZooKeeperStorage.ZK_SESSION_TIMEOUT, appConf.get(ZooKeeperStorage.ZK_SESSION_TIMEOUT));
    }

    // Completion notifier vars
    private void addCompletionVars(List<String> args, String completedUrl) {
        addDef(args, AppConfig.HADOOP_END_RETRY_NAME, appConf.get(AppConfig.CALLBACK_RETRY_NAME));
        addDef(args, AppConfig.HADOOP_END_INTERVAL_NAME, appConf.get(AppConfig.CALLBACK_INTERVAL_NAME));
        addDef(args, AppConfig.HADOOP_END_URL_NAME, completedUrl);
    }

    /**
     * Add files to the Distributed Cache for the controller job.
     */
    public static void addCacheFiles(List<String> args, AppConfig appConf) {
        String overrides = appConf.overrideJarsString();
        if (overrides != null) {
            args.add("-files");
            args.add(overrides);
        }
    }

    /**
     * Create the override classpath, which will be added to
     * HADOOP_CLASSPATH at runtime by the controller job.
     */
    public static String makeOverrideClasspath(AppConfig appConf) {
        String[] overrides = appConf.overrideJars();
        if (overrides == null) {
            return null;
        }

        ArrayList<String> cp = new ArrayList<String>();
        for (String fname : overrides) {
            Path p = new Path(fname);
            cp.add(p.getName());
        }
        return StringUtils.join(":", cp);
    }

    /**
     * Add a Hadoop command line definition to args if the value is
     * not null.
     */
    public static void addDef(List<String> args, String name, String val) {
        if (val != null) {
            args.add("-D");
            args.add(name + "=" + val);
        }
    }

    /**
     * This is called by subclasses when they determined that the sumbmitted job requires
     * metastore access (e.g. Pig job that uses HCatalog).  This then determines if
     * secure access is required and causes TempletonControllerJob to set up a delegation token.
     * @see TempletonControllerJob
     */
    void addHiveMetaStoreTokenArg() {
        //in order for this to work hive-site.xml must be on the classpath
        HiveConf hiveConf = new HiveConf();
        if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL)) {
            return;
        }
        secureMeatastoreAccess = true;
    }
}