gobblin.yarn.GobblinYarnAppLauncher.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.yarn.GobblinYarnAppLauncher.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.yarn;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.mail.EmailException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.Records;

import org.apache.helix.Criteria;
import org.apache.helix.HelixManager;
import org.apache.helix.HelixManagerFactory;
import org.apache.helix.InstanceType;
import org.apache.helix.model.Message;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.eventbus.EventBus;
import com.google.common.eventbus.Subscribe;
import com.google.common.io.Closer;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.Service;
import com.google.common.util.concurrent.ServiceManager;

import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

import gobblin.admin.AdminWebServer;
import gobblin.cluster.GobblinClusterConfigurationKeys;
import gobblin.cluster.GobblinClusterUtils;
import gobblin.cluster.GobblinHelixConstants;
import gobblin.cluster.HelixUtils;
import gobblin.configuration.ConfigurationKeys;
import gobblin.rest.JobExecutionInfoServer;
import gobblin.util.ConfigUtils;
import gobblin.util.EmailUtils;
import gobblin.util.ExecutorsUtils;
import gobblin.util.io.StreamUtils;
import gobblin.util.JvmUtils;
import gobblin.util.logs.LogCopier;
import gobblin.yarn.event.ApplicationReportArrivalEvent;
import gobblin.yarn.event.GetApplicationReportFailureEvent;

/**
 * A client driver to launch Gobblin as a Yarn application.
 *
 * <p>
 *   This class, upon starting, will check if there's a Yarn application that it has previously submitted and
 *   it is able to reconnect to. More specifically, it checks if an application with the same application name
 *   exists and can be reconnected to, i.e., if the application has not completed yet. If so, it simply starts
 *   monitoring that application.
 * </p>
 *
 * <p>
 *   On the other hand, if there's no such a reconnectable Yarn application, This class will launch a new Yarn
 *   application and start the {@link GobblinApplicationMaster}. It also persists the new application ID so it
 *   is able to reconnect to the Yarn application if it is restarted for some reason. Once the application is
 *   launched, this class starts to monitor the application by periodically polling the status of the application
 *   through a {@link ListeningExecutorService}.
 * </p>
 *
 * <p>
 *   If a shutdown signal is received, it sends a Helix
 *   {@link org.apache.helix.model.Message.MessageType#SCHEDULER_MSG} to the {@link GobblinApplicationMaster}
 *   asking it to shutdown and release all the allocated containers. It also sends an email notification for
 *   the shutdown if {@link GobblinYarnConfigurationKeys#EMAIL_NOTIFICATION_ON_SHUTDOWN_KEY} is {@code true}.
 * </p>
 *
 * <p>
 *   This class has a scheduled task to get the {@link ApplicationReport} of the Yarn application periodically.
 *   Since it may fail to get the {@link ApplicationReport} due to reason such as the Yarn cluster is down for
 *   maintenance, it keeps track of the count of consecutive failures to get the {@link ApplicationReport}. If
 *   this count exceeds the maximum number allowed, it will initiate a shutdown.
 * </p>
 *
 * @author Yinan Li
 */
public class GobblinYarnAppLauncher {

    private static final Logger LOGGER = LoggerFactory.getLogger(GobblinYarnAppLauncher.class);

    private static final Splitter SPLITTER = Splitter.on(',').omitEmptyStrings().trimResults();

    private static final String GOBBLIN_YARN_APPLICATION_TYPE = "GOBBLIN_YARN";

    // The set of Yarn application types this class is interested in. This is used to
    // lookup the application this class has launched previously upon restarting.
    private static final Set<String> APPLICATION_TYPES = ImmutableSet.of(GOBBLIN_YARN_APPLICATION_TYPE);

    // The set of Yarn application states under which the driver can reconnect to the Yarn application after restart
    private static final EnumSet<YarnApplicationState> RECONNECTABLE_APPLICATION_STATES = EnumSet.of(
            YarnApplicationState.NEW, YarnApplicationState.NEW_SAVING, YarnApplicationState.SUBMITTED,
            YarnApplicationState.ACCEPTED, YarnApplicationState.RUNNING);

    private final String applicationName;
    private final String appQueueName;

    private final Config config;

    private final HelixManager helixManager;

    private final Configuration yarnConfiguration;
    private final YarnClient yarnClient;
    private final FileSystem fs;

    private final EventBus eventBus = new EventBus(GobblinYarnAppLauncher.class.getSimpleName());

    private final ScheduledExecutorService applicationStatusMonitor;
    private final long appReportIntervalMinutes;

    private final Optional<String> appMasterJvmArgs;

    private final Path sinkLogRootDir;

    private final Closer closer = Closer.create();

    // Yarn application ID
    private volatile Optional<ApplicationId> applicationId = Optional.absent();

    private volatile Optional<ServiceManager> serviceManager = Optional.absent();

    // Maximum number of consecutive failures allowed to get the ApplicationReport
    private final int maxGetApplicationReportFailures;

    // A count on the number of consecutive failures on getting the ApplicationReport
    private final AtomicInteger getApplicationReportFailureCount = new AtomicInteger();

    // This flag tells if the Yarn application has already completed. This is used to
    // tell if it is necessary to send a shutdown message to the ApplicationMaster.
    private volatile boolean applicationCompleted = false;

    private volatile boolean stopped = false;

    private final boolean emailNotificationOnShutdown;

    public GobblinYarnAppLauncher(Config config, YarnConfiguration yarnConfiguration) throws IOException {
        this.config = config;

        this.applicationName = config.getString(GobblinYarnConfigurationKeys.APPLICATION_NAME_KEY);
        this.appQueueName = config.getString(GobblinYarnConfigurationKeys.APP_QUEUE_KEY);

        String zkConnectionString = config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY);
        LOGGER.info("Using ZooKeeper connection string: " + zkConnectionString);

        this.helixManager = HelixManagerFactory.getZKHelixManager(
                config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY),
                GobblinClusterUtils.getHostname(), InstanceType.SPECTATOR, zkConnectionString);

        this.yarnConfiguration = yarnConfiguration;
        this.yarnConfiguration.set("fs.automatic.close", "false");
        this.yarnClient = YarnClient.createYarnClient();
        this.yarnClient.init(this.yarnConfiguration);

        this.fs = config.hasPath(ConfigurationKeys.FS_URI_KEY)
                ? FileSystem.get(URI.create(config.getString(ConfigurationKeys.FS_URI_KEY)), this.yarnConfiguration)
                : FileSystem.get(this.yarnConfiguration);
        this.closer.register(this.fs);

        this.applicationStatusMonitor = Executors.newSingleThreadScheduledExecutor(
                ExecutorsUtils.newThreadFactory(Optional.of(LOGGER), Optional.of("GobblinYarnAppStatusMonitor")));
        this.appReportIntervalMinutes = config
                .getLong(GobblinYarnConfigurationKeys.APP_REPORT_INTERVAL_MINUTES_KEY);

        this.appMasterJvmArgs = config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_JVM_ARGS_KEY)
                ? Optional.of(config.getString(GobblinYarnConfigurationKeys.APP_MASTER_JVM_ARGS_KEY))
                : Optional.<String>absent();

        this.sinkLogRootDir = new Path(config.getString(GobblinYarnConfigurationKeys.LOGS_SINK_ROOT_DIR_KEY));

        this.maxGetApplicationReportFailures = config
                .getInt(GobblinYarnConfigurationKeys.MAX_GET_APP_REPORT_FAILURES_KEY);

        this.emailNotificationOnShutdown = config
                .getBoolean(GobblinYarnConfigurationKeys.EMAIL_NOTIFICATION_ON_SHUTDOWN_KEY);
    }

    /**
     * Launch a new Gobblin instance on Yarn.
     *
     * @throws IOException if there's something wrong launching the application
     * @throws YarnException if there's something wrong launching the application
     */
    public void launch() throws IOException, YarnException {
        this.eventBus.register(this);

        String clusterName = this.config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY);
        HelixUtils.createGobblinHelixCluster(
                this.config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY), clusterName);
        LOGGER.info("Created Helix cluster " + clusterName);

        connectHelixManager();

        startYarnClient();

        this.applicationId = getApplicationId();

        this.applicationStatusMonitor.scheduleAtFixedRate(new Runnable() {
            @Override
            public void run() {
                try {
                    eventBus.post(new ApplicationReportArrivalEvent(
                            yarnClient.getApplicationReport(applicationId.get())));
                } catch (YarnException | IOException e) {
                    LOGGER.error(
                            "Failed to get application report for Gobblin Yarn application " + applicationId.get(),
                            e);
                    eventBus.post(new GetApplicationReportFailureEvent(e));
                }
            }
        }, 0, this.appReportIntervalMinutes, TimeUnit.MINUTES);

        List<Service> services = Lists.newArrayList();
        if (this.config.hasPath(GobblinYarnConfigurationKeys.KEYTAB_FILE_PATH)) {
            LOGGER.info("Adding YarnAppSecurityManager since login is keytab based");
            services.add(buildYarnAppSecurityManager());
        }
        if (!this.config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY)
                || !this.config.getBoolean(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY)) {
            services.add(buildLogCopier(this.config,
                    new Path(this.sinkLogRootDir,
                            this.applicationName + Path.SEPARATOR + this.applicationId.get().toString()),
                    GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName,
                            this.applicationId.get().toString())));
        }
        if (config.getBoolean(ConfigurationKeys.JOB_EXECINFO_SERVER_ENABLED_KEY)) {
            LOGGER.info("Starting the job execution info server since it is enabled");
            Properties properties = ConfigUtils.configToProperties(config);
            JobExecutionInfoServer executionInfoServer = new JobExecutionInfoServer(properties);
            services.add(executionInfoServer);
            if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) {
                LOGGER.info("Starting the admin UI server since it is enabled");
                services.add(new AdminWebServer(properties, executionInfoServer.getAdvertisedServerUri()));
            }
        } else if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) {
            LOGGER.warn("NOT starting the admin UI because the job execution info server is NOT enabled");
        }

        this.serviceManager = Optional.of(new ServiceManager(services));
        // Start all the services running in the ApplicationMaster
        this.serviceManager.get().startAsync();
    }

    /**
     * Stop this {@link GobblinYarnAppLauncher} instance.
     *
     * @throws IOException if this {@link GobblinYarnAppLauncher} instance fails to clean up its working directory.
     */
    public synchronized void stop() throws IOException, TimeoutException {
        if (this.stopped) {
            return;
        }

        LOGGER.info("Stopping the " + GobblinYarnAppLauncher.class.getSimpleName());

        try {
            if (this.applicationId.isPresent() && !this.applicationCompleted) {
                // Only send the shutdown message if the application has been successfully submitted and is still running
                sendShutdownRequest();
            }

            if (this.serviceManager.isPresent()) {
                this.serviceManager.get().stopAsync().awaitStopped(5, TimeUnit.MINUTES);
            }

            ExecutorsUtils.shutdownExecutorService(this.applicationStatusMonitor, Optional.of(LOGGER), 5,
                    TimeUnit.MINUTES);

            stopYarnClient();

            disconnectHelixManager();
        } finally {
            try {
                if (this.applicationId.isPresent()) {
                    cleanUpAppWorkDirectory(this.applicationId.get());
                }
            } finally {
                this.closer.close();
            }
        }

        this.stopped = true;
    }

    @Subscribe
    public void handleApplicationReportArrivalEvent(ApplicationReportArrivalEvent applicationReportArrivalEvent) {
        ApplicationReport applicationReport = applicationReportArrivalEvent.getApplicationReport();

        YarnApplicationState appState = applicationReport.getYarnApplicationState();
        LOGGER.info("Gobblin Yarn application state: " + appState.toString());

        // Reset the count on failures to get the ApplicationReport when there's one success
        this.getApplicationReportFailureCount.set(0);

        if (appState == YarnApplicationState.FINISHED || appState == YarnApplicationState.FAILED
                || appState == YarnApplicationState.KILLED) {

            applicationCompleted = true;

            LOGGER.info("Gobblin Yarn application finished with final status: "
                    + applicationReport.getFinalApplicationStatus().toString());
            if (applicationReport.getFinalApplicationStatus() == FinalApplicationStatus.FAILED) {
                LOGGER.error("Gobblin Yarn application failed for the following reason: "
                        + applicationReport.getDiagnostics());
            }

            try {
                GobblinYarnAppLauncher.this.stop();
            } catch (IOException ioe) {
                LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
            } catch (TimeoutException te) {
                LOGGER.error("Timeout in stopping the service manager", te);
            } finally {
                if (this.emailNotificationOnShutdown) {
                    sendEmailOnShutdown(Optional.of(applicationReport));
                }
            }
        }
    }

    @Subscribe
    public void handleGetApplicationReportFailureEvent(
            @SuppressWarnings("unused") GetApplicationReportFailureEvent getApplicationReportFailureEvent) {
        int numConsecutiveFailures = this.getApplicationReportFailureCount.incrementAndGet();
        if (numConsecutiveFailures > this.maxGetApplicationReportFailures) {
            LOGGER.warn(String.format(
                    "Number of consecutive failures to get the ApplicationReport %d exceeds the threshold %d",
                    numConsecutiveFailures, this.maxGetApplicationReportFailures));

            try {
                stop();
            } catch (IOException ioe) {
                LOGGER.error("Failed to close the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
            } catch (TimeoutException te) {
                LOGGER.error("Timeout in stopping the service manager", te);
            } finally {
                if (this.emailNotificationOnShutdown) {
                    sendEmailOnShutdown(Optional.<ApplicationReport>absent());
                }
            }
        }
    }

    @VisibleForTesting
    void connectHelixManager() {
        try {
            this.helixManager.connect();
        } catch (Exception e) {
            LOGGER.error("HelixManager failed to connect", e);
            throw Throwables.propagate(e);
        }
    }

    @VisibleForTesting
    void disconnectHelixManager() {
        if (this.helixManager.isConnected()) {
            this.helixManager.disconnect();
        }
    }

    @VisibleForTesting
    void startYarnClient() {
        this.yarnClient.start();
    }

    @VisibleForTesting
    void stopYarnClient() {
        this.yarnClient.stop();
    }

    private Optional<ApplicationId> getApplicationId() throws YarnException, IOException {
        Optional<ApplicationId> reconnectableApplicationId = getReconnectableApplicationId();
        if (reconnectableApplicationId.isPresent()) {
            LOGGER.info("Found reconnectable application with application ID: " + reconnectableApplicationId.get());
            return reconnectableApplicationId;
        }

        LOGGER.info("No reconnectable application found so submitting a new application");
        return Optional.of(setupAndSubmitApplication());
    }

    @VisibleForTesting
    Optional<ApplicationId> getReconnectableApplicationId() throws YarnException, IOException {
        List<ApplicationReport> applicationReports = this.yarnClient.getApplications(APPLICATION_TYPES,
                RECONNECTABLE_APPLICATION_STATES);
        if (applicationReports == null || applicationReports.isEmpty()) {
            return Optional.absent();
        }

        // Try to find an application with a matching application name
        for (ApplicationReport applicationReport : applicationReports) {
            if (this.applicationName.equals(applicationReport.getName())) {
                return Optional.of(applicationReport.getApplicationId());
            }
        }

        return Optional.absent();
    }

    /**
     * Setup and submit the Gobblin Yarn application.
     *
     * @throws IOException if there's anything wrong setting up and submitting the Yarn application
     * @throws YarnException if there's anything wrong setting up and submitting the Yarn application
     */
    @VisibleForTesting
    ApplicationId setupAndSubmitApplication() throws IOException, YarnException {
        YarnClientApplication gobblinYarnApp = this.yarnClient.createApplication();
        ApplicationSubmissionContext appSubmissionContext = gobblinYarnApp.getApplicationSubmissionContext();
        appSubmissionContext.setApplicationType(GOBBLIN_YARN_APPLICATION_TYPE);
        ApplicationId applicationId = appSubmissionContext.getApplicationId();

        GetNewApplicationResponse newApplicationResponse = gobblinYarnApp.getNewApplicationResponse();
        // Set up resource type requirements for ApplicationMaster
        Resource resource = prepareContainerResource(newApplicationResponse);

        // Add lib jars, and jars and files that the ApplicationMaster need as LocalResources
        Map<String, LocalResource> appMasterLocalResources = addAppMasterLocalResources(applicationId);

        ContainerLaunchContext amContainerLaunchContext = Records.newRecord(ContainerLaunchContext.class);
        amContainerLaunchContext.setLocalResources(appMasterLocalResources);
        amContainerLaunchContext.setEnvironment(YarnHelixUtils.getEnvironmentVariables(this.yarnConfiguration));
        amContainerLaunchContext
                .setCommands(Lists.newArrayList(buildApplicationMasterCommand(resource.getMemory())));
        if (UserGroupInformation.isSecurityEnabled()) {
            setupSecurityTokens(amContainerLaunchContext);
        }

        // Setup the application submission context
        appSubmissionContext.setApplicationName(this.applicationName);
        appSubmissionContext.setResource(resource);
        appSubmissionContext.setQueue(this.appQueueName);
        appSubmissionContext.setPriority(Priority.newInstance(0));
        appSubmissionContext.setAMContainerSpec(amContainerLaunchContext);

        // Also setup container local resources by copying local jars and files the container need to HDFS
        addContainerLocalResources(applicationId);

        // Submit the application
        LOGGER.info("Submitting application " + applicationId);
        this.yarnClient.submitApplication(appSubmissionContext);

        LOGGER.info("Application successfully submitted and accepted");
        ApplicationReport applicationReport = this.yarnClient.getApplicationReport(applicationId);
        LOGGER.info("Application Name: " + applicationReport.getName());
        LOGGER.info("Application Tracking URL: " + applicationReport.getTrackingUrl());
        LOGGER.info("Application User: " + applicationReport.getUser() + " Queue: " + applicationReport.getQueue());

        return applicationId;
    }

    private Resource prepareContainerResource(GetNewApplicationResponse newApplicationResponse) {
        int memoryMbs = this.config.getInt(GobblinYarnConfigurationKeys.APP_MASTER_MEMORY_MBS_KEY);
        int maximumMemoryCapacity = newApplicationResponse.getMaximumResourceCapability().getMemory();
        if (memoryMbs > maximumMemoryCapacity) {
            LOGGER.info(String.format(
                    "Specified AM memory [%d] is above the maximum memory capacity [%d] of the "
                            + "cluster, using the maximum memory capacity instead.",
                    memoryMbs, maximumMemoryCapacity));
            memoryMbs = maximumMemoryCapacity;
        }

        int vCores = this.config.getInt(GobblinYarnConfigurationKeys.APP_MASTER_CORES_KEY);
        int maximumVirtualCoreCapacity = newApplicationResponse.getMaximumResourceCapability().getVirtualCores();
        if (vCores > maximumVirtualCoreCapacity) {
            LOGGER.info(String.format(
                    "Specified AM vcores [%d] is above the maximum vcore capacity [%d] of the "
                            + "cluster, using the maximum vcore capacity instead.",
                    memoryMbs, maximumMemoryCapacity));
            vCores = maximumVirtualCoreCapacity;
        }

        // Set up resource type requirements for ApplicationMaster
        return Resource.newInstance(memoryMbs, vCores);
    }

    private Map<String, LocalResource> addAppMasterLocalResources(ApplicationId applicationId) throws IOException {
        Path appWorkDir = GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName,
                applicationId.toString());
        Path appMasterWorkDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.APP_MASTER_WORK_DIR_NAME);

        Map<String, LocalResource> appMasterResources = Maps.newHashMap();

        if (this.config.hasPath(GobblinYarnConfigurationKeys.LIB_JARS_DIR_KEY)) {
            Path libJarsDestDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.LIB_JARS_DIR_NAME);
            addLibJars(new Path(this.config.getString(GobblinYarnConfigurationKeys.LIB_JARS_DIR_KEY)),
                    Optional.of(appMasterResources), libJarsDestDir);
        }
        if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_JARS_KEY)) {
            Path appJarsDestDir = new Path(appMasterWorkDir, GobblinYarnConfigurationKeys.APP_JARS_DIR_NAME);
            addAppJars(this.config.getString(GobblinYarnConfigurationKeys.APP_MASTER_JARS_KEY),
                    Optional.of(appMasterResources), appJarsDestDir);
        }
        if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_FILES_LOCAL_KEY)) {
            Path appFilesDestDir = new Path(appMasterWorkDir, GobblinYarnConfigurationKeys.APP_FILES_DIR_NAME);
            addAppLocalFiles(this.config.getString(GobblinYarnConfigurationKeys.APP_MASTER_FILES_LOCAL_KEY),
                    Optional.of(appMasterResources), appFilesDestDir);
        }
        if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_FILES_REMOTE_KEY)) {
            addAppRemoteFiles(this.config.getString(GobblinYarnConfigurationKeys.APP_MASTER_FILES_REMOTE_KEY),
                    appMasterResources);
        }
        if (this.config.hasPath(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY)) {
            Path appFilesDestDir = new Path(appMasterWorkDir, GobblinYarnConfigurationKeys.APP_FILES_DIR_NAME);
            addJobConfPackage(this.config.getString(GobblinClusterConfigurationKeys.JOB_CONF_PATH_KEY),
                    appFilesDestDir, appMasterResources);
        }

        return appMasterResources;
    }

    private void addContainerLocalResources(ApplicationId applicationId) throws IOException {
        Path appWorkDir = GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName,
                applicationId.toString());
        Path containerWorkDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.CONTAINER_WORK_DIR_NAME);

        if (this.config.hasPath(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY)) {
            Path appJarsDestDir = new Path(containerWorkDir, GobblinYarnConfigurationKeys.APP_JARS_DIR_NAME);
            addAppJars(this.config.getString(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY),
                    Optional.<Map<String, LocalResource>>absent(), appJarsDestDir);
        }
        if (this.config.hasPath(GobblinYarnConfigurationKeys.CONTAINER_FILES_LOCAL_KEY)) {
            Path appFilesDestDir = new Path(containerWorkDir, GobblinYarnConfigurationKeys.APP_FILES_DIR_NAME);
            addAppLocalFiles(this.config.getString(GobblinYarnConfigurationKeys.CONTAINER_FILES_LOCAL_KEY),
                    Optional.<Map<String, LocalResource>>absent(), appFilesDestDir);
        }
    }

    private void addLibJars(Path srcLibJarDir, Optional<Map<String, LocalResource>> resourceMap, Path destDir)
            throws IOException {
        FileSystem localFs = FileSystem.getLocal(this.yarnConfiguration);
        FileStatus[] libJarFiles = localFs.listStatus(srcLibJarDir);
        if (libJarFiles == null || libJarFiles.length == 0) {
            return;
        }

        for (FileStatus libJarFile : libJarFiles) {
            Path destFilePath = new Path(destDir, libJarFile.getPath().getName());
            this.fs.copyFromLocalFile(libJarFile.getPath(), destFilePath);
            if (resourceMap.isPresent()) {
                YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.FILE,
                        resourceMap.get());
            }
        }
    }

    private void addAppJars(String jarFilePathList, Optional<Map<String, LocalResource>> resourceMap, Path destDir)
            throws IOException {
        for (String jarFilePath : SPLITTER.split(jarFilePathList)) {
            Path srcFilePath = new Path(jarFilePath);
            Path destFilePath = new Path(destDir, srcFilePath.getName());
            this.fs.copyFromLocalFile(srcFilePath, destFilePath);
            if (resourceMap.isPresent()) {
                YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.FILE,
                        resourceMap.get());
            }
        }
    }

    private void addAppLocalFiles(String localFilePathList, Optional<Map<String, LocalResource>> resourceMap,
            Path destDir) throws IOException {
        for (String localFilePath : SPLITTER.split(localFilePathList)) {
            Path srcFilePath = new Path(localFilePath);
            Path destFilePath = new Path(destDir, srcFilePath.getName());
            this.fs.copyFromLocalFile(srcFilePath, destFilePath);
            if (resourceMap.isPresent()) {
                YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.FILE,
                        resourceMap.get());
            }
        }
    }

    private void addAppRemoteFiles(String hdfsFileList, Map<String, LocalResource> resourceMap) throws IOException {
        for (String hdfsFilePath : SPLITTER.split(hdfsFileList)) {
            YarnHelixUtils.addFileAsLocalResource(this.fs, new Path(hdfsFilePath), LocalResourceType.FILE,
                    resourceMap);
        }
    }

    private void addJobConfPackage(String jobConfPackagePath, Path destDir, Map<String, LocalResource> resourceMap)
            throws IOException {
        Path srcFilePath = new Path(jobConfPackagePath);
        Path destFilePath = new Path(destDir,
                srcFilePath.getName() + GobblinClusterConfigurationKeys.TAR_GZ_FILE_SUFFIX);
        StreamUtils.tar(FileSystem.getLocal(this.yarnConfiguration), this.fs, srcFilePath, destFilePath);
        YarnHelixUtils.addFileAsLocalResource(this.fs, destFilePath, LocalResourceType.ARCHIVE, resourceMap);
    }

    private String buildApplicationMasterCommand(int memoryMbs) {
        String appMasterClassName = GobblinApplicationMaster.class.getSimpleName();
        return new StringBuilder().append(ApplicationConstants.Environment.JAVA_HOME.$()).append("/bin/java")
                .append(" -Xmx").append(memoryMbs).append("M").append(" ")
                .append(JvmUtils.formatJvmArguments(this.appMasterJvmArgs)).append(" ")
                .append(GobblinApplicationMaster.class.getName()).append(" --")
                .append(GobblinClusterConfigurationKeys.APPLICATION_NAME_OPTION_NAME).append(" ")
                .append(this.applicationName).append(" 1>").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR)
                .append(File.separator).append(appMasterClassName).append(".").append(ApplicationConstants.STDOUT)
                .append(" 2>").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR).append(File.separator)
                .append(appMasterClassName).append(".").append(ApplicationConstants.STDERR).toString();
    }

    private void setupSecurityTokens(ContainerLaunchContext containerLaunchContext) throws IOException {
        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        String tokenRenewer = this.yarnConfiguration.get(YarnConfiguration.RM_PRINCIPAL);
        if (tokenRenewer == null || tokenRenewer.length() == 0) {
            throw new IOException("Failed to get master Kerberos principal for the RM to use as renewer");
        }

        // For now, only getting tokens for the default file-system.
        Token<?> tokens[] = this.fs.addDelegationTokens(tokenRenewer, credentials);
        if (tokens != null) {
            for (Token<?> token : tokens) {
                LOGGER.info("Got delegation token for " + this.fs.getUri() + "; " + token);
            }
        }

        Closer closer = Closer.create();
        try {
            DataOutputBuffer dataOutputBuffer = closer.register(new DataOutputBuffer());
            credentials.writeTokenStorageToStream(dataOutputBuffer);
            ByteBuffer fsTokens = ByteBuffer.wrap(dataOutputBuffer.getData(), 0, dataOutputBuffer.getLength());
            containerLaunchContext.setTokens(fsTokens);
        } catch (Throwable t) {
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
    }

    private LogCopier buildLogCopier(Config config, Path sinkLogDir, Path appWorkDir) throws IOException {
        FileSystem rawLocalFs = this.closer.register(new RawLocalFileSystem());
        rawLocalFs.initialize(URI.create(ConfigurationKeys.LOCAL_FS_URI), new Configuration());

        LogCopier.Builder builder = LogCopier.newBuilder().useSrcFileSystem(this.fs).useDestFileSystem(rawLocalFs)
                .readFrom(getHdfsLogDir(appWorkDir)).writeTo(sinkLogDir).acceptsLogFileExtensions(
                        ImmutableSet.of(ApplicationConstants.STDOUT, ApplicationConstants.STDERR));
        if (config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_MAX_FILE_SIZE)) {
            builder.useMaxBytesPerLogFile(config.getBytes(GobblinYarnConfigurationKeys.LOG_COPIER_MAX_FILE_SIZE));
        }
        if (config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_SCHEDULER)) {
            builder.useScheduler(config.getString(GobblinYarnConfigurationKeys.LOG_COPIER_SCHEDULER));
        }
        return builder.build();
    }

    private Path getHdfsLogDir(Path appWorkDir) throws IOException {
        Path logRootDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.APP_LOGS_DIR_NAME);
        if (!this.fs.exists(logRootDir)) {
            this.fs.mkdirs(logRootDir);
        }

        return logRootDir;
    }

    private YarnAppSecurityManager buildYarnAppSecurityManager() throws IOException {
        Path tokenFilePath = new Path(this.fs.getHomeDirectory(),
                this.applicationName + Path.SEPARATOR + GobblinYarnConfigurationKeys.TOKEN_FILE_NAME);
        return new YarnAppSecurityManager(this.config, this.helixManager, this.fs, tokenFilePath);
    }

    @VisibleForTesting
    void sendShutdownRequest() {
        Criteria criteria = new Criteria();
        criteria.setInstanceName("%");
        criteria.setResource("%");
        criteria.setPartition("%");
        criteria.setPartitionState("%");
        criteria.setRecipientInstanceType(InstanceType.CONTROLLER);
        criteria.setSessionSpecific(true);

        Message shutdownRequest = new Message(GobblinHelixConstants.SHUTDOWN_MESSAGE_TYPE,
                HelixMessageSubTypes.APPLICATION_MASTER_SHUTDOWN.toString().toLowerCase()
                        + UUID.randomUUID().toString());
        shutdownRequest.setMsgSubType(HelixMessageSubTypes.APPLICATION_MASTER_SHUTDOWN.toString());
        shutdownRequest.setMsgState(Message.MessageState.NEW);
        shutdownRequest.setTgtSessionId("*");

        int messagesSent = this.helixManager.getMessagingService().send(criteria, shutdownRequest);
        if (messagesSent == 0) {
            LOGGER.error(String.format("Failed to send the %s message to the controller",
                    shutdownRequest.getMsgSubType()));
        }
    }

    @VisibleForTesting
    void cleanUpAppWorkDirectory(ApplicationId applicationId) throws IOException {
        Path appWorkDir = GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName,
                applicationId.toString());
        if (this.fs.exists(appWorkDir)) {
            LOGGER.info("Deleting application working directory " + appWorkDir);
            this.fs.delete(appWorkDir, true);
        }
    }

    private void sendEmailOnShutdown(Optional<ApplicationReport> applicationReport) {
        String subject = String.format("Gobblin Yarn application %s completed", this.applicationName);

        StringBuilder messageBuilder = new StringBuilder("Gobblin Yarn ApplicationReport:");
        if (applicationReport.isPresent()) {
            messageBuilder.append("\n");
            messageBuilder.append("\tApplication ID: ").append(applicationReport.get().getApplicationId())
                    .append("\n");
            messageBuilder.append("\tApplication attempt ID: ")
                    .append(applicationReport.get().getCurrentApplicationAttemptId()).append("\n");
            messageBuilder.append("\tFinal application status: ")
                    .append(applicationReport.get().getFinalApplicationStatus()).append("\n");
            messageBuilder.append("\tStart time: ").append(applicationReport.get().getStartTime()).append("\n");
            messageBuilder.append("\tFinish time: ").append(applicationReport.get().getFinishTime()).append("\n");

            if (!Strings.isNullOrEmpty(applicationReport.get().getDiagnostics())) {
                messageBuilder.append("\tDiagnostics: ").append(applicationReport.get().getDiagnostics())
                        .append("\n");
            }

            ApplicationResourceUsageReport resourceUsageReport = applicationReport.get()
                    .getApplicationResourceUsageReport();
            if (resourceUsageReport != null) {
                messageBuilder.append("\tUsed containers: ").append(resourceUsageReport.getNumUsedContainers())
                        .append("\n");
                Resource usedResource = resourceUsageReport.getUsedResources();
                if (usedResource != null) {
                    messageBuilder.append("\tUsed memory (MBs): ").append(usedResource.getMemory()).append("\n");
                    messageBuilder.append("\tUsed vcores: ").append(usedResource.getVirtualCores()).append("\n");
                }
            }
        } else {
            messageBuilder.append(' ').append("Not available");
        }

        try {
            EmailUtils.sendEmail(ConfigUtils.configToState(this.config), subject, messageBuilder.toString());
        } catch (EmailException ee) {
            LOGGER.error("Failed to send email notification on shutdown", ee);
        }
    }

    public static void main(String[] args) throws Exception {
        final GobblinYarnAppLauncher gobblinYarnAppLauncher = new GobblinYarnAppLauncher(ConfigFactory.load(),
                new YarnConfiguration());
        Runtime.getRuntime().addShutdownHook(new Thread() {

            @Override
            public void run() {
                try {
                    gobblinYarnAppLauncher.stop();
                } catch (IOException ioe) {
                    LOGGER.error("Failed to shutdown the " + GobblinYarnAppLauncher.class.getSimpleName(), ioe);
                } catch (TimeoutException te) {
                    LOGGER.error("Timeout in stopping the service manager", te);
                } finally {
                    if (gobblinYarnAppLauncher.emailNotificationOnShutdown) {
                        gobblinYarnAppLauncher.sendEmailOnShutdown(Optional.<ApplicationReport>absent());
                    }
                }
            }
        });

        gobblinYarnAppLauncher.launch();
    }
}