com.streamsets.datacollector.cluster.ClusterProviderImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.streamsets.datacollector.cluster.ClusterProviderImpl.java

Source

/**
 * Copyright 2015 StreamSets Inc.
 *
 * Licensed under the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.streamsets.datacollector.cluster;

import static com.streamsets.datacollector.definition.StageLibraryDefinitionExtractor.DATA_COLLECTOR_LIBRARY_PROPERTIES;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import com.streamsets.datacollector.config.PipelineConfiguration;
import com.streamsets.datacollector.config.RuleDefinitions;
import com.streamsets.datacollector.config.StageConfiguration;
import com.streamsets.datacollector.config.StageDefinition;
import com.streamsets.datacollector.creation.PipelineBean;
import com.streamsets.datacollector.creation.PipelineBeanCreator;
import com.streamsets.datacollector.creation.PipelineConfigBean;
import com.streamsets.datacollector.creation.StageBean;
import com.streamsets.datacollector.execution.runner.common.Constants;
import com.streamsets.datacollector.http.WebServerTask;
import com.streamsets.datacollector.json.ObjectMapperFactory;
import com.streamsets.datacollector.main.RuntimeInfo;
import com.streamsets.datacollector.main.RuntimeModule;
import com.streamsets.datacollector.restapi.bean.BeanHelper;
import com.streamsets.datacollector.security.SecurityConfiguration;
import com.streamsets.datacollector.stagelibrary.StageLibraryTask;
import com.streamsets.datacollector.stagelibrary.StageLibraryUtils;
import com.streamsets.datacollector.store.PipelineInfo;
import com.streamsets.datacollector.store.impl.FilePipelineStoreTask;
import com.streamsets.datacollector.util.Configuration;
import com.streamsets.datacollector.util.PipelineDirectoryUtil;
import com.streamsets.datacollector.util.SystemProcessFactory;
import com.streamsets.datacollector.validation.Issue;
import com.streamsets.pipeline.api.Config;
import com.streamsets.pipeline.api.ExecutionMode;
import com.streamsets.pipeline.api.impl.PipelineUtils;
import com.streamsets.pipeline.api.impl.Utils;
import com.streamsets.pipeline.lib.util.ThreadUtil;
import com.streamsets.pipeline.util.SystemProcess;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLClassLoader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ClusterProviderImpl implements ClusterProvider {
    static final Pattern YARN_APPLICATION_ID_REGEX = Pattern.compile("\\s(application_[0-9]+_[0-9]+)(\\s|$)");
    static final Pattern MESOS_DRIVER_ID_REGEX = Pattern.compile("\\s(driver-[0-9]+-[0-9]+)(\\s|$)");
    static final Pattern NO_VALID_CREDENTIALS = Pattern.compile("(No valid credentials provided.*)");
    public static final String CLUSTER_TYPE = "CLUSTER_TYPE";
    public static final String CLUSTER_TYPE_MESOS = "mesos";
    public static final String CLUSTER_TYPE_MAPREDUCE = "mr";
    public static final String CLUSTER_TYPE_YARN = "yarn";
    private static final String STAGING_DIR = "STAGING_DIR";
    private static final String MESOS_UBER_JAR_PATH = "MESOS_UBER_JAR_PATH";
    private static final String MESOS_UBER_JAR = "MESOS_UBER_JAR";
    private static final String ETC_TAR_ARCHIVE = "ETC_TAR_ARCHIVE";
    private static final String LIBS_TAR_ARCHIVE = "LIBS_TAR_ARCHIVE";
    private static final String RESOURCES_TAR_ARCHIVE = "RESOURCES_TAR_ARCHIVE";
    private static final String MESOS_HOSTING_JAR_DIR = "MESOS_HOSTING_JAR_DIR";
    private static final String KERBEROS_AUTH = "KERBEROS_AUTH";
    private static final String KERBEROS_KEYTAB = "KERBEROS_KEYTAB";
    private static final String KERBEROS_PRINCIPAL = "KERBEROS_PRINCIPAL";
    private static final String CLUSTER_MODE_JAR_BLACKLIST = "cluster.jar.blacklist.regex_";
    private static final String ALL_STAGES = "*";
    private static final String TOPIC = "topic";
    private static final String MESOS_HOSTING_DIR_PARENT = "mesos";
    private final RuntimeInfo runtimeInfo;
    private final YARNStatusParser yarnStatusParser;
    private final MesosStatusParser mesosStatusParser;
    /**
     * Only null in the case of tests
     */
    @Nullable
    private final SecurityConfiguration securityConfiguration;

    private static final Logger LOG = LoggerFactory.getLogger(ClusterProviderImpl.class);
    private static final boolean IS_TRACE_ENABLED = LOG.isTraceEnabled();

    @VisibleForTesting
    ClusterProviderImpl() {
        this(null, null);
    }

    public ClusterProviderImpl(RuntimeInfo runtimeInfo, @Nullable SecurityConfiguration securityConfiguration) {
        this.runtimeInfo = runtimeInfo;
        this.securityConfiguration = securityConfiguration;
        this.yarnStatusParser = new YARNStatusParser();
        this.mesosStatusParser = new MesosStatusParser();
    }

    @Override
    public void killPipeline(SystemProcessFactory systemProcessFactory, File sparkManager, File tempDir,
            String appId, PipelineConfiguration pipelineConfiguration) throws TimeoutException, IOException {
        Map<String, String> environment = new HashMap<>();
        environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
        addKerberosConfiguration(environment);
        ImmutableList.Builder<String> args = ImmutableList.builder();
        args.add(sparkManager.getAbsolutePath());
        args.add("kill");
        args.add(appId);
        ExecutionMode executionMode = PipelineBeanCreator.get().getExecutionMode(pipelineConfiguration,
                new ArrayList<Issue>());
        if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
            addMesosArgs(pipelineConfiguration, environment, args);
        }
        SystemProcess process = systemProcessFactory.create(ClusterProviderImpl.class.getSimpleName(), tempDir,
                args.build());
        try {
            process.start(environment);
            if (!process.waitFor(30, TimeUnit.SECONDS)) {
                logOutput(appId, process);
                throw new TimeoutException(errorString("Kill command for {} timed out.", appId));
            }
        } finally {
            process.cleanup();
        }
    }

    private static String errorString(String template, Object... args) {
        return Utils.format("ERROR: " + template, args);
    }

    private static void logOutput(String appId, SystemProcess process) {
        try {
            LOG.info("Status command standard error: {} ", Joiner.on("\n").join(process.getAllError()));
            LOG.info("Status command standard output: {} ", Joiner.on("\n").join(process.getAllOutput()));
        } catch (Exception e) {
            String msg = errorString("Could not read output of command '{}' for app {}: {}", process.getCommand(),
                    appId, e);
            LOG.error(msg, e);
        }
    }

    @Override
    public ClusterPipelineStatus getStatus(SystemProcessFactory systemProcessFactory, File sparkManager,
            File tempDir, String appId, PipelineConfiguration pipelineConfiguration)
            throws TimeoutException, IOException {

        Map<String, String> environment = new HashMap<>();
        environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
        addKerberosConfiguration(environment);
        ImmutableList.Builder<String> args = ImmutableList.builder();
        args.add(sparkManager.getAbsolutePath());
        args.add("status");
        args.add(appId);
        ExecutionMode executionMode = PipelineBeanCreator.get().getExecutionMode(pipelineConfiguration,
                new ArrayList<Issue>());
        if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
            addMesosArgs(pipelineConfiguration, environment, args);
        }
        SystemProcess process = systemProcessFactory.create(ClusterProviderImpl.class.getSimpleName(), tempDir,
                args.build());
        try {
            process.start(environment);
            if (!process.waitFor(30, TimeUnit.SECONDS)) {
                logOutput(appId, process);
                throw new TimeoutException(errorString("YARN status command for {} timed out.", appId));
            }
            if (process.exitValue() != 0) {
                logOutput(appId, process);
                throw new IllegalStateException(
                        errorString("Status command for {} failed with exit code {}.", appId, process.exitValue()));
            }
            logOutput(appId, process);
            String status;
            if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
                status = mesosStatusParser.parseStatus(process.getAllOutput());
            } else {
                status = yarnStatusParser.parseStatus(process.getAllOutput());
            }
            return ClusterPipelineStatus.valueOf(status);
        } finally {
            process.cleanup();
        }
    }

    private void addMesosArgs(PipelineConfiguration pipelineConfiguration, Map<String, String> environment,
            ImmutableList.Builder<String> args) {
        String mesosDispatcherURL = Utils.checkNotNull(
                PipelineBeanCreator.get().getMesosDispatcherURL(pipelineConfiguration), "mesosDispatcherURL");
        environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MESOS);
        args.add("--master");
        args.add(mesosDispatcherURL);
    }

    private void rewriteProperties(File sdcPropertiesFile, Map<String, String> sourceConfigs,
            Map<String, String> sourceInfo, String clusterToken, Optional<String> mesosURL) throws IOException {
        InputStream sdcInStream = null;
        OutputStream sdcOutStream = null;
        Properties sdcProperties = new Properties();
        try {
            sdcInStream = new FileInputStream(sdcPropertiesFile);
            sdcProperties.load(sdcInStream);
            sdcProperties.remove(Configuration.CONFIG_INCLUDES);
            sdcProperties.setProperty(WebServerTask.HTTP_PORT_KEY, "0");
            sdcProperties.setProperty(WebServerTask.HTTPS_PORT_KEY, "-1");
            sdcProperties.setProperty(RuntimeModule.PIPELINE_EXECUTION_MODE_KEY, ExecutionMode.SLAVE.name());
            sdcProperties.setProperty(WebServerTask.REALM_FILE_PERMISSION_CHECK, "false");
            sdcProperties.remove(RuntimeModule.DATA_COLLECTOR_BASE_HTTP_URL);
            if (runtimeInfo != null) {
                String id = String.valueOf(runtimeInfo.getId());
                sdcProperties.setProperty(Constants.SDC_ID, id);
                sdcProperties.setProperty(Constants.PIPELINE_CLUSTER_TOKEN_KEY, clusterToken);
                sdcProperties.setProperty(Constants.CALLBACK_SERVER_URL_KEY, runtimeInfo.getClusterCallbackURL());
            }

            if (mesosURL.isPresent()) {
                sdcProperties.setProperty(Constants.MESOS_JAR_URL, mesosURL.get());
            }
            addClusterConfigs(sourceConfigs, sdcProperties);
            addClusterConfigs(sourceInfo, sdcProperties);

            sdcOutStream = new FileOutputStream(sdcPropertiesFile);
            sdcProperties.store(sdcOutStream, null);
            LOG.debug("sourceConfigs = {}", sourceConfigs);
            LOG.debug("sourceInfo = {}", sourceInfo);
            LOG.debug("sdcProperties = {}", sdcProperties);
            sdcOutStream.flush();
            sdcOutStream.close();
        } finally {
            if (sdcInStream != null) {
                IOUtils.closeQuietly(sdcInStream);
            }
            if (sdcOutStream != null) {
                IOUtils.closeQuietly(sdcOutStream);
            }
        }
    }

    private void addClusterConfigs(Map<String, String> configs, Properties properties) {
        for (Map.Entry<String, String> entry : configs.entrySet()) {
            properties.setProperty(entry.getKey(), entry.getValue());
        }
    }

    private static File getBootstrapJar(File bootstrapDir, final String name) {
        Utils.checkState(bootstrapDir.isDirectory(),
                Utils.format("SDC bootstrap lib does not exist: {}", bootstrapDir));
        File[] candidates = bootstrapDir.listFiles(new FileFilter() {
            @Override
            public boolean accept(File candidate) {
                final String filename = candidate.getName();
                return filename.startsWith(name) && filename.endsWith(".jar");
            }
        });
        Utils.checkState(candidates != null,
                Utils.format("Did not find jar matching {} in {}", name, bootstrapDir));
        Utils.checkState(candidates.length == 1,
                Utils.format("Did not find exactly one bootstrap jar: {}", Arrays.toString(candidates)));
        return candidates[0];
    }

    private void addKerberosConfiguration(Map<String, String> environment) {
        if (securityConfiguration != null) {
            environment.put(KERBEROS_AUTH, String.valueOf(securityConfiguration.isKerberosEnabled()));
            if (securityConfiguration.isKerberosEnabled()) {
                environment.put(KERBEROS_PRINCIPAL, securityConfiguration.getKerberosPrincipal());
                environment.put(KERBEROS_KEYTAB, securityConfiguration.getKerberosKeytab());
            }
        }
    }

    static File createDirectoryClone(File srcDir, String dirName, File tempDir) throws IOException {
        File tempSrcDir = new File(tempDir, dirName);
        FileUtils.deleteQuietly(tempSrcDir);
        Utils.checkState(tempSrcDir.mkdir(), Utils.formatL("Could not create {}", tempSrcDir));
        doCopyDirectory(srcDir, tempSrcDir);
        return tempSrcDir;
    }

    private static void doCopyDirectory(File srcDir, File destDir) throws IOException {
        // code copied from commons-io FileUtils to work around files which cannot be read
        // recurse
        final File[] srcFiles = srcDir.listFiles();
        if (srcFiles == null) { // null if abstract pathname does not denote a directory, or if an I/O error occurs
            throw new IOException("Failed to list contents of " + srcDir);
        }
        if (destDir.exists()) {
            if (!destDir.isDirectory()) {
                throw new IOException("Destination '" + destDir + "' exists but is not a directory");
            }
        } else {
            if (!destDir.mkdirs() && !destDir.isDirectory()) {
                throw new IOException("Destination '" + destDir + "' directory cannot be created");
            }
        }
        if (!destDir.canWrite()) {
            throw new IOException("Destination '" + destDir + "' cannot be written to");
        }
        for (final File srcFile : srcFiles) {
            final File dstFile = new File(destDir, srcFile.getName());
            if (srcFile.canRead()) { // ignore files which cannot be read
                if (srcFile.isDirectory()) {
                    doCopyDirectory(srcFile, dstFile);
                } else {
                    try (InputStream in = new FileInputStream((srcFile))) {
                        try (OutputStream out = new FileOutputStream((dstFile))) {
                            IOUtils.copy(in, out);
                        }
                    }
                }
            }
        }
    }

    static boolean exclude(List<String> blacklist, String name) {
        for (String pattern : blacklist) {
            if (Pattern.compile(pattern).matcher(name).find()) {
                return true;
            } else if (IS_TRACE_ENABLED) {
                LOG.trace("Pattern '{}' does not match '{}'", pattern, name);
            }
        }
        return false;
    }

    private static Properties readDataCollectorProperties(ClassLoader cl) throws IOException {
        Properties properties = new Properties();
        while (cl != null) {
            Enumeration<URL> urls = cl.getResources(DATA_COLLECTOR_LIBRARY_PROPERTIES);
            if (urls != null) {
                while (urls.hasMoreElements()) {
                    URL url = urls.nextElement();
                    LOG.trace("Loading data collector library properties: {}", url);
                    try (InputStream inputStream = url.openStream()) {
                        properties.load(inputStream);
                    }
                }
            }
            cl = cl.getParent();
        }
        LOG.trace("Final properties: {} ", properties);
        return properties;
    }

    private static List<URL> findJars(String name, URLClassLoader cl, @Nullable String stageClazzName)
            throws IOException {
        Properties properties = readDataCollectorProperties(cl);
        List<String> blacklist = new ArrayList<>();
        for (Map.Entry entry : properties.entrySet()) {
            String key = (String) entry.getKey();
            if (stageClazzName != null && key.equals(CLUSTER_MODE_JAR_BLACKLIST + stageClazzName)) {
                String value = (String) entry.getValue();
                blacklist.addAll(Splitter.on(",").trimResults().omitEmptyStrings().splitToList(value));
            } else if (key.equals(CLUSTER_MODE_JAR_BLACKLIST + ALL_STAGES)) {
                String value = (String) entry.getValue();
                blacklist.addAll(Splitter.on(",").trimResults().omitEmptyStrings().splitToList(value));
            }
        }
        if (IS_TRACE_ENABLED) {
            LOG.trace("Blacklist for '{}': '{}'", name, blacklist);
        }
        List<URL> urls = new ArrayList<>();
        for (URL url : cl.getURLs()) {
            if (blacklist.isEmpty()) {
                urls.add(url);
            } else {
                if (exclude(blacklist, FilenameUtils.getName(url.getPath()))) {
                    LOG.trace("Skipping '{}' for '{}' due to '{}'", url, name, blacklist);
                } else {
                    urls.add(url);
                }
            }
        }
        return urls;
    }

    @Override
    public ApplicationState startPipeline(SystemProcessFactory systemProcessFactory, File clusterManager,
            File outputDir, Map<String, String> environment, Map<String, String> sourceInfo,
            PipelineConfiguration pipelineConfiguration, StageLibraryTask stageLibrary, File etcDir,
            File resourcesDir, File staticWebDir, File bootstrapDir, URLClassLoader apiCL,
            URLClassLoader containerCL, long timeToWaitForFailure, RuleDefinitions ruleDefinitions)
            throws IOException, TimeoutException {
        File stagingDir = new File(outputDir, "staging");
        if (!stagingDir.mkdirs() || !stagingDir.isDirectory()) {
            String msg = Utils.format("Could not create staging directory: {}", stagingDir);
            throw new IllegalStateException(msg);
        }
        try {
            return startPipelineInternal(systemProcessFactory, clusterManager, outputDir, environment, sourceInfo,
                    pipelineConfiguration, stageLibrary, etcDir, resourcesDir, staticWebDir, bootstrapDir, apiCL,
                    containerCL, timeToWaitForFailure, stagingDir, ruleDefinitions);
        } finally {
            // in testing mode the staging dir is used by yarn
            // tasks and thus cannot be deleted
            if (!Boolean.getBoolean("sdc.testing-mode") && !FileUtils.deleteQuietly(stagingDir)) {
                LOG.warn("Unable to cleanup: {}", stagingDir);
            }
        }
    }

    private ApplicationState startPipelineInternal(SystemProcessFactory systemProcessFactory, File clusterManager,
            File outputDir, Map<String, String> environment, Map<String, String> sourceInfo,
            PipelineConfiguration pipelineConfiguration, StageLibraryTask stageLibrary, File etcDir,
            File resourcesDir, File staticWebDir, File bootstrapDir, URLClassLoader apiCL,
            URLClassLoader containerCL, long timeToWaitForFailure, File stagingDir, RuleDefinitions ruleDefinitions)
            throws IOException, TimeoutException {
        environment = Maps.newHashMap(environment);
        // create libs.tar.gz file for pipeline
        Map<String, List<URL>> streamsetsLibsCl = new HashMap<>();
        Map<String, List<URL>> userLibsCL = new HashMap<>();
        Map<String, String> sourceConfigs = new HashMap<>();
        ImmutableList.Builder<StageConfiguration> pipelineConfigurations = ImmutableList.builder();
        // order is important here as we don't want error stage
        // configs overriding source stage configs
        String clusterToken = UUID.randomUUID().toString();
        List<String> jarsToShip = new ArrayList<String>();
        List<Issue> errors = new ArrayList<>();
        PipelineBean pipelineBean = PipelineBeanCreator.get().create(false, stageLibrary, pipelineConfiguration,
                errors);
        if (!errors.isEmpty()) {
            String msg = Utils.format("Found '{}' configuration errors: {}", errors.size(), errors);
            throw new IllegalStateException(msg);
        }
        pipelineConfigurations.add(pipelineBean.getErrorStage().getConfiguration());
        StageBean statsStage = pipelineBean.getStatsAggregatorStage();
        // statsStage is null for pre 1.3 pipelines
        if (statsStage != null) {
            pipelineConfigurations.add(statsStage.getConfiguration());
        }
        for (StageBean stageBean : pipelineBean.getStages()) {
            pipelineConfigurations.add(stageBean.getConfiguration());
        }
        ExecutionMode executionMode = ExecutionMode.STANDALONE;
        for (StageConfiguration stageConf : pipelineConfigurations.build()) {
            StageDefinition stageDef = stageLibrary.getStage(stageConf.getLibrary(), stageConf.getStageName(),
                    false);
            if (stageConf.getInputLanes().isEmpty()) {
                for (Config conf : stageConf.getConfiguration()) {
                    if (conf.getValue() != null) {
                        Object value = conf.getValue();
                        if (value instanceof List) {
                            List values = (List) value;
                            if (values.isEmpty()) {
                                LOG.debug("Conf value for " + conf.getName() + " is empty");
                            } else {
                                Object first = values.get(0);
                                if (canCastToString(first)) {
                                    sourceConfigs.put(conf.getName(), Joiner.on(",").join(values));
                                } else if (first instanceof Map) {
                                    addToSourceConfigs(sourceConfigs, (List<Map<String, Object>>) values);
                                } else {
                                    LOG.info("List is of type '{}' which cannot be converted to property value.",
                                            first.getClass().getName());
                                }
                            }
                        } else if (canCastToString(conf.getValue())) {
                            LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
                            sourceConfigs.put(conf.getName(), String.valueOf(value));
                        } else if (value instanceof Enum) {
                            value = ((Enum) value).name();
                            LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
                            sourceConfigs.put(conf.getName(), String.valueOf(value));
                        } else {
                            LOG.warn("Conf value is of unknown type " + conf.getValue());
                        }
                    }
                }
                executionMode = PipelineBeanCreator.get().getExecutionMode(pipelineConfiguration,
                        new ArrayList<Issue>());

                List<String> libJarsRegex = stageDef.getLibJarsRegex();
                if (!libJarsRegex.isEmpty()) {
                    for (URL jarUrl : ((URLClassLoader) stageDef.getStageClassLoader()).getURLs()) {
                        File jarFile = new File(jarUrl.getPath());
                        for (String libJar : libJarsRegex) {
                            Pattern pattern = Pattern.compile(libJar);
                            Matcher matcher = pattern.matcher(jarFile.getName());
                            if (matcher.matches()) {
                                jarsToShip.add(jarFile.getAbsolutePath());
                            }
                        }
                    }
                }
            }
            String type = StageLibraryUtils.getLibraryType(stageDef.getStageClassLoader());
            String name = StageLibraryUtils.getLibraryName(stageDef.getStageClassLoader());
            if (ClusterModeConstants.STREAMSETS_LIBS.equals(type)) {
                streamsetsLibsCl.put(name,
                        findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
            } else if (ClusterModeConstants.USER_LIBS.equals(type)) {
                userLibsCL.put(name,
                        findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
            } else {
                throw new IllegalStateException(Utils.format("Error unknown stage library type: '{}'", type));
            }
        }
        LOG.info("stagingDir = '{}'", stagingDir);
        LOG.info("bootstrapDir = '{}'", bootstrapDir);
        LOG.info("etcDir = '{}'", etcDir);
        LOG.info("resourcesDir = '{}'", resourcesDir);
        LOG.info("staticWebDir = '{}'", staticWebDir);

        Utils.checkState(staticWebDir.isDirectory(), Utils.format("Expected '{}' to be a directory", staticWebDir));
        File libsTarGz = new File(stagingDir, "libs.tar.gz");
        try {
            TarFileCreator.createLibsTarGz(findJars("api", apiCL, null), findJars("container", containerCL, null),
                    streamsetsLibsCl, userLibsCL, staticWebDir, libsTarGz);
        } catch (Exception ex) {
            String msg = errorString("Serializing classpath: '{}'", ex);
            throw new RuntimeException(msg, ex);
        }
        File resourcesTarGz = new File(stagingDir, "resources.tar.gz");
        try {
            resourcesDir = createDirectoryClone(resourcesDir, "resources", stagingDir);
            TarFileCreator.createTarGz(resourcesDir, resourcesTarGz);
        } catch (Exception ex) {
            String msg = errorString("Serializing resources directory: '{}': {}", resourcesDir.getName(), ex);
            throw new RuntimeException(msg, ex);
        }
        File etcTarGz = new File(stagingDir, "etc.tar.gz");
        File sdcPropertiesFile;
        File bootstrapJar = getBootstrapJar(new File(bootstrapDir, "main"), "streamsets-datacollector-bootstrap");
        File clusterBootstrapJar;
        String mesosHostingJarDir = null;
        String mesosURL = null;
        if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
            clusterBootstrapJar = getBootstrapJar(new File(bootstrapDir, "mesos"),
                    "streamsets-datacollector-mesos-bootstrap");
            String topic = sourceConfigs.get(TOPIC);
            String pipelineName = sourceInfo.get(ClusterModeConstants.CLUSTER_PIPELINE_NAME);
            mesosHostingJarDir = MESOS_HOSTING_DIR_PARENT + File.separatorChar
                    + getSha256(getMesosHostingDir(topic, pipelineName));
            mesosURL = runtimeInfo.getBaseHttpUrl() + File.separatorChar + mesosHostingJarDir + File.separatorChar
                    + clusterBootstrapJar.getName();
        } else {
            clusterBootstrapJar = getBootstrapJar(new File(bootstrapDir, "spark"),
                    "streamsets-datacollector-spark-bootstrap");
        }
        try {
            etcDir = createDirectoryClone(etcDir, "etc", stagingDir);
            InputStream clusterLog4jProperties;
            if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
                clusterLog4jProperties = Utils.checkNotNull(
                        getClass().getResourceAsStream("/cluster-spark-log4j.properties"),
                        "Cluster Log4J Properties");
                File log4jProperty = new File(etcDir, runtimeInfo.getLog4jPropertiesFileName());
                if (!log4jProperty.isFile()) {
                    throw new IllegalStateException(
                            Utils.format("Log4j config file doesn't exist: '{}'", log4jProperty.getAbsolutePath()));
                }
                LOG.info("Copying log4j properties for mesos cluster mode");
                FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperty);
            }
            PipelineInfo pipelineInfo = Utils.checkNotNull(pipelineConfiguration.getInfo(), "Pipeline Info");
            String pipelineName = pipelineInfo.getName();
            File rootDataDir = new File(etcDir, "data");
            File pipelineBaseDir = new File(rootDataDir, PipelineDirectoryUtil.PIPELINE_INFO_BASE_DIR);
            File pipelineDir = new File(pipelineBaseDir, PipelineUtils.escapedPipelineName(pipelineName));
            if (!pipelineDir.exists()) {
                if (!pipelineDir.mkdirs()) {
                    throw new RuntimeException("Failed to create pipeline directory " + pipelineDir.getPath());
                }
            }
            File pipelineFile = new File(pipelineDir, FilePipelineStoreTask.PIPELINE_FILE);
            ObjectMapperFactory.getOneLine().writeValue(pipelineFile,
                    BeanHelper.wrapPipelineConfiguration(pipelineConfiguration));
            File infoFile = new File(pipelineDir, FilePipelineStoreTask.INFO_FILE);
            ObjectMapperFactory.getOneLine().writeValue(infoFile, BeanHelper.wrapPipelineInfo(pipelineInfo));
            Utils.checkNotNull(ruleDefinitions, "ruleDefinitions");
            File rulesFile = new File(pipelineDir, FilePipelineStoreTask.RULES_FILE);
            ObjectMapperFactory.getOneLine().writeValue(rulesFile, BeanHelper.wrapRuleDefinitions(ruleDefinitions));
            sdcPropertiesFile = new File(etcDir, "sdc.properties");
            if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
                String hdfsS3ConfDirValue = PipelineBeanCreator.get().getHdfsS3ConfDirectory(pipelineConfiguration);
                if (hdfsS3ConfDirValue != null && !hdfsS3ConfDirValue.isEmpty()) {
                    File hdfsS3ConfDir = new File(resourcesDir, hdfsS3ConfDirValue).getAbsoluteFile();
                    if (!hdfsS3ConfDir.exists()) {
                        String msg = Utils.format("HDFS/S3 Checkpoint Configuration Directory '{}' doesn't exist",
                                hdfsS3ConfDir.getPath());
                        throw new IllegalArgumentException(msg);
                    } else {
                        File coreSite = new File(hdfsS3ConfDir, "core-site.xml");
                        if (!coreSite.exists()) {
                            String msg = Utils.format(
                                    "HDFS/S3 Checkpoint Configuration file core-site.xml '{}' doesn't exist",
                                    coreSite.getPath());
                            throw new IllegalStateException(msg);
                        }
                        sourceConfigs.put("hdfsS3ConfDir", hdfsS3ConfDirValue);
                    }
                } else {
                    throw new IllegalStateException("HDFS/S3 Checkpoint configuration directory is required");
                }
            }
            rewriteProperties(sdcPropertiesFile, sourceConfigs, sourceInfo, clusterToken,
                    Optional.fromNullable(mesosURL));
            TarFileCreator.createTarGz(etcDir, etcTarGz);
        } catch (RuntimeException ex) {
            String msg = errorString("serializing etc directory: {}", ex);
            throw new RuntimeException(msg, ex);
        }
        File log4jProperties = new File(stagingDir, "log4j.properties");
        InputStream clusterLog4jProperties = null;
        try {
            if (executionMode == ExecutionMode.CLUSTER_BATCH) {
                clusterLog4jProperties = Utils.checkNotNull(
                        getClass().getResourceAsStream("/cluster-mr-log4j.properties"), "Cluster Log4J Properties");
            } else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
                clusterLog4jProperties = Utils.checkNotNull(
                        getClass().getResourceAsStream("/cluster-spark-log4j.properties"),
                        "Cluster Log4J Properties");
            }
            if (clusterLog4jProperties != null) {
                FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperties);
            }
        } catch (IOException ex) {
            String msg = errorString("copying log4j configuration: {}", ex);
            throw new RuntimeException(msg, ex);
        } finally {
            if (clusterLog4jProperties != null) {
                IOUtils.closeQuietly(clusterLog4jProperties);
            }
        }
        addKerberosConfiguration(environment);
        errors.clear();
        PipelineConfigBean config = PipelineBeanCreator.get().create(pipelineConfiguration, errors);
        Utils.checkArgument(config != null, Utils.formatL("Invalid pipeline configuration: {}", errors));
        String numExecutors = sourceInfo.get(ClusterModeConstants.NUM_EXECUTORS_KEY);
        List<String> args;
        File hostingDir = null;
        if (executionMode == ExecutionMode.CLUSTER_BATCH) {
            LOG.info("Submitting MapReduce Job");
            environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MAPREDUCE);
            args = generateMRArgs(clusterManager.getAbsolutePath(), String.valueOf(config.clusterSlaveMemory),
                    config.clusterSlaveJavaOpts, libsTarGz.getAbsolutePath(), etcTarGz.getAbsolutePath(),
                    resourcesTarGz.getAbsolutePath(), log4jProperties.getAbsolutePath(),
                    bootstrapJar.getAbsolutePath(), sdcPropertiesFile.getAbsolutePath(),
                    clusterBootstrapJar.getAbsolutePath(), jarsToShip);
        } else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
            LOG.info("Submitting Spark Job on Yarn");
            environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
            args = generateSparkArgs(clusterManager.getAbsolutePath(), String.valueOf(config.clusterSlaveMemory),
                    config.clusterSlaveJavaOpts, numExecutors, libsTarGz.getAbsolutePath(),
                    etcTarGz.getAbsolutePath(), resourcesTarGz.getAbsolutePath(), log4jProperties.getAbsolutePath(),
                    bootstrapJar.getAbsolutePath(), jarsToShip, clusterBootstrapJar.getAbsolutePath());
        } else if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
            LOG.info("Submitting Spark Job on Mesos");
            environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MESOS);
            environment.put(STAGING_DIR, stagingDir.getAbsolutePath());
            environment.put(MESOS_UBER_JAR_PATH, clusterBootstrapJar.getAbsolutePath());
            environment.put(MESOS_UBER_JAR, clusterBootstrapJar.getName());
            environment.put(ETC_TAR_ARCHIVE, "etc.tar.gz");
            environment.put(LIBS_TAR_ARCHIVE, "libs.tar.gz");
            environment.put(RESOURCES_TAR_ARCHIVE, "resources.tar.gz");
            hostingDir = new File(runtimeInfo.getDataDir(),
                    Utils.checkNotNull(mesosHostingJarDir, "mesos jar dir cannot be null"));
            if (!hostingDir.mkdirs()) {
                throw new RuntimeException("Couldn't create hosting dir: " + hostingDir.toString());
            }
            environment.put(MESOS_HOSTING_JAR_DIR, hostingDir.getAbsolutePath());
            args = generateMesosArgs(clusterManager.getAbsolutePath(), config.mesosDispatcherURL,
                    Utils.checkNotNull(mesosURL, "mesos jar url cannot be null"));
        } else {
            throw new IllegalStateException(Utils.format("Incorrect execution mode: {}", executionMode));
        }
        SystemProcess process = systemProcessFactory.create(ClusterProviderImpl.class.getSimpleName(), outputDir,
                args);
        LOG.info("Starting: " + process);
        try {
            process.start(environment);
            long start = System.currentTimeMillis();
            Set<String> applicationIds = new HashSet<>();
            while (true) {
                long elapsedSeconds = TimeUnit.SECONDS.convert(System.currentTimeMillis() - start,
                        TimeUnit.MILLISECONDS);
                LOG.debug("Waiting for application id, elapsed seconds: " + elapsedSeconds);
                if (applicationIds.size() > 1) {
                    logOutput("unknown", process);
                    throw new IllegalStateException(
                            errorString("Found more than one application id: {}", applicationIds));
                } else if (!applicationIds.isEmpty()) {
                    String appId = applicationIds.iterator().next();
                    logOutput(appId, process);
                    ApplicationState applicationState = new ApplicationState();
                    applicationState.setId(appId);
                    applicationState.setSdcToken(clusterToken);
                    if (mesosHostingJarDir != null) {
                        applicationState.setDirId(mesosHostingJarDir);
                    }
                    return applicationState;
                }
                if (!ThreadUtil.sleep(1000)) {
                    if (hostingDir != null) {
                        FileUtils.deleteQuietly(hostingDir);
                    }
                    throw new IllegalStateException("Interrupted while waiting for pipeline to start");
                }
                List<String> lines = new ArrayList<>();
                lines.addAll(process.getOutput());
                lines.addAll(process.getError());
                Matcher m;
                for (String line : lines) {
                    if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
                        m = MESOS_DRIVER_ID_REGEX.matcher(line);
                    } else {
                        m = YARN_APPLICATION_ID_REGEX.matcher(line);
                    }
                    if (m.find()) {
                        LOG.info("Found application id " + m.group(1));
                        applicationIds.add(m.group(1));
                    }
                    m = NO_VALID_CREDENTIALS.matcher(line);
                    if (m.find()) {
                        LOG.info("Kerberos Error found on line: " + line);
                        String msg = "Kerberos Error: " + m.group(1);
                        throw new IOException(msg);
                    }
                }
                if (elapsedSeconds > timeToWaitForFailure) {
                    logOutput("unknown", process);
                    String msg = Utils.format(
                            "Timed out after waiting {} seconds for for cluster application to start. "
                                    + "Submit command {} alive.",
                            elapsedSeconds, (process.isAlive() ? "is" : "is not"));
                    if (hostingDir != null) {
                        FileUtils.deleteQuietly(hostingDir);
                    }
                    throw new IllegalStateException(msg);
                }
            }
        } finally {
            process.cleanup();
        }
    }

    private List<String> generateMesosArgs(String clusterManager, String mesosDispatcherURL, String mesosJar) {
        List<String> args = new ArrayList<>();
        args.add(clusterManager);
        args.add("start");
        args.add("--deploy-mode");
        args.add("cluster");
        // total executor cores option currently doesn't work for spark on mesos
        args.add("--total-executor-cores");
        args.add("1");
        args.add("--master");
        args.add(mesosDispatcherURL);
        args.add("--class");
        args.add("com.streamsets.pipeline.mesos.BootstrapMesosDriver");
        args.add(mesosJar);
        return args;
    }

    private List<String> generateMRArgs(String clusterManager, String slaveMemory, String javaOpts,
            String libsTarGz, String etcTarGz, String resourcesTarGz, String log4jProperties, String bootstrapJar,
            String sdcPropertiesFile, String clusterBootstrapJar, List<String> jarsToShip) {
        List<String> args = new ArrayList<>();
        args.add(clusterManager);
        args.add("start");
        args.add("jar");
        args.add(clusterBootstrapJar);
        args.add("com.streamsets.pipeline.BootstrapClusterBatch");
        args.add("-archives");
        args.add(Joiner.on(",").join(libsTarGz, etcTarGz, resourcesTarGz));
        args.add("-D");
        args.add("mapreduce.job.log4j-properties-file=" + log4jProperties);
        args.add("-libjars");
        StringBuilder libJarString = new StringBuilder(bootstrapJar);
        for (String jarToShip : jarsToShip) {
            libJarString.append(",").append(jarToShip);
        }
        args.add(libJarString.toString());
        args.add(sdcPropertiesFile);
        args.add(Joiner.on(" ").join(String.format("-Xmx%sm", slaveMemory), javaOpts,
                "-javaagent:./" + (new File(bootstrapJar)).getName()));
        return args;
    }

    private List<String> generateSparkArgs(String clusterManager, String slaveMemory, String javaOpts,
            String numExecutors, String libsTarGz, String etcTarGz, String resourcesTarGz, String log4jProperties,
            String bootstrapJar, List<String> jarsToShip, String clusterBootstrapJar) {
        List<String> args = new ArrayList<>();
        args.add(clusterManager);
        args.add("start");
        // we only support yarn-cluster mode
        args.add("--master");
        args.add("yarn-cluster");
        args.add("--executor-memory");
        args.add(slaveMemory + "m");
        // one single sdc per executor
        args.add("--executor-cores");
        args.add("1");

        // Number of Executors based on the origin parallelism
        checkNumExecutors(numExecutors);
        args.add("--num-executors");
        args.add(numExecutors);

        // ship our stage libs and etc directory
        args.add("--archives");
        args.add(Joiner.on(",").join(libsTarGz, etcTarGz, resourcesTarGz));
        // required or else we won't be able to log on cluster
        args.add("--files");
        args.add(log4jProperties);
        args.add("--jars");
        StringBuilder libJarString = new StringBuilder(bootstrapJar);
        for (String jarToShip : jarsToShip) {
            libJarString.append(",").append(jarToShip);
        }
        args.add(libJarString.toString());
        // use our javaagent and java opt configs
        args.add("--conf");
        args.add("spark.executor.extraJavaOptions="
                + Joiner.on(" ").join("-javaagent:./" + (new File(bootstrapJar)).getName(), javaOpts));
        // main class
        args.add("--class");
        args.add("com.streamsets.pipeline.BootstrapClusterStreaming");
        args.add(clusterBootstrapJar);
        return args;
    }

    private void addToSourceConfigs(Map<String, String> sourceConfigs, List<Map<String, Object>> arrayListValues) {
        for (Map<String, Object> map : arrayListValues) {
            String confKey = null;
            String confValue = null;
            for (Map.Entry<String, Object> mapEntry : map.entrySet()) {
                String mapKey = mapEntry.getKey();
                Object mapValue = mapEntry.getValue();
                switch (mapKey) {
                case "key":
                    // Assuming the key is always string
                    confKey = String.valueOf(mapValue);
                    break;
                case "value":
                    confValue = canCastToString(mapValue) ? String.valueOf(mapValue) : null;
                    break;
                default:
                    confKey = mapKey;
                    confValue = canCastToString(mapValue) ? String.valueOf(mapValue) : null;
                    break;
                }
                if (confKey != null && confValue != null) {
                    LOG.debug("Adding to source configs " + confKey + "=" + confValue);
                    sourceConfigs.put(confKey, confValue);
                }
            }
        }
    }

    private boolean canCastToString(Object value) {
        return value instanceof String || value instanceof Number || value.getClass().isPrimitive()
                || value instanceof Boolean;
    }

    private void checkNumExecutors(String numExecutorsString) {
        Utils.checkNotNull(numExecutorsString, "Number of executors not found");
        int numExecutors;
        try {
            numExecutors = Integer.parseInt(numExecutorsString);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Number of executors is not a valid integer");
        }
        Utils.checkArgument(numExecutors > 0, "Number of executors cannot be less than 1");
    }

    private enum ClusterOrigin {
        HDFS, KAFKA;
    }

    private String getMesosHostingDir(String topic, String pipelineName) {
        String sdcId = String.valueOf(runtimeInfo.getId());
        String mesosHostingDir = sdcId + File.separatorChar + topic + File.separatorChar + pipelineName;
        return mesosHostingDir;
    }

    private String getSha256(String mesosHostingDir) throws UnsupportedEncodingException {
        MessageDigest md;
        try {
            md = MessageDigest.getInstance("SHA-256");
        } catch (NoSuchAlgorithmException e) {
            throw new IllegalStateException(e);
        }
        md.update(mesosHostingDir.getBytes("UTF-8"));
        return Base64.encodeBase64URLSafeString(md.digest());
    }
}