Example usage for org.apache.spark.launcher SparkLauncher SparkLauncher

List of usage examples for org.apache.spark.launcher SparkLauncher SparkLauncher

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher SparkLauncher.

Prototype

public SparkLauncher(Map<String, String> env) 

Source Link

Document

Creates a launcher that will set the given environment variables in the child.

Usage

From source file:com.ebay.logstream.runner.spark.SparkPipelineRunner.java

License:Apache License

@Override
public Map<String, Object> run(Pipeline pipeline) {
    Map<String, Object> result = new HashMap<>();
    Map<String, String> env = Maps.newHashMap();
    env.put("SPARK_PRINT_LAUNCH_COMMAND", "1");
    SparkLauncher launcher = new SparkLauncher(env);
    launcher.setAppResource(pipeline.getContext().getPipelineJarPath());
    launcher.setAppName(pipeline.getContext().getPipelineName());
    launcher.setMainClass(SparkPipelineRunner.class.getCanonicalName());
    launcher.setSparkHome(pipeline.getContext().getConfig().getString(SPARK_HOME_KEY));
    launcher.setJavaHome(pipeline.getContext().getConfig().getString(JAVA_HOME));
    //set app args
    launcher.addAppArgs(pipeline.getContext().getPipeline());
    launcher.addAppArgs(pipeline.getContext().getPipelineName());
    launcher.addAppArgs(pipeline.getContext().getDeployMode().toString());
    launcher.addAppArgs(pipeline.getContext().getInputParallelism() + "");
    launcher.addAppArgs(pipeline.getContext().getFilterParallelism() + "");
    launcher.addAppArgs(pipeline.getContext().getOutputParallelism() + "");
    //work around(for get driver pid)
    String uuid = UUID.randomUUID().toString();
    launcher.addAppArgs(uuid);/*from   w  w w  . j  av  a2 s .  c  o m*/
    launcher.addAppArgs();
    launcher.setVerbose(true);
    launcher.addSparkArg("--verbose");
    if (pipeline.getContext().getDeployMode() == LogStormConstants.DeployMode.LOCAL) {
        launcher.setMaster("local[*]");
    } else {
        launcher.setMaster(pipeline.getContext().getConfig().getString(SPARK_MASTER_KEY));
    }

    try {
        SparkAppHandle handle = launcher.startApplication();
        while (handle.getAppId() == null) {
            Thread.sleep(1000);
        }
        result.put("applicationId", handle.getAppId());
        LOG.info("generate spark applicationId " + handle.getAppId());
        //get driver pid
        String cmd = "ps -ef | grep " + uuid + " | grep -v grep | awk '{print $2}'";
        LOG.info("cmd {}", cmd);
        Process process = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", cmd });
        synchronized (process) {
            try {
                process.wait();
            } catch (Exception e) {
                LOG.warn("failed to wait driver pid: ", e);
            }
        }
        InputStream inputStream = process.getInputStream();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        String pid;
        while ((pid = bufferedReader.readLine()) != null) {
            result.put("driverPid", pid);
            System.out.println(pid);
        }
        bufferedReader.close();
    } catch (Exception e) {
        LOG.error("failed to start as a spark application, ", e);
    }

    return result;
}

From source file:com.streamsets.datacollector.pipeline.executor.spark.yarn.YarnAppLauncher.java

License:Apache License

@VisibleForTesting
protected SparkLauncher getLauncher() {
    return new SparkLauncher(yarnConfigs.env);
}

From source file:com.thinkbiganalytics.nifi.v2.spark.ExecuteSparkJob.java

License:Apache License

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();/*from  w  ww. ja v  a 2  s . co m*/
    if (flowFile == null) {
        return;
    }
    String PROVENANCE_JOB_STATUS_KEY = "Job Status";
    String PROVENANCE_SPARK_EXIT_CODE_KEY = "Spark Exit Code";

    try {

        PROVENANCE_JOB_STATUS_KEY = context.getName() + " Job Status";
        PROVENANCE_SPARK_EXIT_CODE_KEY = context.getName() + " Spark Exit Code";

        /* Configuration parameters for spark launcher */
        String appJar = getApplicationJar(context, flowFile);
        String mainClass = getMainClass(context, flowFile);
        String[] appArgs = getMainArgs(context, flowFile);
        String extraJars = getExtraJars(context, flowFile);
        String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue();
        String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue()
                .trim();
        String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE)
                .evaluateAttributeExpressions(flowFile).getValue();
        String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile)
                .getValue();
        String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile)
                .getValue();
        String numberOfExecutors = context.getProperty(NUMBER_EXECUTORS).evaluateAttributeExpressions(flowFile)
                .getValue();
        String sparkApplicationName = context.getProperty(SPARK_APPLICATION_NAME)
                .evaluateAttributeExpressions(flowFile).getValue();
        String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile)
                .getValue();
        String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile)
                .getValue();
        String principal = context.getProperty(kerberosPrincipal).getValue();
        String keyTab = context.getProperty(kerberosKeyTab).getValue();
        String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue();
        String sparkConfs = context.getProperty(SPARK_CONFS).evaluateAttributeExpressions(flowFile).getValue();
        String extraFiles = context.getProperty(EXTRA_SPARK_FILES).evaluateAttributeExpressions(flowFile)
                .getValue();
        Integer sparkProcessTimeout = context.getProperty(PROCESS_TIMEOUT)
                .evaluateAttributeExpressions(flowFile).asTimePeriod(TimeUnit.SECONDS).intValue();
        String datasourceIds = context.getProperty(DATASOURCES).evaluateAttributeExpressions(flowFile)
                .getValue();
        String catalogDataSourceIds = context.getProperty(CATALOG_DATASOURCES)
                .evaluateAttributeExpressions(flowFile).getValue();
        String dataSetIds = context.getProperty(DATASETS).evaluateAttributeExpressions(flowFile).getValue();
        MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE)
                .asControllerService(MetadataProviderService.class);

        final List<String> extraJarPaths = getExtraJarPaths(extraJars);

        // If all 3 fields are filled out then assume kerberos is enabled, and user should be authenticated
        boolean isAuthenticated = !StringUtils.isEmpty(principal) && !StringUtils.isEmpty(keyTab)
                && !StringUtils.isEmpty(hadoopConfigurationResources);
        try {
            if (isAuthenticated && isSecurityEnabled(hadoopConfigurationResources)) {
                logger.info("Security is enabled");

                if (principal.equals("") && keyTab.equals("")) {
                    logger.error(
                            "Kerberos Principal and Kerberos KeyTab information missing in Kerboeros enabled cluster. {} ",
                            new Object[] { flowFile });
                    session.transfer(flowFile, REL_FAILURE);
                    return;
                }

                logger.info("User authentication initiated");

                boolean authenticationStatus = new ApplySecurityPolicy().validateUserWithKerberos(logger,
                        hadoopConfigurationResources, principal, keyTab);
                if (authenticationStatus) {
                    logger.info("User authenticated successfully.");
                } else {
                    logger.error("User authentication failed.  {} ", new Object[] { flowFile });
                    session.transfer(flowFile, REL_FAILURE);
                    return;
                }
            }
        } catch (IOException e1) {
            logger.error("Unknown exception occurred while authenticating user : {} and flow file: {}",
                    new Object[] { e1.getMessage(), flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;

        } catch (Exception unknownException) {
            logger.error("Unknown exception occurred while validating user : {}.  {} ",
                    new Object[] { unknownException.getMessage(), flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        }

        String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue();

        // Build environment
        final Map<String, String> env = getDatasources(session, flowFile, PROVENANCE_JOB_STATUS_KEY,
                datasourceIds, dataSetIds, catalogDataSourceIds, metadataService, extraJarPaths);
        if (env != null) {
            StringBuilder datasourceSummary = new StringBuilder();

            if (env.containsKey("DATASETS")) {
                final int count = StringUtils.countMatches("DATASETS", ',') + 1;
                datasourceSummary.append(count).append(" datasets");
            }
            if (env.containsKey("DATASOURCES")) {
                final int count = StringUtils.countMatches("DATASOURCES", ',') + 1;
                (datasourceSummary.length() > 0 ? datasourceSummary.append("; ") : datasourceSummary)
                        .append(count).append(" legacy datasources");
            }
            if (env.containsKey("CATALOG_DATASOURCES")) {
                final int count = StringUtils.countMatches("CATALOG_DATASOURCES", ',') + 1;
                (datasourceSummary.length() > 0 ? datasourceSummary.append("; ") : datasourceSummary)
                        .append(count).append(" catalog datasources");
            }

            String summaryString = datasourceSummary.toString();
            if (StringUtils.isNotBlank(summaryString)) {
                flowFile = session.putAttribute(flowFile, "Data source usage", summaryString);
            }
        } else {
            return;
        }

        addEncryptionSettings(env);

        /* Launch the spark job as a child process */
        SparkLauncher launcher = new SparkLauncher(env).setAppResource(appJar).setMainClass(mainClass)
                .setMaster(sparkMaster).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory)
                .setConf(SPARK_NUM_EXECUTORS, numberOfExecutors)
                .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory)
                .setConf(SparkLauncher.EXECUTOR_CORES, executorCores)
                .setConf(SPARK_NETWORK_TIMEOUT_CONFIG_NAME, networkTimeout).setSparkHome(sparkHome)
                .setAppName(sparkApplicationName);

        OptionalSparkConfigurator optionalSparkConf = new OptionalSparkConfigurator(launcher)
                .setDeployMode(sparkMaster, sparkYarnDeployMode)
                .setAuthentication(isAuthenticated, keyTab, principal).addAppArgs(appArgs)
                .addSparkArg(sparkConfs).addExtraJars(extraJarPaths).setYarnQueue(yarnQueue)
                .setExtraFiles(extraFiles);

        Process spark = optionalSparkConf.getLaucnher().launch();

        /* Read/clear the process input stream */
        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, spark.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();

        /* Read/clear the process error stream */
        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, spark.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();

        logger.info("Waiting for Spark job to complete");

        /* Wait for job completion */
        boolean completed = spark.waitFor(sparkProcessTimeout, TimeUnit.SECONDS);
        if (!completed) {
            spark.destroyForcibly();
            getLog().error("Spark process timed out after {} seconds using flow file: {}  ",
                    new Object[] { sparkProcessTimeout, flowFile });
            session.transfer(flowFile, REL_FAILURE);
            return;
        }

        int exitCode = spark.exitValue();

        flowFile = session.putAttribute(flowFile, PROVENANCE_SPARK_EXIT_CODE_KEY, Integer.toString(exitCode));
        if (exitCode != 0) {
            logger.error("ExecuteSparkJob for {} and flowfile: {} completed with failed status {} ",
                    new Object[] { context.getName(), flowFile, exitCode });
            flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed");
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("ExecuteSparkJob for {} and flowfile: {} completed with success status {} ",
                    new Object[] { context.getName(), flowFile, exitCode });
            flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Success");
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute Spark job {},{}", new Object[] { flowFile, e.getMessage() }, e);
        flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed With Exception");
        flowFile = session.putAttribute(flowFile, "Spark Exception:", e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
    }
}

From source file:com.thinkbiganalytics.spark.shell.SparkShellProcessBuilder.java

License:Apache License

/**
 * Constructs a {@code SparkShellProcessBuilder}.
 *///from  www .  j a  v a  2  s  .com
public SparkShellProcessBuilder() {
    // Generate client id and secret
    clientId = UUID.randomUUID().toString();
    clientSecret = UUID.randomUUID().toString();

    // Create Spark Launcher
    final Map<String, String> env = ImmutableMap.<String, String>builder().put(CLIENT_ID, clientId)
            .put(CLIENT_SECRET, clientSecret).build();
    launcher = new SparkLauncher(env).setConf("spark.driver.userClassPathFirst", "true")
            .setConf("spark.yarn.appMasterEnv." + CLIENT_ID, clientId)
            .setConf("spark.yarn.appMasterEnv." + CLIENT_SECRET, clientSecret)
            .setMainClass("com.thinkbiganalytics.spark.SparkShellApp");
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

public SparkLauncher createSparkLauncher(File hadoopConfDir, String configurationHdfsPath, String jobHdfsPath)
        throws Exception {
    // mimic env. variables
    final Map<String, String> env = new HashMap<>();
    env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath());

    final SparkLauncher sparkLauncher = new SparkLauncher(env);

    sparkLauncher.setSparkHome(_sparkHome);
    sparkLauncher.setMaster("yarn-cluster");
    sparkLauncher.setAppName("DataCleaner");

    final MutableRef<String> primaryJar = new MutableRef<>();
    final List<String> jars = buildJarFiles(primaryJar);
    logger.info("Using JAR files: {}", jars);

    for (final String jar : jars) {
        sparkLauncher.addJar(jar);//w  ww  . j  a va  2 s . c om
    }
    sparkLauncher.setMainClass(Main.class.getName());

    // the primary jar is always the first argument
    sparkLauncher.addAppArgs(primaryJar.get());

    sparkLauncher.addAppArgs(toHdfsPath(configurationHdfsPath));
    sparkLauncher.addAppArgs(toHdfsPath(jobHdfsPath));

    return sparkLauncher;
}