List of usage examples for org.apache.spark.launcher SparkLauncher SparkLauncher
public SparkLauncher(Map<String, String> env)
From source file:com.ebay.logstream.runner.spark.SparkPipelineRunner.java
License:Apache License
@Override public Map<String, Object> run(Pipeline pipeline) { Map<String, Object> result = new HashMap<>(); Map<String, String> env = Maps.newHashMap(); env.put("SPARK_PRINT_LAUNCH_COMMAND", "1"); SparkLauncher launcher = new SparkLauncher(env); launcher.setAppResource(pipeline.getContext().getPipelineJarPath()); launcher.setAppName(pipeline.getContext().getPipelineName()); launcher.setMainClass(SparkPipelineRunner.class.getCanonicalName()); launcher.setSparkHome(pipeline.getContext().getConfig().getString(SPARK_HOME_KEY)); launcher.setJavaHome(pipeline.getContext().getConfig().getString(JAVA_HOME)); //set app args launcher.addAppArgs(pipeline.getContext().getPipeline()); launcher.addAppArgs(pipeline.getContext().getPipelineName()); launcher.addAppArgs(pipeline.getContext().getDeployMode().toString()); launcher.addAppArgs(pipeline.getContext().getInputParallelism() + ""); launcher.addAppArgs(pipeline.getContext().getFilterParallelism() + ""); launcher.addAppArgs(pipeline.getContext().getOutputParallelism() + ""); //work around(for get driver pid) String uuid = UUID.randomUUID().toString(); launcher.addAppArgs(uuid);/*from w w w . j av a2 s . c o m*/ launcher.addAppArgs(); launcher.setVerbose(true); launcher.addSparkArg("--verbose"); if (pipeline.getContext().getDeployMode() == LogStormConstants.DeployMode.LOCAL) { launcher.setMaster("local[*]"); } else { launcher.setMaster(pipeline.getContext().getConfig().getString(SPARK_MASTER_KEY)); } try { SparkAppHandle handle = launcher.startApplication(); while (handle.getAppId() == null) { Thread.sleep(1000); } result.put("applicationId", handle.getAppId()); LOG.info("generate spark applicationId " + handle.getAppId()); //get driver pid String cmd = "ps -ef | grep " + uuid + " | grep -v grep | awk '{print $2}'"; LOG.info("cmd {}", cmd); Process process = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", cmd }); synchronized (process) { try { process.wait(); } catch (Exception e) { LOG.warn("failed to wait driver pid: ", e); } } InputStream inputStream = process.getInputStream(); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); String pid; while ((pid = bufferedReader.readLine()) != null) { result.put("driverPid", pid); System.out.println(pid); } bufferedReader.close(); } catch (Exception e) { LOG.error("failed to start as a spark application, ", e); } return result; }
From source file:com.streamsets.datacollector.pipeline.executor.spark.yarn.YarnAppLauncher.java
License:Apache License
@VisibleForTesting protected SparkLauncher getLauncher() { return new SparkLauncher(yarnConfigs.env); }
From source file:com.thinkbiganalytics.nifi.v2.spark.ExecuteSparkJob.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final ComponentLog logger = getLog(); FlowFile flowFile = session.get();/*from w ww. ja v a 2 s . co m*/ if (flowFile == null) { return; } String PROVENANCE_JOB_STATUS_KEY = "Job Status"; String PROVENANCE_SPARK_EXIT_CODE_KEY = "Spark Exit Code"; try { PROVENANCE_JOB_STATUS_KEY = context.getName() + " Job Status"; PROVENANCE_SPARK_EXIT_CODE_KEY = context.getName() + " Spark Exit Code"; /* Configuration parameters for spark launcher */ String appJar = getApplicationJar(context, flowFile); String mainClass = getMainClass(context, flowFile); String[] appArgs = getMainArgs(context, flowFile); String extraJars = getExtraJars(context, flowFile); String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue(); String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue() .trim(); String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE) .evaluateAttributeExpressions(flowFile).getValue(); String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); String numberOfExecutors = context.getProperty(NUMBER_EXECUTORS).evaluateAttributeExpressions(flowFile) .getValue(); String sparkApplicationName = context.getProperty(SPARK_APPLICATION_NAME) .evaluateAttributeExpressions(flowFile).getValue(); String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile) .getValue(); String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile) .getValue(); String principal = context.getProperty(kerberosPrincipal).getValue(); String keyTab = context.getProperty(kerberosKeyTab).getValue(); String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue(); String sparkConfs = context.getProperty(SPARK_CONFS).evaluateAttributeExpressions(flowFile).getValue(); String extraFiles = context.getProperty(EXTRA_SPARK_FILES).evaluateAttributeExpressions(flowFile) .getValue(); Integer sparkProcessTimeout = context.getProperty(PROCESS_TIMEOUT) .evaluateAttributeExpressions(flowFile).asTimePeriod(TimeUnit.SECONDS).intValue(); String datasourceIds = context.getProperty(DATASOURCES).evaluateAttributeExpressions(flowFile) .getValue(); String catalogDataSourceIds = context.getProperty(CATALOG_DATASOURCES) .evaluateAttributeExpressions(flowFile).getValue(); String dataSetIds = context.getProperty(DATASETS).evaluateAttributeExpressions(flowFile).getValue(); MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE) .asControllerService(MetadataProviderService.class); final List<String> extraJarPaths = getExtraJarPaths(extraJars); // If all 3 fields are filled out then assume kerberos is enabled, and user should be authenticated boolean isAuthenticated = !StringUtils.isEmpty(principal) && !StringUtils.isEmpty(keyTab) && !StringUtils.isEmpty(hadoopConfigurationResources); try { if (isAuthenticated && isSecurityEnabled(hadoopConfigurationResources)) { logger.info("Security is enabled"); if (principal.equals("") && keyTab.equals("")) { logger.error( "Kerberos Principal and Kerberos KeyTab information missing in Kerboeros enabled cluster. {} ", new Object[] { flowFile }); session.transfer(flowFile, REL_FAILURE); return; } logger.info("User authentication initiated"); boolean authenticationStatus = new ApplySecurityPolicy().validateUserWithKerberos(logger, hadoopConfigurationResources, principal, keyTab); if (authenticationStatus) { logger.info("User authenticated successfully."); } else { logger.error("User authentication failed. {} ", new Object[] { flowFile }); session.transfer(flowFile, REL_FAILURE); return; } } } catch (IOException e1) { logger.error("Unknown exception occurred while authenticating user : {} and flow file: {}", new Object[] { e1.getMessage(), flowFile }); session.transfer(flowFile, REL_FAILURE); return; } catch (Exception unknownException) { logger.error("Unknown exception occurred while validating user : {}. {} ", new Object[] { unknownException.getMessage(), flowFile }); session.transfer(flowFile, REL_FAILURE); return; } String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue(); // Build environment final Map<String, String> env = getDatasources(session, flowFile, PROVENANCE_JOB_STATUS_KEY, datasourceIds, dataSetIds, catalogDataSourceIds, metadataService, extraJarPaths); if (env != null) { StringBuilder datasourceSummary = new StringBuilder(); if (env.containsKey("DATASETS")) { final int count = StringUtils.countMatches("DATASETS", ',') + 1; datasourceSummary.append(count).append(" datasets"); } if (env.containsKey("DATASOURCES")) { final int count = StringUtils.countMatches("DATASOURCES", ',') + 1; (datasourceSummary.length() > 0 ? datasourceSummary.append("; ") : datasourceSummary) .append(count).append(" legacy datasources"); } if (env.containsKey("CATALOG_DATASOURCES")) { final int count = StringUtils.countMatches("CATALOG_DATASOURCES", ',') + 1; (datasourceSummary.length() > 0 ? datasourceSummary.append("; ") : datasourceSummary) .append(count).append(" catalog datasources"); } String summaryString = datasourceSummary.toString(); if (StringUtils.isNotBlank(summaryString)) { flowFile = session.putAttribute(flowFile, "Data source usage", summaryString); } } else { return; } addEncryptionSettings(env); /* Launch the spark job as a child process */ SparkLauncher launcher = new SparkLauncher(env).setAppResource(appJar).setMainClass(mainClass) .setMaster(sparkMaster).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory) .setConf(SPARK_NUM_EXECUTORS, numberOfExecutors) .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory) .setConf(SparkLauncher.EXECUTOR_CORES, executorCores) .setConf(SPARK_NETWORK_TIMEOUT_CONFIG_NAME, networkTimeout).setSparkHome(sparkHome) .setAppName(sparkApplicationName); OptionalSparkConfigurator optionalSparkConf = new OptionalSparkConfigurator(launcher) .setDeployMode(sparkMaster, sparkYarnDeployMode) .setAuthentication(isAuthenticated, keyTab, principal).addAppArgs(appArgs) .addSparkArg(sparkConfs).addExtraJars(extraJarPaths).setYarnQueue(yarnQueue) .setExtraFiles(extraFiles); Process spark = optionalSparkConf.getLaucnher().launch(); /* Read/clear the process input stream */ InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getInputStream()); Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input"); inputThread.start(); /* Read/clear the process error stream */ InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getErrorStream()); Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error"); errorThread.start(); logger.info("Waiting for Spark job to complete"); /* Wait for job completion */ boolean completed = spark.waitFor(sparkProcessTimeout, TimeUnit.SECONDS); if (!completed) { spark.destroyForcibly(); getLog().error("Spark process timed out after {} seconds using flow file: {} ", new Object[] { sparkProcessTimeout, flowFile }); session.transfer(flowFile, REL_FAILURE); return; } int exitCode = spark.exitValue(); flowFile = session.putAttribute(flowFile, PROVENANCE_SPARK_EXIT_CODE_KEY, Integer.toString(exitCode)); if (exitCode != 0) { logger.error("ExecuteSparkJob for {} and flowfile: {} completed with failed status {} ", new Object[] { context.getName(), flowFile, exitCode }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed"); session.transfer(flowFile, REL_FAILURE); } else { logger.info("ExecuteSparkJob for {} and flowfile: {} completed with success status {} ", new Object[] { context.getName(), flowFile, exitCode }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Success"); session.transfer(flowFile, REL_SUCCESS); } } catch (final Exception e) { logger.error("Unable to execute Spark job {},{}", new Object[] { flowFile, e.getMessage() }, e); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed With Exception"); flowFile = session.putAttribute(flowFile, "Spark Exception:", e.getMessage()); session.transfer(flowFile, REL_FAILURE); } }
From source file:com.thinkbiganalytics.spark.shell.SparkShellProcessBuilder.java
License:Apache License
/** * Constructs a {@code SparkShellProcessBuilder}. *///from www . j a v a 2 s .com public SparkShellProcessBuilder() { // Generate client id and secret clientId = UUID.randomUUID().toString(); clientSecret = UUID.randomUUID().toString(); // Create Spark Launcher final Map<String, String> env = ImmutableMap.<String, String>builder().put(CLIENT_ID, clientId) .put(CLIENT_SECRET, clientSecret).build(); launcher = new SparkLauncher(env).setConf("spark.driver.userClassPathFirst", "true") .setConf("spark.yarn.appMasterEnv." + CLIENT_ID, clientId) .setConf("spark.yarn.appMasterEnv." + CLIENT_SECRET, clientSecret) .setMainClass("com.thinkbiganalytics.spark.SparkShellApp"); }
From source file:org.datacleaner.spark.ApplicationDriver.java
License:Open Source License
public SparkLauncher createSparkLauncher(File hadoopConfDir, String configurationHdfsPath, String jobHdfsPath) throws Exception { // mimic env. variables final Map<String, String> env = new HashMap<>(); env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath()); final SparkLauncher sparkLauncher = new SparkLauncher(env); sparkLauncher.setSparkHome(_sparkHome); sparkLauncher.setMaster("yarn-cluster"); sparkLauncher.setAppName("DataCleaner"); final MutableRef<String> primaryJar = new MutableRef<>(); final List<String> jars = buildJarFiles(primaryJar); logger.info("Using JAR files: {}", jars); for (final String jar : jars) { sparkLauncher.addJar(jar);//w ww . j a va 2 s . c om } sparkLauncher.setMainClass(Main.class.getName()); // the primary jar is always the first argument sparkLauncher.addAppArgs(primaryJar.get()); sparkLauncher.addAppArgs(toHdfsPath(configurationHdfsPath)); sparkLauncher.addAppArgs(toHdfsPath(jobHdfsPath)); return sparkLauncher; }