List of usage examples for org.apache.spark.launcher SparkLauncher setAppName
@Override
public SparkLauncher setAppName(String appName)
From source file:com.ebay.logstream.runner.spark.SparkPipelineRunner.java
License:Apache License
@Override public Map<String, Object> run(Pipeline pipeline) { Map<String, Object> result = new HashMap<>(); Map<String, String> env = Maps.newHashMap(); env.put("SPARK_PRINT_LAUNCH_COMMAND", "1"); SparkLauncher launcher = new SparkLauncher(env); launcher.setAppResource(pipeline.getContext().getPipelineJarPath()); launcher.setAppName(pipeline.getContext().getPipelineName()); launcher.setMainClass(SparkPipelineRunner.class.getCanonicalName()); launcher.setSparkHome(pipeline.getContext().getConfig().getString(SPARK_HOME_KEY)); launcher.setJavaHome(pipeline.getContext().getConfig().getString(JAVA_HOME)); //set app args launcher.addAppArgs(pipeline.getContext().getPipeline()); launcher.addAppArgs(pipeline.getContext().getPipelineName()); launcher.addAppArgs(pipeline.getContext().getDeployMode().toString()); launcher.addAppArgs(pipeline.getContext().getInputParallelism() + ""); launcher.addAppArgs(pipeline.getContext().getFilterParallelism() + ""); launcher.addAppArgs(pipeline.getContext().getOutputParallelism() + ""); //work around(for get driver pid) String uuid = UUID.randomUUID().toString(); launcher.addAppArgs(uuid);//from www . j a v a 2s . c o m launcher.addAppArgs(); launcher.setVerbose(true); launcher.addSparkArg("--verbose"); if (pipeline.getContext().getDeployMode() == LogStormConstants.DeployMode.LOCAL) { launcher.setMaster("local[*]"); } else { launcher.setMaster(pipeline.getContext().getConfig().getString(SPARK_MASTER_KEY)); } try { SparkAppHandle handle = launcher.startApplication(); while (handle.getAppId() == null) { Thread.sleep(1000); } result.put("applicationId", handle.getAppId()); LOG.info("generate spark applicationId " + handle.getAppId()); //get driver pid String cmd = "ps -ef | grep " + uuid + " | grep -v grep | awk '{print $2}'"; LOG.info("cmd {}", cmd); Process process = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", cmd }); synchronized (process) { try { process.wait(); } catch (Exception e) { LOG.warn("failed to wait driver pid: ", e); } } InputStream inputStream = process.getInputStream(); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)); String pid; while ((pid = bufferedReader.readLine()) != null) { result.put("driverPid", pid); System.out.println(pid); } bufferedReader.close(); } catch (Exception e) { LOG.error("failed to start as a spark application, ", e); } return result; }
From source file:com.thinkbiganalytics.nifi.pyspark.core.ExecutePySpark.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final ComponentLog logger = getLog(); FlowFile flowFile = session.get();/*from w ww . jav a 2 s .c o m*/ if (flowFile == null) { flowFile = session.create(); logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) }); } else { logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) }); } try { final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue(); final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue(); final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES) .getValue(); final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES) .evaluateAttributeExpressions(flowFile).getValue(); final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile) .getValue().trim().toLowerCase(); final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE) .evaluateAttributeExpressions(flowFile).getValue(); final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile) .getValue(); final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile) .getValue(); final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); final String executorMemory = context.getProperty(EXECUTOR_MEMORY) .evaluateAttributeExpressions(flowFile).getValue(); final String executorInstances = context.getProperty(EXECUTOR_INSTANCES) .evaluateAttributeExpressions(flowFile).getValue(); final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile) .getValue(); final String networkTimeout = context.getProperty(NETWORK_TIMEOUT) .evaluateAttributeExpressions(flowFile).getValue(); final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS) .evaluateAttributeExpressions(flowFile).getValue(); PySparkUtils pySparkUtils = new PySparkUtils(); /* Get app arguments */ String[] pySparkAppArgsArray = null; if (!StringUtils.isEmpty(pySparkAppArgs)) { pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs); logger.info("Provided application arguments: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) }); } /* Get additional python files */ String[] pySparkAdditionalFilesArray = null; if (!StringUtils.isEmpty(pySparkAdditionalFiles)) { pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles); logger.info("Provided python files: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) }); } /* Get additional config key-value pairs */ String[] additionalSparkConfigOptionsArray = null; if (!StringUtils.isEmpty(additionalSparkConfigOptions)) { additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions); logger.info("Provided spark config options: {}", new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) }); } /* Determine if Kerberos is enabled */ boolean kerberosEnabled = false; if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab) && !StringUtils.isEmpty(hadoopConfigurationResources)) { kerberosEnabled = true; logger.info("Kerberos is enabled"); } /* For Kerberized cluster, attempt user authentication */ if (kerberosEnabled) { logger.info("Attempting user authentication for Kerberos"); ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy(); Configuration configuration; try { logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources); configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources); if (SecurityUtil.isSecurityEnabled(configuration)) { logger.info("Security is enabled"); if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) { logger.error( "Kerberos Principal and Keytab provided with empty values for a Kerberized cluster."); session.transfer(flowFile, REL_FAILURE); return; } try { logger.info("User authentication initiated"); boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger, hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab); if (authenticationStatus) { logger.info("User authenticated successfully."); } else { logger.error("User authentication failed."); session.transfer(flowFile, REL_FAILURE); return; } } catch (Exception unknownException) { logger.error("Unknown exception occurred while validating user :" + unknownException.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } } } catch (IOException e1) { logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } } /* Build and launch PySpark Job */ logger.info("Configuring PySpark job for execution"); SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile); logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile }); if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) { pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray); logger.info("App arguments set to: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) }); } pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster); logger.info("App name set to: {}", new Object[] { pySparkAppName }); logger.info("Spark master set to: {}", new Object[] { sparkMaster }); if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) { for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) { pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile); logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile }); } } if (sparkMaster.equals("yarn")) { pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode); logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode }); } pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome) .setConf(SparkLauncher.DRIVER_MEMORY, driverMemory) .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory) .setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances) .setConf(SparkLauncher.EXECUTOR_CORES, executorCores) .setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout); logger.info("Spark home set to: {} ", new Object[] { sparkHome }); logger.info("Driver memory set to: {} ", new Object[] { driverMemory }); logger.info("Executor memory set to: {} ", new Object[] { executorMemory }); logger.info("Executor instances set to: {} ", new Object[] { executorInstances }); logger.info("Executor cores set to: {} ", new Object[] { executorCores }); logger.info("Network timeout set to: {} ", new Object[] { networkTimeout }); if (kerberosEnabled) { pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal); pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab); logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal }); logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab }); } if (!StringUtils.isEmpty(yarnQueue)) { pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue); logger.info("YARN queue set to: {} ", new Object[] { yarnQueue }); } if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) { for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) { String[] confKeyValue = additionalSparkConfigOption.split("="); if (confKeyValue.length == 2) { pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]); logger.info("Spark additional config option set to: {}={}", new Object[] { confKeyValue[0], confKeyValue[1] }); } } } logger.info("Starting execution of PySpark job"); Process pySparkProcess = pySparkLauncher.launch(); InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getInputStream()); Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input"); inputThread.start(); InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getErrorStream()); Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error"); errorThread.start(); logger.info("Waiting for PySpark job to complete"); int exitCode = pySparkProcess.waitFor(); if (exitCode != 0) { logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]", new Object[] { exitCode }); session.transfer(flowFile, REL_FAILURE); } else { logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]", new Object[] { exitCode }); session.transfer(flowFile, REL_SUCCESS); } } catch (final Exception e) { logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e }); session.transfer(flowFile, REL_FAILURE); } }
From source file:io.zz.Launcher.java
public static void main(String[] args) throws IOException, InterruptedException { SparkLauncher sparkLauncher = new SparkLauncher(); Process spark = sparkLauncher.setAppName("APP NAME").setSparkHome("/tmp") .setAppResource(SparkContext.jarOfClass(Launcher.class).get()) .setMaster("spark://192.168.100.105:7077").setMainClass("io.zz.TestSaveToCassandra").launch(); spark.waitFor();//from w w w. ja va 2 s. co m }
From source file:org.apache.eagle.app.environment.impl.SparkExecutionRuntime.java
License:Apache License
private SparkLauncher prepareSparkConfig(Config config) { String master = config.hasPath(TOPOLOGY_MASTER) ? config.getString(TOPOLOGY_MASTER) : "local[*]"; String sparkExecutorCores = config.getString(SPARK_EXECUTOR_CORES); String sparkExecutorMemory = config.getString(SPARK_EXECUTOR_MEMORY); String driverMemory = config.getString(DRIVER_MEMORY); String driverCore = config.getString(DRIVER_CORES); String deployMode = config.getString(DEPLOY_MODE); String enable = config.getString(TOPOLOGY_DYNAMICALLOCATION); boolean verbose = config.getBoolean(TOPOLOGY_VERBOSE); String mainClass = config.getString(TOPOLOGY_MAINCLASS); String sparkHome = config.getString(TOPOLOGY_SPARKHOME); String uiport = config.getString(TOPOLOGY_SPARKUIPORT); String appResource = config.getString(TOPOLOGY_APPRESOURCE); String yarnqueue = config.getString(TOPOLOGY_YARNQUEUE); SparkLauncher sparkLauncher = new SparkLauncher(); sparkLauncher.setMaster(master);/* ww w . ja v a 2s .com*/ sparkLauncher.setMainClass(mainClass); sparkLauncher.setSparkHome(sparkHome); //sparkLauncher.setJavaHome(TOPOLOGY_JAVAHOME); sparkLauncher.setDeployMode(deployMode); sparkLauncher.setVerbose(verbose); sparkLauncher.setAppResource(appResource); sparkLauncher.setAppName(config.getString(TOPOLOGY_NAME)); sparkLauncher.setConf("spark.yarn.queue", yarnqueue); sparkLauncher.setConf("spark.executor.cores", sparkExecutorCores); sparkLauncher.setConf("spark.executor.memory", sparkExecutorMemory); sparkLauncher.setConf("spark.driver.memory", driverMemory); sparkLauncher.setConf("spark.driver.cores", driverCore); sparkLauncher.setConf("spark.streaming.dynamicAllocation.enable", enable); sparkLauncher.setConf("spark.ui.port", uiport); String path = config.getString(TOPOLOGY_SPARKCONFFILEPATH); if (StringUtil.isNotBlank(path)) { sparkLauncher.setPropertiesFile(path); } String batchDuration = config.getString(BATCH_DURATION); String routerTasknum = config.getString(ROUTER_TASK_NUM); String alertTasknum = config.getString(ALERT_TASK_NUM); String publishTasknum = config.getString(PUBLISH_TASK_NUM); String slideDurationsecond = config.getString(SLIDE_DURATION_SECOND); String windowDurationssecond = config.getString(WINDOW_DURATIONS_SECOND); String checkpointPath = config.getString(CHECKPOINT_PATH); String topologyGroupid = config.getString(TOPOLOGY_GROUPID); String autoOffsetReset = config.getString(AUTO_OFFSET_RESET); String restApihost = config.getString(EAGLE_CORRELATION_SERVICE_HOST); String restApiport = config.getString(EAGLE_CORRELATION_SERVICE_PORT); String restApicontext = config.getString(EAGLE_CORRELATION_CONTEXT); String useMultiKafka = config.getString(TOPOLOGY_MULTIKAFKA); String kafkaBrokerZkQuorum = config.getString(SPOUT_KAFKABROKERZKQUORUM); String zkConfigzkQuorum = config.getString(ZKCONFIG_ZKQUORUM); sparkLauncher.addAppArgs(batchDuration, routerTasknum, alertTasknum, publishTasknum, slideDurationsecond, windowDurationssecond, checkpointPath, topologyGroupid, autoOffsetReset, restApicontext, restApiport, restApihost, useMultiKafka, kafkaBrokerZkQuorum, zkConfigzkQuorum); return sparkLauncher; }
From source file:org.datacleaner.spark.ApplicationDriver.java
License:Open Source License
public SparkLauncher createSparkLauncher(File hadoopConfDir, String configurationHdfsPath, String jobHdfsPath) throws Exception { // mimic env. variables final Map<String, String> env = new HashMap<>(); env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath()); final SparkLauncher sparkLauncher = new SparkLauncher(env); sparkLauncher.setSparkHome(_sparkHome); sparkLauncher.setMaster("yarn-cluster"); sparkLauncher.setAppName("DataCleaner"); final MutableRef<String> primaryJar = new MutableRef<>(); final List<String> jars = buildJarFiles(primaryJar); logger.info("Using JAR files: {}", jars); for (final String jar : jars) { sparkLauncher.addJar(jar);/*from www. j a v a 2 s. co m*/ } sparkLauncher.setMainClass(Main.class.getName()); // the primary jar is always the first argument sparkLauncher.addAppArgs(primaryJar.get()); sparkLauncher.addAppArgs(toHdfsPath(configurationHdfsPath)); sparkLauncher.addAppArgs(toHdfsPath(jobHdfsPath)); return sparkLauncher; }