Example usage for org.apache.spark.launcher SparkLauncher setAppName

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher setAppName.

Prototype

@Override
    public SparkLauncher setAppName(String appName)

Source Link

Usage

From source file:com.ebay.logstream.runner.spark.SparkPipelineRunner.java

License:Apache License

@Override
public Map<String, Object> run(Pipeline pipeline) {
    Map<String, Object> result = new HashMap<>();
    Map<String, String> env = Maps.newHashMap();
    env.put("SPARK_PRINT_LAUNCH_COMMAND", "1");
    SparkLauncher launcher = new SparkLauncher(env);
    launcher.setAppResource(pipeline.getContext().getPipelineJarPath());
    launcher.setAppName(pipeline.getContext().getPipelineName());
    launcher.setMainClass(SparkPipelineRunner.class.getCanonicalName());
    launcher.setSparkHome(pipeline.getContext().getConfig().getString(SPARK_HOME_KEY));
    launcher.setJavaHome(pipeline.getContext().getConfig().getString(JAVA_HOME));
    //set app args
    launcher.addAppArgs(pipeline.getContext().getPipeline());
    launcher.addAppArgs(pipeline.getContext().getPipelineName());
    launcher.addAppArgs(pipeline.getContext().getDeployMode().toString());
    launcher.addAppArgs(pipeline.getContext().getInputParallelism() + "");
    launcher.addAppArgs(pipeline.getContext().getFilterParallelism() + "");
    launcher.addAppArgs(pipeline.getContext().getOutputParallelism() + "");
    //work around(for get driver pid)
    String uuid = UUID.randomUUID().toString();
    launcher.addAppArgs(uuid);//from  www  .  j  a  v a  2s  .  c o m
    launcher.addAppArgs();
    launcher.setVerbose(true);
    launcher.addSparkArg("--verbose");
    if (pipeline.getContext().getDeployMode() == LogStormConstants.DeployMode.LOCAL) {
        launcher.setMaster("local[*]");
    } else {
        launcher.setMaster(pipeline.getContext().getConfig().getString(SPARK_MASTER_KEY));
    }

    try {
        SparkAppHandle handle = launcher.startApplication();
        while (handle.getAppId() == null) {
            Thread.sleep(1000);
        }
        result.put("applicationId", handle.getAppId());
        LOG.info("generate spark applicationId " + handle.getAppId());
        //get driver pid
        String cmd = "ps -ef | grep " + uuid + " | grep -v grep | awk '{print $2}'";
        LOG.info("cmd {}", cmd);
        Process process = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", cmd });
        synchronized (process) {
            try {
                process.wait();
            } catch (Exception e) {
                LOG.warn("failed to wait driver pid: ", e);
            }
        }
        InputStream inputStream = process.getInputStream();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        String pid;
        while ((pid = bufferedReader.readLine()) != null) {
            result.put("driverPid", pid);
            System.out.println(pid);
        }
        bufferedReader.close();
    } catch (Exception e) {
        LOG.error("failed to start as a spark application, ", e);
    }

    return result;
}

From source file:com.thinkbiganalytics.nifi.pyspark.core.ExecutePySpark.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();/*from w ww .  jav a 2 s  .c o m*/

    if (flowFile == null) {
        flowFile = session.create();
        logger.info("Created a flow file having uuid: {}",
                new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("Using an existing flow file having uuid: {}",
                new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    try {
        final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
        final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
        final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES)
                .getValue();
        final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile)
                .getValue().trim().toLowerCase();
        final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String executorMemory = context.getProperty(EXECUTOR_MEMORY)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String executorInstances = context.getProperty(EXECUTOR_INSTANCES)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String networkTimeout = context.getProperty(NETWORK_TIMEOUT)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS)
                .evaluateAttributeExpressions(flowFile).getValue();

        PySparkUtils pySparkUtils = new PySparkUtils();

        /* Get app arguments */
        String[] pySparkAppArgsArray = null;
        if (!StringUtils.isEmpty(pySparkAppArgs)) {
            pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs);
            logger.info("Provided application arguments: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }

        /* Get additional python files */
        String[] pySparkAdditionalFilesArray = null;
        if (!StringUtils.isEmpty(pySparkAdditionalFiles)) {
            pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles);
            logger.info("Provided python files: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) });
        }

        /* Get additional config key-value pairs */
        String[] additionalSparkConfigOptionsArray = null;
        if (!StringUtils.isEmpty(additionalSparkConfigOptions)) {
            additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions);
            logger.info("Provided spark config options: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) });
        }

        /* Determine if Kerberos is enabled */
        boolean kerberosEnabled = false;
        if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab)
                && !StringUtils.isEmpty(hadoopConfigurationResources)) {
            kerberosEnabled = true;
            logger.info("Kerberos is enabled");
        }

        /* For Kerberized cluster, attempt user authentication */
        if (kerberosEnabled) {
            logger.info("Attempting user authentication for Kerberos");
            ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy();
            Configuration configuration;
            try {
                logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources);
                configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources);

                if (SecurityUtil.isSecurityEnabled(configuration)) {
                    logger.info("Security is enabled");

                    if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) {
                        logger.error(
                                "Kerberos Principal and Keytab provided with empty values for a Kerberized cluster.");
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }

                    try {
                        logger.info("User authentication initiated");

                        boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger,
                                hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab);
                        if (authenticationStatus) {
                            logger.info("User authenticated successfully.");
                        } else {
                            logger.error("User authentication failed.");
                            session.transfer(flowFile, REL_FAILURE);
                            return;
                        }

                    } catch (Exception unknownException) {
                        logger.error("Unknown exception occurred while validating user :"
                                + unknownException.getMessage());
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                }
            } catch (IOException e1) {
                logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage());
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }

        /* Build and launch PySpark Job */
        logger.info("Configuring PySpark job for execution");
        SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile);
        logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile });

        if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) {
            pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray);
            logger.info("App arguments set to: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }

        pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster);

        logger.info("App name set to: {}", new Object[] { pySparkAppName });
        logger.info("Spark master set to: {}", new Object[] { sparkMaster });

        if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) {
            for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) {
                pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile);
                logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile });
            }
        }

        if (sparkMaster.equals("yarn")) {
            pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode);
            logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode });
        }

        pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome)
                .setConf(SparkLauncher.DRIVER_MEMORY, driverMemory)
                .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory)
                .setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances)
                .setConf(SparkLauncher.EXECUTOR_CORES, executorCores)
                .setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout);

        logger.info("Spark home set to: {} ", new Object[] { sparkHome });
        logger.info("Driver memory set to: {} ", new Object[] { driverMemory });
        logger.info("Executor memory set to: {} ", new Object[] { executorMemory });
        logger.info("Executor instances set to: {} ", new Object[] { executorInstances });
        logger.info("Executor cores set to: {} ", new Object[] { executorCores });
        logger.info("Network timeout set to: {} ", new Object[] { networkTimeout });

        if (kerberosEnabled) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal);
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab);
            logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal });
            logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab });
        }

        if (!StringUtils.isEmpty(yarnQueue)) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue);
            logger.info("YARN queue set to: {} ", new Object[] { yarnQueue });
        }

        if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) {
            for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) {
                String[] confKeyValue = additionalSparkConfigOption.split("=");
                if (confKeyValue.length == 2) {
                    pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]);
                    logger.info("Spark additional config option set to: {}={}",
                            new Object[] { confKeyValue[0], confKeyValue[1] });
                }
            }
        }

        logger.info("Starting execution of PySpark job");
        Process pySparkProcess = pySparkLauncher.launch();

        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, pySparkProcess.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();

        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, pySparkProcess.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();

        logger.info("Waiting for PySpark job to complete");

        int exitCode = pySparkProcess.waitFor();
        if (exitCode != 0) {
            logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]",
                    new Object[] { exitCode });
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]",
                    new Object[] { exitCode });
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}

From source file:io.zz.Launcher.java

public static void main(String[] args) throws IOException, InterruptedException {
    SparkLauncher sparkLauncher = new SparkLauncher();
    Process spark = sparkLauncher.setAppName("APP NAME").setSparkHome("/tmp")
            .setAppResource(SparkContext.jarOfClass(Launcher.class).get())
            .setMaster("spark://192.168.100.105:7077").setMainClass("io.zz.TestSaveToCassandra").launch();

    spark.waitFor();//from  w w  w.  ja va 2 s. co m
}

From source file:org.apache.eagle.app.environment.impl.SparkExecutionRuntime.java

License:Apache License

private SparkLauncher prepareSparkConfig(Config config) {
    String master = config.hasPath(TOPOLOGY_MASTER) ? config.getString(TOPOLOGY_MASTER) : "local[*]";
    String sparkExecutorCores = config.getString(SPARK_EXECUTOR_CORES);
    String sparkExecutorMemory = config.getString(SPARK_EXECUTOR_MEMORY);
    String driverMemory = config.getString(DRIVER_MEMORY);
    String driverCore = config.getString(DRIVER_CORES);
    String deployMode = config.getString(DEPLOY_MODE);
    String enable = config.getString(TOPOLOGY_DYNAMICALLOCATION);
    boolean verbose = config.getBoolean(TOPOLOGY_VERBOSE);
    String mainClass = config.getString(TOPOLOGY_MAINCLASS);
    String sparkHome = config.getString(TOPOLOGY_SPARKHOME);
    String uiport = config.getString(TOPOLOGY_SPARKUIPORT);
    String appResource = config.getString(TOPOLOGY_APPRESOURCE);
    String yarnqueue = config.getString(TOPOLOGY_YARNQUEUE);

    SparkLauncher sparkLauncher = new SparkLauncher();
    sparkLauncher.setMaster(master);/* ww  w  .  ja v a 2s  .com*/
    sparkLauncher.setMainClass(mainClass);
    sparkLauncher.setSparkHome(sparkHome);
    //sparkLauncher.setJavaHome(TOPOLOGY_JAVAHOME);
    sparkLauncher.setDeployMode(deployMode);
    sparkLauncher.setVerbose(verbose);
    sparkLauncher.setAppResource(appResource);
    sparkLauncher.setAppName(config.getString(TOPOLOGY_NAME));
    sparkLauncher.setConf("spark.yarn.queue", yarnqueue);
    sparkLauncher.setConf("spark.executor.cores", sparkExecutorCores);
    sparkLauncher.setConf("spark.executor.memory", sparkExecutorMemory);
    sparkLauncher.setConf("spark.driver.memory", driverMemory);
    sparkLauncher.setConf("spark.driver.cores", driverCore);
    sparkLauncher.setConf("spark.streaming.dynamicAllocation.enable", enable);
    sparkLauncher.setConf("spark.ui.port", uiport);
    String path = config.getString(TOPOLOGY_SPARKCONFFILEPATH);
    if (StringUtil.isNotBlank(path)) {
        sparkLauncher.setPropertiesFile(path);
    }

    String batchDuration = config.getString(BATCH_DURATION);
    String routerTasknum = config.getString(ROUTER_TASK_NUM);
    String alertTasknum = config.getString(ALERT_TASK_NUM);
    String publishTasknum = config.getString(PUBLISH_TASK_NUM);
    String slideDurationsecond = config.getString(SLIDE_DURATION_SECOND);
    String windowDurationssecond = config.getString(WINDOW_DURATIONS_SECOND);
    String checkpointPath = config.getString(CHECKPOINT_PATH);
    String topologyGroupid = config.getString(TOPOLOGY_GROUPID);
    String autoOffsetReset = config.getString(AUTO_OFFSET_RESET);
    String restApihost = config.getString(EAGLE_CORRELATION_SERVICE_HOST);
    String restApiport = config.getString(EAGLE_CORRELATION_SERVICE_PORT);
    String restApicontext = config.getString(EAGLE_CORRELATION_CONTEXT);
    String useMultiKafka = config.getString(TOPOLOGY_MULTIKAFKA);
    String kafkaBrokerZkQuorum = config.getString(SPOUT_KAFKABROKERZKQUORUM);
    String zkConfigzkQuorum = config.getString(ZKCONFIG_ZKQUORUM);

    sparkLauncher.addAppArgs(batchDuration, routerTasknum, alertTasknum, publishTasknum, slideDurationsecond,
            windowDurationssecond, checkpointPath, topologyGroupid, autoOffsetReset, restApicontext,
            restApiport, restApihost, useMultiKafka, kafkaBrokerZkQuorum, zkConfigzkQuorum);
    return sparkLauncher;
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

public SparkLauncher createSparkLauncher(File hadoopConfDir, String configurationHdfsPath, String jobHdfsPath)
        throws Exception {
    // mimic env. variables
    final Map<String, String> env = new HashMap<>();
    env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath());

    final SparkLauncher sparkLauncher = new SparkLauncher(env);

    sparkLauncher.setSparkHome(_sparkHome);
    sparkLauncher.setMaster("yarn-cluster");
    sparkLauncher.setAppName("DataCleaner");

    final MutableRef<String> primaryJar = new MutableRef<>();
    final List<String> jars = buildJarFiles(primaryJar);
    logger.info("Using JAR files: {}", jars);

    for (final String jar : jars) {
        sparkLauncher.addJar(jar);/*from www. j a  v a 2 s.  co  m*/
    }
    sparkLauncher.setMainClass(Main.class.getName());

    // the primary jar is always the first argument
    sparkLauncher.addAppArgs(primaryJar.get());

    sparkLauncher.addAppArgs(toHdfsPath(configurationHdfsPath));
    sparkLauncher.addAppArgs(toHdfsPath(jobHdfsPath));

    return sparkLauncher;
}