Example usage for org.apache.spark.launcher SparkLauncher addSparkArg

List of usage examples for org.apache.spark.launcher SparkLauncher addSparkArg

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher addSparkArg.

Prototype

@Override
    public SparkLauncher addSparkArg(String name, String value) 

Source Link

Usage

From source file:com.cloudera.livy.client.local.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RpcServer rpcServer, final LocalConf conf, final String clientId,
        final String secret, final String className) throws IOException {
    final String serverAddress = rpcServer.getAddress();
    final String serverPort = String.valueOf(rpcServer.getPort());
    if (conf.get(CLIENT_IN_PROCESS) != null) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override/* w  w  w  . j a  v  a  2  s.  co m*/
            public void run() {
                List<String> args = new ArrayList<>();
                args.add("--remote-host");
                args.add(serverAddress);
                args.add("--remote-port");
                args.add(serverPort);
                args.add("--client-id");
                args.add(clientId);
                args.add("--secret");
                args.add(secret);

                for (Map.Entry<String, String> e : conf) {
                    args.add("--conf");
                    args.add(String.format("%s=%s", e.getKey(), e.getValue()));
                }
                try {
                    RemoteDriver.main(args.toArray(new String[args.size()]));
                } catch (Exception e) {
                    LOG.error("Error running driver.", e);
                }
            }
        };
        return new ChildProcess(conf, child);
    } else {
        // If a Spark installation is provided, use the spark-submit script. Otherwise, call the
        // SparkSubmit class directly, which has some caveats (like having to provide a proper
        // version of Guava on the classpath depending on the deploy mode).
        final SparkLauncher launcher = new SparkLauncher();
        String sparkHome = conf.get(SPARK_HOME_KEY);
        if (sparkHome == null) {
            sparkHome = System.getenv(SPARK_HOME_ENV);
        }
        if (sparkHome == null) {
            sparkHome = System.getProperty(SPARK_HOME_KEY);
        }
        launcher.setSparkHome(sparkHome);

        conf.set(CLIENT_ID, clientId);
        conf.set(CLIENT_SECRET, secret);

        launcher.setAppResource("spark-internal");

        String livyJars = conf.get(LIVY_JARS);
        if (livyJars == null) {
            String livyHome = System.getenv("LIVY_HOME");
            Preconditions.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
            File clientJars = new File(livyHome, "client-jars");
            Preconditions.checkState(clientJars.isDirectory(),
                    "Cannot find 'client-jars' directory under LIVY_HOME.");
            List<String> jars = new ArrayList<>();
            for (File f : clientJars.listFiles()) {
                jars.add(f.getAbsolutePath());
            }
            livyJars = Joiner.on(",").join(jars);
        }

        String userJars = conf.get(SPARK_JARS_KEY);
        if (userJars != null) {
            String allJars = Joiner.on(",").join(livyJars, userJars);
            conf.set(SPARK_JARS_KEY, allJars);
        } else {
            conf.set(SPARK_JARS_KEY, livyJars);
        }

        // Disable multiple attempts since the RPC server doesn't yet support multiple
        // connections for the same registered app.
        conf.set("spark.yarn.maxAppAttempts", "1");

        File confFile = writeConfToFile(conf);

        // Define how to pass options to the child process. If launching in client (or local)
        // mode, the driver options need to be passed directly on the command line. Otherwise,
        // SparkSubmit will take care of that for us.
        String master = conf.get("spark.master");
        Preconditions.checkArgument(master != null, "spark.master is not defined.");
        launcher.setMaster(master);
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(className);
        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }
        launcher.addAppArgs("--remote-host", serverAddress);
        launcher.addAppArgs("--remote-port", serverPort);
        return new ChildProcess(conf, launcher.launch());
    }
}

From source file:com.cloudera.livy.rsc.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RSCConf conf, Promise<?> promise) throws IOException {
    String livyJars = conf.get(LIVY_JARS);
    if (livyJars == null) {
        String livyHome = System.getenv("LIVY_HOME");
        Utils.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
        File rscJars = new File(livyHome, "rsc-jars");
        if (!rscJars.isDirectory()) {
            rscJars = new File(livyHome, "rsc/target/jars");
        }//from w  w  w  .j  a va2 s  .  c o  m
        Utils.checkState(rscJars.isDirectory(), "Cannot find 'client-jars' directory under LIVY_HOME.");
        List<String> jars = new ArrayList<>();
        for (File f : rscJars.listFiles()) {
            jars.add(f.getAbsolutePath());
        }
        livyJars = Utils.join(jars, ",");
    }
    merge(conf, SPARK_JARS_KEY, livyJars, ",");

    String kind = conf.get(SESSION_KIND);
    if ("sparkr".equals(kind)) {
        merge(conf, SPARK_ARCHIVES_KEY, conf.get(RSCConf.Entry.SPARKR_PACKAGE), ",");
    } else if ("pyspark".equals(kind)) {
        merge(conf, "spark.submit.pyFiles", conf.get(RSCConf.Entry.PYSPARK_ARCHIVES), ",");
    }

    // Disable multiple attempts since the RPC server doesn't yet support multiple
    // connections for the same registered app.
    conf.set("spark.yarn.maxAppAttempts", "1");

    // Let the launcher go away when launcher in yarn cluster mode. This avoids keeping lots
    // of "small" Java processes lingering on the Livy server node.
    conf.set("spark.yarn.submit.waitAppCompletion", "false");

    // For testing; propagate jacoco settings so that we also do coverage analysis
    // on the launched driver. We replace the name of the main file ("main.exec")
    // so that we don't end up fighting with the main test launcher.
    String jacocoArgs = System.getProperty("jacoco.args");
    if (jacocoArgs != null) {
        jacocoArgs = jacocoArgs.replace("main.exec", "child.exec");
        merge(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, jacocoArgs, " ");
    }

    final File confFile = writeConfToFile(conf);

    if (conf.getBoolean(CLIENT_IN_PROCESS)) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override
            public void run() {
                try {
                    RSCDriverBootstrapper.main(new String[] { confFile.getAbsolutePath() });
                } catch (Exception e) {
                    throw Utils.propagate(e);
                }
            }
        };
        return new ChildProcess(conf, promise, child, confFile);
    } else {
        final SparkLauncher launcher = new SparkLauncher();
        launcher.setSparkHome(System.getenv(SPARK_HOME_ENV));
        launcher.setAppResource("spark-internal");
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(RSCDriverBootstrapper.class.getName());

        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }

        return new ChildProcess(conf, promise, launcher.launch(), confFile);
    }
}

From source file:com.streamsets.datacollector.pipeline.executor.spark.yarn.YarnAppLauncher.java

License:Apache License

@Override
public Optional<String> launchApp(Record record) throws ApplicationLaunchFailureException, ELEvalException {

    SparkLauncher launcher = getLauncher();

    if (yarnConfigs.language == Language.JVM) {
        launcher.setMainClass(yarnConfigs.mainClass);
    }/* w w  w .java  2 s.c  om*/

    launcher.setAppResource(yarnConfigs.appResource).setAppName(yarnConfigs.appName).setMaster(YARN)
            .setDeployMode(yarnConfigs.deployMode.getLabel().toLowerCase()).setVerbose(yarnConfigs.verbose);

    if (yarnConfigs.dynamicAllocation) {
        launcher.setConf("spark.dynamicAllocation.enabled", "true");
        launcher.setConf("spark.shuffle.service.enabled", "true");
        launcher.setConf("spark.dynamicAllocation.minExecutors", String.valueOf(yarnConfigs.minExecutors));
        launcher.setConf("spark.dynamicAllocation.maxExecutors", String.valueOf(yarnConfigs.maxExecutors));
    } else {
        launcher.setConf("spark.dynamicAllocation.enabled", "false");
        launcher.addSparkArg("--num-executors", String.valueOf(yarnConfigs.numExecutors));
    }

    launcher.addSparkArg("--executor-memory", yarnConfigs.executorMemory);
    launcher.addSparkArg("--driver-memory", yarnConfigs.driverMemory);

    if (yarnConfigs.deployMode == DeployMode.CLUSTER && yarnConfigs.waitForCompletion) {
        launcher.setConf("spark.yarn.submit.waitAppCompletion", "true");
    }

    // Default is empty string, so pass only non-empty ones.
    yarnConfigs.noValueArgs.forEach((String arg) -> applyConfIfPresent(arg, launcher::addSparkArg));
    yarnConfigs.args.forEach((String k, String v) -> applyConfIfPresent(k, v, launcher::addSparkArg));

    // For files, no need of removing empty strings, since we verify the file exists in init itself.
    yarnConfigs.additionalFiles.forEach(launcher::addFile);
    yarnConfigs.additionalJars.forEach(launcher::addJar);
    yarnConfigs.pyFiles.forEach(launcher::addPyFile);

    launcher.addAppArgs(getNonEmptyArgs(yarnConfigs.evaluateArgsELs(record)));

    applyConfIfPresent(configs.javaHome, launcher::setJavaHome);
    applyConfIfPresent("spark.yarn.principal", configs.credentialsConfigBean.principal, launcher::setConf);
    applyConfIfPresent("spark.yarn.keytab", configs.credentialsConfigBean.keytab, launcher::setConf);
    applyConfIfPresent("--proxy-user", yarnConfigs.proxyUser, launcher::addSparkArg);
    applyConfIfPresent(configs.sparkHome, launcher::setSparkHome);

    timeout = yarnConfigs.waitTimeout;

    try {
        final SparkAppHandle handle = launcher.startApplication(new AppListener());
        return Optional.ofNullable(handle.getAppId());
    } catch (IOException ex) {
        latch.countDown();
        throw new ApplicationLaunchFailureException(ex);
    } catch (Throwable ex) { // NOSONAR
        latch.countDown();
        throw ex;
    }
}