Example usage for org.apache.spark.launcher SparkLauncher setMaster

List of usage examples for org.apache.spark.launcher SparkLauncher setMaster

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher setMaster.

Prototype

@Override
    public SparkLauncher setMaster(String master) 

Source Link

Usage

From source file:com.cloudera.livy.client.local.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RpcServer rpcServer, final LocalConf conf, final String clientId,
        final String secret, final String className) throws IOException {
    final String serverAddress = rpcServer.getAddress();
    final String serverPort = String.valueOf(rpcServer.getPort());
    if (conf.get(CLIENT_IN_PROCESS) != null) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override//from   ww w  . ja  v a2s  .c  o m
            public void run() {
                List<String> args = new ArrayList<>();
                args.add("--remote-host");
                args.add(serverAddress);
                args.add("--remote-port");
                args.add(serverPort);
                args.add("--client-id");
                args.add(clientId);
                args.add("--secret");
                args.add(secret);

                for (Map.Entry<String, String> e : conf) {
                    args.add("--conf");
                    args.add(String.format("%s=%s", e.getKey(), e.getValue()));
                }
                try {
                    RemoteDriver.main(args.toArray(new String[args.size()]));
                } catch (Exception e) {
                    LOG.error("Error running driver.", e);
                }
            }
        };
        return new ChildProcess(conf, child);
    } else {
        // If a Spark installation is provided, use the spark-submit script. Otherwise, call the
        // SparkSubmit class directly, which has some caveats (like having to provide a proper
        // version of Guava on the classpath depending on the deploy mode).
        final SparkLauncher launcher = new SparkLauncher();
        String sparkHome = conf.get(SPARK_HOME_KEY);
        if (sparkHome == null) {
            sparkHome = System.getenv(SPARK_HOME_ENV);
        }
        if (sparkHome == null) {
            sparkHome = System.getProperty(SPARK_HOME_KEY);
        }
        launcher.setSparkHome(sparkHome);

        conf.set(CLIENT_ID, clientId);
        conf.set(CLIENT_SECRET, secret);

        launcher.setAppResource("spark-internal");

        String livyJars = conf.get(LIVY_JARS);
        if (livyJars == null) {
            String livyHome = System.getenv("LIVY_HOME");
            Preconditions.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
            File clientJars = new File(livyHome, "client-jars");
            Preconditions.checkState(clientJars.isDirectory(),
                    "Cannot find 'client-jars' directory under LIVY_HOME.");
            List<String> jars = new ArrayList<>();
            for (File f : clientJars.listFiles()) {
                jars.add(f.getAbsolutePath());
            }
            livyJars = Joiner.on(",").join(jars);
        }

        String userJars = conf.get(SPARK_JARS_KEY);
        if (userJars != null) {
            String allJars = Joiner.on(",").join(livyJars, userJars);
            conf.set(SPARK_JARS_KEY, allJars);
        } else {
            conf.set(SPARK_JARS_KEY, livyJars);
        }

        // Disable multiple attempts since the RPC server doesn't yet support multiple
        // connections for the same registered app.
        conf.set("spark.yarn.maxAppAttempts", "1");

        File confFile = writeConfToFile(conf);

        // Define how to pass options to the child process. If launching in client (or local)
        // mode, the driver options need to be passed directly on the command line. Otherwise,
        // SparkSubmit will take care of that for us.
        String master = conf.get("spark.master");
        Preconditions.checkArgument(master != null, "spark.master is not defined.");
        launcher.setMaster(master);
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(className);
        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }
        launcher.addAppArgs("--remote-host", serverAddress);
        launcher.addAppArgs("--remote-port", serverPort);
        return new ChildProcess(conf, launcher.launch());
    }
}

From source file:com.ebay.logstream.runner.spark.SparkPipelineRunner.java

License:Apache License

@Override
public Map<String, Object> run(Pipeline pipeline) {
    Map<String, Object> result = new HashMap<>();
    Map<String, String> env = Maps.newHashMap();
    env.put("SPARK_PRINT_LAUNCH_COMMAND", "1");
    SparkLauncher launcher = new SparkLauncher(env);
    launcher.setAppResource(pipeline.getContext().getPipelineJarPath());
    launcher.setAppName(pipeline.getContext().getPipelineName());
    launcher.setMainClass(SparkPipelineRunner.class.getCanonicalName());
    launcher.setSparkHome(pipeline.getContext().getConfig().getString(SPARK_HOME_KEY));
    launcher.setJavaHome(pipeline.getContext().getConfig().getString(JAVA_HOME));
    //set app args
    launcher.addAppArgs(pipeline.getContext().getPipeline());
    launcher.addAppArgs(pipeline.getContext().getPipelineName());
    launcher.addAppArgs(pipeline.getContext().getDeployMode().toString());
    launcher.addAppArgs(pipeline.getContext().getInputParallelism() + "");
    launcher.addAppArgs(pipeline.getContext().getFilterParallelism() + "");
    launcher.addAppArgs(pipeline.getContext().getOutputParallelism() + "");
    //work around(for get driver pid)
    String uuid = UUID.randomUUID().toString();
    launcher.addAppArgs(uuid);//from w  w w.j  a v  a 2s  . com
    launcher.addAppArgs();
    launcher.setVerbose(true);
    launcher.addSparkArg("--verbose");
    if (pipeline.getContext().getDeployMode() == LogStormConstants.DeployMode.LOCAL) {
        launcher.setMaster("local[*]");
    } else {
        launcher.setMaster(pipeline.getContext().getConfig().getString(SPARK_MASTER_KEY));
    }

    try {
        SparkAppHandle handle = launcher.startApplication();
        while (handle.getAppId() == null) {
            Thread.sleep(1000);
        }
        result.put("applicationId", handle.getAppId());
        LOG.info("generate spark applicationId " + handle.getAppId());
        //get driver pid
        String cmd = "ps -ef | grep " + uuid + " | grep -v grep | awk '{print $2}'";
        LOG.info("cmd {}", cmd);
        Process process = Runtime.getRuntime().exec(new String[] { "/bin/sh", "-c", cmd });
        synchronized (process) {
            try {
                process.wait();
            } catch (Exception e) {
                LOG.warn("failed to wait driver pid: ", e);
            }
        }
        InputStream inputStream = process.getInputStream();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        String pid;
        while ((pid = bufferedReader.readLine()) != null) {
            result.put("driverPid", pid);
            System.out.println(pid);
        }
        bufferedReader.close();
    } catch (Exception e) {
        LOG.error("failed to start as a spark application, ", e);
    }

    return result;
}

From source file:org.apache.eagle.app.environment.impl.SparkExecutionRuntime.java

License:Apache License

private SparkLauncher prepareSparkConfig(Config config) {
    String master = config.hasPath(TOPOLOGY_MASTER) ? config.getString(TOPOLOGY_MASTER) : "local[*]";
    String sparkExecutorCores = config.getString(SPARK_EXECUTOR_CORES);
    String sparkExecutorMemory = config.getString(SPARK_EXECUTOR_MEMORY);
    String driverMemory = config.getString(DRIVER_MEMORY);
    String driverCore = config.getString(DRIVER_CORES);
    String deployMode = config.getString(DEPLOY_MODE);
    String enable = config.getString(TOPOLOGY_DYNAMICALLOCATION);
    boolean verbose = config.getBoolean(TOPOLOGY_VERBOSE);
    String mainClass = config.getString(TOPOLOGY_MAINCLASS);
    String sparkHome = config.getString(TOPOLOGY_SPARKHOME);
    String uiport = config.getString(TOPOLOGY_SPARKUIPORT);
    String appResource = config.getString(TOPOLOGY_APPRESOURCE);
    String yarnqueue = config.getString(TOPOLOGY_YARNQUEUE);

    SparkLauncher sparkLauncher = new SparkLauncher();
    sparkLauncher.setMaster(master);
    sparkLauncher.setMainClass(mainClass);
    sparkLauncher.setSparkHome(sparkHome);
    //sparkLauncher.setJavaHome(TOPOLOGY_JAVAHOME);
    sparkLauncher.setDeployMode(deployMode);
    sparkLauncher.setVerbose(verbose);/*w ww  . j a v a  2s . c  o  m*/
    sparkLauncher.setAppResource(appResource);
    sparkLauncher.setAppName(config.getString(TOPOLOGY_NAME));
    sparkLauncher.setConf("spark.yarn.queue", yarnqueue);
    sparkLauncher.setConf("spark.executor.cores", sparkExecutorCores);
    sparkLauncher.setConf("spark.executor.memory", sparkExecutorMemory);
    sparkLauncher.setConf("spark.driver.memory", driverMemory);
    sparkLauncher.setConf("spark.driver.cores", driverCore);
    sparkLauncher.setConf("spark.streaming.dynamicAllocation.enable", enable);
    sparkLauncher.setConf("spark.ui.port", uiport);
    String path = config.getString(TOPOLOGY_SPARKCONFFILEPATH);
    if (StringUtil.isNotBlank(path)) {
        sparkLauncher.setPropertiesFile(path);
    }

    String batchDuration = config.getString(BATCH_DURATION);
    String routerTasknum = config.getString(ROUTER_TASK_NUM);
    String alertTasknum = config.getString(ALERT_TASK_NUM);
    String publishTasknum = config.getString(PUBLISH_TASK_NUM);
    String slideDurationsecond = config.getString(SLIDE_DURATION_SECOND);
    String windowDurationssecond = config.getString(WINDOW_DURATIONS_SECOND);
    String checkpointPath = config.getString(CHECKPOINT_PATH);
    String topologyGroupid = config.getString(TOPOLOGY_GROUPID);
    String autoOffsetReset = config.getString(AUTO_OFFSET_RESET);
    String restApihost = config.getString(EAGLE_CORRELATION_SERVICE_HOST);
    String restApiport = config.getString(EAGLE_CORRELATION_SERVICE_PORT);
    String restApicontext = config.getString(EAGLE_CORRELATION_CONTEXT);
    String useMultiKafka = config.getString(TOPOLOGY_MULTIKAFKA);
    String kafkaBrokerZkQuorum = config.getString(SPOUT_KAFKABROKERZKQUORUM);
    String zkConfigzkQuorum = config.getString(ZKCONFIG_ZKQUORUM);

    sparkLauncher.addAppArgs(batchDuration, routerTasknum, alertTasknum, publishTasknum, slideDurationsecond,
            windowDurationssecond, checkpointPath, topologyGroupid, autoOffsetReset, restApicontext,
            restApiport, restApihost, useMultiKafka, kafkaBrokerZkQuorum, zkConfigzkQuorum);
    return sparkLauncher;
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

public SparkLauncher createSparkLauncher(File hadoopConfDir, String configurationHdfsPath, String jobHdfsPath)
        throws Exception {
    // mimic env. variables
    final Map<String, String> env = new HashMap<>();
    env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath());

    final SparkLauncher sparkLauncher = new SparkLauncher(env);

    sparkLauncher.setSparkHome(_sparkHome);
    sparkLauncher.setMaster("yarn-cluster");
    sparkLauncher.setAppName("DataCleaner");

    final MutableRef<String> primaryJar = new MutableRef<>();
    final List<String> jars = buildJarFiles(primaryJar);
    logger.info("Using JAR files: {}", jars);

    for (final String jar : jars) {
        sparkLauncher.addJar(jar);//from   w  w  w .  j a va 2s. c o m
    }
    sparkLauncher.setMainClass(Main.class.getName());

    // the primary jar is always the first argument
    sparkLauncher.addAppArgs(primaryJar.get());

    sparkLauncher.addAppArgs(toHdfsPath(configurationHdfsPath));
    sparkLauncher.addAppArgs(toHdfsPath(jobHdfsPath));

    return sparkLauncher;
}