Example usage for org.apache.spark.launcher SparkLauncher launch

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher launch.

Prototype

public Process launch() throws IOException

Source Link

Document

Launches a sub-process that will start the configured Spark application.

Usage

From source file:com.uber.hoodie.cli.commands.HDFSParquetImportCommand.java

License:Apache License

@CliCommand(value = "hdfsparquetimport", help = "Imports Parquet dataset to a hoodie dataset")
public String convert(
        @CliOption(key = "upsert", mandatory = false, unspecifiedDefaultValue = "false", help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert,
        @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
        @CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String targetPath,
        @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
        @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
        @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
        @CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name") final String partitionPathField,
        @CliOption(key = {//from  ww  w .j  ava2 s  .  com
                "parallelism" }, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism,
        @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file") final String schemaFilePath,
        @CliOption(key = "format", mandatory = true, help = "Format for the input data") final String format,
        @CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory,
        @CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry)
        throws Exception {

    (new HDFSParquetImporter.FormatValidator()).validate("format", format);

    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String sparkPropertiesPath = Utils
            .getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());

    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);

    String cmd = SparkCommand.IMPORT.toString();
    if (useUpsert) {
        cmd = SparkCommand.UPSERT.toString();
    }

    sparkLauncher.addAppArgs(cmd, srcPath, targetPath, tableName, tableType, rowKeyField, partitionPathField,
            parallelism, schemaFilePath, sparkMemory, retry);
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    if (exitCode != 0) {
        return "Failed to import dataset to hoodie format";
    }
    return "Dataset imported to hoodie format";
}

From source file:com.uber.hoodie.cli.commands.RecordsCommand.java

License:Apache License

@CliCommand(value = "records deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
public String deduplicate(@CliOption(key = {
        "duplicatedPartitionPath" }, help = "Partition Path containing the duplicates") final String duplicatedPartitionPath,
        @CliOption(key = {/*from   w  ww.  j a va  2 s  .  c  om*/
                "repairedOutputPath" }, help = "Location to place the repaired files") final String repairedOutputPath,
        @CliOption(key = {
                "sparkProperties" }, help = "Spark Properites File Path") final String sparkPropertiesPath)
        throws Exception {
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
            repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();

    if (exitCode != 0) {
        return "Deduplicated files placed in:  " + repairedOutputPath;
    }
    return "Deduplication failed ";
}

From source file:com.uber.hoodie.cli.commands.RepairsCommand.java

License:Apache License

@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce "
        + "repaired files to replace with")
public String deduplicate(@CliOption(key = {
        "duplicatedPartitionPath" }, help = "Partition Path containing the duplicates", mandatory = true) final String duplicatedPartitionPath,
        @CliOption(key = {/*www. j  av a2s . c o  m*/
                "repairedOutputPath" }, help = "Location to place the repaired files", mandatory = true) final String repairedOutputPath,
        @CliOption(key = {
                "sparkProperties" }, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath)
        throws Exception {
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
            repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();

    if (exitCode != 0) {
        return "Deduplicated files placed in:  " + repairedOutputPath;
    }
    return "Deduplication failed ";
}

From source file:com.uber.hoodie.cli.commands.SavepointsCommand.java

License:Apache License

@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
public String rollbackToSavepoint(
        @CliOption(key = { "savepoint" }, help = "Savepoint to rollback") final String commitTime,
        @CliOption(key = {/*  ww  w . j a va2s .co m*/
                "sparkProperties" }, help = "Spark Properites File Path") final String sparkPropertiesPath)
        throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
    HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);

    if (!timeline.containsInstant(commitInstant)) {
        return "Commit " + commitTime + " not found in Commits " + timeline;
    }

    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime,
            HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    // Refresh the current
    refreshMetaClient();
    if (exitCode != 0) {
        return "Savepoint " + commitTime + " failed to roll back";
    }
    return "Savepoint " + commitTime + " rolled back";
}

From source file:org.cripac.isee.vpe.ctrl.MainController.java

License:Open Source License

public static void main(String[] args) throws URISyntaxException, IOException, ParserConfigurationException,
        SAXException, UnimplementedException {
    // Analyze the command line and store the options into a system property
    // center./*from w ww.  ja va2s. co m*/
    SystemPropertyCenter propCenter = new SystemPropertyCenter(args);

    final AtomicReference<Boolean> running = new AtomicReference<>();
    running.set(true);

    // Prepare system configuration.
    if (propCenter.sparkMaster.toLowerCase().contains("yarn")) {
        System.setProperty("SPARK_YARN_MODE", "true");

        // Create a thread to listen to reports.
        Thread listener = new Thread(() -> {
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(
                    propCenter.getKafkaConsumerProp(UUID.randomUUID().toString(), true));
            ArrayList<String> topicList = new ArrayList<>();
            for (String appName : propCenter.appsToStart) {
                topicList.add(appName + "_report");
            }
            consumer.subscribe(topicList);
            while (running.get()) {
                ConsumerRecords<String, String> records = consumer.poll(propCenter.batchDuration);
                records.forEach(rec -> System.out.println(rec.value()));
                consumer.commitSync();
            }
        });
        listener.start();

        final class ProcessWithName {
            private Process process;
            private String name;

            private ProcessWithName(Process process, String name) {
                this.process = process;
                this.name = name;
            }
        }

        List<ProcessWithName> processesWithNames = new LinkedList<>();
        for (String appName : propCenter.appsToStart) {
            try {
                SparkLauncher launcher = propCenter.GetSparkLauncher(appName);

                Process launcherProcess = launcher.launch();
                processesWithNames.add(new ProcessWithName(launcherProcess, appName));

                // Create threads listening to output of the launcher process.
                Thread infoThread = new Thread(
                        new InputStreamReaderRunnable(launcherProcess.getInputStream(), "INFO", running),
                        "LogStreamReader info");
                Thread errorThread = new Thread(
                        new InputStreamReaderRunnable(launcherProcess.getErrorStream(), "ERROR", running),
                        "LogStreamReader error");
                infoThread.start();
                errorThread.start();
            } catch (NoAppSpecifiedException e) {
                e.printStackTrace();
            }
        }

        while (!processesWithNames.isEmpty()) {
            for (ProcessWithName processWithName : processesWithNames) {
                try {
                    boolean exited = processWithName.process.waitFor(100, TimeUnit.MILLISECONDS);
                    if (exited) {
                        System.out.println("[INFO]Process " + processWithName.name + "finished! Exit code: "
                                + processWithName.process.exitValue());
                        processWithName.process.getInputStream().close();
                        processWithName.process.getErrorStream().close();
                        processesWithNames.remove(processWithName);
                        break;
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    } else {
        // TODO Complete code for running locally.
        throw new UnimplementedException();
    }

    running.set(false);
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

public int launch(SparkLauncher sparkLauncher) throws Exception {
    final Process process = sparkLauncher.launch();

    final InputStream errorStream = process.getErrorStream();
    startLogger(errorStream);//from  ww  w.  ja  v  a2  s.co  m

    final InputStream inputStream = process.getInputStream();
    startLogger(inputStream);

    return process.waitFor();
}

From source file:org.kaaproject.examples.spark.KaaSparkLauncher.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkLauncher launcher = new SparkLauncher().setMaster(SPARK_MASTER_URL).setSparkHome(SPARK_HOME)
            .setAppResource(SPARK_APP_JAR).setMainClass(KaaSparkExample.class.getName())
            .setAppName(KAA_SPARK_EXAMPLE_JOB_NAME).addAppArgs(FLUME_BIND_HOST, FLUME_BIND_PORT);

    final Process spark = launcher.launch();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override//w w w . j a  va  2s . co  m
        public void run() {
            LOG.warn("Spark job interrupted!");
            spark.destroy();
        }
    });

    Thread isReader = startReader(spark.getInputStream());
    Thread esReader = startReader(spark.getErrorStream());

    int resultCode = spark.waitFor();

    isReader.join();
    esReader.join();

    if (resultCode != 0) {
        LOG.warn("Spark job result code: {}", resultCode);
    }
}