Example usage for org.apache.spark.launcher SparkLauncher launch

List of usage examples for org.apache.spark.launcher SparkLauncher launch

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher launch.

Prototype

public Process launch() throws IOException 

Source Link

Document

Launches a sub-process that will start the configured Spark application.

Usage

From source file:com.uber.hoodie.cli.commands.HDFSParquetImportCommand.java

License:Apache License

@CliCommand(value = "hdfsparquetimport", help = "Imports Parquet dataset to a hoodie dataset")
public String convert(
        @CliOption(key = "upsert", mandatory = false, unspecifiedDefaultValue = "false", help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert,
        @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
        @CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String targetPath,
        @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
        @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
        @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
        @CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name") final String partitionPathField,
        @CliOption(key = {//from  ww  w .j  ava2 s  .  com
                "parallelism" }, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism,
        @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file") final String schemaFilePath,
        @CliOption(key = "format", mandatory = true, help = "Format for the input data") final String format,
        @CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory,
        @CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry)
        throws Exception {

    (new HDFSParquetImporter.FormatValidator()).validate("format", format);

    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    String sparkPropertiesPath = Utils
            .getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());

    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);

    String cmd = SparkCommand.IMPORT.toString();
    if (useUpsert) {
        cmd = SparkCommand.UPSERT.toString();
    }

    sparkLauncher.addAppArgs(cmd, srcPath, targetPath, tableName, tableType, rowKeyField, partitionPathField,
            parallelism, schemaFilePath, sparkMemory, retry);
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    if (exitCode != 0) {
        return "Failed to import dataset to hoodie format";
    }
    return "Dataset imported to hoodie format";
}

From source file:com.uber.hoodie.cli.commands.RecordsCommand.java

License:Apache License

@CliCommand(value = "records deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
public String deduplicate(@CliOption(key = {
        "duplicatedPartitionPath" }, help = "Partition Path containing the duplicates") final String duplicatedPartitionPath,
        @CliOption(key = {/*from   w  ww.  j a va  2 s  .  c  om*/
                "repairedOutputPath" }, help = "Location to place the repaired files") final String repairedOutputPath,
        @CliOption(key = {
                "sparkProperties" }, help = "Spark Properites File Path") final String sparkPropertiesPath)
        throws Exception {
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
            repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();

    if (exitCode != 0) {
        return "Deduplicated files placed in:  " + repairedOutputPath;
    }
    return "Deduplication failed ";
}

From source file:com.uber.hoodie.cli.commands.RepairsCommand.java

License:Apache License

@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce "
        + "repaired files to replace with")
public String deduplicate(@CliOption(key = {
        "duplicatedPartitionPath" }, help = "Partition Path containing the duplicates", mandatory = true) final String duplicatedPartitionPath,
        @CliOption(key = {/*www. j  av a2s . c o  m*/
                "repairedOutputPath" }, help = "Location to place the repaired files", mandatory = true) final String repairedOutputPath,
        @CliOption(key = {
                "sparkProperties" }, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath)
        throws Exception {
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
            repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();

    if (exitCode != 0) {
        return "Deduplicated files placed in:  " + repairedOutputPath;
    }
    return "Deduplication failed ";
}

From source file:com.uber.hoodie.cli.commands.SavepointsCommand.java

License:Apache License

@CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
public String rollbackToSavepoint(
        @CliOption(key = { "savepoint" }, help = "Savepoint to rollback") final String commitTime,
        @CliOption(key = {/*  ww  w . j a va2s .co m*/
                "sparkProperties" }, help = "Spark Properites File Path") final String sparkPropertiesPath)
        throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
    HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);

    if (!timeline.containsInstant(commitInstant)) {
        return "Commit " + commitTime + " not found in Commits " + timeline;
    }

    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime,
            HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
    // Refresh the current
    refreshMetaClient();
    if (exitCode != 0) {
        return "Savepoint " + commitTime + " failed to roll back";
    }
    return "Savepoint " + commitTime + " rolled back";
}

From source file:org.cripac.isee.vpe.ctrl.MainController.java

License:Open Source License

public static void main(String[] args) throws URISyntaxException, IOException, ParserConfigurationException,
        SAXException, UnimplementedException {
    // Analyze the command line and store the options into a system property
    // center./*from w ww.  ja va2s. co m*/
    SystemPropertyCenter propCenter = new SystemPropertyCenter(args);

    final AtomicReference<Boolean> running = new AtomicReference<>();
    running.set(true);

    // Prepare system configuration.
    if (propCenter.sparkMaster.toLowerCase().contains("yarn")) {
        System.setProperty("SPARK_YARN_MODE", "true");

        // Create a thread to listen to reports.
        Thread listener = new Thread(() -> {
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(
                    propCenter.getKafkaConsumerProp(UUID.randomUUID().toString(), true));
            ArrayList<String> topicList = new ArrayList<>();
            for (String appName : propCenter.appsToStart) {
                topicList.add(appName + "_report");
            }
            consumer.subscribe(topicList);
            while (running.get()) {
                ConsumerRecords<String, String> records = consumer.poll(propCenter.batchDuration);
                records.forEach(rec -> System.out.println(rec.value()));
                consumer.commitSync();
            }
        });
        listener.start();

        final class ProcessWithName {
            private Process process;
            private String name;

            private ProcessWithName(Process process, String name) {
                this.process = process;
                this.name = name;
            }
        }

        List<ProcessWithName> processesWithNames = new LinkedList<>();
        for (String appName : propCenter.appsToStart) {
            try {
                SparkLauncher launcher = propCenter.GetSparkLauncher(appName);

                Process launcherProcess = launcher.launch();
                processesWithNames.add(new ProcessWithName(launcherProcess, appName));

                // Create threads listening to output of the launcher process.
                Thread infoThread = new Thread(
                        new InputStreamReaderRunnable(launcherProcess.getInputStream(), "INFO", running),
                        "LogStreamReader info");
                Thread errorThread = new Thread(
                        new InputStreamReaderRunnable(launcherProcess.getErrorStream(), "ERROR", running),
                        "LogStreamReader error");
                infoThread.start();
                errorThread.start();
            } catch (NoAppSpecifiedException e) {
                e.printStackTrace();
            }
        }

        while (!processesWithNames.isEmpty()) {
            for (ProcessWithName processWithName : processesWithNames) {
                try {
                    boolean exited = processWithName.process.waitFor(100, TimeUnit.MILLISECONDS);
                    if (exited) {
                        System.out.println("[INFO]Process " + processWithName.name + "finished! Exit code: "
                                + processWithName.process.exitValue());
                        processWithName.process.getInputStream().close();
                        processWithName.process.getErrorStream().close();
                        processesWithNames.remove(processWithName);
                        break;
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }
    } else {
        // TODO Complete code for running locally.
        throw new UnimplementedException();
    }

    running.set(false);
}

From source file:org.datacleaner.spark.ApplicationDriver.java

License:Open Source License

public int launch(SparkLauncher sparkLauncher) throws Exception {
    final Process process = sparkLauncher.launch();

    final InputStream errorStream = process.getErrorStream();
    startLogger(errorStream);//from  ww  w.  ja  v  a2  s.co  m

    final InputStream inputStream = process.getInputStream();
    startLogger(inputStream);

    return process.waitFor();
}

From source file:org.kaaproject.examples.spark.KaaSparkLauncher.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkLauncher launcher = new SparkLauncher().setMaster(SPARK_MASTER_URL).setSparkHome(SPARK_HOME)
            .setAppResource(SPARK_APP_JAR).setMainClass(KaaSparkExample.class.getName())
            .setAppName(KAA_SPARK_EXAMPLE_JOB_NAME).addAppArgs(FLUME_BIND_HOST, FLUME_BIND_PORT);

    final Process spark = launcher.launch();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override//w w w . j a  va  2s . co  m
        public void run() {
            LOG.warn("Spark job interrupted!");
            spark.destroy();
        }
    });

    Thread isReader = startReader(spark.getInputStream());
    Thread esReader = startReader(spark.getErrorStream());

    int resultCode = spark.waitFor();

    isReader.join();
    esReader.join();

    if (resultCode != 0) {
        LOG.warn("Spark job result code: {}", resultCode);
    }
}