List of usage examples for org.apache.spark.launcher SparkLauncher launch
public Process launch() throws IOException
From source file:com.uber.hoodie.cli.commands.HDFSParquetImportCommand.java
License:Apache License
@CliCommand(value = "hdfsparquetimport", help = "Imports Parquet dataset to a hoodie dataset") public String convert( @CliOption(key = "upsert", mandatory = false, unspecifiedDefaultValue = "false", help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert, @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath, @CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String targetPath, @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName, @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType, @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField, @CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name") final String partitionPathField, @CliOption(key = {//from ww w .j ava2 s . com "parallelism" }, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism, @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file") final String schemaFilePath, @CliOption(key = "format", mandatory = true, help = "Format for the input data") final String format, @CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory, @CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry) throws Exception { (new HDFSParquetImporter.FormatValidator()).validate("format", format); boolean initialized = HoodieCLI.initConf(); HoodieCLI.initFS(initialized); String sparkPropertiesPath = Utils .getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala()); SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); String cmd = SparkCommand.IMPORT.toString(); if (useUpsert) { cmd = SparkCommand.UPSERT.toString(); } sparkLauncher.addAppArgs(cmd, srcPath, targetPath, tableName, tableType, rowKeyField, partitionPathField, parallelism, schemaFilePath, sparkMemory, retry); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); if (exitCode != 0) { return "Failed to import dataset to hoodie format"; } return "Dataset imported to hoodie format"; }
From source file:com.uber.hoodie.cli.commands.RecordsCommand.java
License:Apache License
@CliCommand(value = "records deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with") public String deduplicate(@CliOption(key = { "duplicatedPartitionPath" }, help = "Partition Path containing the duplicates") final String duplicatedPartitionPath, @CliOption(key = {/*from w ww. j a va 2 s . c om*/ "repairedOutputPath" }, help = "Location to place the repaired files") final String repairedOutputPath, @CliOption(key = { "sparkProperties" }, help = "Spark Properites File Path") final String sparkPropertiesPath) throws Exception { SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath, HoodieCLI.tableMetadata.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); if (exitCode != 0) { return "Deduplicated files placed in: " + repairedOutputPath; } return "Deduplication failed "; }
From source file:com.uber.hoodie.cli.commands.RepairsCommand.java
License:Apache License
@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce " + "repaired files to replace with") public String deduplicate(@CliOption(key = { "duplicatedPartitionPath" }, help = "Partition Path containing the duplicates", mandatory = true) final String duplicatedPartitionPath, @CliOption(key = {/*www. j av a2s . c o m*/ "repairedOutputPath" }, help = "Location to place the repaired files", mandatory = true) final String repairedOutputPath, @CliOption(key = { "sparkProperties" }, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath) throws Exception { SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath, HoodieCLI.tableMetadata.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); if (exitCode != 0) { return "Deduplicated files placed in: " + repairedOutputPath; } return "Deduplication failed "; }
From source file:com.uber.hoodie.cli.commands.SavepointsCommand.java
License:Apache License
@CliCommand(value = "savepoint rollback", help = "Savepoint a commit") public String rollbackToSavepoint( @CliOption(key = { "savepoint" }, help = "Savepoint to rollback") final String commitTime, @CliOption(key = {/* ww w . j a va2s .co m*/ "sparkProperties" }, help = "Spark Properites File Path") final String sparkPropertiesPath) throws Exception { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants(); HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); if (!timeline.containsInstant(commitInstant)) { return "Commit " + commitTime + " not found in Commits " + timeline; } SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(), commitTime, HoodieCLI.tableMetadata.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); // Refresh the current refreshMetaClient(); if (exitCode != 0) { return "Savepoint " + commitTime + " failed to roll back"; } return "Savepoint " + commitTime + " rolled back"; }
From source file:org.cripac.isee.vpe.ctrl.MainController.java
License:Open Source License
public static void main(String[] args) throws URISyntaxException, IOException, ParserConfigurationException, SAXException, UnimplementedException { // Analyze the command line and store the options into a system property // center./*from w ww. ja va2s. co m*/ SystemPropertyCenter propCenter = new SystemPropertyCenter(args); final AtomicReference<Boolean> running = new AtomicReference<>(); running.set(true); // Prepare system configuration. if (propCenter.sparkMaster.toLowerCase().contains("yarn")) { System.setProperty("SPARK_YARN_MODE", "true"); // Create a thread to listen to reports. Thread listener = new Thread(() -> { KafkaConsumer<String, String> consumer = new KafkaConsumer<>( propCenter.getKafkaConsumerProp(UUID.randomUUID().toString(), true)); ArrayList<String> topicList = new ArrayList<>(); for (String appName : propCenter.appsToStart) { topicList.add(appName + "_report"); } consumer.subscribe(topicList); while (running.get()) { ConsumerRecords<String, String> records = consumer.poll(propCenter.batchDuration); records.forEach(rec -> System.out.println(rec.value())); consumer.commitSync(); } }); listener.start(); final class ProcessWithName { private Process process; private String name; private ProcessWithName(Process process, String name) { this.process = process; this.name = name; } } List<ProcessWithName> processesWithNames = new LinkedList<>(); for (String appName : propCenter.appsToStart) { try { SparkLauncher launcher = propCenter.GetSparkLauncher(appName); Process launcherProcess = launcher.launch(); processesWithNames.add(new ProcessWithName(launcherProcess, appName)); // Create threads listening to output of the launcher process. Thread infoThread = new Thread( new InputStreamReaderRunnable(launcherProcess.getInputStream(), "INFO", running), "LogStreamReader info"); Thread errorThread = new Thread( new InputStreamReaderRunnable(launcherProcess.getErrorStream(), "ERROR", running), "LogStreamReader error"); infoThread.start(); errorThread.start(); } catch (NoAppSpecifiedException e) { e.printStackTrace(); } } while (!processesWithNames.isEmpty()) { for (ProcessWithName processWithName : processesWithNames) { try { boolean exited = processWithName.process.waitFor(100, TimeUnit.MILLISECONDS); if (exited) { System.out.println("[INFO]Process " + processWithName.name + "finished! Exit code: " + processWithName.process.exitValue()); processWithName.process.getInputStream().close(); processWithName.process.getErrorStream().close(); processesWithNames.remove(processWithName); break; } } catch (InterruptedException e) { e.printStackTrace(); } } } } else { // TODO Complete code for running locally. throw new UnimplementedException(); } running.set(false); }
From source file:org.datacleaner.spark.ApplicationDriver.java
License:Open Source License
public int launch(SparkLauncher sparkLauncher) throws Exception { final Process process = sparkLauncher.launch(); final InputStream errorStream = process.getErrorStream(); startLogger(errorStream);//from ww w. ja v a2 s.co m final InputStream inputStream = process.getInputStream(); startLogger(inputStream); return process.waitFor(); }
From source file:org.kaaproject.examples.spark.KaaSparkLauncher.java
License:Apache License
public static void main(String[] args) throws Exception { SparkLauncher launcher = new SparkLauncher().setMaster(SPARK_MASTER_URL).setSparkHome(SPARK_HOME) .setAppResource(SPARK_APP_JAR).setMainClass(KaaSparkExample.class.getName()) .setAppName(KAA_SPARK_EXAMPLE_JOB_NAME).addAppArgs(FLUME_BIND_HOST, FLUME_BIND_PORT); final Process spark = launcher.launch(); Runtime.getRuntime().addShutdownHook(new Thread() { @Override//w w w . j a va 2s . co m public void run() { LOG.warn("Spark job interrupted!"); spark.destroy(); } }); Thread isReader = startReader(spark.getInputStream()); Thread esReader = startReader(spark.getErrorStream()); int resultCode = spark.waitFor(); isReader.join(); esReader.join(); if (resultCode != 0) { LOG.warn("Spark job result code: {}", resultCode); } }