List of usage examples for org.apache.spark.launcher SparkLauncher SparkLauncher
public SparkLauncher()
From source file:com.cloudera.livy.client.local.ContextLauncher.java
License:Apache License
private static ChildProcess startDriver(final RpcServer rpcServer, final LocalConf conf, final String clientId, final String secret, final String className) throws IOException { final String serverAddress = rpcServer.getAddress(); final String serverPort = String.valueOf(rpcServer.getPort()); if (conf.get(CLIENT_IN_PROCESS) != null) { // Mostly for testing things quickly. Do not do this in production. LOG.warn("!!!! Running remote driver in-process. !!!!"); Runnable child = new Runnable() { @Override//ww w . j a va 2s. c o m public void run() { List<String> args = new ArrayList<>(); args.add("--remote-host"); args.add(serverAddress); args.add("--remote-port"); args.add(serverPort); args.add("--client-id"); args.add(clientId); args.add("--secret"); args.add(secret); for (Map.Entry<String, String> e : conf) { args.add("--conf"); args.add(String.format("%s=%s", e.getKey(), e.getValue())); } try { RemoteDriver.main(args.toArray(new String[args.size()])); } catch (Exception e) { LOG.error("Error running driver.", e); } } }; return new ChildProcess(conf, child); } else { // If a Spark installation is provided, use the spark-submit script. Otherwise, call the // SparkSubmit class directly, which has some caveats (like having to provide a proper // version of Guava on the classpath depending on the deploy mode). final SparkLauncher launcher = new SparkLauncher(); String sparkHome = conf.get(SPARK_HOME_KEY); if (sparkHome == null) { sparkHome = System.getenv(SPARK_HOME_ENV); } if (sparkHome == null) { sparkHome = System.getProperty(SPARK_HOME_KEY); } launcher.setSparkHome(sparkHome); conf.set(CLIENT_ID, clientId); conf.set(CLIENT_SECRET, secret); launcher.setAppResource("spark-internal"); String livyJars = conf.get(LIVY_JARS); if (livyJars == null) { String livyHome = System.getenv("LIVY_HOME"); Preconditions.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key()); File clientJars = new File(livyHome, "client-jars"); Preconditions.checkState(clientJars.isDirectory(), "Cannot find 'client-jars' directory under LIVY_HOME."); List<String> jars = new ArrayList<>(); for (File f : clientJars.listFiles()) { jars.add(f.getAbsolutePath()); } livyJars = Joiner.on(",").join(jars); } String userJars = conf.get(SPARK_JARS_KEY); if (userJars != null) { String allJars = Joiner.on(",").join(livyJars, userJars); conf.set(SPARK_JARS_KEY, allJars); } else { conf.set(SPARK_JARS_KEY, livyJars); } // Disable multiple attempts since the RPC server doesn't yet support multiple // connections for the same registered app. conf.set("spark.yarn.maxAppAttempts", "1"); File confFile = writeConfToFile(conf); // Define how to pass options to the child process. If launching in client (or local) // mode, the driver options need to be passed directly on the command line. Otherwise, // SparkSubmit will take care of that for us. String master = conf.get("spark.master"); Preconditions.checkArgument(master != null, "spark.master is not defined."); launcher.setMaster(master); launcher.setPropertiesFile(confFile.getAbsolutePath()); launcher.setMainClass(className); if (conf.get(PROXY_USER) != null) { launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER)); } launcher.addAppArgs("--remote-host", serverAddress); launcher.addAppArgs("--remote-port", serverPort); return new ChildProcess(conf, launcher.launch()); } }
From source file:com.cloudera.livy.rsc.ContextLauncher.java
License:Apache License
private static ChildProcess startDriver(final RSCConf conf, Promise<?> promise) throws IOException { String livyJars = conf.get(LIVY_JARS); if (livyJars == null) { String livyHome = System.getenv("LIVY_HOME"); Utils.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key()); File rscJars = new File(livyHome, "rsc-jars"); if (!rscJars.isDirectory()) { rscJars = new File(livyHome, "rsc/target/jars"); }//from w w w. j a va 2 s .c o m Utils.checkState(rscJars.isDirectory(), "Cannot find 'client-jars' directory under LIVY_HOME."); List<String> jars = new ArrayList<>(); for (File f : rscJars.listFiles()) { jars.add(f.getAbsolutePath()); } livyJars = Utils.join(jars, ","); } merge(conf, SPARK_JARS_KEY, livyJars, ","); String kind = conf.get(SESSION_KIND); if ("sparkr".equals(kind)) { merge(conf, SPARK_ARCHIVES_KEY, conf.get(RSCConf.Entry.SPARKR_PACKAGE), ","); } else if ("pyspark".equals(kind)) { merge(conf, "spark.submit.pyFiles", conf.get(RSCConf.Entry.PYSPARK_ARCHIVES), ","); } // Disable multiple attempts since the RPC server doesn't yet support multiple // connections for the same registered app. conf.set("spark.yarn.maxAppAttempts", "1"); // Let the launcher go away when launcher in yarn cluster mode. This avoids keeping lots // of "small" Java processes lingering on the Livy server node. conf.set("spark.yarn.submit.waitAppCompletion", "false"); // For testing; propagate jacoco settings so that we also do coverage analysis // on the launched driver. We replace the name of the main file ("main.exec") // so that we don't end up fighting with the main test launcher. String jacocoArgs = System.getProperty("jacoco.args"); if (jacocoArgs != null) { jacocoArgs = jacocoArgs.replace("main.exec", "child.exec"); merge(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, jacocoArgs, " "); } final File confFile = writeConfToFile(conf); if (conf.getBoolean(CLIENT_IN_PROCESS)) { // Mostly for testing things quickly. Do not do this in production. LOG.warn("!!!! Running remote driver in-process. !!!!"); Runnable child = new Runnable() { @Override public void run() { try { RSCDriverBootstrapper.main(new String[] { confFile.getAbsolutePath() }); } catch (Exception e) { throw Utils.propagate(e); } } }; return new ChildProcess(conf, promise, child, confFile); } else { final SparkLauncher launcher = new SparkLauncher(); launcher.setSparkHome(System.getenv(SPARK_HOME_ENV)); launcher.setAppResource("spark-internal"); launcher.setPropertiesFile(confFile.getAbsolutePath()); launcher.setMainClass(RSCDriverBootstrapper.class.getName()); if (conf.get(PROXY_USER) != null) { launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER)); } return new ChildProcess(conf, promise, launcher.launch(), confFile); } }
From source file:com.thinkbiganalytics.nifi.pyspark.core.ExecutePySpark.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final ComponentLog logger = getLog(); FlowFile flowFile = session.get();/*from w ww . j a va 2 s.c o m*/ if (flowFile == null) { flowFile = session.create(); logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) }); } else { logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) }); } try { final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue(); final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue(); final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES) .getValue(); final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES) .evaluateAttributeExpressions(flowFile).getValue(); final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile) .getValue().trim().toLowerCase(); final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE) .evaluateAttributeExpressions(flowFile).getValue(); final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile) .getValue(); final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile) .getValue(); final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); final String executorMemory = context.getProperty(EXECUTOR_MEMORY) .evaluateAttributeExpressions(flowFile).getValue(); final String executorInstances = context.getProperty(EXECUTOR_INSTANCES) .evaluateAttributeExpressions(flowFile).getValue(); final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile) .getValue(); final String networkTimeout = context.getProperty(NETWORK_TIMEOUT) .evaluateAttributeExpressions(flowFile).getValue(); final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS) .evaluateAttributeExpressions(flowFile).getValue(); PySparkUtils pySparkUtils = new PySparkUtils(); /* Get app arguments */ String[] pySparkAppArgsArray = null; if (!StringUtils.isEmpty(pySparkAppArgs)) { pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs); logger.info("Provided application arguments: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) }); } /* Get additional python files */ String[] pySparkAdditionalFilesArray = null; if (!StringUtils.isEmpty(pySparkAdditionalFiles)) { pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles); logger.info("Provided python files: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) }); } /* Get additional config key-value pairs */ String[] additionalSparkConfigOptionsArray = null; if (!StringUtils.isEmpty(additionalSparkConfigOptions)) { additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions); logger.info("Provided spark config options: {}", new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) }); } /* Determine if Kerberos is enabled */ boolean kerberosEnabled = false; if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab) && !StringUtils.isEmpty(hadoopConfigurationResources)) { kerberosEnabled = true; logger.info("Kerberos is enabled"); } /* For Kerberized cluster, attempt user authentication */ if (kerberosEnabled) { logger.info("Attempting user authentication for Kerberos"); ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy(); Configuration configuration; try { logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources); configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources); if (SecurityUtil.isSecurityEnabled(configuration)) { logger.info("Security is enabled"); if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) { logger.error( "Kerberos Principal and Keytab provided with empty values for a Kerberized cluster."); session.transfer(flowFile, REL_FAILURE); return; } try { logger.info("User authentication initiated"); boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger, hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab); if (authenticationStatus) { logger.info("User authenticated successfully."); } else { logger.error("User authentication failed."); session.transfer(flowFile, REL_FAILURE); return; } } catch (Exception unknownException) { logger.error("Unknown exception occurred while validating user :" + unknownException.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } } } catch (IOException e1) { logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } } /* Build and launch PySpark Job */ logger.info("Configuring PySpark job for execution"); SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile); logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile }); if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) { pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray); logger.info("App arguments set to: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) }); } pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster); logger.info("App name set to: {}", new Object[] { pySparkAppName }); logger.info("Spark master set to: {}", new Object[] { sparkMaster }); if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) { for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) { pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile); logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile }); } } if (sparkMaster.equals("yarn")) { pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode); logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode }); } pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome) .setConf(SparkLauncher.DRIVER_MEMORY, driverMemory) .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory) .setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances) .setConf(SparkLauncher.EXECUTOR_CORES, executorCores) .setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout); logger.info("Spark home set to: {} ", new Object[] { sparkHome }); logger.info("Driver memory set to: {} ", new Object[] { driverMemory }); logger.info("Executor memory set to: {} ", new Object[] { executorMemory }); logger.info("Executor instances set to: {} ", new Object[] { executorInstances }); logger.info("Executor cores set to: {} ", new Object[] { executorCores }); logger.info("Network timeout set to: {} ", new Object[] { networkTimeout }); if (kerberosEnabled) { pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal); pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab); logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal }); logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab }); } if (!StringUtils.isEmpty(yarnQueue)) { pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue); logger.info("YARN queue set to: {} ", new Object[] { yarnQueue }); } if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) { for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) { String[] confKeyValue = additionalSparkConfigOption.split("="); if (confKeyValue.length == 2) { pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]); logger.info("Spark additional config option set to: {}={}", new Object[] { confKeyValue[0], confKeyValue[1] }); } } } logger.info("Starting execution of PySpark job"); Process pySparkProcess = pySparkLauncher.launch(); InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getInputStream()); Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input"); inputThread.start(); InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getErrorStream()); Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error"); errorThread.start(); logger.info("Waiting for PySpark job to complete"); int exitCode = pySparkProcess.waitFor(); if (exitCode != 0) { logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]", new Object[] { exitCode }); session.transfer(flowFile, REL_FAILURE); } else { logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]", new Object[] { exitCode }); session.transfer(flowFile, REL_SUCCESS); } } catch (final Exception e) { logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e }); session.transfer(flowFile, REL_FAILURE); } }
From source file:com.uber.hoodie.cli.utils.SparkUtil.java
License:Apache License
/** * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro *//* w ww . j a v a 2s. c om*/ public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException { String currentJar = new File( SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()) .getAbsolutePath(); SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar) .setMainClass(SparkMain.class.getName()); if (StringUtils.isNotEmpty(propertiesFile)) { sparkLauncher.setPropertiesFile(propertiesFile); } File libDirectory = new File(new File(currentJar).getParent(), "lib"); for (String library : libDirectory.list()) { sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath()); } return sparkLauncher; }
From source file:io.zz.Launcher.java
public static void main(String[] args) throws IOException, InterruptedException { SparkLauncher sparkLauncher = new SparkLauncher(); Process spark = sparkLauncher.setAppName("APP NAME").setSparkHome("/tmp") .setAppResource(SparkContext.jarOfClass(Launcher.class).get()) .setMaster("spark://192.168.100.105:7077").setMainClass("io.zz.TestSaveToCassandra").launch(); spark.waitFor();//from ww w . j av a2 s . c o m }
From source file:org.apache.eagle.app.environment.impl.SparkExecutionRuntime.java
License:Apache License
private SparkLauncher prepareSparkConfig(Config config) { String master = config.hasPath(TOPOLOGY_MASTER) ? config.getString(TOPOLOGY_MASTER) : "local[*]"; String sparkExecutorCores = config.getString(SPARK_EXECUTOR_CORES); String sparkExecutorMemory = config.getString(SPARK_EXECUTOR_MEMORY); String driverMemory = config.getString(DRIVER_MEMORY); String driverCore = config.getString(DRIVER_CORES); String deployMode = config.getString(DEPLOY_MODE); String enable = config.getString(TOPOLOGY_DYNAMICALLOCATION); boolean verbose = config.getBoolean(TOPOLOGY_VERBOSE); String mainClass = config.getString(TOPOLOGY_MAINCLASS); String sparkHome = config.getString(TOPOLOGY_SPARKHOME); String uiport = config.getString(TOPOLOGY_SPARKUIPORT); String appResource = config.getString(TOPOLOGY_APPRESOURCE); String yarnqueue = config.getString(TOPOLOGY_YARNQUEUE); SparkLauncher sparkLauncher = new SparkLauncher(); sparkLauncher.setMaster(master);/* w w w . jav a2 s.c o m*/ sparkLauncher.setMainClass(mainClass); sparkLauncher.setSparkHome(sparkHome); //sparkLauncher.setJavaHome(TOPOLOGY_JAVAHOME); sparkLauncher.setDeployMode(deployMode); sparkLauncher.setVerbose(verbose); sparkLauncher.setAppResource(appResource); sparkLauncher.setAppName(config.getString(TOPOLOGY_NAME)); sparkLauncher.setConf("spark.yarn.queue", yarnqueue); sparkLauncher.setConf("spark.executor.cores", sparkExecutorCores); sparkLauncher.setConf("spark.executor.memory", sparkExecutorMemory); sparkLauncher.setConf("spark.driver.memory", driverMemory); sparkLauncher.setConf("spark.driver.cores", driverCore); sparkLauncher.setConf("spark.streaming.dynamicAllocation.enable", enable); sparkLauncher.setConf("spark.ui.port", uiport); String path = config.getString(TOPOLOGY_SPARKCONFFILEPATH); if (StringUtil.isNotBlank(path)) { sparkLauncher.setPropertiesFile(path); } String batchDuration = config.getString(BATCH_DURATION); String routerTasknum = config.getString(ROUTER_TASK_NUM); String alertTasknum = config.getString(ALERT_TASK_NUM); String publishTasknum = config.getString(PUBLISH_TASK_NUM); String slideDurationsecond = config.getString(SLIDE_DURATION_SECOND); String windowDurationssecond = config.getString(WINDOW_DURATIONS_SECOND); String checkpointPath = config.getString(CHECKPOINT_PATH); String topologyGroupid = config.getString(TOPOLOGY_GROUPID); String autoOffsetReset = config.getString(AUTO_OFFSET_RESET); String restApihost = config.getString(EAGLE_CORRELATION_SERVICE_HOST); String restApiport = config.getString(EAGLE_CORRELATION_SERVICE_PORT); String restApicontext = config.getString(EAGLE_CORRELATION_CONTEXT); String useMultiKafka = config.getString(TOPOLOGY_MULTIKAFKA); String kafkaBrokerZkQuorum = config.getString(SPOUT_KAFKABROKERZKQUORUM); String zkConfigzkQuorum = config.getString(ZKCONFIG_ZKQUORUM); sparkLauncher.addAppArgs(batchDuration, routerTasknum, alertTasknum, publishTasknum, slideDurationsecond, windowDurationssecond, checkpointPath, topologyGroupid, autoOffsetReset, restApicontext, restApiport, restApihost, useMultiKafka, kafkaBrokerZkQuorum, zkConfigzkQuorum); return sparkLauncher; }
From source file:org.apache.pirk.test.distributed.testsuite.DistTestSuite.java
License:Apache License
@SuppressWarnings("unused") public static List<QueryResponseJSON> performQuery(String queryType, ArrayList<String> selectors, FileSystem fs, boolean isSpark, int numThreads, boolean isStreaming) throws Exception { logger.info("performQuery: "); String queryInputDir = SystemConfiguration.getProperty(DistributedTestDriver.PIR_QUERY_INPUT_DIR); String outputFile = SystemConfiguration.getProperty(DistributedTestDriver.OUTPUT_DIRECTORY_PROPERTY); fs.delete(new Path(outputFile), true); // Ensure old output does not exist. SystemConfiguration.setProperty("pir.queryInput", queryInputDir); SystemConfiguration.setProperty("pir.outputFile", outputFile); SystemConfiguration.setProperty("pir.numReduceTasks", "1"); SystemConfiguration.setProperty("pir.stopListFile", SystemConfiguration.getProperty(DistributedTestDriver.PIR_STOPLIST_FILE)); // Create the temp result file File fileFinalResults = File.createTempFile("finalResultsFile", ".txt"); fileFinalResults.deleteOnExit();/* www .j a v a2 s . c o m*/ logger.info("fileFinalResults = " + fileFinalResults.getAbsolutePath()); boolean embedSelector = SystemConfiguration.getBooleanProperty("pirTest.embedSelector", false); boolean useExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useExpLookupTable", false); boolean useHDFSExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useHDFSExpLookupTable", false); // Set the necessary objects QueryInfo queryInfo = new QueryInfo(BaseTests.queryIdentifier, selectors.size(), BaseTests.hashBitSize, BaseTests.hashKey, BaseTests.dataPartitionBitSize, queryType, useExpLookupTable, embedSelector, useHDFSExpLookupTable); Paillier paillier = new Paillier(BaseTests.paillierBitSize, BaseTests.certainty); // Perform the encryption logger.info("Performing encryption of the selectors - forming encrypted query vectors:"); EncryptQuery encryptQuery = new EncryptQuery(queryInfo, selectors, paillier); Querier querier = encryptQuery.encrypt(numThreads); logger.info("Completed encryption of the selectors - completed formation of the encrypted query vectors:"); // Write the Query object to a file Path queryInputDirPath = new Path(queryInputDir); new HadoopFileSystemStore(fs).store(queryInputDirPath, querier.getQuery()); fs.deleteOnExit(queryInputDirPath); // Grab the original data and query schema properties to reset upon completion String dataSchemaProp = SystemConfiguration.getProperty("data.schemas"); String querySchemaProp = SystemConfiguration.getProperty("query.schemas"); // Get the correct input format class name JSONInputFormatBase jFormat = new JSONInputFormatBase(); String jsonBaseInputFormatString = jFormat.getClass().getName(); SystemConfiguration.setProperty("pir.baseInputFormat", jsonBaseInputFormatString); // Submitting the tool for encrypted query logger.info("Performing encrypted query:"); if (isSpark) { logger.info("spark.home = " + SystemConfiguration.getProperty("spark.home")); // Build args String inputFormat = SystemConfiguration.getProperty("pir.dataInputFormat"); logger.info("inputFormat = " + inputFormat); ArrayList<String> args = new ArrayList<>(); if (isStreaming) { logger.info("platform = sparkstreaming"); args.add("-" + ResponderProps.PLATFORM + "=sparkstreaming"); args.add("-" + ResponderProps.BATCHSECONDS + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.batchSeconds", "30")); args.add("-" + ResponderProps.WINDOWLENGTH + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.windowLength", "60")); args.add("-" + ResponderProps.MAXBATCHES + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.maxBatches", "-1")); args.add("-" + ResponderProps.STOPGRACEFULLY + "=" + SystemConfiguration.getProperty("spark.streaming.stopGracefullyOnShutdown", "false")); args.add("-" + ResponderProps.NUMDATAPARTITIONS + "=" + SystemConfiguration.getProperty("pir.numDataPartitions", "3")); args.add("-" + ResponderProps.USEQUEUESTREAM + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.useQueueStream", "false")); } else { logger.info("platform = spark"); args.add("-" + ResponderProps.PLATFORM + "=spark"); } args.add("-" + ResponderProps.DATAINPUTFORMAT + "=" + inputFormat); args.add("-" + ResponderProps.QUERYINPUT + "=" + SystemConfiguration.getProperty("pir.queryInput")); args.add("-" + ResponderProps.OUTPUTFILE + "=" + SystemConfiguration.getProperty("pir.outputFile")); args.add("-" + ResponderProps.STOPLISTFILE + "=" + SystemConfiguration.getProperty("pir.stopListFile")); args.add("-" + ResponderProps.USELOCALCACHE + "=" + SystemConfiguration.getProperty("pir.useLocalCache", "true")); args.add("-" + ResponderProps.LIMITHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false")); args.add("-" + ResponderProps.MAXHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.maxHitsPerSelector", "1000")); args.add("-" + ResponderProps.QUERYSCHEMAS + "=" + Inputs.HDFS_QUERY_FILES); args.add("-" + ResponderProps.DATASCHEMAS + "=" + Inputs.DATA_SCHEMA_FILE_HDFS); args.add("-" + ResponderProps.NUMEXPLOOKUPPARTS + "=" + SystemConfiguration.getProperty("pir.numExpLookupPartitions", "100")); args.add("-" + ResponderProps.USEMODEXPJOIN + "=" + SystemConfiguration.getProperty("pir.useModExpJoin", "false")); args.add("-" + ResponderProps.NUMCOLMULTPARTITIONS + "=" + SystemConfiguration.getProperty("pir.numColMultPartitions", "20")); args.add("-" + ResponderProps.COLMULTREDUCEBYKEY + "=" + SystemConfiguration.getProperty("pir.colMultReduceByKey", "false")); if (inputFormat.equals(InputFormatConst.BASE_FORMAT)) { args.add("-" + ResponderProps.INPUTDATA + "=" + SystemConfiguration.getProperty("pir.inputData")); args.add("-" + ResponderProps.BASEQUERY + "=" + SystemConfiguration.getProperty("pir.baseQuery")); args.add("-" + ResponderProps.BASEINPUTFORMAT + "=" + SystemConfiguration.getProperty("pir.baseInputFormat")); } else if (inputFormat.equals(InputFormatConst.ES)) { args.add("-" + ResponderProps.ESQUERY + "=" + SystemConfiguration.getProperty("pir.esQuery")); args.add("-" + ResponderProps.ESRESOURCE + "=" + SystemConfiguration.getProperty("pir.esResource")); args.add("-" + ResponderProps.ESNODES + "=" + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_NODES_PROPERTY)); args.add("-" + ResponderProps.ESPORT + "=" + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_PORT_PROPERTY)); } for (String arg : args) { logger.info("arg = " + arg); } // Run spark application Process sLauncher = new SparkLauncher().setAppResource(SystemConfiguration.getProperty("jarFile")) .setSparkHome(SystemConfiguration.getProperty("spark.home")) .setMainClass("org.apache.pirk.responder.wideskies.ResponderDriver") .addAppArgs(args.toArray(new String[args.size()])).setMaster("yarn-cluster") .setConf(SparkLauncher.EXECUTOR_MEMORY, "2g").setConf(SparkLauncher.DRIVER_MEMORY, "2g") .setConf(SparkLauncher.EXECUTOR_CORES, "1").launch(); sLauncher.waitFor(); } else { SystemConfiguration.setProperty("data.schemas", Inputs.DATA_SCHEMA_FILE_HDFS); SystemConfiguration.setProperty("query.schemas", Inputs.HDFS_QUERY_FILES); ComputeResponseTool responseTool = new ComputeResponseTool(); ToolRunner.run(responseTool, new String[] {}); } logger.info("Completed encrypted query"); // Perform decryption // Reconstruct the necessary objects from the files logger.info("Performing decryption; writing final results file"); if (isStreaming) { outputFile = outputFile + "_0"; // currently only processing one batch for testing } logger.info("Pulling results from outputFile = " + outputFile); Response response = new HadoopFileSystemStore(fs).recall(outputFile, Response.class); // Perform decryption and output the result file DecryptResponse decryptResponse = new DecryptResponse(response, querier); QueryResultsWriter.writeResultFile(fileFinalResults, decryptResponse.decrypt(numThreads)); logger.info("Completed performing decryption and writing final results file"); // Read in results logger.info("Reading in and checking results"); List<QueryResponseJSON> results = TestUtils.readResultsFile(fileFinalResults); // Reset data and query schema properties SystemConfiguration.setProperty("data.schemas", dataSchemaProp); SystemConfiguration.setProperty("query.schemas", querySchemaProp); // Clean up output dir in hdfs fs.delete(new Path(outputFile), true); return results; }
From source file:org.cripac.isee.vpe.ctrl.SystemPropertyCenter.java
License:Open Source License
SparkLauncher GetSparkLauncher(String appName) throws IOException, NoAppSpecifiedException { SparkLauncher launcher = new SparkLauncher().setAppResource(jarPath) .setMainClass(AppManager.getMainClassName(appName)).setMaster(sparkMaster).setAppName(appName) .setVerbose(verbose).addFile(ConfManager.getConcatCfgFilePathList(",")) .setConf(SparkLauncher.DRIVER_MEMORY, driverMem).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMem) .setConf(SparkLauncher.CHILD_PROCESS_LOGGER_NAME, appName) .setConf(SparkLauncher.EXECUTOR_CORES, "" + executorCores) .setConf("spark.driver.extraJavaOptions", "-Dlog4j.configuration=log4j.properties") .setConf("spark.executor.extraJavaOptions", "-Dlog4j.configuration=log4j.properties") .setConf("spark.yarn.am.nodeLabelExpression", yarnAmNodeLabelExpression) .addSparkArg("--driver-cores", "" + driverCores).addSparkArg("--num-executors", "" + numExecutors) .addSparkArg("--total-executor-cores", "" + totalExecutorCores).addSparkArg("--queue", hadoopQueue) .addAppArgs(getArgs());//from ww w.j av a 2 s.c o m if (sparkConfFilePath != null) { if (new File(sparkConfFilePath).exists()) { launcher = launcher.setPropertiesFile(sparkConfFilePath); } else { logger.warn("Spark configuration file " + sparkConfFilePath + " does not exist!"); } } if (log4jPropFilePath != null) { if (new File(log4jPropFilePath).exists()) { launcher = launcher.addFile(log4jPropFilePath); } else { logger.warn("Loj4j configuration file " + log4jPropFilePath + " does not exist!"); } } if (sysPropFilePath != null) { if (new File(sysPropFilePath).exists()) { launcher = launcher.addFile(sysPropFilePath); } else { logger.warn("System configuration file " + sysPropFilePath + " does not exist!"); } launcher = launcher.addFile(sysPropFilePath); } if (appPropFilePath != null) { if (new File(appPropFilePath).exists()) { launcher = launcher.addFile(appPropFilePath); } else { logger.warn("App configuration file " + appPropFilePath + " does not exist!"); } } return launcher; }
From source file:org.flowable.decision.DecisionAnalysisService.java
License:Apache License
private void submitSparkAppsForTasks(ProcessDefinition processDefinition, Map<String, List<String>> outcomesMap, List<UserTask> matchingUserTasks, Map<String, Map<String, List<String>>> possibleValueCounts) { for (UserTask matchingUserTask : matchingUserTasks) { LOGGER.info("Submitting Spark ML app for task " + matchingUserTask.getId() + "..."); try {/*from www . j a va 2s. co m*/ // Not so pretty: generating a long argument string to pass info to spark job. Should be handled with a persistent store really. /* * Format (separated by # character): * * - processDefinitionId * - taskKey * - outcome variable * - outcome variable possibilities * - variable names * - variable possibilities */ StringBuilder argumentBuilder = new StringBuilder(); argumentBuilder.append(processDefinition.getId()).append("#") // process definition id .append(matchingUserTask.getId()).append("#") // task key .append("form_" + matchingUserTask.getFormKey() + "_outcome").append("#"); // outcome variable List<String> outcomes = outcomesMap.get(matchingUserTask.getId()); for (int i = 0; i < outcomes.size(); i++) { argumentBuilder.append(outcomes.get(i)); // outcome variable output possibilities if (i != outcomes.size() - 1) { argumentBuilder.append(";"); } } argumentBuilder.append("#"); Map<String, List<String>> variableToPotentialValues = possibleValueCounts .get(matchingUserTask.getId()); List<String> variableNames = new ArrayList<>(variableToPotentialValues.keySet()); for (int i = 0; i < variableNames.size(); i++) { argumentBuilder.append(variableNames.get(i)); // variable names if (i != variableNames.size() - 1) { argumentBuilder.append(";"); } } argumentBuilder.append("#"); for (int i = 0; i < variableNames.size(); i++) { List<String> possibleValues = variableToPotentialValues.get(variableNames.get(i)); for (int j = 0; j < possibleValues.size(); j++) { argumentBuilder.append(possibleValues.get(j)); // variable possibilities if (j != possibleValues.size() - 1) { argumentBuilder.append("&"); } } if (i != variableNames.size() - 1) { argumentBuilder.append(";"); } } LOGGER.info("Arguments for Spark app: " + argumentBuilder.toString()); SparkAppHandle sparkAppHandle = new SparkLauncher().setSparkHome(System.getProperty("sparkHome")) .setAppResource(System.getProperty("appResource")) .setMainClass("org.flowable.AnalyseDecisions").setMaster("local[4]") // .setVerbose(true) .addAppArgs(argumentBuilder.toString()).redirectOutput(Redirect.INHERIT) .startApplication(new SparkAppHandle.Listener() { @Override public void stateChanged(SparkAppHandle handle) { LOGGER.info(handle.getState() + " new state"); } @Override public void infoChanged(SparkAppHandle handle) { LOGGER.info(handle.getState() + " new state"); } }); // For demo: make sure the tasks are processed sequentially to not have the console output mixed for all tasks while (!sparkAppHandle.getState().equals(State.FINISHED) && !sparkAppHandle.getState().equals(State.FAILED)) { Thread.sleep(5000L); } } catch (IOException e) { LOGGER.error("Could not submit app to Spark", e); } catch (InterruptedException e) { e.printStackTrace(); } } }
From source file:org.kaaproject.examples.spark.KaaSparkLauncher.java
License:Apache License
public static void main(String[] args) throws Exception { SparkLauncher launcher = new SparkLauncher().setMaster(SPARK_MASTER_URL).setSparkHome(SPARK_HOME) .setAppResource(SPARK_APP_JAR).setMainClass(KaaSparkExample.class.getName()) .setAppName(KAA_SPARK_EXAMPLE_JOB_NAME).addAppArgs(FLUME_BIND_HOST, FLUME_BIND_PORT); final Process spark = launcher.launch(); Runtime.getRuntime().addShutdownHook(new Thread() { @Override//w ww . ja v a 2s. c o m public void run() { LOG.warn("Spark job interrupted!"); spark.destroy(); } }); Thread isReader = startReader(spark.getInputStream()); Thread esReader = startReader(spark.getErrorStream()); int resultCode = spark.waitFor(); isReader.join(); esReader.join(); if (resultCode != 0) { LOG.warn("Spark job result code: {}", resultCode); } }