List of usage examples for org.apache.spark.launcher SparkLauncher EXECUTOR_CORES
String EXECUTOR_CORES
To view the source code for org.apache.spark.launcher SparkLauncher EXECUTOR_CORES.
Click Source Link
From source file:com.thinkbiganalytics.nifi.pyspark.core.ExecutePySpark.java
License:Apache License
@Override public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { final ComponentLog logger = getLog(); FlowFile flowFile = session.get();//from w w w . j av a2s. co m if (flowFile == null) { flowFile = session.create(); logger.info("Created a flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) }); } else { logger.info("Using an existing flow file having uuid: {}", new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) }); } try { final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue(); final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue(); final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES) .getValue(); final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME) .evaluateAttributeExpressions(flowFile).getValue(); final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES) .evaluateAttributeExpressions(flowFile).getValue(); final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile) .getValue().trim().toLowerCase(); final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE) .evaluateAttributeExpressions(flowFile).getValue(); final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile) .getValue(); final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile) .getValue(); final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); final String executorMemory = context.getProperty(EXECUTOR_MEMORY) .evaluateAttributeExpressions(flowFile).getValue(); final String executorInstances = context.getProperty(EXECUTOR_INSTANCES) .evaluateAttributeExpressions(flowFile).getValue(); final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile) .getValue(); final String networkTimeout = context.getProperty(NETWORK_TIMEOUT) .evaluateAttributeExpressions(flowFile).getValue(); final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS) .evaluateAttributeExpressions(flowFile).getValue(); PySparkUtils pySparkUtils = new PySparkUtils(); /* Get app arguments */ String[] pySparkAppArgsArray = null; if (!StringUtils.isEmpty(pySparkAppArgs)) { pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs); logger.info("Provided application arguments: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) }); } /* Get additional python files */ String[] pySparkAdditionalFilesArray = null; if (!StringUtils.isEmpty(pySparkAdditionalFiles)) { pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles); logger.info("Provided python files: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) }); } /* Get additional config key-value pairs */ String[] additionalSparkConfigOptionsArray = null; if (!StringUtils.isEmpty(additionalSparkConfigOptions)) { additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions); logger.info("Provided spark config options: {}", new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) }); } /* Determine if Kerberos is enabled */ boolean kerberosEnabled = false; if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab) && !StringUtils.isEmpty(hadoopConfigurationResources)) { kerberosEnabled = true; logger.info("Kerberos is enabled"); } /* For Kerberized cluster, attempt user authentication */ if (kerberosEnabled) { logger.info("Attempting user authentication for Kerberos"); ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy(); Configuration configuration; try { logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources); configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources); if (SecurityUtil.isSecurityEnabled(configuration)) { logger.info("Security is enabled"); if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) { logger.error( "Kerberos Principal and Keytab provided with empty values for a Kerberized cluster."); session.transfer(flowFile, REL_FAILURE); return; } try { logger.info("User authentication initiated"); boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger, hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab); if (authenticationStatus) { logger.info("User authenticated successfully."); } else { logger.error("User authentication failed."); session.transfer(flowFile, REL_FAILURE); return; } } catch (Exception unknownException) { logger.error("Unknown exception occurred while validating user :" + unknownException.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } } } catch (IOException e1) { logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } } /* Build and launch PySpark Job */ logger.info("Configuring PySpark job for execution"); SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile); logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile }); if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) { pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray); logger.info("App arguments set to: {}", new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) }); } pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster); logger.info("App name set to: {}", new Object[] { pySparkAppName }); logger.info("Spark master set to: {}", new Object[] { sparkMaster }); if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) { for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) { pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile); logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile }); } } if (sparkMaster.equals("yarn")) { pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode); logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode }); } pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome) .setConf(SparkLauncher.DRIVER_MEMORY, driverMemory) .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory) .setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances) .setConf(SparkLauncher.EXECUTOR_CORES, executorCores) .setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout); logger.info("Spark home set to: {} ", new Object[] { sparkHome }); logger.info("Driver memory set to: {} ", new Object[] { driverMemory }); logger.info("Executor memory set to: {} ", new Object[] { executorMemory }); logger.info("Executor instances set to: {} ", new Object[] { executorInstances }); logger.info("Executor cores set to: {} ", new Object[] { executorCores }); logger.info("Network timeout set to: {} ", new Object[] { networkTimeout }); if (kerberosEnabled) { pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal); pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab); logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal }); logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab }); } if (!StringUtils.isEmpty(yarnQueue)) { pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue); logger.info("YARN queue set to: {} ", new Object[] { yarnQueue }); } if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) { for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) { String[] confKeyValue = additionalSparkConfigOption.split("="); if (confKeyValue.length == 2) { pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]); logger.info("Spark additional config option set to: {}={}", new Object[] { confKeyValue[0], confKeyValue[1] }); } } } logger.info("Starting execution of PySpark job"); Process pySparkProcess = pySparkLauncher.launch(); InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getInputStream()); Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input"); inputThread.start(); InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, pySparkProcess.getErrorStream()); Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error"); errorThread.start(); logger.info("Waiting for PySpark job to complete"); int exitCode = pySparkProcess.waitFor(); if (exitCode != 0) { logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]", new Object[] { exitCode }); session.transfer(flowFile, REL_FAILURE); } else { logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]", new Object[] { exitCode }); session.transfer(flowFile, REL_SUCCESS); } } catch (final Exception e) { logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e }); session.transfer(flowFile, REL_FAILURE); } }
From source file:com.thinkbiganalytics.nifi.v2.spark.ExecuteSparkJob.java
License:Apache License
@Override public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { final ComponentLog logger = getLog(); FlowFile flowFile = session.get();//from w w w . j a v a2 s .co m if (flowFile == null) { return; } String PROVENANCE_JOB_STATUS_KEY = "Job Status"; String PROVENANCE_SPARK_EXIT_CODE_KEY = "Spark Exit Code"; try { PROVENANCE_JOB_STATUS_KEY = context.getName() + " Job Status"; PROVENANCE_SPARK_EXIT_CODE_KEY = context.getName() + " Spark Exit Code"; /* Configuration parameters for spark launcher */ String appJar = getApplicationJar(context, flowFile); String mainClass = getMainClass(context, flowFile); String[] appArgs = getMainArgs(context, flowFile); String extraJars = getExtraJars(context, flowFile); String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile).getValue(); String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile).getValue() .trim(); String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE) .evaluateAttributeExpressions(flowFile).getValue(); String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); String executorMemory = context.getProperty(EXECUTOR_MEMORY).evaluateAttributeExpressions(flowFile) .getValue(); String numberOfExecutors = context.getProperty(NUMBER_EXECUTORS).evaluateAttributeExpressions(flowFile) .getValue(); String sparkApplicationName = context.getProperty(SPARK_APPLICATION_NAME) .evaluateAttributeExpressions(flowFile).getValue(); String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile) .getValue(); String networkTimeout = context.getProperty(NETWORK_TIMEOUT).evaluateAttributeExpressions(flowFile) .getValue(); String principal = context.getProperty(kerberosPrincipal).getValue(); String keyTab = context.getProperty(kerberosKeyTab).getValue(); String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES).getValue(); String sparkConfs = context.getProperty(SPARK_CONFS).evaluateAttributeExpressions(flowFile).getValue(); String extraFiles = context.getProperty(EXTRA_SPARK_FILES).evaluateAttributeExpressions(flowFile) .getValue(); Integer sparkProcessTimeout = context.getProperty(PROCESS_TIMEOUT) .evaluateAttributeExpressions(flowFile).asTimePeriod(TimeUnit.SECONDS).intValue(); String datasourceIds = context.getProperty(DATASOURCES).evaluateAttributeExpressions(flowFile) .getValue(); String catalogDataSourceIds = context.getProperty(CATALOG_DATASOURCES) .evaluateAttributeExpressions(flowFile).getValue(); String dataSetIds = context.getProperty(DATASETS).evaluateAttributeExpressions(flowFile).getValue(); MetadataProviderService metadataService = context.getProperty(METADATA_SERVICE) .asControllerService(MetadataProviderService.class); final List<String> extraJarPaths = getExtraJarPaths(extraJars); // If all 3 fields are filled out then assume kerberos is enabled, and user should be authenticated boolean isAuthenticated = !StringUtils.isEmpty(principal) && !StringUtils.isEmpty(keyTab) && !StringUtils.isEmpty(hadoopConfigurationResources); try { if (isAuthenticated && isSecurityEnabled(hadoopConfigurationResources)) { logger.info("Security is enabled"); if (principal.equals("") && keyTab.equals("")) { logger.error( "Kerberos Principal and Kerberos KeyTab information missing in Kerboeros enabled cluster. {} ", new Object[] { flowFile }); session.transfer(flowFile, REL_FAILURE); return; } logger.info("User authentication initiated"); boolean authenticationStatus = new ApplySecurityPolicy().validateUserWithKerberos(logger, hadoopConfigurationResources, principal, keyTab); if (authenticationStatus) { logger.info("User authenticated successfully."); } else { logger.error("User authentication failed. {} ", new Object[] { flowFile }); session.transfer(flowFile, REL_FAILURE); return; } } } catch (IOException e1) { logger.error("Unknown exception occurred while authenticating user : {} and flow file: {}", new Object[] { e1.getMessage(), flowFile }); session.transfer(flowFile, REL_FAILURE); return; } catch (Exception unknownException) { logger.error("Unknown exception occurred while validating user : {}. {} ", new Object[] { unknownException.getMessage(), flowFile }); session.transfer(flowFile, REL_FAILURE); return; } String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile).getValue(); // Build environment final Map<String, String> env = getDatasources(session, flowFile, PROVENANCE_JOB_STATUS_KEY, datasourceIds, dataSetIds, catalogDataSourceIds, metadataService, extraJarPaths); if (env != null) { StringBuilder datasourceSummary = new StringBuilder(); if (env.containsKey("DATASETS")) { final int count = StringUtils.countMatches("DATASETS", ',') + 1; datasourceSummary.append(count).append(" datasets"); } if (env.containsKey("DATASOURCES")) { final int count = StringUtils.countMatches("DATASOURCES", ',') + 1; (datasourceSummary.length() > 0 ? datasourceSummary.append("; ") : datasourceSummary) .append(count).append(" legacy datasources"); } if (env.containsKey("CATALOG_DATASOURCES")) { final int count = StringUtils.countMatches("CATALOG_DATASOURCES", ',') + 1; (datasourceSummary.length() > 0 ? datasourceSummary.append("; ") : datasourceSummary) .append(count).append(" catalog datasources"); } String summaryString = datasourceSummary.toString(); if (StringUtils.isNotBlank(summaryString)) { flowFile = session.putAttribute(flowFile, "Data source usage", summaryString); } } else { return; } addEncryptionSettings(env); /* Launch the spark job as a child process */ SparkLauncher launcher = new SparkLauncher(env).setAppResource(appJar).setMainClass(mainClass) .setMaster(sparkMaster).setConf(SparkLauncher.DRIVER_MEMORY, driverMemory) .setConf(SPARK_NUM_EXECUTORS, numberOfExecutors) .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory) .setConf(SparkLauncher.EXECUTOR_CORES, executorCores) .setConf(SPARK_NETWORK_TIMEOUT_CONFIG_NAME, networkTimeout).setSparkHome(sparkHome) .setAppName(sparkApplicationName); OptionalSparkConfigurator optionalSparkConf = new OptionalSparkConfigurator(launcher) .setDeployMode(sparkMaster, sparkYarnDeployMode) .setAuthentication(isAuthenticated, keyTab, principal).addAppArgs(appArgs) .addSparkArg(sparkConfs).addExtraJars(extraJarPaths).setYarnQueue(yarnQueue) .setExtraFiles(extraFiles); Process spark = optionalSparkConf.getLaucnher().launch(); /* Read/clear the process input stream */ InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getInputStream()); Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input"); inputThread.start(); /* Read/clear the process error stream */ InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO, logger, spark.getErrorStream()); Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error"); errorThread.start(); logger.info("Waiting for Spark job to complete"); /* Wait for job completion */ boolean completed = spark.waitFor(sparkProcessTimeout, TimeUnit.SECONDS); if (!completed) { spark.destroyForcibly(); getLog().error("Spark process timed out after {} seconds using flow file: {} ", new Object[] { sparkProcessTimeout, flowFile }); session.transfer(flowFile, REL_FAILURE); return; } int exitCode = spark.exitValue(); flowFile = session.putAttribute(flowFile, PROVENANCE_SPARK_EXIT_CODE_KEY, Integer.toString(exitCode)); if (exitCode != 0) { logger.error("ExecuteSparkJob for {} and flowfile: {} completed with failed status {} ", new Object[] { context.getName(), flowFile, exitCode }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed"); session.transfer(flowFile, REL_FAILURE); } else { logger.info("ExecuteSparkJob for {} and flowfile: {} completed with success status {} ", new Object[] { context.getName(), flowFile, exitCode }); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Success"); session.transfer(flowFile, REL_SUCCESS); } } catch (final Exception e) { logger.error("Unable to execute Spark job {},{}", new Object[] { flowFile, e.getMessage() }, e); flowFile = session.putAttribute(flowFile, PROVENANCE_JOB_STATUS_KEY, "Failed With Exception"); flowFile = session.putAttribute(flowFile, "Spark Exception:", e.getMessage()); session.transfer(flowFile, REL_FAILURE); } }
From source file:org.apache.pirk.test.distributed.testsuite.DistTestSuite.java
License:Apache License
@SuppressWarnings("unused") public static List<QueryResponseJSON> performQuery(String queryType, ArrayList<String> selectors, FileSystem fs, boolean isSpark, int numThreads, boolean isStreaming) throws Exception { logger.info("performQuery: "); String queryInputDir = SystemConfiguration.getProperty(DistributedTestDriver.PIR_QUERY_INPUT_DIR); String outputFile = SystemConfiguration.getProperty(DistributedTestDriver.OUTPUT_DIRECTORY_PROPERTY); fs.delete(new Path(outputFile), true); // Ensure old output does not exist. SystemConfiguration.setProperty("pir.queryInput", queryInputDir); SystemConfiguration.setProperty("pir.outputFile", outputFile); SystemConfiguration.setProperty("pir.numReduceTasks", "1"); SystemConfiguration.setProperty("pir.stopListFile", SystemConfiguration.getProperty(DistributedTestDriver.PIR_STOPLIST_FILE)); // Create the temp result file File fileFinalResults = File.createTempFile("finalResultsFile", ".txt"); fileFinalResults.deleteOnExit();// w w w . ja va 2 s . c o m logger.info("fileFinalResults = " + fileFinalResults.getAbsolutePath()); boolean embedSelector = SystemConfiguration.getBooleanProperty("pirTest.embedSelector", false); boolean useExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useExpLookupTable", false); boolean useHDFSExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useHDFSExpLookupTable", false); // Set the necessary objects QueryInfo queryInfo = new QueryInfo(BaseTests.queryIdentifier, selectors.size(), BaseTests.hashBitSize, BaseTests.hashKey, BaseTests.dataPartitionBitSize, queryType, useExpLookupTable, embedSelector, useHDFSExpLookupTable); Paillier paillier = new Paillier(BaseTests.paillierBitSize, BaseTests.certainty); // Perform the encryption logger.info("Performing encryption of the selectors - forming encrypted query vectors:"); EncryptQuery encryptQuery = new EncryptQuery(queryInfo, selectors, paillier); Querier querier = encryptQuery.encrypt(numThreads); logger.info("Completed encryption of the selectors - completed formation of the encrypted query vectors:"); // Write the Query object to a file Path queryInputDirPath = new Path(queryInputDir); new HadoopFileSystemStore(fs).store(queryInputDirPath, querier.getQuery()); fs.deleteOnExit(queryInputDirPath); // Grab the original data and query schema properties to reset upon completion String dataSchemaProp = SystemConfiguration.getProperty("data.schemas"); String querySchemaProp = SystemConfiguration.getProperty("query.schemas"); // Get the correct input format class name JSONInputFormatBase jFormat = new JSONInputFormatBase(); String jsonBaseInputFormatString = jFormat.getClass().getName(); SystemConfiguration.setProperty("pir.baseInputFormat", jsonBaseInputFormatString); // Submitting the tool for encrypted query logger.info("Performing encrypted query:"); if (isSpark) { logger.info("spark.home = " + SystemConfiguration.getProperty("spark.home")); // Build args String inputFormat = SystemConfiguration.getProperty("pir.dataInputFormat"); logger.info("inputFormat = " + inputFormat); ArrayList<String> args = new ArrayList<>(); if (isStreaming) { logger.info("platform = sparkstreaming"); args.add("-" + ResponderProps.PLATFORM + "=sparkstreaming"); args.add("-" + ResponderProps.BATCHSECONDS + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.batchSeconds", "30")); args.add("-" + ResponderProps.WINDOWLENGTH + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.windowLength", "60")); args.add("-" + ResponderProps.MAXBATCHES + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.maxBatches", "-1")); args.add("-" + ResponderProps.STOPGRACEFULLY + "=" + SystemConfiguration.getProperty("spark.streaming.stopGracefullyOnShutdown", "false")); args.add("-" + ResponderProps.NUMDATAPARTITIONS + "=" + SystemConfiguration.getProperty("pir.numDataPartitions", "3")); args.add("-" + ResponderProps.USEQUEUESTREAM + "=" + SystemConfiguration.getProperty("pir.sparkstreaming.useQueueStream", "false")); } else { logger.info("platform = spark"); args.add("-" + ResponderProps.PLATFORM + "=spark"); } args.add("-" + ResponderProps.DATAINPUTFORMAT + "=" + inputFormat); args.add("-" + ResponderProps.QUERYINPUT + "=" + SystemConfiguration.getProperty("pir.queryInput")); args.add("-" + ResponderProps.OUTPUTFILE + "=" + SystemConfiguration.getProperty("pir.outputFile")); args.add("-" + ResponderProps.STOPLISTFILE + "=" + SystemConfiguration.getProperty("pir.stopListFile")); args.add("-" + ResponderProps.USELOCALCACHE + "=" + SystemConfiguration.getProperty("pir.useLocalCache", "true")); args.add("-" + ResponderProps.LIMITHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false")); args.add("-" + ResponderProps.MAXHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.maxHitsPerSelector", "1000")); args.add("-" + ResponderProps.QUERYSCHEMAS + "=" + Inputs.HDFS_QUERY_FILES); args.add("-" + ResponderProps.DATASCHEMAS + "=" + Inputs.DATA_SCHEMA_FILE_HDFS); args.add("-" + ResponderProps.NUMEXPLOOKUPPARTS + "=" + SystemConfiguration.getProperty("pir.numExpLookupPartitions", "100")); args.add("-" + ResponderProps.USEMODEXPJOIN + "=" + SystemConfiguration.getProperty("pir.useModExpJoin", "false")); args.add("-" + ResponderProps.NUMCOLMULTPARTITIONS + "=" + SystemConfiguration.getProperty("pir.numColMultPartitions", "20")); args.add("-" + ResponderProps.COLMULTREDUCEBYKEY + "=" + SystemConfiguration.getProperty("pir.colMultReduceByKey", "false")); if (inputFormat.equals(InputFormatConst.BASE_FORMAT)) { args.add("-" + ResponderProps.INPUTDATA + "=" + SystemConfiguration.getProperty("pir.inputData")); args.add("-" + ResponderProps.BASEQUERY + "=" + SystemConfiguration.getProperty("pir.baseQuery")); args.add("-" + ResponderProps.BASEINPUTFORMAT + "=" + SystemConfiguration.getProperty("pir.baseInputFormat")); } else if (inputFormat.equals(InputFormatConst.ES)) { args.add("-" + ResponderProps.ESQUERY + "=" + SystemConfiguration.getProperty("pir.esQuery")); args.add("-" + ResponderProps.ESRESOURCE + "=" + SystemConfiguration.getProperty("pir.esResource")); args.add("-" + ResponderProps.ESNODES + "=" + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_NODES_PROPERTY)); args.add("-" + ResponderProps.ESPORT + "=" + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_PORT_PROPERTY)); } for (String arg : args) { logger.info("arg = " + arg); } // Run spark application Process sLauncher = new SparkLauncher().setAppResource(SystemConfiguration.getProperty("jarFile")) .setSparkHome(SystemConfiguration.getProperty("spark.home")) .setMainClass("org.apache.pirk.responder.wideskies.ResponderDriver") .addAppArgs(args.toArray(new String[args.size()])).setMaster("yarn-cluster") .setConf(SparkLauncher.EXECUTOR_MEMORY, "2g").setConf(SparkLauncher.DRIVER_MEMORY, "2g") .setConf(SparkLauncher.EXECUTOR_CORES, "1").launch(); sLauncher.waitFor(); } else { SystemConfiguration.setProperty("data.schemas", Inputs.DATA_SCHEMA_FILE_HDFS); SystemConfiguration.setProperty("query.schemas", Inputs.HDFS_QUERY_FILES); ComputeResponseTool responseTool = new ComputeResponseTool(); ToolRunner.run(responseTool, new String[] {}); } logger.info("Completed encrypted query"); // Perform decryption // Reconstruct the necessary objects from the files logger.info("Performing decryption; writing final results file"); if (isStreaming) { outputFile = outputFile + "_0"; // currently only processing one batch for testing } logger.info("Pulling results from outputFile = " + outputFile); Response response = new HadoopFileSystemStore(fs).recall(outputFile, Response.class); // Perform decryption and output the result file DecryptResponse decryptResponse = new DecryptResponse(response, querier); QueryResultsWriter.writeResultFile(fileFinalResults, decryptResponse.decrypt(numThreads)); logger.info("Completed performing decryption and writing final results file"); // Read in results logger.info("Reading in and checking results"); List<QueryResponseJSON> results = TestUtils.readResultsFile(fileFinalResults); // Reset data and query schema properties SystemConfiguration.setProperty("data.schemas", dataSchemaProp); SystemConfiguration.setProperty("query.schemas", querySchemaProp); // Clean up output dir in hdfs fs.delete(new Path(outputFile), true); return results; }
From source file:org.cripac.isee.vpe.ctrl.SystemPropertyCenter.java
License:Open Source License
SparkLauncher GetSparkLauncher(String appName) throws IOException, NoAppSpecifiedException { SparkLauncher launcher = new SparkLauncher().setAppResource(jarPath) .setMainClass(AppManager.getMainClassName(appName)).setMaster(sparkMaster).setAppName(appName) .setVerbose(verbose).addFile(ConfManager.getConcatCfgFilePathList(",")) .setConf(SparkLauncher.DRIVER_MEMORY, driverMem).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMem) .setConf(SparkLauncher.CHILD_PROCESS_LOGGER_NAME, appName) .setConf(SparkLauncher.EXECUTOR_CORES, "" + executorCores) .setConf("spark.driver.extraJavaOptions", "-Dlog4j.configuration=log4j.properties") .setConf("spark.executor.extraJavaOptions", "-Dlog4j.configuration=log4j.properties") .setConf("spark.yarn.am.nodeLabelExpression", yarnAmNodeLabelExpression) .addSparkArg("--driver-cores", "" + driverCores).addSparkArg("--num-executors", "" + numExecutors) .addSparkArg("--total-executor-cores", "" + totalExecutorCores).addSparkArg("--queue", hadoopQueue) .addAppArgs(getArgs());//from w ww .j ava2 s . com if (sparkConfFilePath != null) { if (new File(sparkConfFilePath).exists()) { launcher = launcher.setPropertiesFile(sparkConfFilePath); } else { logger.warn("Spark configuration file " + sparkConfFilePath + " does not exist!"); } } if (log4jPropFilePath != null) { if (new File(log4jPropFilePath).exists()) { launcher = launcher.addFile(log4jPropFilePath); } else { logger.warn("Loj4j configuration file " + log4jPropFilePath + " does not exist!"); } } if (sysPropFilePath != null) { if (new File(sysPropFilePath).exists()) { launcher = launcher.addFile(sysPropFilePath); } else { logger.warn("System configuration file " + sysPropFilePath + " does not exist!"); } launcher = launcher.addFile(sysPropFilePath); } if (appPropFilePath != null) { if (new File(appPropFilePath).exists()) { launcher = launcher.addFile(appPropFilePath); } else { logger.warn("App configuration file " + appPropFilePath + " does not exist!"); } } return launcher; }