Example usage for org.apache.spark.launcher SparkLauncher SparkLauncher

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher SparkLauncher.

Prototype

public SparkLauncher()

Source Link

Usage

From source file:com.cloudera.livy.client.local.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RpcServer rpcServer, final LocalConf conf, final String clientId,
        final String secret, final String className) throws IOException {
    final String serverAddress = rpcServer.getAddress();
    final String serverPort = String.valueOf(rpcServer.getPort());
    if (conf.get(CLIENT_IN_PROCESS) != null) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override//ww w  . j  a  va 2s.  c o m
            public void run() {
                List<String> args = new ArrayList<>();
                args.add("--remote-host");
                args.add(serverAddress);
                args.add("--remote-port");
                args.add(serverPort);
                args.add("--client-id");
                args.add(clientId);
                args.add("--secret");
                args.add(secret);

                for (Map.Entry<String, String> e : conf) {
                    args.add("--conf");
                    args.add(String.format("%s=%s", e.getKey(), e.getValue()));
                }
                try {
                    RemoteDriver.main(args.toArray(new String[args.size()]));
                } catch (Exception e) {
                    LOG.error("Error running driver.", e);
                }
            }
        };
        return new ChildProcess(conf, child);
    } else {
        // If a Spark installation is provided, use the spark-submit script. Otherwise, call the
        // SparkSubmit class directly, which has some caveats (like having to provide a proper
        // version of Guava on the classpath depending on the deploy mode).
        final SparkLauncher launcher = new SparkLauncher();
        String sparkHome = conf.get(SPARK_HOME_KEY);
        if (sparkHome == null) {
            sparkHome = System.getenv(SPARK_HOME_ENV);
        }
        if (sparkHome == null) {
            sparkHome = System.getProperty(SPARK_HOME_KEY);
        }
        launcher.setSparkHome(sparkHome);

        conf.set(CLIENT_ID, clientId);
        conf.set(CLIENT_SECRET, secret);

        launcher.setAppResource("spark-internal");

        String livyJars = conf.get(LIVY_JARS);
        if (livyJars == null) {
            String livyHome = System.getenv("LIVY_HOME");
            Preconditions.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
            File clientJars = new File(livyHome, "client-jars");
            Preconditions.checkState(clientJars.isDirectory(),
                    "Cannot find 'client-jars' directory under LIVY_HOME.");
            List<String> jars = new ArrayList<>();
            for (File f : clientJars.listFiles()) {
                jars.add(f.getAbsolutePath());
            }
            livyJars = Joiner.on(",").join(jars);
        }

        String userJars = conf.get(SPARK_JARS_KEY);
        if (userJars != null) {
            String allJars = Joiner.on(",").join(livyJars, userJars);
            conf.set(SPARK_JARS_KEY, allJars);
        } else {
            conf.set(SPARK_JARS_KEY, livyJars);
        }

        // Disable multiple attempts since the RPC server doesn't yet support multiple
        // connections for the same registered app.
        conf.set("spark.yarn.maxAppAttempts", "1");

        File confFile = writeConfToFile(conf);

        // Define how to pass options to the child process. If launching in client (or local)
        // mode, the driver options need to be passed directly on the command line. Otherwise,
        // SparkSubmit will take care of that for us.
        String master = conf.get("spark.master");
        Preconditions.checkArgument(master != null, "spark.master is not defined.");
        launcher.setMaster(master);
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(className);
        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }
        launcher.addAppArgs("--remote-host", serverAddress);
        launcher.addAppArgs("--remote-port", serverPort);
        return new ChildProcess(conf, launcher.launch());
    }
}

From source file:com.cloudera.livy.rsc.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RSCConf conf, Promise<?> promise) throws IOException {
    String livyJars = conf.get(LIVY_JARS);
    if (livyJars == null) {
        String livyHome = System.getenv("LIVY_HOME");
        Utils.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
        File rscJars = new File(livyHome, "rsc-jars");
        if (!rscJars.isDirectory()) {
            rscJars = new File(livyHome, "rsc/target/jars");
        }//from w w  w. j  a va  2 s .c o m
        Utils.checkState(rscJars.isDirectory(), "Cannot find 'client-jars' directory under LIVY_HOME.");
        List<String> jars = new ArrayList<>();
        for (File f : rscJars.listFiles()) {
            jars.add(f.getAbsolutePath());
        }
        livyJars = Utils.join(jars, ",");
    }
    merge(conf, SPARK_JARS_KEY, livyJars, ",");

    String kind = conf.get(SESSION_KIND);
    if ("sparkr".equals(kind)) {
        merge(conf, SPARK_ARCHIVES_KEY, conf.get(RSCConf.Entry.SPARKR_PACKAGE), ",");
    } else if ("pyspark".equals(kind)) {
        merge(conf, "spark.submit.pyFiles", conf.get(RSCConf.Entry.PYSPARK_ARCHIVES), ",");
    }

    // Disable multiple attempts since the RPC server doesn't yet support multiple
    // connections for the same registered app.
    conf.set("spark.yarn.maxAppAttempts", "1");

    // Let the launcher go away when launcher in yarn cluster mode. This avoids keeping lots
    // of "small" Java processes lingering on the Livy server node.
    conf.set("spark.yarn.submit.waitAppCompletion", "false");

    // For testing; propagate jacoco settings so that we also do coverage analysis
    // on the launched driver. We replace the name of the main file ("main.exec")
    // so that we don't end up fighting with the main test launcher.
    String jacocoArgs = System.getProperty("jacoco.args");
    if (jacocoArgs != null) {
        jacocoArgs = jacocoArgs.replace("main.exec", "child.exec");
        merge(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, jacocoArgs, " ");
    }

    final File confFile = writeConfToFile(conf);

    if (conf.getBoolean(CLIENT_IN_PROCESS)) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override
            public void run() {
                try {
                    RSCDriverBootstrapper.main(new String[] { confFile.getAbsolutePath() });
                } catch (Exception e) {
                    throw Utils.propagate(e);
                }
            }
        };
        return new ChildProcess(conf, promise, child, confFile);
    } else {
        final SparkLauncher launcher = new SparkLauncher();
        launcher.setSparkHome(System.getenv(SPARK_HOME_ENV));
        launcher.setAppResource("spark-internal");
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(RSCDriverBootstrapper.class.getName());

        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }

        return new ChildProcess(conf, promise, launcher.launch(), confFile);
    }
}

From source file:com.thinkbiganalytics.nifi.pyspark.core.ExecutePySpark.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();/*from w ww . j  a va  2 s.c o  m*/

    if (flowFile == null) {
        flowFile = session.create();
        logger.info("Created a flow file having uuid: {}",
                new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("Using an existing flow file having uuid: {}",
                new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    try {
        final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
        final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
        final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES)
                .getValue();
        final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile)
                .getValue().trim().toLowerCase();
        final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String executorMemory = context.getProperty(EXECUTOR_MEMORY)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String executorInstances = context.getProperty(EXECUTOR_INSTANCES)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String networkTimeout = context.getProperty(NETWORK_TIMEOUT)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS)
                .evaluateAttributeExpressions(flowFile).getValue();

        PySparkUtils pySparkUtils = new PySparkUtils();

        /* Get app arguments */
        String[] pySparkAppArgsArray = null;
        if (!StringUtils.isEmpty(pySparkAppArgs)) {
            pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs);
            logger.info("Provided application arguments: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }

        /* Get additional python files */
        String[] pySparkAdditionalFilesArray = null;
        if (!StringUtils.isEmpty(pySparkAdditionalFiles)) {
            pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles);
            logger.info("Provided python files: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) });
        }

        /* Get additional config key-value pairs */
        String[] additionalSparkConfigOptionsArray = null;
        if (!StringUtils.isEmpty(additionalSparkConfigOptions)) {
            additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions);
            logger.info("Provided spark config options: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) });
        }

        /* Determine if Kerberos is enabled */
        boolean kerberosEnabled = false;
        if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab)
                && !StringUtils.isEmpty(hadoopConfigurationResources)) {
            kerberosEnabled = true;
            logger.info("Kerberos is enabled");
        }

        /* For Kerberized cluster, attempt user authentication */
        if (kerberosEnabled) {
            logger.info("Attempting user authentication for Kerberos");
            ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy();
            Configuration configuration;
            try {
                logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources);
                configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources);

                if (SecurityUtil.isSecurityEnabled(configuration)) {
                    logger.info("Security is enabled");

                    if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) {
                        logger.error(
                                "Kerberos Principal and Keytab provided with empty values for a Kerberized cluster.");
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }

                    try {
                        logger.info("User authentication initiated");

                        boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger,
                                hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab);
                        if (authenticationStatus) {
                            logger.info("User authenticated successfully.");
                        } else {
                            logger.error("User authentication failed.");
                            session.transfer(flowFile, REL_FAILURE);
                            return;
                        }

                    } catch (Exception unknownException) {
                        logger.error("Unknown exception occurred while validating user :"
                                + unknownException.getMessage());
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                }
            } catch (IOException e1) {
                logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage());
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }

        /* Build and launch PySpark Job */
        logger.info("Configuring PySpark job for execution");
        SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile);
        logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile });

        if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) {
            pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray);
            logger.info("App arguments set to: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }

        pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster);

        logger.info("App name set to: {}", new Object[] { pySparkAppName });
        logger.info("Spark master set to: {}", new Object[] { sparkMaster });

        if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) {
            for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) {
                pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile);
                logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile });
            }
        }

        if (sparkMaster.equals("yarn")) {
            pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode);
            logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode });
        }

        pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome)
                .setConf(SparkLauncher.DRIVER_MEMORY, driverMemory)
                .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory)
                .setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances)
                .setConf(SparkLauncher.EXECUTOR_CORES, executorCores)
                .setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout);

        logger.info("Spark home set to: {} ", new Object[] { sparkHome });
        logger.info("Driver memory set to: {} ", new Object[] { driverMemory });
        logger.info("Executor memory set to: {} ", new Object[] { executorMemory });
        logger.info("Executor instances set to: {} ", new Object[] { executorInstances });
        logger.info("Executor cores set to: {} ", new Object[] { executorCores });
        logger.info("Network timeout set to: {} ", new Object[] { networkTimeout });

        if (kerberosEnabled) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal);
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab);
            logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal });
            logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab });
        }

        if (!StringUtils.isEmpty(yarnQueue)) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue);
            logger.info("YARN queue set to: {} ", new Object[] { yarnQueue });
        }

        if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) {
            for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) {
                String[] confKeyValue = additionalSparkConfigOption.split("=");
                if (confKeyValue.length == 2) {
                    pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]);
                    logger.info("Spark additional config option set to: {}={}",
                            new Object[] { confKeyValue[0], confKeyValue[1] });
                }
            }
        }

        logger.info("Starting execution of PySpark job");
        Process pySparkProcess = pySparkLauncher.launch();

        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, pySparkProcess.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();

        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, pySparkProcess.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();

        logger.info("Waiting for PySpark job to complete");

        int exitCode = pySparkProcess.waitFor();
        if (exitCode != 0) {
            logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]",
                    new Object[] { exitCode });
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]",
                    new Object[] { exitCode });
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}

From source file:com.uber.hoodie.cli.utils.SparkUtil.java

License:Apache License

/**
 * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
 *//*  w  ww . j a  v a 2s.  c  om*/
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
    String currentJar = new File(
            SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
                    .getAbsolutePath();
    SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar)
            .setMainClass(SparkMain.class.getName());

    if (StringUtils.isNotEmpty(propertiesFile)) {
        sparkLauncher.setPropertiesFile(propertiesFile);
    }

    File libDirectory = new File(new File(currentJar).getParent(), "lib");
    for (String library : libDirectory.list()) {
        sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
    }
    return sparkLauncher;
}

From source file:io.zz.Launcher.java

public static void main(String[] args) throws IOException, InterruptedException {
    SparkLauncher sparkLauncher = new SparkLauncher();
    Process spark = sparkLauncher.setAppName("APP NAME").setSparkHome("/tmp")
            .setAppResource(SparkContext.jarOfClass(Launcher.class).get())
            .setMaster("spark://192.168.100.105:7077").setMainClass("io.zz.TestSaveToCassandra").launch();

    spark.waitFor();//from ww  w  .  j av  a2 s . c  o m
}

From source file:org.apache.eagle.app.environment.impl.SparkExecutionRuntime.java

License:Apache License

private SparkLauncher prepareSparkConfig(Config config) {
    String master = config.hasPath(TOPOLOGY_MASTER) ? config.getString(TOPOLOGY_MASTER) : "local[*]";
    String sparkExecutorCores = config.getString(SPARK_EXECUTOR_CORES);
    String sparkExecutorMemory = config.getString(SPARK_EXECUTOR_MEMORY);
    String driverMemory = config.getString(DRIVER_MEMORY);
    String driverCore = config.getString(DRIVER_CORES);
    String deployMode = config.getString(DEPLOY_MODE);
    String enable = config.getString(TOPOLOGY_DYNAMICALLOCATION);
    boolean verbose = config.getBoolean(TOPOLOGY_VERBOSE);
    String mainClass = config.getString(TOPOLOGY_MAINCLASS);
    String sparkHome = config.getString(TOPOLOGY_SPARKHOME);
    String uiport = config.getString(TOPOLOGY_SPARKUIPORT);
    String appResource = config.getString(TOPOLOGY_APPRESOURCE);
    String yarnqueue = config.getString(TOPOLOGY_YARNQUEUE);

    SparkLauncher sparkLauncher = new SparkLauncher();
    sparkLauncher.setMaster(master);/*  w w w .  jav a2 s.c  o m*/
    sparkLauncher.setMainClass(mainClass);
    sparkLauncher.setSparkHome(sparkHome);
    //sparkLauncher.setJavaHome(TOPOLOGY_JAVAHOME);
    sparkLauncher.setDeployMode(deployMode);
    sparkLauncher.setVerbose(verbose);
    sparkLauncher.setAppResource(appResource);
    sparkLauncher.setAppName(config.getString(TOPOLOGY_NAME));
    sparkLauncher.setConf("spark.yarn.queue", yarnqueue);
    sparkLauncher.setConf("spark.executor.cores", sparkExecutorCores);
    sparkLauncher.setConf("spark.executor.memory", sparkExecutorMemory);
    sparkLauncher.setConf("spark.driver.memory", driverMemory);
    sparkLauncher.setConf("spark.driver.cores", driverCore);
    sparkLauncher.setConf("spark.streaming.dynamicAllocation.enable", enable);
    sparkLauncher.setConf("spark.ui.port", uiport);
    String path = config.getString(TOPOLOGY_SPARKCONFFILEPATH);
    if (StringUtil.isNotBlank(path)) {
        sparkLauncher.setPropertiesFile(path);
    }

    String batchDuration = config.getString(BATCH_DURATION);
    String routerTasknum = config.getString(ROUTER_TASK_NUM);
    String alertTasknum = config.getString(ALERT_TASK_NUM);
    String publishTasknum = config.getString(PUBLISH_TASK_NUM);
    String slideDurationsecond = config.getString(SLIDE_DURATION_SECOND);
    String windowDurationssecond = config.getString(WINDOW_DURATIONS_SECOND);
    String checkpointPath = config.getString(CHECKPOINT_PATH);
    String topologyGroupid = config.getString(TOPOLOGY_GROUPID);
    String autoOffsetReset = config.getString(AUTO_OFFSET_RESET);
    String restApihost = config.getString(EAGLE_CORRELATION_SERVICE_HOST);
    String restApiport = config.getString(EAGLE_CORRELATION_SERVICE_PORT);
    String restApicontext = config.getString(EAGLE_CORRELATION_CONTEXT);
    String useMultiKafka = config.getString(TOPOLOGY_MULTIKAFKA);
    String kafkaBrokerZkQuorum = config.getString(SPOUT_KAFKABROKERZKQUORUM);
    String zkConfigzkQuorum = config.getString(ZKCONFIG_ZKQUORUM);

    sparkLauncher.addAppArgs(batchDuration, routerTasknum, alertTasknum, publishTasknum, slideDurationsecond,
            windowDurationssecond, checkpointPath, topologyGroupid, autoOffsetReset, restApicontext,
            restApiport, restApihost, useMultiKafka, kafkaBrokerZkQuorum, zkConfigzkQuorum);
    return sparkLauncher;
}

From source file:org.apache.pirk.test.distributed.testsuite.DistTestSuite.java

License:Apache License

@SuppressWarnings("unused")
public static List<QueryResponseJSON> performQuery(String queryType, ArrayList<String> selectors, FileSystem fs,
        boolean isSpark, int numThreads, boolean isStreaming) throws Exception {
    logger.info("performQuery: ");

    String queryInputDir = SystemConfiguration.getProperty(DistributedTestDriver.PIR_QUERY_INPUT_DIR);
    String outputFile = SystemConfiguration.getProperty(DistributedTestDriver.OUTPUT_DIRECTORY_PROPERTY);
    fs.delete(new Path(outputFile), true); // Ensure old output does not exist.

    SystemConfiguration.setProperty("pir.queryInput", queryInputDir);
    SystemConfiguration.setProperty("pir.outputFile", outputFile);
    SystemConfiguration.setProperty("pir.numReduceTasks", "1");
    SystemConfiguration.setProperty("pir.stopListFile",
            SystemConfiguration.getProperty(DistributedTestDriver.PIR_STOPLIST_FILE));

    // Create the temp result file
    File fileFinalResults = File.createTempFile("finalResultsFile", ".txt");
    fileFinalResults.deleteOnExit();/* www  .j  a  v  a2  s  . c o m*/
    logger.info("fileFinalResults = " + fileFinalResults.getAbsolutePath());

    boolean embedSelector = SystemConfiguration.getBooleanProperty("pirTest.embedSelector", false);
    boolean useExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useExpLookupTable", false);
    boolean useHDFSExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useHDFSExpLookupTable",
            false);

    // Set the necessary objects
    QueryInfo queryInfo = new QueryInfo(BaseTests.queryIdentifier, selectors.size(), BaseTests.hashBitSize,
            BaseTests.hashKey, BaseTests.dataPartitionBitSize, queryType, useExpLookupTable, embedSelector,
            useHDFSExpLookupTable);

    Paillier paillier = new Paillier(BaseTests.paillierBitSize, BaseTests.certainty);

    // Perform the encryption
    logger.info("Performing encryption of the selectors - forming encrypted query vectors:");
    EncryptQuery encryptQuery = new EncryptQuery(queryInfo, selectors, paillier);
    Querier querier = encryptQuery.encrypt(numThreads);
    logger.info("Completed encryption of the selectors - completed formation of the encrypted query vectors:");

    // Write the Query object to a file
    Path queryInputDirPath = new Path(queryInputDir);
    new HadoopFileSystemStore(fs).store(queryInputDirPath, querier.getQuery());
    fs.deleteOnExit(queryInputDirPath);

    // Grab the original data and query schema properties to reset upon completion
    String dataSchemaProp = SystemConfiguration.getProperty("data.schemas");
    String querySchemaProp = SystemConfiguration.getProperty("query.schemas");

    // Get the correct input format class name
    JSONInputFormatBase jFormat = new JSONInputFormatBase();
    String jsonBaseInputFormatString = jFormat.getClass().getName();
    SystemConfiguration.setProperty("pir.baseInputFormat", jsonBaseInputFormatString);

    // Submitting the tool for encrypted query
    logger.info("Performing encrypted query:");
    if (isSpark) {
        logger.info("spark.home = " + SystemConfiguration.getProperty("spark.home"));

        // Build args
        String inputFormat = SystemConfiguration.getProperty("pir.dataInputFormat");
        logger.info("inputFormat = " + inputFormat);
        ArrayList<String> args = new ArrayList<>();
        if (isStreaming) {
            logger.info("platform = sparkstreaming");
            args.add("-" + ResponderProps.PLATFORM + "=sparkstreaming");
            args.add("-" + ResponderProps.BATCHSECONDS + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.batchSeconds", "30"));
            args.add("-" + ResponderProps.WINDOWLENGTH + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.windowLength", "60"));
            args.add("-" + ResponderProps.MAXBATCHES + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.maxBatches", "-1"));
            args.add("-" + ResponderProps.STOPGRACEFULLY + "="
                    + SystemConfiguration.getProperty("spark.streaming.stopGracefullyOnShutdown", "false"));
            args.add("-" + ResponderProps.NUMDATAPARTITIONS + "="
                    + SystemConfiguration.getProperty("pir.numDataPartitions", "3"));
            args.add("-" + ResponderProps.USEQUEUESTREAM + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.useQueueStream", "false"));
        } else {
            logger.info("platform = spark");
            args.add("-" + ResponderProps.PLATFORM + "=spark");
        }
        args.add("-" + ResponderProps.DATAINPUTFORMAT + "=" + inputFormat);
        args.add("-" + ResponderProps.QUERYINPUT + "=" + SystemConfiguration.getProperty("pir.queryInput"));
        args.add("-" + ResponderProps.OUTPUTFILE + "=" + SystemConfiguration.getProperty("pir.outputFile"));
        args.add("-" + ResponderProps.STOPLISTFILE + "=" + SystemConfiguration.getProperty("pir.stopListFile"));
        args.add("-" + ResponderProps.USELOCALCACHE + "="
                + SystemConfiguration.getProperty("pir.useLocalCache", "true"));
        args.add("-" + ResponderProps.LIMITHITSPERSELECTOR + "="
                + SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false"));
        args.add("-" + ResponderProps.MAXHITSPERSELECTOR + "="
                + SystemConfiguration.getProperty("pir.maxHitsPerSelector", "1000"));
        args.add("-" + ResponderProps.QUERYSCHEMAS + "=" + Inputs.HDFS_QUERY_FILES);
        args.add("-" + ResponderProps.DATASCHEMAS + "=" + Inputs.DATA_SCHEMA_FILE_HDFS);
        args.add("-" + ResponderProps.NUMEXPLOOKUPPARTS + "="
                + SystemConfiguration.getProperty("pir.numExpLookupPartitions", "100"));
        args.add("-" + ResponderProps.USEMODEXPJOIN + "="
                + SystemConfiguration.getProperty("pir.useModExpJoin", "false"));
        args.add("-" + ResponderProps.NUMCOLMULTPARTITIONS + "="
                + SystemConfiguration.getProperty("pir.numColMultPartitions", "20"));
        args.add("-" + ResponderProps.COLMULTREDUCEBYKEY + "="
                + SystemConfiguration.getProperty("pir.colMultReduceByKey", "false"));
        if (inputFormat.equals(InputFormatConst.BASE_FORMAT)) {
            args.add("-" + ResponderProps.INPUTDATA + "=" + SystemConfiguration.getProperty("pir.inputData"));
            args.add("-" + ResponderProps.BASEQUERY + "=" + SystemConfiguration.getProperty("pir.baseQuery"));
            args.add("-" + ResponderProps.BASEINPUTFORMAT + "="
                    + SystemConfiguration.getProperty("pir.baseInputFormat"));
        } else if (inputFormat.equals(InputFormatConst.ES)) {
            args.add("-" + ResponderProps.ESQUERY + "=" + SystemConfiguration.getProperty("pir.esQuery"));
            args.add("-" + ResponderProps.ESRESOURCE + "=" + SystemConfiguration.getProperty("pir.esResource"));
            args.add("-" + ResponderProps.ESNODES + "="
                    + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_NODES_PROPERTY));
            args.add("-" + ResponderProps.ESPORT + "="
                    + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_PORT_PROPERTY));
        }

        for (String arg : args) {
            logger.info("arg = " + arg);
        }

        // Run spark application
        Process sLauncher = new SparkLauncher().setAppResource(SystemConfiguration.getProperty("jarFile"))
                .setSparkHome(SystemConfiguration.getProperty("spark.home"))
                .setMainClass("org.apache.pirk.responder.wideskies.ResponderDriver")
                .addAppArgs(args.toArray(new String[args.size()])).setMaster("yarn-cluster")
                .setConf(SparkLauncher.EXECUTOR_MEMORY, "2g").setConf(SparkLauncher.DRIVER_MEMORY, "2g")
                .setConf(SparkLauncher.EXECUTOR_CORES, "1").launch();
        sLauncher.waitFor();
    } else {
        SystemConfiguration.setProperty("data.schemas", Inputs.DATA_SCHEMA_FILE_HDFS);
        SystemConfiguration.setProperty("query.schemas", Inputs.HDFS_QUERY_FILES);

        ComputeResponseTool responseTool = new ComputeResponseTool();
        ToolRunner.run(responseTool, new String[] {});
    }
    logger.info("Completed encrypted query");

    // Perform decryption
    // Reconstruct the necessary objects from the files
    logger.info("Performing decryption; writing final results file");
    if (isStreaming) {
        outputFile = outputFile + "_0"; // currently only processing one batch for testing
    }
    logger.info("Pulling results from outputFile = " + outputFile);
    Response response = new HadoopFileSystemStore(fs).recall(outputFile, Response.class);

    // Perform decryption and output the result file
    DecryptResponse decryptResponse = new DecryptResponse(response, querier);
    QueryResultsWriter.writeResultFile(fileFinalResults, decryptResponse.decrypt(numThreads));
    logger.info("Completed performing decryption and writing final results file");

    // Read in results
    logger.info("Reading in and checking results");
    List<QueryResponseJSON> results = TestUtils.readResultsFile(fileFinalResults);

    // Reset data and query schema properties
    SystemConfiguration.setProperty("data.schemas", dataSchemaProp);
    SystemConfiguration.setProperty("query.schemas", querySchemaProp);

    // Clean up output dir in hdfs
    fs.delete(new Path(outputFile), true);

    return results;
}

From source file:org.cripac.isee.vpe.ctrl.SystemPropertyCenter.java

License:Open Source License

SparkLauncher GetSparkLauncher(String appName) throws IOException, NoAppSpecifiedException {
    SparkLauncher launcher = new SparkLauncher().setAppResource(jarPath)
            .setMainClass(AppManager.getMainClassName(appName)).setMaster(sparkMaster).setAppName(appName)
            .setVerbose(verbose).addFile(ConfManager.getConcatCfgFilePathList(","))
            .setConf(SparkLauncher.DRIVER_MEMORY, driverMem).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMem)
            .setConf(SparkLauncher.CHILD_PROCESS_LOGGER_NAME, appName)
            .setConf(SparkLauncher.EXECUTOR_CORES, "" + executorCores)
            .setConf("spark.driver.extraJavaOptions", "-Dlog4j.configuration=log4j.properties")
            .setConf("spark.executor.extraJavaOptions", "-Dlog4j.configuration=log4j.properties")
            .setConf("spark.yarn.am.nodeLabelExpression", yarnAmNodeLabelExpression)
            .addSparkArg("--driver-cores", "" + driverCores).addSparkArg("--num-executors", "" + numExecutors)
            .addSparkArg("--total-executor-cores", "" + totalExecutorCores).addSparkArg("--queue", hadoopQueue)
            .addAppArgs(getArgs());//from ww  w.j  av  a 2  s.c  o  m
    if (sparkConfFilePath != null) {
        if (new File(sparkConfFilePath).exists()) {
            launcher = launcher.setPropertiesFile(sparkConfFilePath);
        } else {
            logger.warn("Spark configuration file " + sparkConfFilePath + " does not exist!");
        }
    }
    if (log4jPropFilePath != null) {
        if (new File(log4jPropFilePath).exists()) {
            launcher = launcher.addFile(log4jPropFilePath);
        } else {
            logger.warn("Loj4j configuration file " + log4jPropFilePath + " does not exist!");
        }
    }
    if (sysPropFilePath != null) {
        if (new File(sysPropFilePath).exists()) {
            launcher = launcher.addFile(sysPropFilePath);
        } else {
            logger.warn("System configuration file " + sysPropFilePath + " does not exist!");
        }
        launcher = launcher.addFile(sysPropFilePath);
    }
    if (appPropFilePath != null) {
        if (new File(appPropFilePath).exists()) {
            launcher = launcher.addFile(appPropFilePath);
        } else {
            logger.warn("App configuration file " + appPropFilePath + " does not exist!");
        }
    }
    return launcher;
}

From source file:org.flowable.decision.DecisionAnalysisService.java

License:Apache License

private void submitSparkAppsForTasks(ProcessDefinition processDefinition, Map<String, List<String>> outcomesMap,
        List<UserTask> matchingUserTasks, Map<String, Map<String, List<String>>> possibleValueCounts) {
    for (UserTask matchingUserTask : matchingUserTasks) {
        LOGGER.info("Submitting Spark ML app for task " + matchingUserTask.getId() + "...");
        try {/*from www .  j  a va 2s. co  m*/

            // Not so pretty: generating a long argument string to pass info to spark job. Should be handled with a persistent store really.

            /*
             * Format (separated by # character):
             * 
             * - processDefinitionId
             * - taskKey
             * - outcome variable
             * - outcome variable possibilities
             * - variable names
             * - variable possibilities
             */

            StringBuilder argumentBuilder = new StringBuilder();
            argumentBuilder.append(processDefinition.getId()).append("#") // process definition id
                    .append(matchingUserTask.getId()).append("#") // task key
                    .append("form_" + matchingUserTask.getFormKey() + "_outcome").append("#"); // outcome variable

            List<String> outcomes = outcomesMap.get(matchingUserTask.getId());
            for (int i = 0; i < outcomes.size(); i++) {
                argumentBuilder.append(outcomes.get(i)); // outcome variable output possibilities
                if (i != outcomes.size() - 1) {
                    argumentBuilder.append(";");
                }
            }
            argumentBuilder.append("#");

            Map<String, List<String>> variableToPotentialValues = possibleValueCounts
                    .get(matchingUserTask.getId());
            List<String> variableNames = new ArrayList<>(variableToPotentialValues.keySet());
            for (int i = 0; i < variableNames.size(); i++) {
                argumentBuilder.append(variableNames.get(i)); // variable names
                if (i != variableNames.size() - 1) {
                    argumentBuilder.append(";");
                }
            }
            argumentBuilder.append("#");
            for (int i = 0; i < variableNames.size(); i++) {
                List<String> possibleValues = variableToPotentialValues.get(variableNames.get(i));
                for (int j = 0; j < possibleValues.size(); j++) {
                    argumentBuilder.append(possibleValues.get(j)); // variable possibilities
                    if (j != possibleValues.size() - 1) {
                        argumentBuilder.append("&");
                    }
                }
                if (i != variableNames.size() - 1) {
                    argumentBuilder.append(";");
                }
            }

            LOGGER.info("Arguments for Spark app: " + argumentBuilder.toString());

            SparkAppHandle sparkAppHandle = new SparkLauncher().setSparkHome(System.getProperty("sparkHome"))
                    .setAppResource(System.getProperty("appResource"))
                    .setMainClass("org.flowable.AnalyseDecisions").setMaster("local[4]")
                    //                        .setVerbose(true)
                    .addAppArgs(argumentBuilder.toString()).redirectOutput(Redirect.INHERIT)
                    .startApplication(new SparkAppHandle.Listener() {

                        @Override
                        public void stateChanged(SparkAppHandle handle) {
                            LOGGER.info(handle.getState() + " new  state");
                        }

                        @Override
                        public void infoChanged(SparkAppHandle handle) {
                            LOGGER.info(handle.getState() + " new  state");
                        }
                    });

            // For demo: make sure the tasks are processed sequentially to not have the console output mixed for all tasks 
            while (!sparkAppHandle.getState().equals(State.FINISHED)
                    && !sparkAppHandle.getState().equals(State.FAILED)) {
                Thread.sleep(5000L);
            }

        } catch (IOException e) {
            LOGGER.error("Could not submit app to Spark", e);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

From source file:org.kaaproject.examples.spark.KaaSparkLauncher.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkLauncher launcher = new SparkLauncher().setMaster(SPARK_MASTER_URL).setSparkHome(SPARK_HOME)
            .setAppResource(SPARK_APP_JAR).setMainClass(KaaSparkExample.class.getName())
            .setAppName(KAA_SPARK_EXAMPLE_JOB_NAME).addAppArgs(FLUME_BIND_HOST, FLUME_BIND_PORT);

    final Process spark = launcher.launch();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override//w  ww  .  ja  v  a  2s.  c o  m
        public void run() {
            LOG.warn("Spark job interrupted!");
            spark.destroy();
        }
    });

    Thread isReader = startReader(spark.getInputStream());
    Thread esReader = startReader(spark.getErrorStream());

    int resultCode = spark.waitFor();

    isReader.join();
    esReader.join();

    if (resultCode != 0) {
        LOG.warn("Spark job result code: {}", resultCode);
    }
}