Example usage for org.apache.spark.launcher SparkLauncher SparkLauncher

List of usage examples for org.apache.spark.launcher SparkLauncher SparkLauncher

Introduction

In this page you can find the example usage for org.apache.spark.launcher SparkLauncher SparkLauncher.

Prototype

public SparkLauncher() 

Source Link

Usage

From source file:com.cloudera.livy.client.local.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RpcServer rpcServer, final LocalConf conf, final String clientId,
        final String secret, final String className) throws IOException {
    final String serverAddress = rpcServer.getAddress();
    final String serverPort = String.valueOf(rpcServer.getPort());
    if (conf.get(CLIENT_IN_PROCESS) != null) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override//ww w  . j  a  va 2s.  c o m
            public void run() {
                List<String> args = new ArrayList<>();
                args.add("--remote-host");
                args.add(serverAddress);
                args.add("--remote-port");
                args.add(serverPort);
                args.add("--client-id");
                args.add(clientId);
                args.add("--secret");
                args.add(secret);

                for (Map.Entry<String, String> e : conf) {
                    args.add("--conf");
                    args.add(String.format("%s=%s", e.getKey(), e.getValue()));
                }
                try {
                    RemoteDriver.main(args.toArray(new String[args.size()]));
                } catch (Exception e) {
                    LOG.error("Error running driver.", e);
                }
            }
        };
        return new ChildProcess(conf, child);
    } else {
        // If a Spark installation is provided, use the spark-submit script. Otherwise, call the
        // SparkSubmit class directly, which has some caveats (like having to provide a proper
        // version of Guava on the classpath depending on the deploy mode).
        final SparkLauncher launcher = new SparkLauncher();
        String sparkHome = conf.get(SPARK_HOME_KEY);
        if (sparkHome == null) {
            sparkHome = System.getenv(SPARK_HOME_ENV);
        }
        if (sparkHome == null) {
            sparkHome = System.getProperty(SPARK_HOME_KEY);
        }
        launcher.setSparkHome(sparkHome);

        conf.set(CLIENT_ID, clientId);
        conf.set(CLIENT_SECRET, secret);

        launcher.setAppResource("spark-internal");

        String livyJars = conf.get(LIVY_JARS);
        if (livyJars == null) {
            String livyHome = System.getenv("LIVY_HOME");
            Preconditions.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
            File clientJars = new File(livyHome, "client-jars");
            Preconditions.checkState(clientJars.isDirectory(),
                    "Cannot find 'client-jars' directory under LIVY_HOME.");
            List<String> jars = new ArrayList<>();
            for (File f : clientJars.listFiles()) {
                jars.add(f.getAbsolutePath());
            }
            livyJars = Joiner.on(",").join(jars);
        }

        String userJars = conf.get(SPARK_JARS_KEY);
        if (userJars != null) {
            String allJars = Joiner.on(",").join(livyJars, userJars);
            conf.set(SPARK_JARS_KEY, allJars);
        } else {
            conf.set(SPARK_JARS_KEY, livyJars);
        }

        // Disable multiple attempts since the RPC server doesn't yet support multiple
        // connections for the same registered app.
        conf.set("spark.yarn.maxAppAttempts", "1");

        File confFile = writeConfToFile(conf);

        // Define how to pass options to the child process. If launching in client (or local)
        // mode, the driver options need to be passed directly on the command line. Otherwise,
        // SparkSubmit will take care of that for us.
        String master = conf.get("spark.master");
        Preconditions.checkArgument(master != null, "spark.master is not defined.");
        launcher.setMaster(master);
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(className);
        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }
        launcher.addAppArgs("--remote-host", serverAddress);
        launcher.addAppArgs("--remote-port", serverPort);
        return new ChildProcess(conf, launcher.launch());
    }
}

From source file:com.cloudera.livy.rsc.ContextLauncher.java

License:Apache License

private static ChildProcess startDriver(final RSCConf conf, Promise<?> promise) throws IOException {
    String livyJars = conf.get(LIVY_JARS);
    if (livyJars == null) {
        String livyHome = System.getenv("LIVY_HOME");
        Utils.checkState(livyHome != null, "Need one of LIVY_HOME or %s set.", LIVY_JARS.key());
        File rscJars = new File(livyHome, "rsc-jars");
        if (!rscJars.isDirectory()) {
            rscJars = new File(livyHome, "rsc/target/jars");
        }//from w w  w. j  a va  2 s .c o m
        Utils.checkState(rscJars.isDirectory(), "Cannot find 'client-jars' directory under LIVY_HOME.");
        List<String> jars = new ArrayList<>();
        for (File f : rscJars.listFiles()) {
            jars.add(f.getAbsolutePath());
        }
        livyJars = Utils.join(jars, ",");
    }
    merge(conf, SPARK_JARS_KEY, livyJars, ",");

    String kind = conf.get(SESSION_KIND);
    if ("sparkr".equals(kind)) {
        merge(conf, SPARK_ARCHIVES_KEY, conf.get(RSCConf.Entry.SPARKR_PACKAGE), ",");
    } else if ("pyspark".equals(kind)) {
        merge(conf, "spark.submit.pyFiles", conf.get(RSCConf.Entry.PYSPARK_ARCHIVES), ",");
    }

    // Disable multiple attempts since the RPC server doesn't yet support multiple
    // connections for the same registered app.
    conf.set("spark.yarn.maxAppAttempts", "1");

    // Let the launcher go away when launcher in yarn cluster mode. This avoids keeping lots
    // of "small" Java processes lingering on the Livy server node.
    conf.set("spark.yarn.submit.waitAppCompletion", "false");

    // For testing; propagate jacoco settings so that we also do coverage analysis
    // on the launched driver. We replace the name of the main file ("main.exec")
    // so that we don't end up fighting with the main test launcher.
    String jacocoArgs = System.getProperty("jacoco.args");
    if (jacocoArgs != null) {
        jacocoArgs = jacocoArgs.replace("main.exec", "child.exec");
        merge(conf, SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, jacocoArgs, " ");
    }

    final File confFile = writeConfToFile(conf);

    if (conf.getBoolean(CLIENT_IN_PROCESS)) {
        // Mostly for testing things quickly. Do not do this in production.
        LOG.warn("!!!! Running remote driver in-process. !!!!");
        Runnable child = new Runnable() {
            @Override
            public void run() {
                try {
                    RSCDriverBootstrapper.main(new String[] { confFile.getAbsolutePath() });
                } catch (Exception e) {
                    throw Utils.propagate(e);
                }
            }
        };
        return new ChildProcess(conf, promise, child, confFile);
    } else {
        final SparkLauncher launcher = new SparkLauncher();
        launcher.setSparkHome(System.getenv(SPARK_HOME_ENV));
        launcher.setAppResource("spark-internal");
        launcher.setPropertiesFile(confFile.getAbsolutePath());
        launcher.setMainClass(RSCDriverBootstrapper.class.getName());

        if (conf.get(PROXY_USER) != null) {
            launcher.addSparkArg("--proxy-user", conf.get(PROXY_USER));
        }

        return new ChildProcess(conf, promise, launcher.launch(), confFile);
    }
}

From source file:com.thinkbiganalytics.nifi.pyspark.core.ExecutePySpark.java

License:Apache License

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final ComponentLog logger = getLog();
    FlowFile flowFile = session.get();/*from w ww . j  a va  2 s.c o  m*/

    if (flowFile == null) {
        flowFile = session.create();
        logger.info("Created a flow file having uuid: {}",
                new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    } else {
        logger.info("Using an existing flow file having uuid: {}",
                new Object[] { flowFile.getAttribute(CoreAttributes.UUID.key()) });
    }
    try {
        final String kerberosPrincipal = context.getProperty(KERBEROS_PRINCIPAL).getValue();
        final String kerberosKeyTab = context.getProperty(KERBEROS_KEYTAB).getValue();
        final String hadoopConfigurationResources = context.getProperty(HADOOP_CONFIGURATION_RESOURCES)
                .getValue();
        final String pySparkAppFile = context.getProperty(PYSPARK_APP_FILE)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppArgs = context.getProperty(PYSPARK_APP_ARGS)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAppName = context.getProperty(PYSPARK_APP_NAME)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String pySparkAdditionalFiles = context.getProperty(PYSPARK_ADDITIONAL_FILES)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String sparkMaster = context.getProperty(SPARK_MASTER).evaluateAttributeExpressions(flowFile)
                .getValue().trim().toLowerCase();
        final String sparkYarnDeployMode = context.getProperty(SPARK_YARN_DEPLOY_MODE)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String yarnQueue = context.getProperty(YARN_QUEUE).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String sparkHome = context.getProperty(SPARK_HOME).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String driverMemory = context.getProperty(DRIVER_MEMORY).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String executorMemory = context.getProperty(EXECUTOR_MEMORY)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String executorInstances = context.getProperty(EXECUTOR_INSTANCES)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String executorCores = context.getProperty(EXECUTOR_CORES).evaluateAttributeExpressions(flowFile)
                .getValue();
        final String networkTimeout = context.getProperty(NETWORK_TIMEOUT)
                .evaluateAttributeExpressions(flowFile).getValue();
        final String additionalSparkConfigOptions = context.getProperty(ADDITIONAL_SPARK_CONFIG_OPTIONS)
                .evaluateAttributeExpressions(flowFile).getValue();

        PySparkUtils pySparkUtils = new PySparkUtils();

        /* Get app arguments */
        String[] pySparkAppArgsArray = null;
        if (!StringUtils.isEmpty(pySparkAppArgs)) {
            pySparkAppArgsArray = pySparkUtils.getCsvValuesAsArray(pySparkAppArgs);
            logger.info("Provided application arguments: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }

        /* Get additional python files */
        String[] pySparkAdditionalFilesArray = null;
        if (!StringUtils.isEmpty(pySparkAdditionalFiles)) {
            pySparkAdditionalFilesArray = pySparkUtils.getCsvValuesAsArray(pySparkAdditionalFiles);
            logger.info("Provided python files: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAdditionalFilesArray) });
        }

        /* Get additional config key-value pairs */
        String[] additionalSparkConfigOptionsArray = null;
        if (!StringUtils.isEmpty(additionalSparkConfigOptions)) {
            additionalSparkConfigOptionsArray = pySparkUtils.getCsvValuesAsArray(additionalSparkConfigOptions);
            logger.info("Provided spark config options: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(additionalSparkConfigOptionsArray) });
        }

        /* Determine if Kerberos is enabled */
        boolean kerberosEnabled = false;
        if (!StringUtils.isEmpty(kerberosPrincipal) && !StringUtils.isEmpty(kerberosKeyTab)
                && !StringUtils.isEmpty(hadoopConfigurationResources)) {
            kerberosEnabled = true;
            logger.info("Kerberos is enabled");
        }

        /* For Kerberized cluster, attempt user authentication */
        if (kerberosEnabled) {
            logger.info("Attempting user authentication for Kerberos");
            ApplySecurityPolicy applySecurityObject = new ApplySecurityPolicy();
            Configuration configuration;
            try {
                logger.info("Getting Hadoop configuration from " + hadoopConfigurationResources);
                configuration = ApplySecurityPolicy.getConfigurationFromResources(hadoopConfigurationResources);

                if (SecurityUtil.isSecurityEnabled(configuration)) {
                    logger.info("Security is enabled");

                    if (kerberosPrincipal.equals("") && kerberosKeyTab.equals("")) {
                        logger.error(
                                "Kerberos Principal and Keytab provided with empty values for a Kerberized cluster.");
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }

                    try {
                        logger.info("User authentication initiated");

                        boolean authenticationStatus = applySecurityObject.validateUserWithKerberos(logger,
                                hadoopConfigurationResources, kerberosPrincipal, kerberosKeyTab);
                        if (authenticationStatus) {
                            logger.info("User authenticated successfully.");
                        } else {
                            logger.error("User authentication failed.");
                            session.transfer(flowFile, REL_FAILURE);
                            return;
                        }

                    } catch (Exception unknownException) {
                        logger.error("Unknown exception occurred while validating user :"
                                + unknownException.getMessage());
                        session.transfer(flowFile, REL_FAILURE);
                        return;
                    }
                }
            } catch (IOException e1) {
                logger.error("Unknown exception occurred while authenticating user :" + e1.getMessage());
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }

        /* Build and launch PySpark Job */
        logger.info("Configuring PySpark job for execution");
        SparkLauncher pySparkLauncher = new SparkLauncher().setAppResource(pySparkAppFile);
        logger.info("PySpark app file set to: {}", new Object[] { pySparkAppFile });

        if (pySparkAppArgsArray != null && pySparkAppArgsArray.length > 0) {
            pySparkLauncher = pySparkLauncher.addAppArgs(pySparkAppArgsArray);
            logger.info("App arguments set to: {}",
                    new Object[] { pySparkUtils.getCsvStringFromArray(pySparkAppArgsArray) });
        }

        pySparkLauncher = pySparkLauncher.setAppName(pySparkAppName).setMaster(sparkMaster);

        logger.info("App name set to: {}", new Object[] { pySparkAppName });
        logger.info("Spark master set to: {}", new Object[] { sparkMaster });

        if (pySparkAdditionalFilesArray != null && pySparkAdditionalFilesArray.length > 0) {
            for (String pySparkAdditionalFile : pySparkAdditionalFilesArray) {
                pySparkLauncher = pySparkLauncher.addPyFile(pySparkAdditionalFile);
                logger.info("Additional python file set to: {}", new Object[] { pySparkAdditionalFile });
            }
        }

        if (sparkMaster.equals("yarn")) {
            pySparkLauncher = pySparkLauncher.setDeployMode(sparkYarnDeployMode);
            logger.info("YARN deploy mode set to: {}", new Object[] { sparkYarnDeployMode });
        }

        pySparkLauncher = pySparkLauncher.setSparkHome(sparkHome)
                .setConf(SparkLauncher.DRIVER_MEMORY, driverMemory)
                .setConf(SparkLauncher.EXECUTOR_MEMORY, executorMemory)
                .setConf(CONFIG_PROP_SPARK_EXECUTOR_INSTANCES, executorInstances)
                .setConf(SparkLauncher.EXECUTOR_CORES, executorCores)
                .setConf(CONFIG_PROP_SPARK_NETWORK_TIMEOUT, networkTimeout);

        logger.info("Spark home set to: {} ", new Object[] { sparkHome });
        logger.info("Driver memory set to: {} ", new Object[] { driverMemory });
        logger.info("Executor memory set to: {} ", new Object[] { executorMemory });
        logger.info("Executor instances set to: {} ", new Object[] { executorInstances });
        logger.info("Executor cores set to: {} ", new Object[] { executorCores });
        logger.info("Network timeout set to: {} ", new Object[] { networkTimeout });

        if (kerberosEnabled) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_PRINCIPAL, kerberosPrincipal);
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_KEYTAB, kerberosKeyTab);
            logger.info("Kerberos principal set to: {} ", new Object[] { kerberosPrincipal });
            logger.info("Kerberos keytab set to: {} ", new Object[] { kerberosKeyTab });
        }

        if (!StringUtils.isEmpty(yarnQueue)) {
            pySparkLauncher = pySparkLauncher.setConf(CONFIG_PROP_SPARK_YARN_QUEUE, yarnQueue);
            logger.info("YARN queue set to: {} ", new Object[] { yarnQueue });
        }

        if (additionalSparkConfigOptionsArray != null && additionalSparkConfigOptionsArray.length > 0) {
            for (String additionalSparkConfigOption : additionalSparkConfigOptionsArray) {
                String[] confKeyValue = additionalSparkConfigOption.split("=");
                if (confKeyValue.length == 2) {
                    pySparkLauncher = pySparkLauncher.setConf(confKeyValue[0], confKeyValue[1]);
                    logger.info("Spark additional config option set to: {}={}",
                            new Object[] { confKeyValue[0], confKeyValue[1] });
                }
            }
        }

        logger.info("Starting execution of PySpark job");
        Process pySparkProcess = pySparkLauncher.launch();

        InputStreamReaderRunnable inputStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, pySparkProcess.getInputStream());
        Thread inputThread = new Thread(inputStreamReaderRunnable, "stream input");
        inputThread.start();

        InputStreamReaderRunnable errorStreamReaderRunnable = new InputStreamReaderRunnable(LogLevel.INFO,
                logger, pySparkProcess.getErrorStream());
        Thread errorThread = new Thread(errorStreamReaderRunnable, "stream error");
        errorThread.start();

        logger.info("Waiting for PySpark job to complete");

        int exitCode = pySparkProcess.waitFor();
        if (exitCode != 0) {
            logger.info("Finished execution of PySpark job [FAILURE] [Status code: {}]",
                    new Object[] { exitCode });
            session.transfer(flowFile, REL_FAILURE);
        } else {
            logger.info("Finished execution of PySpark job [SUCCESS] [Status code: {}]",
                    new Object[] { exitCode });
            session.transfer(flowFile, REL_SUCCESS);
        }
    } catch (final Exception e) {
        logger.error("Unable to execute PySpark job [FAILURE]", new Object[] { flowFile, e });
        session.transfer(flowFile, REL_FAILURE);
    }
}

From source file:com.uber.hoodie.cli.utils.SparkUtil.java

License:Apache License

/**
 * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
 *//*  w  ww . j a  v a 2s.  c  om*/
public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
    String currentJar = new File(
            SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
                    .getAbsolutePath();
    SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar)
            .setMainClass(SparkMain.class.getName());

    if (StringUtils.isNotEmpty(propertiesFile)) {
        sparkLauncher.setPropertiesFile(propertiesFile);
    }

    File libDirectory = new File(new File(currentJar).getParent(), "lib");
    for (String library : libDirectory.list()) {
        sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
    }
    return sparkLauncher;
}

From source file:io.zz.Launcher.java

public static void main(String[] args) throws IOException, InterruptedException {
    SparkLauncher sparkLauncher = new SparkLauncher();
    Process spark = sparkLauncher.setAppName("APP NAME").setSparkHome("/tmp")
            .setAppResource(SparkContext.jarOfClass(Launcher.class).get())
            .setMaster("spark://192.168.100.105:7077").setMainClass("io.zz.TestSaveToCassandra").launch();

    spark.waitFor();//from ww  w  .  j av  a2 s . c  o m
}

From source file:org.apache.eagle.app.environment.impl.SparkExecutionRuntime.java

License:Apache License

private SparkLauncher prepareSparkConfig(Config config) {
    String master = config.hasPath(TOPOLOGY_MASTER) ? config.getString(TOPOLOGY_MASTER) : "local[*]";
    String sparkExecutorCores = config.getString(SPARK_EXECUTOR_CORES);
    String sparkExecutorMemory = config.getString(SPARK_EXECUTOR_MEMORY);
    String driverMemory = config.getString(DRIVER_MEMORY);
    String driverCore = config.getString(DRIVER_CORES);
    String deployMode = config.getString(DEPLOY_MODE);
    String enable = config.getString(TOPOLOGY_DYNAMICALLOCATION);
    boolean verbose = config.getBoolean(TOPOLOGY_VERBOSE);
    String mainClass = config.getString(TOPOLOGY_MAINCLASS);
    String sparkHome = config.getString(TOPOLOGY_SPARKHOME);
    String uiport = config.getString(TOPOLOGY_SPARKUIPORT);
    String appResource = config.getString(TOPOLOGY_APPRESOURCE);
    String yarnqueue = config.getString(TOPOLOGY_YARNQUEUE);

    SparkLauncher sparkLauncher = new SparkLauncher();
    sparkLauncher.setMaster(master);/*  w w w .  jav a2 s.c  o m*/
    sparkLauncher.setMainClass(mainClass);
    sparkLauncher.setSparkHome(sparkHome);
    //sparkLauncher.setJavaHome(TOPOLOGY_JAVAHOME);
    sparkLauncher.setDeployMode(deployMode);
    sparkLauncher.setVerbose(verbose);
    sparkLauncher.setAppResource(appResource);
    sparkLauncher.setAppName(config.getString(TOPOLOGY_NAME));
    sparkLauncher.setConf("spark.yarn.queue", yarnqueue);
    sparkLauncher.setConf("spark.executor.cores", sparkExecutorCores);
    sparkLauncher.setConf("spark.executor.memory", sparkExecutorMemory);
    sparkLauncher.setConf("spark.driver.memory", driverMemory);
    sparkLauncher.setConf("spark.driver.cores", driverCore);
    sparkLauncher.setConf("spark.streaming.dynamicAllocation.enable", enable);
    sparkLauncher.setConf("spark.ui.port", uiport);
    String path = config.getString(TOPOLOGY_SPARKCONFFILEPATH);
    if (StringUtil.isNotBlank(path)) {
        sparkLauncher.setPropertiesFile(path);
    }

    String batchDuration = config.getString(BATCH_DURATION);
    String routerTasknum = config.getString(ROUTER_TASK_NUM);
    String alertTasknum = config.getString(ALERT_TASK_NUM);
    String publishTasknum = config.getString(PUBLISH_TASK_NUM);
    String slideDurationsecond = config.getString(SLIDE_DURATION_SECOND);
    String windowDurationssecond = config.getString(WINDOW_DURATIONS_SECOND);
    String checkpointPath = config.getString(CHECKPOINT_PATH);
    String topologyGroupid = config.getString(TOPOLOGY_GROUPID);
    String autoOffsetReset = config.getString(AUTO_OFFSET_RESET);
    String restApihost = config.getString(EAGLE_CORRELATION_SERVICE_HOST);
    String restApiport = config.getString(EAGLE_CORRELATION_SERVICE_PORT);
    String restApicontext = config.getString(EAGLE_CORRELATION_CONTEXT);
    String useMultiKafka = config.getString(TOPOLOGY_MULTIKAFKA);
    String kafkaBrokerZkQuorum = config.getString(SPOUT_KAFKABROKERZKQUORUM);
    String zkConfigzkQuorum = config.getString(ZKCONFIG_ZKQUORUM);

    sparkLauncher.addAppArgs(batchDuration, routerTasknum, alertTasknum, publishTasknum, slideDurationsecond,
            windowDurationssecond, checkpointPath, topologyGroupid, autoOffsetReset, restApicontext,
            restApiport, restApihost, useMultiKafka, kafkaBrokerZkQuorum, zkConfigzkQuorum);
    return sparkLauncher;
}

From source file:org.apache.pirk.test.distributed.testsuite.DistTestSuite.java

License:Apache License

@SuppressWarnings("unused")
public static List<QueryResponseJSON> performQuery(String queryType, ArrayList<String> selectors, FileSystem fs,
        boolean isSpark, int numThreads, boolean isStreaming) throws Exception {
    logger.info("performQuery: ");

    String queryInputDir = SystemConfiguration.getProperty(DistributedTestDriver.PIR_QUERY_INPUT_DIR);
    String outputFile = SystemConfiguration.getProperty(DistributedTestDriver.OUTPUT_DIRECTORY_PROPERTY);
    fs.delete(new Path(outputFile), true); // Ensure old output does not exist.

    SystemConfiguration.setProperty("pir.queryInput", queryInputDir);
    SystemConfiguration.setProperty("pir.outputFile", outputFile);
    SystemConfiguration.setProperty("pir.numReduceTasks", "1");
    SystemConfiguration.setProperty("pir.stopListFile",
            SystemConfiguration.getProperty(DistributedTestDriver.PIR_STOPLIST_FILE));

    // Create the temp result file
    File fileFinalResults = File.createTempFile("finalResultsFile", ".txt");
    fileFinalResults.deleteOnExit();/* www  .j  a  v  a2  s  . c o m*/
    logger.info("fileFinalResults = " + fileFinalResults.getAbsolutePath());

    boolean embedSelector = SystemConfiguration.getBooleanProperty("pirTest.embedSelector", false);
    boolean useExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useExpLookupTable", false);
    boolean useHDFSExpLookupTable = SystemConfiguration.getBooleanProperty("pirTest.useHDFSExpLookupTable",
            false);

    // Set the necessary objects
    QueryInfo queryInfo = new QueryInfo(BaseTests.queryIdentifier, selectors.size(), BaseTests.hashBitSize,
            BaseTests.hashKey, BaseTests.dataPartitionBitSize, queryType, useExpLookupTable, embedSelector,
            useHDFSExpLookupTable);

    Paillier paillier = new Paillier(BaseTests.paillierBitSize, BaseTests.certainty);

    // Perform the encryption
    logger.info("Performing encryption of the selectors - forming encrypted query vectors:");
    EncryptQuery encryptQuery = new EncryptQuery(queryInfo, selectors, paillier);
    Querier querier = encryptQuery.encrypt(numThreads);
    logger.info("Completed encryption of the selectors - completed formation of the encrypted query vectors:");

    // Write the Query object to a file
    Path queryInputDirPath = new Path(queryInputDir);
    new HadoopFileSystemStore(fs).store(queryInputDirPath, querier.getQuery());
    fs.deleteOnExit(queryInputDirPath);

    // Grab the original data and query schema properties to reset upon completion
    String dataSchemaProp = SystemConfiguration.getProperty("data.schemas");
    String querySchemaProp = SystemConfiguration.getProperty("query.schemas");

    // Get the correct input format class name
    JSONInputFormatBase jFormat = new JSONInputFormatBase();
    String jsonBaseInputFormatString = jFormat.getClass().getName();
    SystemConfiguration.setProperty("pir.baseInputFormat", jsonBaseInputFormatString);

    // Submitting the tool for encrypted query
    logger.info("Performing encrypted query:");
    if (isSpark) {
        logger.info("spark.home = " + SystemConfiguration.getProperty("spark.home"));

        // Build args
        String inputFormat = SystemConfiguration.getProperty("pir.dataInputFormat");
        logger.info("inputFormat = " + inputFormat);
        ArrayList<String> args = new ArrayList<>();
        if (isStreaming) {
            logger.info("platform = sparkstreaming");
            args.add("-" + ResponderProps.PLATFORM + "=sparkstreaming");
            args.add("-" + ResponderProps.BATCHSECONDS + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.batchSeconds", "30"));
            args.add("-" + ResponderProps.WINDOWLENGTH + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.windowLength", "60"));
            args.add("-" + ResponderProps.MAXBATCHES + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.maxBatches", "-1"));
            args.add("-" + ResponderProps.STOPGRACEFULLY + "="
                    + SystemConfiguration.getProperty("spark.streaming.stopGracefullyOnShutdown", "false"));
            args.add("-" + ResponderProps.NUMDATAPARTITIONS + "="
                    + SystemConfiguration.getProperty("pir.numDataPartitions", "3"));
            args.add("-" + ResponderProps.USEQUEUESTREAM + "="
                    + SystemConfiguration.getProperty("pir.sparkstreaming.useQueueStream", "false"));
        } else {
            logger.info("platform = spark");
            args.add("-" + ResponderProps.PLATFORM + "=spark");
        }
        args.add("-" + ResponderProps.DATAINPUTFORMAT + "=" + inputFormat);
        args.add("-" + ResponderProps.QUERYINPUT + "=" + SystemConfiguration.getProperty("pir.queryInput"));
        args.add("-" + ResponderProps.OUTPUTFILE + "=" + SystemConfiguration.getProperty("pir.outputFile"));
        args.add("-" + ResponderProps.STOPLISTFILE + "=" + SystemConfiguration.getProperty("pir.stopListFile"));
        args.add("-" + ResponderProps.USELOCALCACHE + "="
                + SystemConfiguration.getProperty("pir.useLocalCache", "true"));
        args.add("-" + ResponderProps.LIMITHITSPERSELECTOR + "="
                + SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false"));
        args.add("-" + ResponderProps.MAXHITSPERSELECTOR + "="
                + SystemConfiguration.getProperty("pir.maxHitsPerSelector", "1000"));
        args.add("-" + ResponderProps.QUERYSCHEMAS + "=" + Inputs.HDFS_QUERY_FILES);
        args.add("-" + ResponderProps.DATASCHEMAS + "=" + Inputs.DATA_SCHEMA_FILE_HDFS);
        args.add("-" + ResponderProps.NUMEXPLOOKUPPARTS + "="
                + SystemConfiguration.getProperty("pir.numExpLookupPartitions", "100"));
        args.add("-" + ResponderProps.USEMODEXPJOIN + "="
                + SystemConfiguration.getProperty("pir.useModExpJoin", "false"));
        args.add("-" + ResponderProps.NUMCOLMULTPARTITIONS + "="
                + SystemConfiguration.getProperty("pir.numColMultPartitions", "20"));
        args.add("-" + ResponderProps.COLMULTREDUCEBYKEY + "="
                + SystemConfiguration.getProperty("pir.colMultReduceByKey", "false"));
        if (inputFormat.equals(InputFormatConst.BASE_FORMAT)) {
            args.add("-" + ResponderProps.INPUTDATA + "=" + SystemConfiguration.getProperty("pir.inputData"));
            args.add("-" + ResponderProps.BASEQUERY + "=" + SystemConfiguration.getProperty("pir.baseQuery"));
            args.add("-" + ResponderProps.BASEINPUTFORMAT + "="
                    + SystemConfiguration.getProperty("pir.baseInputFormat"));
        } else if (inputFormat.equals(InputFormatConst.ES)) {
            args.add("-" + ResponderProps.ESQUERY + "=" + SystemConfiguration.getProperty("pir.esQuery"));
            args.add("-" + ResponderProps.ESRESOURCE + "=" + SystemConfiguration.getProperty("pir.esResource"));
            args.add("-" + ResponderProps.ESNODES + "="
                    + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_NODES_PROPERTY));
            args.add("-" + ResponderProps.ESPORT + "="
                    + SystemConfiguration.getProperty(DistributedTestDriver.ES_INPUT_PORT_PROPERTY));
        }

        for (String arg : args) {
            logger.info("arg = " + arg);
        }

        // Run spark application
        Process sLauncher = new SparkLauncher().setAppResource(SystemConfiguration.getProperty("jarFile"))
                .setSparkHome(SystemConfiguration.getProperty("spark.home"))
                .setMainClass("org.apache.pirk.responder.wideskies.ResponderDriver")
                .addAppArgs(args.toArray(new String[args.size()])).setMaster("yarn-cluster")
                .setConf(SparkLauncher.EXECUTOR_MEMORY, "2g").setConf(SparkLauncher.DRIVER_MEMORY, "2g")
                .setConf(SparkLauncher.EXECUTOR_CORES, "1").launch();
        sLauncher.waitFor();
    } else {
        SystemConfiguration.setProperty("data.schemas", Inputs.DATA_SCHEMA_FILE_HDFS);
        SystemConfiguration.setProperty("query.schemas", Inputs.HDFS_QUERY_FILES);

        ComputeResponseTool responseTool = new ComputeResponseTool();
        ToolRunner.run(responseTool, new String[] {});
    }
    logger.info("Completed encrypted query");

    // Perform decryption
    // Reconstruct the necessary objects from the files
    logger.info("Performing decryption; writing final results file");
    if (isStreaming) {
        outputFile = outputFile + "_0"; // currently only processing one batch for testing
    }
    logger.info("Pulling results from outputFile = " + outputFile);
    Response response = new HadoopFileSystemStore(fs).recall(outputFile, Response.class);

    // Perform decryption and output the result file
    DecryptResponse decryptResponse = new DecryptResponse(response, querier);
    QueryResultsWriter.writeResultFile(fileFinalResults, decryptResponse.decrypt(numThreads));
    logger.info("Completed performing decryption and writing final results file");

    // Read in results
    logger.info("Reading in and checking results");
    List<QueryResponseJSON> results = TestUtils.readResultsFile(fileFinalResults);

    // Reset data and query schema properties
    SystemConfiguration.setProperty("data.schemas", dataSchemaProp);
    SystemConfiguration.setProperty("query.schemas", querySchemaProp);

    // Clean up output dir in hdfs
    fs.delete(new Path(outputFile), true);

    return results;
}

From source file:org.cripac.isee.vpe.ctrl.SystemPropertyCenter.java

License:Open Source License

SparkLauncher GetSparkLauncher(String appName) throws IOException, NoAppSpecifiedException {
    SparkLauncher launcher = new SparkLauncher().setAppResource(jarPath)
            .setMainClass(AppManager.getMainClassName(appName)).setMaster(sparkMaster).setAppName(appName)
            .setVerbose(verbose).addFile(ConfManager.getConcatCfgFilePathList(","))
            .setConf(SparkLauncher.DRIVER_MEMORY, driverMem).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMem)
            .setConf(SparkLauncher.CHILD_PROCESS_LOGGER_NAME, appName)
            .setConf(SparkLauncher.EXECUTOR_CORES, "" + executorCores)
            .setConf("spark.driver.extraJavaOptions", "-Dlog4j.configuration=log4j.properties")
            .setConf("spark.executor.extraJavaOptions", "-Dlog4j.configuration=log4j.properties")
            .setConf("spark.yarn.am.nodeLabelExpression", yarnAmNodeLabelExpression)
            .addSparkArg("--driver-cores", "" + driverCores).addSparkArg("--num-executors", "" + numExecutors)
            .addSparkArg("--total-executor-cores", "" + totalExecutorCores).addSparkArg("--queue", hadoopQueue)
            .addAppArgs(getArgs());//from ww  w.j  av  a 2  s.c  o  m
    if (sparkConfFilePath != null) {
        if (new File(sparkConfFilePath).exists()) {
            launcher = launcher.setPropertiesFile(sparkConfFilePath);
        } else {
            logger.warn("Spark configuration file " + sparkConfFilePath + " does not exist!");
        }
    }
    if (log4jPropFilePath != null) {
        if (new File(log4jPropFilePath).exists()) {
            launcher = launcher.addFile(log4jPropFilePath);
        } else {
            logger.warn("Loj4j configuration file " + log4jPropFilePath + " does not exist!");
        }
    }
    if (sysPropFilePath != null) {
        if (new File(sysPropFilePath).exists()) {
            launcher = launcher.addFile(sysPropFilePath);
        } else {
            logger.warn("System configuration file " + sysPropFilePath + " does not exist!");
        }
        launcher = launcher.addFile(sysPropFilePath);
    }
    if (appPropFilePath != null) {
        if (new File(appPropFilePath).exists()) {
            launcher = launcher.addFile(appPropFilePath);
        } else {
            logger.warn("App configuration file " + appPropFilePath + " does not exist!");
        }
    }
    return launcher;
}

From source file:org.flowable.decision.DecisionAnalysisService.java

License:Apache License

private void submitSparkAppsForTasks(ProcessDefinition processDefinition, Map<String, List<String>> outcomesMap,
        List<UserTask> matchingUserTasks, Map<String, Map<String, List<String>>> possibleValueCounts) {
    for (UserTask matchingUserTask : matchingUserTasks) {
        LOGGER.info("Submitting Spark ML app for task " + matchingUserTask.getId() + "...");
        try {/*from www .  j  a va 2s. co  m*/

            // Not so pretty: generating a long argument string to pass info to spark job. Should be handled with a persistent store really.

            /*
             * Format (separated by # character):
             * 
             * - processDefinitionId
             * - taskKey
             * - outcome variable
             * - outcome variable possibilities
             * - variable names
             * - variable possibilities
             */

            StringBuilder argumentBuilder = new StringBuilder();
            argumentBuilder.append(processDefinition.getId()).append("#") // process definition id
                    .append(matchingUserTask.getId()).append("#") // task key
                    .append("form_" + matchingUserTask.getFormKey() + "_outcome").append("#"); // outcome variable

            List<String> outcomes = outcomesMap.get(matchingUserTask.getId());
            for (int i = 0; i < outcomes.size(); i++) {
                argumentBuilder.append(outcomes.get(i)); // outcome variable output possibilities
                if (i != outcomes.size() - 1) {
                    argumentBuilder.append(";");
                }
            }
            argumentBuilder.append("#");

            Map<String, List<String>> variableToPotentialValues = possibleValueCounts
                    .get(matchingUserTask.getId());
            List<String> variableNames = new ArrayList<>(variableToPotentialValues.keySet());
            for (int i = 0; i < variableNames.size(); i++) {
                argumentBuilder.append(variableNames.get(i)); // variable names
                if (i != variableNames.size() - 1) {
                    argumentBuilder.append(";");
                }
            }
            argumentBuilder.append("#");
            for (int i = 0; i < variableNames.size(); i++) {
                List<String> possibleValues = variableToPotentialValues.get(variableNames.get(i));
                for (int j = 0; j < possibleValues.size(); j++) {
                    argumentBuilder.append(possibleValues.get(j)); // variable possibilities
                    if (j != possibleValues.size() - 1) {
                        argumentBuilder.append("&");
                    }
                }
                if (i != variableNames.size() - 1) {
                    argumentBuilder.append(";");
                }
            }

            LOGGER.info("Arguments for Spark app: " + argumentBuilder.toString());

            SparkAppHandle sparkAppHandle = new SparkLauncher().setSparkHome(System.getProperty("sparkHome"))
                    .setAppResource(System.getProperty("appResource"))
                    .setMainClass("org.flowable.AnalyseDecisions").setMaster("local[4]")
                    //                        .setVerbose(true)
                    .addAppArgs(argumentBuilder.toString()).redirectOutput(Redirect.INHERIT)
                    .startApplication(new SparkAppHandle.Listener() {

                        @Override
                        public void stateChanged(SparkAppHandle handle) {
                            LOGGER.info(handle.getState() + " new  state");
                        }

                        @Override
                        public void infoChanged(SparkAppHandle handle) {
                            LOGGER.info(handle.getState() + " new  state");
                        }
                    });

            // For demo: make sure the tasks are processed sequentially to not have the console output mixed for all tasks 
            while (!sparkAppHandle.getState().equals(State.FINISHED)
                    && !sparkAppHandle.getState().equals(State.FAILED)) {
                Thread.sleep(5000L);
            }

        } catch (IOException e) {
            LOGGER.error("Could not submit app to Spark", e);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

From source file:org.kaaproject.examples.spark.KaaSparkLauncher.java

License:Apache License

public static void main(String[] args) throws Exception {
    SparkLauncher launcher = new SparkLauncher().setMaster(SPARK_MASTER_URL).setSparkHome(SPARK_HOME)
            .setAppResource(SPARK_APP_JAR).setMainClass(KaaSparkExample.class.getName())
            .setAppName(KAA_SPARK_EXAMPLE_JOB_NAME).addAppArgs(FLUME_BIND_HOST, FLUME_BIND_PORT);

    final Process spark = launcher.launch();

    Runtime.getRuntime().addShutdownHook(new Thread() {
        @Override//w  ww  .  ja  v  a  2s.  c o  m
        public void run() {
            LOG.warn("Spark job interrupted!");
            spark.destroy();
        }
    });

    Thread isReader = startReader(spark.getInputStream());
    Thread esReader = startReader(spark.getErrorStream());

    int resultCode = spark.waitFor();

    isReader.join();
    esReader.join();

    if (resultCode != 0) {
        LOG.warn("Spark job result code: {}", resultCode);
    }
}