org.cripac.isee.vpe.ctrl.SystemPropertyCenter.java Source code

Introduction

Here is the source code for org.cripac.isee.vpe.ctrl.SystemPropertyCenter.java
Source

/*
 * This file is part of LaS-VPE Platform.
 *
 * LaS-VPE Platform is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * LaS-VPE Platform is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LaS-VPE Platform.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.cripac.isee.vpe.ctrl;

import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import org.apache.commons.cli.*;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.log4j.Level;
import org.apache.spark.launcher.SparkLauncher;
import org.cripac.isee.vpe.util.hdfs.HadoopHelper;
import org.cripac.isee.vpe.util.logging.ConsoleLogger;
import org.cripac.isee.vpe.util.logging.Logger;
import org.xml.sax.SAXException;

import javax.annotation.Nonnull;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;

/**
 * The SystemPropertyCenter class is responsible of managing the properties of
 * the systems. There are some properties predefined, and they can be
 * overwritten by command options or an extern property file. It can also
 * generate back command options for uses like SparkSubmit.
 *
 * @author Ken Yu, CRIPAC, 2016
 */
public class SystemPropertyCenter implements Serializable {

    private static final long serialVersionUID = -6642856932636724919L;
    // Logger for parsing.
    private transient Logger logger = new ConsoleLogger(Level.INFO);

    // Zookeeper properties
    public String zkConn = "localhost:2181";
    public int sessionTimeoutMs = 10 * 10000;
    // Kafka properties
    public String kafkaBootstrapServers = "localhost:9092";
    public int kafkaNumPartitions = 1;
    public int kafkaReplFactor = 1;
    private int kafkaMsgMaxBytes = 100000000;
    private int kafkaSendMaxSize = 100000000;
    private int kafkaRequestTimeoutMs = 60000;
    private int kafkaFetchTimeoutMs = 60000;
    public String kafkaLocationStrategy = "PreferBrokers";
    // Spark properties
    public String checkpointRootDir = "checkpoint";
    public String metadataDir = "metadata";
    public String sparkMaster = "local[*]";
    public String sparkDeployMode = "client";
    String[] appsToStart = null;
    // Caffe properties
    public int caffeGPU = -1;
    /**
     * Memory per executor (e.g. 1000M, 2G) (Default: 1G)
     */
    private String executorMem = "1G";
    /**
     * Number of executors to run (Default: 2)
     */
    private int numExecutors = 2;
    /**
     * Number of cores per executor (Default: 1)
     */
    private int executorCores = 1;
    /**
     * Total cores for all executors (Spark standalone and Mesos only).
     */
    private int totalExecutorCores = 1;
    /**
     * Memory for driver (e.g. 1000M, 2G) (Default: 1024 Mb)
     */
    private String driverMem = "1G";
    /**
     * Number of cores used by the driver (Default: 1)
     */
    private int driverCores = 1;
    /**
     * A YARN node label expression that restricts the set of nodes AM will be scheduled on.
     * Only versions of YARN greater than or equal to 2.6 support node label expressions,
     * so when running against earlier versions, this property will be ignored.
     * <p>
     * To enable label-based scheduling,
     * see https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/NodeLabel.html
     */
    private String yarnAmNodeLabelExpression = "";
    /**
     * The hadoop queue to use for allocation requests (Default: 'default')
     */
    private String hadoopQueue = "default";
    private String sysPropFilePath = "conf/system.properties";
    /**
     * Application-specific property file. Properties loaded from it
     * will override those loaded from the system property file.
     * Leaving it as null will let the system automatically find
     * that in default places according to the application specified.
     */
    private String appPropFilePath = null;
    private String sparkConfFilePath = ConfManager.CONF_DIR + "/spark-defaults.conf";
    private String log4jPropFilePath = ConfManager.CONF_DIR + "/log4j.properties";
    private String hdfsDefaultName = "localhost:9000";
    private String jarPath = "bin/vpe-platform.jar";
    /**
     * Duration for buffering results.
     */
    public int bufDuration = 600000;
    /**
     * Duration of spark batches.
     */
    public int batchDuration = 2000;
    /**
     * Whether to print verbose running information.
     */
    public boolean verbose = false;

    /**
     * Subclasses can continue to analyze this property storage.
     */
    protected Properties sysProps = new Properties();

    /**
     * Construction function supporting allocating a SystemPropertyCenter then
     * filling in the properties manually.
     */
    public SystemPropertyCenter() throws SAXException, ParserConfigurationException, URISyntaxException {
        this(new String[0]);
    }

    public SystemPropertyCenter(@Nonnull String[] args)
            throws URISyntaxException, ParserConfigurationException, SAXException {
        CommandLineParser parser = new BasicParser();
        Options options = new Options();
        options.addOption("h", "help", false, "Print this help message.");
        options.addOption("v", "verbose", false, "Display debug information.");
        options.addOption("a", "application", true, "Application specified to run.");
        options.addOption(null, "spark-property-file", true, "Path of the spark property file.");
        options.addOption(null, "system-property-file", true, "Path of the system property file.");
        options.addOption(null, "app-property-file", true,
                "Path of the application-specific system property file.");
        options.addOption(null, "log4j-property-file", true, "Path of the log4j property file.");
        options.addOption(null, "report-listening-addr", true, "Address of runtime report listener.");
        options.addOption(null, "report-listening-topic", true, "Port of runtime report listener.");
        CommandLine commandLine;

        try {
            commandLine = parser.parse(options, args);
        } catch (ParseException e) {
            e.printStackTrace();
            logger.debug("Try using '-h' for more information.");
            System.exit(0);
            return;
        }

        if (commandLine.hasOption('h')) {
            final HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("LaS-VPE Platform", options);
            System.exit(0);
            return;
        }

        verbose = commandLine.hasOption('v');
        if (verbose) {
            logger.setLevel(Level.DEBUG);
        }

        if (commandLine.hasOption('a')) {
            appsToStart = commandLine.getOptionValues('a');
            logger.debug("To run application:");
            for (String app : appsToStart) {
                logger.debug("\t\t" + app);
            }
        }

        if (commandLine.hasOption("system-property-file")) {
            sysPropFilePath = commandLine.getOptionValue("system-property-file");
        }
        if (commandLine.hasOption("log4j-property-file")) {
            log4jPropFilePath = commandLine.getOptionValue("log4j-property-file");
        }
        if (commandLine.hasOption("spark-property-file")) {
            sparkConfFilePath = commandLine.getOptionValue("spark-property-file");
        }
        if (commandLine.hasOption("app-property-file")) {
            appPropFilePath = commandLine.getOptionValue("app-property-file");
        }

        // Load properties from file.
        BufferedInputStream propInputStream;
        try {
            if (sysPropFilePath.contains("hdfs:/")) {
                // TODO: Check if can load property file from HDFS.
                logger.debug("Loading system-wise default properties using HDFS platform from " + sysPropFilePath
                        + "...");
                final FileSystem hdfs = FileSystem.get(new URI(sysPropFilePath), HadoopHelper.getDefaultConf());
                final FSDataInputStream hdfsInputStream = hdfs.open(new Path(sysPropFilePath));
                propInputStream = new BufferedInputStream(hdfsInputStream);
            } else {
                final File propFile = new File(sysPropFilePath);
                logger.debug("Loading system-wise default properties locally from " + propFile.getAbsolutePath()
                        + "...");
                propInputStream = new BufferedInputStream(new FileInputStream(propFile));
            }
            sysProps.load(propInputStream);
            propInputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
            logger.error("Couldn't find system-wise default property file at specified path: \"" + sysPropFilePath
                    + "\"!\n");
            logger.error("Try use '-h' for more information.");
            System.exit(0);
            return;
        }

        if (appPropFilePath != null) {
            try {
                if (appPropFilePath.contains("hdfs:/")) {
                    // TODO: Check if can load property file from HDFS.
                    logger.debug("Loading application-specific properties" + " using HDFS platform from "
                            + appPropFilePath + "...");
                    final FileSystem hdfs = FileSystem.get(new URI(appPropFilePath), HadoopHelper.getDefaultConf());
                    final FSDataInputStream hdfsInputStream = hdfs.open(new Path(appPropFilePath));
                    propInputStream = new BufferedInputStream(hdfsInputStream);
                } else {
                    final File propFile = new File(appPropFilePath);
                    logger.debug("Loading application-specific properties locally from "
                            + propFile.getAbsolutePath() + "...");
                    propInputStream = new BufferedInputStream(new FileInputStream(propFile));
                }
                sysProps.load(propInputStream);
                propInputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
                logger.error("Couldn't find application-specific property file at specified path: \""
                        + appPropFilePath + "\"!\n");
                logger.error("Try use '-h' for more information.");
                System.exit(0);
                return;
            }
        }

        // Digest the settings.
        for (Entry<Object, Object> entry : sysProps.entrySet()) {
            if (verbose) {
                logger.debug("Read from property file: " + entry.getKey() + "=" + entry.getValue());
            }
            switch ((String) entry.getKey()) {
            case "zookeeper.connect":
                zkConn = (String) entry.getValue();
                break;
            case "kafka.bootstrap.servers":
                kafkaBootstrapServers = (String) entry.getValue();
                break;
            case "kafka.partitions":
                kafkaNumPartitions = new Integer((String) entry.getValue());
                break;
            case "kafka.replication.factor":
                kafkaReplFactor = new Integer((String) entry.getValue());
                break;
            case "kafka.fetch.max.size":
                kafkaMsgMaxBytes = new Integer((String) entry.getValue());
                break;
            case "kafka.location.strategy":
                kafkaLocationStrategy = (String) entry.getValue();
                break;
            case "spark.checkpoint.dir":
                checkpointRootDir = (String) entry.getValue();
                break;
            case "vpe.metadata.dir":
                metadataDir = (String) entry.getValue();
                break;
            case "spark.master":
                sparkMaster = (String) entry.getValue();
                break;
            case "spark.deploy.mode":
                sparkDeployMode = (String) entry.getValue();
                break;
            case "vpe.platform.jar":
                jarPath = (String) entry.getValue();
                break;
            case "spark.yarn.am.nodeLabelExpression":
                yarnAmNodeLabelExpression = (String) entry.getValue();
                break;
            case "hdfs.default.name":
                hdfsDefaultName = (String) entry.getValue();
                break;
            case "executor.num":
                numExecutors = new Integer((String) entry.getValue());
                break;
            case "executor.memory":
                executorMem = (String) entry.getValue();
                break;
            case "executor.cores":
                executorCores = new Integer((String) entry.getValue());
                break;
            case "total.executor.cores":
                totalExecutorCores = new Integer((String) entry.getValue());
                break;
            case "driver.memory":
                driverMem = (String) entry.getValue();
                break;
            case "driver.cores":
                driverCores = new Integer((String) entry.getValue());
                break;
            case "hadoop.queue":
                hadoopQueue = (String) entry.getValue();
                break;
            case "vpe.recv.parallel":
                break;
            case "vpe.buf.duration":
                bufDuration = new Integer((String) entry.getValue());
                break;
            case "vpe.batch.duration":
                batchDuration = new Integer((String) entry.getValue());
                break;
            case "kafka.send.max.size":
                kafkaSendMaxSize = new Integer((String) entry.getValue());
                break;
            case "kafka.request.timeout.ms":
                kafkaRequestTimeoutMs = new Integer((String) entry.getValue());
                break;
            case "kafka.fetch.timeout.ms":
                kafkaFetchTimeoutMs = new Integer((String) entry.getValue());
                break;
            case "caffe.gpu":
                caffeGPU = new Integer((String) entry.getValue());
                break;
            }
        }
    }

    /**
     * Generate command line options for SparkSubmit client, according to the
     * stored properties.
     *
     * @return An array of string with format required by SparkSubmit client.
     */
    private String[] getArgs() {
        ArrayList<String> optList = new ArrayList<>();

        if (verbose) {
            optList.add("-v");
        }

        if (appPropFilePath != null && new File(appPropFilePath).exists()) {
            optList.add("--app-property-file");
            optList.add(new File(appPropFilePath).getName());
        }

        optList.add("--system-property-file");
        optList.add(new File(sysPropFilePath).getName());

        optList.add("--log4j-property-file");
        if (sparkMaster.toLowerCase().contains("yarn")) {
            optList.add("log4j.properties");
        } else if (sparkMaster.toLowerCase().contains("local")) {
            optList.add(log4jPropFilePath);
        } else {
            throw new NotImplementedException("System is currently not supporting deploy mode: " + sparkMaster);
        }

        return Arrays.copyOf(optList.toArray(), optList.size(), String[].class);
    }

    SparkLauncher GetSparkLauncher(String appName) throws IOException, NoAppSpecifiedException {
        SparkLauncher launcher = new SparkLauncher().setAppResource(jarPath)
                .setMainClass(AppManager.getMainClassName(appName)).setMaster(sparkMaster).setAppName(appName)
                .setVerbose(verbose).addFile(ConfManager.getConcatCfgFilePathList(","))
                .setConf(SparkLauncher.DRIVER_MEMORY, driverMem).setConf(SparkLauncher.EXECUTOR_MEMORY, executorMem)
                .setConf(SparkLauncher.CHILD_PROCESS_LOGGER_NAME, appName)
                .setConf(SparkLauncher.EXECUTOR_CORES, "" + executorCores)
                .setConf("spark.driver.extraJavaOptions", "-Dlog4j.configuration=log4j.properties")
                .setConf("spark.executor.extraJavaOptions", "-Dlog4j.configuration=log4j.properties")
                .setConf("spark.yarn.am.nodeLabelExpression", yarnAmNodeLabelExpression)
                .addSparkArg("--driver-cores", "" + driverCores).addSparkArg("--num-executors", "" + numExecutors)
                .addSparkArg("--total-executor-cores", "" + totalExecutorCores).addSparkArg("--queue", hadoopQueue)
                .addAppArgs(getArgs());
        if (sparkConfFilePath != null) {
            if (new File(sparkConfFilePath).exists()) {
                launcher = launcher.setPropertiesFile(sparkConfFilePath);
            } else {
                logger.warn("Spark configuration file " + sparkConfFilePath + " does not exist!");
            }
        }
        if (log4jPropFilePath != null) {
            if (new File(log4jPropFilePath).exists()) {
                launcher = launcher.addFile(log4jPropFilePath);
            } else {
                logger.warn("Loj4j configuration file " + log4jPropFilePath + " does not exist!");
            }
        }
        if (sysPropFilePath != null) {
            if (new File(sysPropFilePath).exists()) {
                launcher = launcher.addFile(sysPropFilePath);
            } else {
                logger.warn("System configuration file " + sysPropFilePath + " does not exist!");
            }
            launcher = launcher.addFile(sysPropFilePath);
        }
        if (appPropFilePath != null) {
            if (new File(appPropFilePath).exists()) {
                launcher = launcher.addFile(appPropFilePath);
            } else {
                logger.warn("App configuration file " + appPropFilePath + " does not exist!");
            }
        }
        return launcher;
    }

    /**
     * Thrown when no application is specified in any possible property sources.
     *
     * @author Ken Yu, CRIPAC, 2016
     */
    public static class NoAppSpecifiedException extends Exception {
        private static final long serialVersionUID = -8356206863229009557L;

        public NoAppSpecifiedException(String message) {
            super(message);
        }
    }

    public Properties getKafkaProducerProp(boolean isStringValue) {
        Properties producerProp = new Properties();
        producerProp.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaBootstrapServers);
        producerProp.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, kafkaSendMaxSize);
        producerProp.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        producerProp.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
                isStringValue ? StringSerializer.class : ByteArraySerializer.class);
        producerProp.put(ProducerConfig.BUFFER_MEMORY_CONFIG, kafkaSendMaxSize);
        producerProp.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, kafkaRequestTimeoutMs);
        producerProp.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "lz4");
        return producerProp;
    }

    public Properties getKafkaConsumerProp(String group, boolean isStringValue) {
        Properties consumerProp = new Properties();
        consumerProp.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaBootstrapServers);
        consumerProp.put(ConsumerConfig.GROUP_ID_CONFIG, group);
        consumerProp.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        consumerProp.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        consumerProp.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,
                isStringValue ? StringDeserializer.class : ByteArrayDeserializer.class);
        consumerProp.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, kafkaFetchTimeoutMs);
        return consumerProp;
    }

    public Map<String, Object> getKafkaParams(String group) {
        Map<String, Object> kafkaParams = new Object2ObjectOpenHashMap<>();
        kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, kafkaBootstrapServers);
        kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, group);
        kafkaParams.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
        kafkaParams.put(ConsumerConfig.FETCH_MAX_BYTES_CONFIG, kafkaMsgMaxBytes);
        kafkaParams.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, kafkaMsgMaxBytes);
        kafkaParams.put("fetch.message.max.bytes", kafkaMsgMaxBytes);
        kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class);
        kafkaParams.put(ConsumerConfig.RECEIVE_BUFFER_CONFIG, kafkaMsgMaxBytes);
        kafkaParams.put(ConsumerConfig.SEND_BUFFER_CONFIG, kafkaMsgMaxBytes);
        kafkaParams.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, kafkaFetchTimeoutMs);
        kafkaParams.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        return kafkaParams;
    }
}