org.apache.hive.hcatalog.templeton.AppConfig.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hive.hcatalog.templeton.AppConfig.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hive.hcatalog.templeton;

import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.VersionInfo;
import org.apache.hive.hcatalog.templeton.tool.JobState;
import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
import org.apache.hive.hcatalog.templeton.tool.ZooKeeperCleanup;
import org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage;

/**
 * The configuration for Templeton.  This merges the normal Hadoop
 * configuration with the Templeton specific variables.
 *
 * The Templeton configuration variables are described in
 * templeton-default.xml
 *
 * The Templeton specific configuration is split into two layers
 *
 * 1. webhcat-default.xml - All the configuration variables that
 *    Templeton needs.  These are the defaults that ship with the app
 *    and should only be changed be the app developers.
 *
 * 2. webhcat-site.xml - The (possibly empty) configuration that the
 *    system administrator can set variables for their Hadoop cluster.
 *
 * The configuration files are loaded in this order with later files
 * overriding earlier ones.
 *
 * To find the configuration files, we first attempt to load a file
 * from the CLASSPATH and then look in the directory specified in the
 * TEMPLETON_HOME environment variable.
 *
 * In addition the configuration files may access the special env
 * variable env for all environment variables.  For example, the
 * hadoop executable could be specified using:
 *<pre>
 *      ${env.HADOOP_PREFIX}/bin/hadoop
 *</pre>
 */
public class AppConfig extends Configuration {
    public static final String[] HADOOP_CONF_FILENAMES = { "core-default.xml", "core-site.xml",
            "mapred-default.xml", "mapred-site.xml", "hdfs-site.xml" };

    public static final String[] HADOOP_PREFIX_VARS = { "HADOOP_PREFIX", "HADOOP_HOME" };

    public static final String TEMPLETON_HOME_VAR = "TEMPLETON_HOME";
    public static final String WEBHCAT_CONF_DIR = "WEBHCAT_CONF_DIR";

    public static final String[] TEMPLETON_CONF_FILENAMES = { "webhcat-default.xml", "webhcat-site.xml" };

    public static final String PORT = "templeton.port";
    public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding";
    public static final String EXEC_ENVS_NAME = "templeton.exec.envs";
    public static final String EXEC_MAX_BYTES_NAME = "templeton.exec.max-output-bytes";
    public static final String EXEC_MAX_PROCS_NAME = "templeton.exec.max-procs";
    public static final String EXEC_TIMEOUT_NAME = "templeton.exec.timeout";
    public static final String HADOOP_QUEUE_NAME = "templeton.hadoop.queue.name";
    public static final String ENABLE_JOB_RECONNECT_DEFAULT = "templeton.enable.job.reconnect.default";
    public static final String HADOOP_NAME = "templeton.hadoop";
    public static final String HADOOP_CONF_DIR = "templeton.hadoop.conf.dir";
    public static final String HCAT_NAME = "templeton.hcat";
    public static final String PYTHON_NAME = "templeton.python";
    public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive";
    public static final String HIVE_PATH_NAME = "templeton.hive.path";
    public static final String MAPPER_MEMORY_MB = "templeton.mapper.memory.mb";
    public static final String MR_AM_MEMORY_MB = "templeton.mr.am.memory.mb";

    /**
     * see webhcat-default.xml
     */
    public static final String HIVE_HOME_PATH = "templeton.hive.home";
    /**
     * see webhcat-default.xml
     */
    public static final String HCAT_HOME_PATH = "templeton.hcat.home";
    /**
     * is a comma separated list of name=value pairs;
     * In case some value is itself a comma-separated list, the comma needs to
     * be escaped with {@link org.apache.hadoop.util.StringUtils#ESCAPE_CHAR}.  See other usage
     * of escape/unescape methods in {@link org.apache.hadoop.util.StringUtils} in webhcat.
     */
    public static final String HIVE_PROPS_NAME = "templeton.hive.properties";
    public static final String SQOOP_ARCHIVE_NAME = "templeton.sqoop.archive";
    public static final String SQOOP_PATH_NAME = "templeton.sqoop.path";
    public static final String SQOOP_HOME_PATH = "templeton.sqoop.home";
    public static final String LIB_JARS_NAME = "templeton.libjars";
    public static final String PIG_ARCHIVE_NAME = "templeton.pig.archive";
    public static final String PIG_PATH_NAME = "templeton.pig.path";
    public static final String STREAMING_JAR_NAME = "templeton.streaming.jar";
    public static final String OVERRIDE_JARS_NAME = "templeton.override.jars";
    public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled";
    public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS = "templeton.controller.mr.child.opts";
    public static final String TEMPLETON_CONTROLLER_MR_AM_JAVA_OPTS = "templeton.controller.mr.am.java.opts";

    public static final String KERBEROS_SECRET = "templeton.kerberos.secret";
    public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal";
    public static final String KERBEROS_KEYTAB = "templeton.kerberos.keytab";

    public static final String CALLBACK_INTERVAL_NAME = "templeton.callback.retry.interval";
    public static final String CALLBACK_RETRY_NAME = "templeton.callback.retry.attempts";

    //Hadoop property names (set by templeton logic)
    public static final String HADOOP_END_INTERVAL_NAME = "job.end.retry.interval";
    public static final String HADOOP_END_RETRY_NAME = "job.end.retry.attempts";
    public static final String HADOOP_END_URL_NAME = "job.end.notification.url";
    public static final String HADOOP_SPECULATIVE_NAME = "mapred.map.tasks.speculative.execution";
    public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts";
    public static final String HADOOP_MAP_MEMORY_MB = "mapreduce.map.memory.mb";
    public static final String HADOOP_MR_AM_JAVA_OPTS = "yarn.app.mapreduce.am.command-opts";
    public static final String HADOOP_MR_AM_MEMORY_MB = "yarn.app.mapreduce.am.resource.mb";
    public static final String UNIT_TEST_MODE = "templeton.unit.test.mode";
    /**
     * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in
     * LaunchMapper before launching Hive command
     */
    public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files";

    private static final Log LOG = LogFactory.getLog(AppConfig.class);

    public AppConfig() {
        init();
        LOG.info("Using Hadoop version " + VersionInfo.getVersion());
    }

    private void init() {
        for (Map.Entry<String, String> e : System.getenv().entrySet())
            set("env." + e.getKey(), e.getValue());

        String templetonDir = getTempletonDir();
        for (String fname : TEMPLETON_CONF_FILENAMES) {
            logConfigLoadAttempt(templetonDir + File.separator + fname);
            if (!loadOneClasspathConfig(fname))
                loadOneFileConfig(templetonDir, fname);
        }
        String hadoopConfDir = getHadoopConfDir();
        for (String fname : HADOOP_CONF_FILENAMES) {
            logConfigLoadAttempt(hadoopConfDir + File.separator + fname);
            loadOneFileConfig(hadoopConfDir, fname);
        }
        ProxyUserSupport.processProxyuserConfig(this);
        handleHiveProperties();
        LOG.info(dumpEnvironent());
    }

    /**
     * When auto-shipping hive tar (for example when hive query or pig script
     * is submitted via webhcat), Hive client is launched on some remote node where Hive has not
     * been installed.  We need pass some properties to that client to make sure it connects to the
     * right Metastore, configures Tez, etc.  Here we look for such properties in hive config,
     * and set a comma-separated list of key values in {@link #HIVE_PROPS_NAME}.
     * Note that the user may choose to set the same keys in HIVE_PROPS_NAME directly, in which case
     * those values should take precedence.
     */
    private void handleHiveProperties() {
        HiveConf hiveConf = new HiveConf();//load hive-site.xml from classpath
        List<String> interestingPropNames = Arrays.asList("hive.metastore.uris", "hive.metastore.sasl.enabled",
                "hive.metastore.execute.setugi", "hive.execution.engine");

        //each items is a "key=value" format
        List<String> webhcatHiveProps = new ArrayList<String>(hiveProps());
        for (String interestingPropName : interestingPropNames) {
            String value = hiveConf.get(interestingPropName);
            if (value != null) {
                boolean found = false;
                for (String whProp : webhcatHiveProps) {
                    if (whProp.startsWith(interestingPropName + "=")) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    webhcatHiveProps.add(interestingPropName + "=" + value);
                }
            }
        }
        StringBuilder hiveProps = new StringBuilder();
        for (String whProp : webhcatHiveProps) {
            //make sure to escape separator char in prop values
            hiveProps.append(hiveProps.length() > 0 ? "," : "").append(StringUtils.escapeString(whProp));
        }
        set(HIVE_PROPS_NAME, hiveProps.toString());
    }

    private static void logConfigLoadAttempt(String path) {
        LOG.info("Attempting to load config file: " + path);
    }

    /**
     * Dumps all env and config state.  Should be called once on WebHCat start up to facilitate 
     * support/debugging.  Later it may be worth adding a REST call which will return this data.
     */
    private String dumpEnvironent() {
        StringBuilder sb = TempletonUtils.dumpPropMap("========WebHCat System.getenv()========", System.getenv());
        sb.append("START========WebHCat AppConfig.iterator()========: \n");
        dumpConfig(this, sb);
        sb.append("END========WebHCat AppConfig.iterator()========: \n");

        sb.append(TempletonUtils.dumpPropMap("========WebHCat System.getProperties()========",
                System.getProperties()));

        sb.append("START========\"new HiveConf()\"========\n");
        HiveConf c = new HiveConf();
        sb.append("hiveDefaultUrl=").append(c.getHiveDefaultLocation()).append('\n');
        sb.append("hiveSiteURL=").append(HiveConf.getHiveSiteLocation()).append('\n');
        sb.append("hiveServer2SiteUrl=").append(HiveConf.getHiveServer2SiteLocation()).append('\n');
        sb.append("hivemetastoreSiteUrl=").append(HiveConf.getMetastoreSiteLocation()).append('\n');
        dumpConfig(c, sb);
        sb.append("END========\"new HiveConf()\"========\n");
        return sb.toString();
    }

    private static void dumpConfig(Configuration conf, StringBuilder sb) {
        Iterator<Map.Entry<String, String>> configIter = conf.iterator();
        List<Map.Entry<String, String>> configVals = new ArrayList<>();
        while (configIter.hasNext()) {
            configVals.add(configIter.next());
        }
        Collections.sort(configVals, new Comparator<Map.Entry<String, String>>() {
            @Override
            public int compare(Map.Entry<String, String> ent, Map.Entry<String, String> ent2) {
                return ent.getKey().compareTo(ent2.getKey());
            }
        });
        for (Map.Entry<String, String> entry : configVals) {
            //use get() to make sure variable substitution works
            if (entry.getKey().toLowerCase().contains("path")) {
                StringTokenizer st = new StringTokenizer(conf.get(entry.getKey()), File.pathSeparator);
                sb.append(entry.getKey()).append("=\n");
                while (st.hasMoreTokens()) {
                    sb.append("    ").append(st.nextToken()).append(File.pathSeparator).append('\n');
                }
            } else {
                sb.append(entry.getKey()).append('=').append(conf.get(entry.getKey())).append('\n');
            }
        }
    }

    public void startCleanup() {
        JobState.getStorageInstance(this).startCleanup(this);
    }

    public String getHadoopConfDir() {
        return get(HADOOP_CONF_DIR);
    }

    public static String getTempletonDir() {
        return System.getenv(TEMPLETON_HOME_VAR);
    }

    public static String getWebhcatConfDir() {
        return System.getenv(WEBHCAT_CONF_DIR);
    }

    private boolean loadOneFileConfig(String dir, String fname) {
        if (dir != null) {
            File f = new File(dir, fname);
            if (f.exists()) {
                addResource(new Path(f.getAbsolutePath()));
                LOG.info("loaded config file " + f.getAbsolutePath());
                return true;
            }
        }
        return false;
    }

    private boolean loadOneClasspathConfig(String fname) {
        URL x = getResource(fname);
        if (x != null) {
            addResource(x);
            LOG.info("loaded config from classpath " + x);
            return true;
        }

        return false;
    }

    public String libJars() {
        return get(LIB_JARS_NAME);
    }

    public String hadoopQueueName() {
        return get(HADOOP_QUEUE_NAME);
    }

    public String enableJobReconnectDefault() {
        return get(ENABLE_JOB_RECONNECT_DEFAULT);
    }

    public String clusterHadoop() {
        return get(HADOOP_NAME);
    }

    public String clusterHcat() {
        return get(HCAT_NAME);
    }

    public String clusterPython() {
        return get(PYTHON_NAME);
    }

    public String pigPath() {
        return get(PIG_PATH_NAME);
    }

    public String pigArchive() {
        return get(PIG_ARCHIVE_NAME);
    }

    public String hivePath() {
        return get(HIVE_PATH_NAME);
    }

    public String hiveArchive() {
        return get(HIVE_ARCHIVE_NAME);
    }

    public String sqoopPath() {
        return get(SQOOP_PATH_NAME);
    }

    public String sqoopArchive() {
        return get(SQOOP_ARCHIVE_NAME);
    }

    public String sqoopHome() {
        return get(SQOOP_HOME_PATH);
    }

    public String streamingJar() {
        return get(STREAMING_JAR_NAME);
    }

    public String kerberosSecret() {
        return get(KERBEROS_SECRET);
    }

    public String kerberosPrincipal() {
        return get(KERBEROS_PRINCIPAL);
    }

    public String kerberosKeytab() {
        return get(KERBEROS_KEYTAB);
    }

    public String controllerMRChildOpts() {
        return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS);
    }

    public String controllerAMChildOpts() {
        return get(TEMPLETON_CONTROLLER_MR_AM_JAVA_OPTS);
    }

    public String mapperMemoryMb() {
        return get(MAPPER_MEMORY_MB);
    }

    public String amMemoryMb() {
        return get(MR_AM_MEMORY_MB);
    }

    /**
     * @see  #HIVE_PROPS_NAME
     */
    public Collection<String> hiveProps() {
        String[] props = StringUtils.split(get(HIVE_PROPS_NAME));
        //since raw data was (possibly) escaped to make split work,
        //now need to remove escape chars so they don't interfere with downstream processing
        if (props == null) {
            return Collections.emptyList();
        } else {
            for (int i = 0; i < props.length; i++) {
                props[i] = TempletonUtils.unEscapeString(props[i]);
            }
            return Arrays.asList(props);
        }
    }

    public String[] overrideJars() {
        if (getBoolean(OVERRIDE_JARS_ENABLED, true))
            return getStrings(OVERRIDE_JARS_NAME);
        else
            return null;
    }

    public String overrideJarsString() {
        if (getBoolean(OVERRIDE_JARS_ENABLED, true))
            return get(OVERRIDE_JARS_NAME);
        else
            return null;
    }

    public long zkCleanupInterval() {
        return getLong(ZooKeeperCleanup.ZK_CLEANUP_INTERVAL, (1000L * 60L * 60L * 12L));
    }

    public long zkMaxAge() {
        return getLong(ZooKeeperCleanup.ZK_CLEANUP_MAX_AGE, (1000L * 60L * 60L * 24L * 7L));
    }

    public String zkHosts() {
        return get(ZooKeeperStorage.ZK_HOSTS);
    }

    public int zkSessionTimeout() {
        return getInt(ZooKeeperStorage.ZK_SESSION_TIMEOUT, 30000);
    }
}