org.elasticsearch.hadoop.integration.hive.HiveEmbeddedServer2.java Source code

Java tutorial

Introduction

Here is the source code for org.elasticsearch.hadoop.integration.hive.HiveEmbeddedServer2.java

Source

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.integration.hive;

import java.io.File;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.ResourceType;
import org.apache.hive.service.Service;
import org.apache.hive.service.cli.CLIService;
import org.apache.hive.service.cli.OperationHandle;
import org.apache.hive.service.cli.RowSet;
import org.apache.hive.service.cli.SessionHandle;
import org.apache.hive.service.server.HiveServer2;
import org.elasticsearch.hadoop.HdpBootstrap;
import org.elasticsearch.hadoop.mr.NTFSLocalFileSystem;
import org.elasticsearch.hadoop.util.Assert;
import org.elasticsearch.hadoop.util.ReflectionUtils;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.TestUtils;

/**
 * Utility starting a local/embedded Hive server for testing purposes.
 * Uses sensible defaults to properly clean between reruns.
 *
 * Additionally it wrangles the Hive internals so it rather executes the jobs locally not within a child JVM (which Hive calls local) or external.
 */
class HiveEmbeddedServer2 implements HiveInstance {
    private static Log log = LogFactory.getLog(Hive.class);

    private HiveServer2 hiveServer;

    private final Properties testSettings;
    private HiveConf config;
    private int port;

    public HiveEmbeddedServer2(Properties settings) {
        this.testSettings = settings;
    }

    @Override
    public void start() throws Exception {
        log.info("Starting Hive Local/Embedded Server...");
        if (hiveServer == null) {
            config = configure();
            hiveServer = new HiveServer2();
            port = MetaStoreUtils.findFreePort();
            config.setIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT, port);
            hiveServer.init(config);
            hiveServer.start();
            waitForStartup();
        }
    }

    private void waitForStartup() throws Exception {
        long timeout = TimeUnit.MINUTES.toMillis(1);
        long unitOfWait = TimeUnit.SECONDS.toMillis(1);

        CLIService hs2Client = getServiceClientInternal();
        SessionHandle sessionHandle = null;
        for (int interval = 0; interval < timeout / unitOfWait; interval++) {
            Thread.sleep(unitOfWait);
            try {
                Map<String, String> sessionConf = new HashMap<String, String>();
                sessionHandle = hs2Client.openSession("foo", "bar", sessionConf);
                return;
            } catch (Exception e) {
                // service not started yet
                continue;
            } finally {
                hs2Client.closeSession(sessionHandle);
            }
        }
        throw new TimeoutException("Couldn't get a hold of HiveServer2...");
    }

    private CLIService getServiceClientInternal() {
        for (Service service : hiveServer.getServices()) {
            if (service instanceof CLIService) {
                return (CLIService) service;
            }
        }
        throw new IllegalStateException("Cannot find CLIService");
    }

    // Hive adds automatically the Hive builtin jars - this thread-local cleans that up
    // used in Hive up to 1.2
    private static class InterceptingThreadLocal extends InheritableThreadLocal<SessionState> {

        // changed from Hive 1.2 because ... why not
        //private static class InterceptingThreadLocal extends InheritableThreadLocal<SessionStates> {
        @Override
        public void set(SessionState value) {
            deleteResource(value, ResourceType.JAR);
            super.set(value);
        }
    }

    private static class DummyHiveAuthenticationProvider implements HiveAuthenticationProvider {

        private Configuration conf;

        @Override
        public void setConf(Configuration conf) {
            this.conf = conf;
        }

        @Override
        public Configuration getConf() {
            return conf;
        }

        @Override
        public String getUserName() {
            return System.getProperty("user.name");
        }

        @Override
        public List<String> getGroupNames() {
            return Collections.singletonList("0");
        }

        @Override
        public void destroy() throws HiveException {
            //
        }

        // introduced in Hive 0.13
        @Override
        public void setSessionState(SessionState ss) {
        }
    }

    private HiveConf configure() throws Exception {
        String scratchDir = NTFSLocalFileSystem.SCRATCH_DIR;

        File scratchDirFile = new File(scratchDir);
        TestUtils.delete(scratchDirFile);

        Configuration cfg = new Configuration();
        HiveConf conf = new HiveConf(cfg, HiveConf.class);
        conf.addToRestrictList("columns.comments");
        refreshConfig(conf);

        HdpBootstrap.hackHadoopStagingOnWin();

        // work-around for NTFS FS
        // set permissive permissions since otherwise, on some OS it fails
        if (TestUtils.isWindows()) {
            conf.set("fs.file.impl", NTFSLocalFileSystem.class.getName());
            conf.set("hive.scratch.dir.permission", "650");
            conf.setVar(ConfVars.SCRATCHDIRPERMISSION, "650");
            conf.set("hive.server2.enable.doAs", "false");
            conf.set("hive.execution.engine", "mr");
            //conf.set("hadoop.bin.path", getClass().getClassLoader().getResource("hadoop.cmd").getPath());
            System.setProperty("path.separator", ";");
            conf.setVar(HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER,
                    DummyHiveAuthenticationProvider.class.getName());
        } else {
            conf.set("hive.scratch.dir.permission", "777");
            conf.setVar(ConfVars.SCRATCHDIRPERMISSION, "777");
            scratchDirFile.mkdirs();
            // also set the permissions manually since Hive doesn't do it...
            scratchDirFile.setWritable(true, false);
        }

        int random = new Random().nextInt();

        conf.set("hive.metastore.warehouse.dir", scratchDir + "/warehouse" + random);
        conf.set("hive.metastore.metadb.dir", scratchDir + "/metastore_db" + random);
        conf.set("hive.exec.scratchdir", scratchDir);
        conf.set("fs.permissions.umask-mode", "022");
        conf.set("javax.jdo.option.ConnectionURL",
                "jdbc:derby:;databaseName=" + scratchDir + "/metastore_db" + random + ";create=true");
        conf.set("hive.metastore.local", "true");
        conf.set("hive.aux.jars.path", "");
        conf.set("hive.added.jars.path", "");
        conf.set("hive.added.files.path", "");
        conf.set("hive.added.archives.path", "");
        conf.set("fs.default.name", "file:///");

        // clear mapred.job.tracker - Hadoop defaults to 'local' if not defined. Hive however expects this to be set to 'local' - if it's not, it does a remote execution (i.e. no child JVM)
        Field field = Configuration.class.getDeclaredField("properties");
        field.setAccessible(true);
        Properties props = (Properties) field.get(conf);
        props.remove("mapred.job.tracker");
        props.remove("mapreduce.framework.name");
        props.setProperty("fs.default.name", "file:///");

        // intercept SessionState to clean the threadlocal
        Field tss = SessionState.class.getDeclaredField("tss");
        tss.setAccessible(true);
        //tss.set(null, new InterceptingThreadLocal());

        return new HiveConf(conf);
    }

    private void removeESSettings(HiveConf conf) {
        //delete all "es" properties
        Set<String> props = testSettings.stringPropertyNames();
        Iterator<Map.Entry<String, String>> iter = conf.iterator();
        while (iter.hasNext()) {
            Entry<String, String> entry = iter.next();
            String key = entry.getKey();
            // remove transient settings only to avoid reloading the configuration (which might override some manual settings)
            if (key.startsWith("es.") && !props.contains(key)) {
                // NB: don't use remove since the iterator works on a copy not on the real thing
                conf.unset(key);
            }
        }
    }

    private void refreshConfig(HiveConf conf) {
        removeESSettings(conf);
        // copy test settings
        Enumeration<?> names = testSettings.propertyNames();

        while (names.hasMoreElements()) {
            String key = names.nextElement().toString();
            String value = testSettings.getProperty(key);
            conf.set(key, value);
        }
    }

    public void removeESSettings() {
        removeESSettings(config);

        // clear session state
        //        SessionState sessionState = SessionState.get();
        //        if (sessionState != null) {
        //            cleanConfig(sessionState.getConf());
        //        }
    }

    @Override
    public List<String> execute(String cmd) throws Exception {
        if (cmd.toUpperCase().startsWith("ADD JAR")) {
            // skip the jar since we're running in local mode
            System.out.println("Skipping ADD JAR in local/embedded mode");
            return Collections.emptyList();
        }
        // remove bogus configuration
        config.set("columns.comments", "");
        CLIService client = getServiceClientInternal();
        SessionHandle sh = null;
        try {
            Map<String, String> opConf = new HashMap<String, String>();
            sh = client.openSession("anonymous", "anonymous", opConf);
            OperationHandle oh = client.executeStatement(sh, cmd, opConf);

            if (oh.hasResultSet()) {
                RowSet rows = client.fetchResults(oh);
                List<String> result = new ArrayList<String>(rows.numRows());
                for (Object[] objects : rows) {
                    result.add(StringUtils.concatenate(objects, ","));
                }
                return result;
            }
            return Collections.emptyList();

        } finally {
            if (sh != null) {
                client.closeSession(sh);
            }
        }
    }

    @Override
    public void stop() {
        if (hiveServer != null) {
            log.info("Stopping Hive Local/Embedded Server...");
            hiveServer.stop();
            hiveServer = null;
            config = null;
        }
    }

    private static void deleteResource(SessionState value, ResourceType type) {
        // Hive < 0.14
        Method method = ReflectionUtils.findMethod(SessionState.class, "delete_resource", ResourceType.class);
        if (method == null) {
            method = ReflectionUtils.findMethod(SessionState.class, "delete_resources", ResourceType.class);
        }

        Assert.notNull(method, "Cannot detect delete resource(s) method on SessionState");
        ReflectionUtils.invoke(method, value, type);
    }
}