gobblin.hive.HiveMetastoreClientPool.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.hive.HiveMetastoreClientPool.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.hive;

import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

import org.apache.commons.pool2.impl.GenericObjectPool;
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;

import com.google.common.base.Optional;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.google.common.collect.Maps;
import com.google.common.io.Closer;

import gobblin.configuration.State;
import gobblin.util.AutoReturnableObject;

import lombok.Getter;

/**
 * A pool of {@link IMetaStoreClient} for querying the Hive metastore.
 */
public class HiveMetastoreClientPool {

    private final GenericObjectPool<IMetaStoreClient> pool;
    private final HiveMetaStoreClientFactory factory;
    @Getter
    private final HiveConf hiveConf;
    @Getter
    private final HiveRegProps hiveRegProps;

    private static final long DEFAULT_POOL_CACHE_TTL_MINUTES = 30;
    private static final Cache<Optional<String>, HiveMetastoreClientPool> poolCache = CacheBuilder.newBuilder()
            .expireAfterAccess(DEFAULT_POOL_CACHE_TTL_MINUTES, TimeUnit.MINUTES)
            .removalListener(new RemovalListener<Optional<String>, HiveMetastoreClientPool>() {
                @Override
                public void onRemoval(RemovalNotification<Optional<String>, HiveMetastoreClientPool> notification) {
                    if (notification.getValue() != null) {
                        notification.getValue().close();
                    }
                }
            }).build();

    /**
     * Get a {@link HiveMetastoreClientPool} for the requested metastore URI. Useful for using the same pools across
     * different classes in the code base. Note that if a pool already exists for that metastore, the max number of
     * objects available will be unchanged, and it might be lower than requested by this method.
     *
     * @param properties {@link Properties} used to generate the pool.
     * @param metastoreURI URI of the Hive metastore. If absent, use default metastore.
     * @return a {@link HiveMetastoreClientPool}.
     * @throws IOException
     */
    public static HiveMetastoreClientPool get(final Properties properties, final Optional<String> metastoreURI)
            throws IOException {
        try {
            return poolCache.get(metastoreURI, new Callable<HiveMetastoreClientPool>() {
                @Override
                public HiveMetastoreClientPool call() throws Exception {
                    return new HiveMetastoreClientPool(properties, metastoreURI);
                }
            });
        } catch (ExecutionException ee) {
            throw new IOException("Failed to get " + HiveMetastoreClientPool.class.getSimpleName(), ee.getCause());
        }
    }

    /**
     * Constructor for {@link HiveMetastoreClientPool}.
     * @deprecated It is recommended to use the static {@link #get} method instead. Use this constructor only if you
     *             different pool configurations are required.
     */
    @Deprecated
    public HiveMetastoreClientPool(Properties properties, Optional<String> metastoreURI) {
        this.hiveRegProps = new HiveRegProps(new State(properties));
        GenericObjectPoolConfig config = new GenericObjectPoolConfig();
        config.setMaxTotal(this.hiveRegProps.getNumThreads());
        config.setMaxIdle(this.hiveRegProps.getNumThreads());

        this.factory = new HiveMetaStoreClientFactory(metastoreURI);
        this.pool = new GenericObjectPool<>(this.factory, config);
        this.hiveConf = this.factory.getHiveConf();
    }

    public void close() {
        this.pool.close();
    }

    /**
     * @return an auto returnable wrapper around a {@link IMetaStoreClient}.
     * @throws IOException
     * Note: if you must acquire multiple locks, please use {@link #safeGetClients} instead, as this call may deadlock.
     */
    public AutoReturnableObject<IMetaStoreClient> getClient() throws IOException {
        return new AutoReturnableObject<>(this.pool);
    }

    /**
     * A class wrapping multiple named {@link IMetaStoreClient}s.
     */
    public static class MultiClient implements AutoCloseable {
        private final Map<String, AutoReturnableObject<IMetaStoreClient>> clients;
        private final Closer closer;

        private MultiClient(Map<String, HiveMetastoreClientPool> namedPools) throws IOException {
            this.clients = Maps.newHashMap();
            this.closer = Closer.create();
            Map<HiveMetastoreClientPool, Integer> requiredClientsPerPool = Maps.newHashMap();
            for (Map.Entry<String, HiveMetastoreClientPool> entry : namedPools.entrySet()) {
                if (requiredClientsPerPool.containsKey(entry.getValue())) {
                    requiredClientsPerPool.put(entry.getValue(), requiredClientsPerPool.get(entry.getValue()) + 1);
                } else {
                    requiredClientsPerPool.put(entry.getValue(), 1);
                }
            }
            for (Map.Entry<HiveMetastoreClientPool, Integer> entry : requiredClientsPerPool.entrySet()) {
                if (entry.getKey().pool.getMaxTotal() < entry.getValue()) {
                    throw new IOException(String.format(
                            "Not enough clients available in the pool. Required %d, max available %d.",
                            entry.getValue(), entry.getKey().pool.getMaxTotal()));
                }
            }
            for (Map.Entry<String, HiveMetastoreClientPool> entry : namedPools.entrySet()) {
                this.clients.put(entry.getKey(), this.closer.register(entry.getValue().getClient()));
            }
        }

        /**
         * Get the {@link IMetaStoreClient} with the provided name.
         * @throws IOException
         */
        public IMetaStoreClient getClient(String name) throws IOException {
            if (!this.clients.containsKey(name)) {
                throw new IOException("There is no client with name " + name);
            }
            return this.clients.get(name).get();
        }

        @Override
        public void close() throws IOException {
            this.closer.close();
        }
    }

    /**
     * A method to get multiple {@link IMetaStoreClient}s while preventing deadlocks.
     * @param namedPools A map from String to {@link HiveMetastoreClientPool}.
     * @return a {@link MultiClient} with a {@link IMetaStoreClient} for each entry in the input map. The client can
     *          be retrieved by its name in the input map.
     * @throws IOException
     */
    public static synchronized MultiClient safeGetClients(Map<String, HiveMetastoreClientPool> namedPools)
            throws IOException {
        return new MultiClient(namedPools);
    }

}