gobblin.hive.metastore.HiveMetaStoreBasedRegister.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.hive.metastore.HiveMetaStoreBasedRegister.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.hive.metastore;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import lombok.extern.slf4j.Slf4j;

import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.thrift.TException;
import org.joda.time.DateTime;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.primitives.Ints;

import gobblin.annotation.Alpha;
import gobblin.configuration.State;
import gobblin.hive.HiveLock;
import gobblin.hive.HiveMetastoreClientPool;
import gobblin.hive.HivePartition;
import gobblin.hive.HiveRegProps;
import gobblin.hive.HiveRegister;
import gobblin.hive.HiveRegistrationUnit.Column;
import gobblin.hive.HiveTable;
import gobblin.hive.spec.HiveSpec;
import gobblin.metrics.GobblinMetrics;
import gobblin.metrics.GobblinMetricsRegistry;
import gobblin.metrics.MetricContext;
import gobblin.metrics.event.EventSubmitter;
import gobblin.util.AutoCloseableLock;
import gobblin.util.AutoReturnableObject;

/**
 * An implementation of {@link HiveRegister} that uses {@link IMetaStoreClient} for Hive registration.
 *
 * <p>
 *   An instance of this class is constructed with a {@link State} object or obtained via
 *   {@link HiveRegister#get(State)}. Property {@link HiveRegProps#HIVE_DB_ROOT_DIR} is required for registering
 *   a table or a partition if the database does not exist.
 * </p>
 *
 * <p>
 *   The {@link #register(HiveSpec)} method is asynchronous and returns immediately. Registration is performed in a
 *   thread pool whose size is controlled by {@link HiveRegProps#HIVE_REGISTER_THREADS}.
 * </p>
 *
 * @author Ziyang Liu
 */
@Slf4j
@Alpha
public class HiveMetaStoreBasedRegister extends HiveRegister {

    private final HiveMetastoreClientPool clientPool;
    private final HiveLock locks = new HiveLock();
    private final EventSubmitter eventSubmitter;

    public HiveMetaStoreBasedRegister(State state, Optional<String> metastoreURI) throws IOException {
        super(state);

        GenericObjectPoolConfig config = new GenericObjectPoolConfig();
        config.setMaxTotal(this.props.getNumThreads());
        config.setMaxIdle(this.props.getNumThreads());
        this.clientPool = HiveMetastoreClientPool.get(this.props.getProperties(), metastoreURI);

        MetricContext metricContext = GobblinMetricsRegistry.getInstance().getMetricContext(state,
                HiveMetaStoreBasedRegister.class, GobblinMetrics.getCustomTagsFromState(state));

        this.eventSubmitter = new EventSubmitter.Builder(metricContext, "gobblin.hive.HiveMetaStoreBasedRegister")
                .build();
    }

    @Override
    protected void registerPath(HiveSpec spec) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            Table table = HiveMetaStoreUtils.getTable(spec.getTable());

            createDbIfNotExists(client.get(), table.getDbName());
            createOrAlterTable(client.get(), table, spec);

            Optional<HivePartition> partition = spec.getPartition();
            if (partition.isPresent()) {
                addOrAlterPartition(client.get(), table, HiveMetaStoreUtils.getPartition(partition.get()), spec);
            }
            HiveMetaStoreEventHelper.submitSuccessfulPathRegistration(eventSubmitter, spec);
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedPathRegistration(eventSubmitter, spec, e);
            throw new IOException(e);
        }
    }

    @Override
    public boolean createDbIfNotExists(String dbName) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            return createDbIfNotExists(client.get(), dbName);
        }
    }

    private boolean createDbIfNotExists(IMetaStoreClient client, String dbName) throws IOException {
        Database db = new Database();
        db.setName(dbName);

        try (AutoCloseableLock lock = this.locks.getDbLock(dbName)) {
            try {
                client.getDatabase(db.getName());
                return false;
            } catch (NoSuchObjectException nsoe) {
                // proceed with create
            } catch (TException te) {
                throw new IOException(te);
            }

            Preconditions.checkState(this.hiveDbRootDir.isPresent(),
                    "Missing required property " + HiveRegProps.HIVE_DB_ROOT_DIR);
            db.setLocationUri(new Path(this.hiveDbRootDir.get(), dbName + HIVE_DB_EXTENSION).toString());

            try {
                client.createDatabase(db);
                log.info("Created database " + dbName);
                HiveMetaStoreEventHelper.submitSuccessfulDBCreation(this.eventSubmitter, dbName);
                return true;
            } catch (AlreadyExistsException e) {
                return false;
            } catch (TException e) {
                HiveMetaStoreEventHelper.submitFailedDBCreation(this.eventSubmitter, dbName, e);
                throw new IOException("Unable to create Hive database " + dbName, e);
            }
        }
    }

    @Override
    public boolean createTableIfNotExists(HiveTable table) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient();
                AutoCloseableLock lock = this.locks.getTableLock(table.getDbName(), table.getTableName())) {
            return createTableIfNotExists(client.get(), HiveMetaStoreUtils.getTable(table), table);
        }
    }

    @Override
    public boolean addPartitionIfNotExists(HiveTable table, HivePartition partition) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient();
                AutoCloseableLock lock = this.locks.getTableLock(table.getDbName(), table.getTableName())) {
            try {
                client.get().getPartition(table.getDbName(), table.getTableName(), partition.getValues());
                return false;
            } catch (NoSuchObjectException e) {
                client.get().alter_partition(table.getDbName(), table.getTableName(),
                        getPartitionWithCreateTimeNow(HiveMetaStoreUtils.getPartition(partition)));
                HiveMetaStoreEventHelper.submitSuccessfulPartitionAdd(this.eventSubmitter, table, partition);
                return true;
            }
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedPartitionAdd(this.eventSubmitter, table, partition, e);
            throw new IOException(String.format("Unable to add partition %s in table %s in db %s",
                    partition.getValues(), table.getTableName(), table.getDbName()), e);
        }
    }

    private boolean createTableIfNotExists(IMetaStoreClient client, Table table, HiveTable hiveTable)
            throws IOException {
        String dbName = table.getDbName();
        String tableName = table.getTableName();

        try (AutoCloseableLock lock = this.locks.getTableLock(dbName, tableName)) {
            if (client.tableExists(table.getDbName(), table.getTableName())) {
                return false;
            }
            client.createTable(getTableWithCreateTimeNow(table));
            log.info(String.format("Created Hive table %s in db %s", tableName, dbName));
            HiveMetaStoreEventHelper.submitSuccessfulTableCreation(this.eventSubmitter, hiveTable);
            return true;
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedTableCreation(eventSubmitter, hiveTable, e);
            throw new IOException(String.format("Error in creating or altering Hive table %s in db %s",
                    table.getTableName(), table.getDbName()), e);
        }
    }

    private void createOrAlterTable(IMetaStoreClient client, Table table, HiveSpec spec) throws TException {

        String dbName = table.getDbName();
        String tableName = table.getTableName();
        try (AutoCloseableLock lock = this.locks.getTableLock(dbName, tableName)) {
            try {
                client.createTable(getTableWithCreateTimeNow(table));
                log.info(String.format("Created Hive table %s in db %s", tableName, dbName));
            } catch (AlreadyExistsException e) {
                log.info("Table {} already exists in db {}.", tableName, dbName);
                try {
                    HiveTable existingTable = HiveMetaStoreUtils.getHiveTable(client.getTable(dbName, tableName));
                    if (needToUpdateTable(existingTable, spec.getTable())) {
                        client.alter_table(dbName, tableName, getTableWithCreateTime(table, existingTable));
                        log.info(String.format("updated Hive table %s in db %s", tableName, dbName));
                    }
                } catch (TException e2) {
                    log.error(String.format("Unable to create or alter Hive table %s in db %s: " + e2.getMessage(),
                            tableName, dbName), e2);
                    throw e2;
                }
            } catch (TException e) {
                log.error(String.format("Unable to create Hive table %s in db %s: " + e.getMessage(), tableName,
                        dbName), e);
                throw e;
            }
        }
    }

    @Override
    public boolean existsTable(String dbName, String tableName) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            return client.get().tableExists(dbName, tableName);
        } catch (TException e) {
            throw new IOException(
                    String.format("Unable to check existence of table %s in db %s", tableName, dbName), e);
        }
    }

    @Override
    public boolean existsPartition(String dbName, String tableName, List<Column> partitionKeys,
            List<String> partitionValues) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            client.get().getPartition(dbName, tableName, partitionValues);
            return true;
        } catch (NoSuchObjectException e) {
            return false;
        } catch (TException e) {
            throw new IOException(String.format("Unable to check existence of partition %s in table %s in db %s",
                    partitionValues, tableName, dbName), e);
        }
    }

    @Override
    public void dropTableIfExists(String dbName, String tableName) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            if (client.get().tableExists(dbName, tableName)) {
                client.get().dropTable(dbName, tableName);
                HiveMetaStoreEventHelper.submitSuccessfulTableDrop(eventSubmitter, dbName, tableName);
                log.info("Dropped table " + tableName + " in db " + dbName);
            }
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedTableDrop(eventSubmitter, dbName, tableName, e);
            throw new IOException(String.format("Unable to deregister table %s in db %s", tableName, dbName), e);
        }
    }

    @Override
    public void dropPartitionIfExists(String dbName, String tableName, List<Column> partitionKeys,
            List<String> partitionValues) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            client.get().dropPartition(dbName, tableName, partitionValues, false);
            HiveMetaStoreEventHelper.submitSuccessfulPartitionDrop(eventSubmitter, dbName, tableName,
                    partitionValues);
            log.info("Dropped partition " + partitionValues + " in table " + tableName + " in db " + dbName);
        } catch (NoSuchObjectException e) {
            // Partition does not exist. Nothing to do
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedPartitionDrop(eventSubmitter, dbName, tableName, partitionValues,
                    e);
            throw new IOException(
                    String.format("Unable to check existence of Hive partition %s in table %s in db %s",
                            partitionValues, tableName, dbName),
                    e);
        }
    }

    private void addOrAlterPartition(IMetaStoreClient client, Table table, Partition partition, HiveSpec spec)
            throws TException {
        Preconditions.checkArgument(table.getPartitionKeysSize() == partition.getValues().size(),
                String.format("Partition key size is %s but partition value size is %s",
                        table.getPartitionKeys().size(), partition.getValues().size()));

        try (AutoCloseableLock lock = this.locks.getPartitionLock(table.getDbName(), table.getTableName(),
                partition.getValues())) {

            try {
                client.add_partition(getPartitionWithCreateTimeNow(partition));
                log.info(String.format("Added partition %s to table %s with location %s",
                        stringifyPartition(partition), table.getTableName(), partition.getSd().getLocation()));
            } catch (TException e) {
                try {
                    HivePartition existingPartition = HiveMetaStoreUtils.getHivePartition(
                            client.getPartition(table.getDbName(), table.getTableName(), partition.getValues()));

                    if (needToUpdatePartition(existingPartition, spec.getPartition().get())) {
                        log.info(String.format("Partition update required. ExistingPartition %s, newPartition %s",
                                stringifyPartition(existingPartition),
                                stringifyPartition(spec.getPartition().get())));
                        Partition newPartition = getPartitionWithCreateTime(partition, existingPartition);
                        log.info(String.format("Altering partition %s", newPartition));
                        client.alter_partition(table.getDbName(), table.getTableName(), newPartition);
                        log.info(String.format("Updated partition %s in table %s with location %s",
                                stringifyPartition(newPartition), table.getTableName(),
                                partition.getSd().getLocation()));
                    } else {
                        log.info(String.format(
                                "Partition %s in table %s with location %s already exists and no need to update",
                                stringifyPartition(partition), table.getTableName(),
                                partition.getSd().getLocation()));
                    }
                } catch (Throwable e2) {
                    log.error(
                            String.format(
                                    "Unable to add or alter partition %s in table %s with location %s: "
                                            + e2.getMessage(),
                                    stringifyPartitionVerbose(partition), table.getTableName(),
                                    partition.getSd().getLocation()),
                            e2);
                    throw e2;
                }
            }
        }
    }

    private static String stringifyPartition(Partition partition) {
        if (log.isDebugEnabled()) {
            return stringifyPartitionVerbose(partition);
        }
        return Arrays.toString(partition.getValues().toArray());
    }

    private static String stringifyPartition(HivePartition partition) {
        return partition.toString();
    }

    private static String stringifyPartitionVerbose(Partition partition) {
        return partition.toString();
    }

    @Override
    public Optional<HiveTable> getTable(String dbName, String tableName) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            return Optional.of(HiveMetaStoreUtils.getHiveTable(client.get().getTable(dbName, tableName)));
        } catch (NoSuchObjectException e) {
            return Optional.<HiveTable>absent();
        } catch (TException e) {
            throw new IOException("Unable to get table " + tableName + " in db " + dbName, e);
        }
    }

    @Override
    public Optional<HivePartition> getPartition(String dbName, String tableName, List<Column> partitionKeys,
            List<String> partitionValues) throws IOException {

        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            return Optional.of(HiveMetaStoreUtils
                    .getHivePartition(client.get().getPartition(dbName, tableName, partitionValues)));
        } catch (NoSuchObjectException e) {
            return Optional.<HivePartition>absent();
        } catch (TException e) {
            throw new IOException(
                    "Unable to get partition " + partitionValues + " from table " + tableName + " in db " + dbName,
                    e);
        }
    }

    @Override
    public void alterTable(HiveTable table) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            if (!client.get().tableExists(table.getDbName(), table.getTableName())) {
                throw new IOException(
                        "Table " + table.getTableName() + " in db " + table.getDbName() + " does not exist");
            }
            client.get().alter_table(table.getDbName(), table.getTableName(),
                    getTableWithCreateTimeNow(HiveMetaStoreUtils.getTable(table)));
            HiveMetaStoreEventHelper.submitSuccessfulTableAlter(eventSubmitter, table);
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedTableAlter(eventSubmitter, table, e);
            throw new IOException("Unable to alter table " + table.getTableName() + " in db " + table.getDbName(),
                    e);
        }
    }

    @Override
    public void alterPartition(HiveTable table, HivePartition partition) throws IOException {
        try (AutoReturnableObject<IMetaStoreClient> client = this.clientPool.getClient()) {
            client.get().alter_partition(table.getDbName(), table.getTableName(),
                    getPartitionWithCreateTimeNow(HiveMetaStoreUtils.getPartition(partition)));
            HiveMetaStoreEventHelper.submitSuccessfulPartitionAlter(eventSubmitter, table, partition);
        } catch (TException e) {
            HiveMetaStoreEventHelper.submitFailedPartitionAlter(eventSubmitter, table, partition, e);
            throw new IOException(String.format("Unable to alter partition %s in table %s in db %s",
                    partition.getValues(), table.getTableName(), table.getDbName()), e);
        }
    }

    private Partition getPartitionWithCreateTimeNow(Partition partition) {
        return getPartitionWithCreateTime(partition, Ints.checkedCast(DateTime.now().getMillis() / 1000));
    }

    private Partition getPartitionWithCreateTime(Partition partition, HivePartition referencePartition) {
        return getPartitionWithCreateTime(partition,
                Ints.checkedCast(referencePartition.getCreateTime().or(DateTime.now().getMillis() / 1000)));
    }

    /**
     * Sets create time if not already set.
     */
    private Partition getPartitionWithCreateTime(Partition partition, int createTime) {
        if (partition.isSetCreateTime() && partition.getCreateTime() > 0) {
            return partition;
        }
        Partition actualPartition = partition.deepCopy();
        actualPartition.setCreateTime(createTime);
        return actualPartition;
    }

    private Table getTableWithCreateTimeNow(Table table) {
        return gettableWithCreateTime(table, Ints.checkedCast(DateTime.now().getMillis() / 1000));
    }

    private Table getTableWithCreateTime(Table table, HiveTable referenceTable) {
        return gettableWithCreateTime(table,
                Ints.checkedCast(referenceTable.getCreateTime().or(DateTime.now().getMillis() / 1000)));
    }

    /**
     * Sets create time if not already set.
     */
    private Table gettableWithCreateTime(Table table, int createTime) {
        if (table.isSetCreateTime() && table.getCreateTime() > 0) {
            return table;
        }
        Table actualtable = table.deepCopy();
        actualtable.setCreateTime(createTime);
        return actualtable;
    }

}