org.apache.falcon.catalog.HiveCatalogService.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.falcon.catalog.HiveCatalogService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.falcon.catalog;

import org.apache.commons.lang3.StringUtils;
import org.apache.falcon.FalconException;
import org.apache.falcon.security.CurrentUser;
import org.apache.falcon.security.SecurityUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.io.Text;
import org.apache.hive.hcatalog.api.HCatClient;
import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * An implementation of CatalogService that uses Hive Meta Store (HCatalog)
 * as the backing Catalog registry.
 */
public class HiveCatalogService extends AbstractCatalogService {

    private static final Logger LOG = LoggerFactory.getLogger(HiveCatalogService.class);
    public static final String CREATE_TIME = "falcon.create_time";
    public static final String UPDATE_TIME = "falcon.update_time";
    public static final String PARTITION_DOES_NOT_EXIST = "Partition does not exist";

    public static HiveConf createHiveConf(Configuration conf, String metastoreUrl) throws IOException {
        HiveConf hcatConf = new HiveConf(conf, HiveConf.class);

        hcatConf.set("hive.metastore.local", "false");
        hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUrl);
        hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
        hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName());
        hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");

        hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
        hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
        return hcatConf;
    }

    /**
     * This is used from with in an oozie job.
     *
     * @param conf conf object
     * @param metastoreUrl metastore uri
     * @return hive metastore client handle
     * @throws FalconException
     */
    private static HiveMetaStoreClient createClient(Configuration conf, String metastoreUrl)
            throws FalconException {
        try {
            LOG.info("Creating HCatalog client object for metastore {} using conf {}", metastoreUrl,
                    conf.toString());
            final Credentials credentials = getCredentials(conf);
            Configuration jobConf = credentials != null ? copyCredentialsToConf(conf, credentials) : conf;
            HiveConf hcatConf = createHiveConf(jobConf, metastoreUrl);

            if (UserGroupInformation.isSecurityEnabled()) {
                hcatConf.set(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname,
                        conf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname));
                hcatConf.set(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname, "true");

                UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
                ugi.addCredentials(credentials); // credentials cannot be null
            }

            return new HiveMetaStoreClient(hcatConf);
        } catch (Exception e) {
            throw new FalconException("Exception creating HiveMetaStoreClient: " + e.getMessage(), e);
        }
    }

    private static JobConf copyCredentialsToConf(Configuration conf, Credentials credentials) {
        JobConf jobConf = new JobConf(conf);
        jobConf.setCredentials(credentials);
        return jobConf;
    }

    private static Credentials getCredentials(Configuration conf) throws IOException {
        final String tokenFile = System.getenv("HADOOP_TOKEN_FILE_LOCATION");
        if (tokenFile == null) {
            return null;
        }

        try {
            LOG.info("Adding credentials/delegation tokens from token file={} to conf", tokenFile);
            Credentials credentials = Credentials.readTokenStorageFile(new File(tokenFile), conf);
            LOG.info("credentials numberOfTokens={}, numberOfSecretKeys={}", credentials.numberOfTokens(),
                    credentials.numberOfSecretKeys());
            return credentials;
        } catch (IOException e) {
            LOG.warn("error while fetching credentials from {}", tokenFile);
        }

        return null;
    }

    /**
     * This is used from with in falcon namespace.
     *
     * @param conf                      conf
     * @param catalogUrl                metastore uri
     * @return hive metastore client handle
     * @throws FalconException
     */
    private static HiveMetaStoreClient createProxiedClient(Configuration conf, String catalogUrl)
            throws FalconException {

        try {
            final HiveConf hcatConf = createHiveConf(conf, catalogUrl);
            UserGroupInformation proxyUGI = CurrentUser.getProxyUGI();
            addSecureCredentialsAndToken(conf, hcatConf, proxyUGI);

            LOG.info("Creating HCatalog client object for {}", catalogUrl);
            return proxyUGI.doAs(new PrivilegedExceptionAction<HiveMetaStoreClient>() {
                public HiveMetaStoreClient run() throws Exception {
                    return new HiveMetaStoreClient(hcatConf);
                }
            });
        } catch (Exception e) {
            throw new FalconException("Exception creating Proxied HiveMetaStoreClient: " + e.getMessage(), e);
        }
    }

    private static void addSecureCredentialsAndToken(Configuration conf, HiveConf hcatConf,
            UserGroupInformation proxyUGI) throws IOException {
        if (UserGroupInformation.isSecurityEnabled()) {
            String metaStoreServicePrincipal = conf.get(SecurityUtil.HIVE_METASTORE_KERBEROS_PRINCIPAL);
            hcatConf.set(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname, metaStoreServicePrincipal);
            hcatConf.set(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL.varname, "true");

            Token<DelegationTokenIdentifier> delegationTokenId = getDelegationToken(hcatConf,
                    metaStoreServicePrincipal);
            proxyUGI.addToken(delegationTokenId);
        }
    }

    private static Token<DelegationTokenIdentifier> getDelegationToken(HiveConf hcatConf,
            String metaStoreServicePrincipal) throws IOException {

        LOG.debug("Creating delegation tokens for principal={}", metaStoreServicePrincipal);
        HCatClient hcatClient = HCatClient.create(hcatConf);
        String delegationToken = hcatClient.getDelegationToken(CurrentUser.getUser(), metaStoreServicePrincipal);
        hcatConf.set("hive.metastore.token.signature", "FalconService");

        Token<DelegationTokenIdentifier> delegationTokenId = new Token<DelegationTokenIdentifier>();
        delegationTokenId.decodeFromUrlString(delegationToken);
        delegationTokenId.setService(new Text("FalconService"));
        LOG.info("Created delegation token={}", delegationToken);
        return delegationTokenId;
    }

    @Override
    public boolean isAlive(Configuration conf, final String catalogUrl) throws FalconException {
        LOG.info("Checking if the service is alive for: {}", catalogUrl);

        try {
            HiveMetaStoreClient client = createProxiedClient(conf, catalogUrl);
            Database database = client.getDatabase("default");
            return database != null;
        } catch (Exception e) {
            throw new FalconException("Exception checking if the service is alive:" + e.getMessage(), e);
        }
    }

    @Override
    public boolean dbExists(Configuration conf, final String catalogUrl, final String databaseName)
            throws FalconException {
        LOG.info("Checking if the db exists: {}", databaseName);

        try {
            HiveMetaStoreClient client = createProxiedClient(conf, catalogUrl);
            Database db = client.getDatabase(databaseName);
            return db != null;
        } catch (NoSuchObjectException e) {
            return false;
        } catch (Exception e) {
            throw new FalconException("Exception checking if the db exists:" + e.getMessage(), e);
        }
    }

    @Override
    public boolean tableExists(Configuration conf, final String catalogUrl, final String database,
            final String tableName) throws FalconException {
        LOG.info("Checking if the table exists: {}", tableName);

        try {
            HiveMetaStoreClient client = createProxiedClient(conf, catalogUrl);
            Table table = client.getTable(database, tableName);
            return table != null;
        } catch (NoSuchObjectException e) {
            return false;
        } catch (Exception e) {
            throw new FalconException("Exception checking if the table exists:" + e.getMessage(), e);
        }
    }

    @Override
    public boolean isTableExternal(Configuration conf, String catalogUrl, String database, String tableName)
            throws FalconException {
        LOG.info("Checking if the table is external: {}", tableName);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            Table table = client.getTable(database, tableName);
            return table.getTableType().equals(TableType.EXTERNAL_TABLE.name());
        } catch (Exception e) {
            throw new FalconException("Exception checking if the table is external:" + e.getMessage(), e);
        }
    }

    @Override
    public List<CatalogPartition> listPartitions(Configuration conf, String catalogUrl, String database,
            String tableName, List<String> values) throws FalconException {
        LOG.info("List partitions for: {}, partition filter: {}", tableName, values);

        try {
            List<CatalogPartition> catalogPartitionList = new ArrayList<CatalogPartition>();

            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            List<Partition> hCatPartitions = client.listPartitions(database, tableName, values, (short) -1);
            for (Partition hCatPartition : hCatPartitions) {
                LOG.debug("Partition: " + hCatPartition.getValues());
                CatalogPartition partition = createCatalogPartition(hCatPartition);
                catalogPartitionList.add(partition);
            }

            return catalogPartitionList;
        } catch (Exception e) {
            throw new FalconException("Exception listing partitions:" + e.getMessage(), e);
        }
    }

    @Override
    public List<CatalogPartition> listPartitionsByFilter(Configuration conf, String catalogUrl, String database,
            String tableName, String filter) throws FalconException {
        LOG.info("List partitions for: {}, partition filter: {}", tableName, filter);

        try {
            List<CatalogPartition> catalogPartitionList = new ArrayList<CatalogPartition>();

            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            List<Partition> hCatPartitions = client.listPartitionsByFilter(database, tableName, filter, (short) -1);
            for (Partition hCatPartition : hCatPartitions) {
                LOG.info("Partition: " + hCatPartition.getValues());
                CatalogPartition partition = createCatalogPartition(hCatPartition);
                catalogPartitionList.add(partition);
            }

            return catalogPartitionList;
        } catch (Exception e) {
            throw new FalconException("Exception listing partitions:" + e.getMessage(), e);
        }
    }

    private CatalogPartition createCatalogPartition(Partition hCatPartition) {
        final CatalogPartition catalogPartition = new CatalogPartition();
        catalogPartition.setDatabaseName(hCatPartition.getDbName());
        catalogPartition.setTableName(hCatPartition.getTableName());
        catalogPartition.setValues(hCatPartition.getValues());
        catalogPartition.setInputFormat(hCatPartition.getSd().getInputFormat());
        catalogPartition.setOutputFormat(hCatPartition.getSd().getOutputFormat());
        catalogPartition.setLocation(hCatPartition.getSd().getLocation());
        catalogPartition.setSerdeInfo(hCatPartition.getSd().getSerdeInfo().getSerializationLib());
        catalogPartition.setCreateTime(hCatPartition.getCreateTime());
        catalogPartition.setLastAccessTime(hCatPartition.getLastAccessTime());
        Map<String, String> params = hCatPartition.getParameters();
        if (params != null) {
            String size = hCatPartition.getParameters().get("totalSize");
            if (StringUtils.isNotBlank(size)) {
                catalogPartition.setSize(Long.parseLong(size));
            }
        }
        return catalogPartition;
    }

    //Drop single partition
    @Override
    public boolean dropPartition(Configuration conf, String catalogUrl, String database, String tableName,
            List<String> partitionValues, boolean deleteData) throws FalconException {
        LOG.info("Dropping partition for: {}, partition: {}", tableName, partitionValues);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            return client.dropPartition(database, tableName, partitionValues, deleteData);
        } catch (Exception e) {
            throw new FalconException("Exception dropping partitions:" + e.getMessage(), e);
        }
    }

    @Override
    public void dropPartitions(Configuration conf, String catalogUrl, String database, String tableName,
            List<String> partitionValues, boolean deleteData) throws FalconException {
        LOG.info("Dropping partitions for: {}, partitions: {}", tableName, partitionValues);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            List<Partition> partitions = client.listPartitions(database, tableName, partitionValues, (short) -1);
            for (Partition part : partitions) {
                LOG.info("Dropping partition for: {}, partition: {}", tableName, part.getValues());
                client.dropPartition(database, tableName, part.getValues(), deleteData);
            }
        } catch (Exception e) {
            throw new FalconException("Exception dropping partitions:" + e.getMessage(), e);
        }
    }

    @Override
    public CatalogPartition getPartition(Configuration conf, String catalogUrl, String database, String tableName,
            List<String> partitionValues) throws FalconException {
        LOG.info("Fetch partition for: {}, partition spec: {}", tableName, partitionValues);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            Partition hCatPartition = client.getPartition(database, tableName, partitionValues);
            return createCatalogPartition(hCatPartition);
        } catch (NoSuchObjectException nsoe) {
            throw new FalconException(PARTITION_DOES_NOT_EXIST + ":" + nsoe.getMessage(), nsoe);
        } catch (Exception e) {
            throw new FalconException("Exception fetching partition:" + e.getMessage(), e);
        }
    }

    @Override
    public List<String> getPartitionColumns(Configuration conf, String catalogUrl, String database,
            String tableName) throws FalconException {
        LOG.info("Fetching partition columns of table: " + tableName);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            Table table = client.getTable(database, tableName);
            List<String> partCols = new ArrayList<String>();
            for (FieldSchema part : table.getPartitionKeys()) {
                partCols.add(part.getName());
            }
            return partCols;
        } catch (Exception e) {
            throw new FalconException("Exception fetching partition columns: " + e.getMessage(), e);
        }
    }

    @Override
    public void addPartition(Configuration conf, String catalogUrl, String database, String tableName,
            List<String> partValues, String location) throws FalconException {
        LOG.info("Adding partition {} for {}.{} with location {}", partValues, database, tableName, location);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            Table table = client.getTable(database, tableName);
            org.apache.hadoop.hive.metastore.api.Partition part = new org.apache.hadoop.hive.metastore.api.Partition();
            part.setDbName(database);
            part.setTableName(tableName);
            part.setValues(partValues);
            part.setSd(table.getSd());
            part.getSd().setLocation(location);
            part.setParameters(table.getParameters());
            if (part.getParameters() == null) {
                part.setParameters(new HashMap<String, String>());
            }
            part.getParameters().put(CREATE_TIME, String.valueOf(System.currentTimeMillis()));
            client.add_partition(part);

        } catch (Exception e) {
            throw new FalconException("Exception adding partition: " + e.getMessage(), e);
        }
    }

    @Override
    public void updatePartition(Configuration conf, String catalogUrl, String database, String tableName,
            List<String> partValues, String location) throws FalconException {
        LOG.info("Updating partition {} of {}.{} with location {}", partValues, database, tableName, location);

        try {
            HiveMetaStoreClient client = createClient(conf, catalogUrl);
            Table table = client.getTable(database, tableName);
            org.apache.hadoop.hive.metastore.api.Partition part = new org.apache.hadoop.hive.metastore.api.Partition();
            part.setDbName(database);
            part.setTableName(tableName);
            part.setValues(partValues);
            part.setSd(table.getSd());
            part.getSd().setLocation(location);
            part.setParameters(table.getParameters());
            if (part.getParameters() == null) {
                part.setParameters(new HashMap<String, String>());
            }
            part.getParameters().put(UPDATE_TIME, String.valueOf(System.currentTimeMillis()));
            client.alter_partition(database, tableName, part);
        } catch (Exception e) {
            throw new FalconException("Exception updating partition: " + e.getMessage(), e);
        }
    }
}