hydrograph.server.metadata.strategy.HiveMetadataStrategy.java Source code

Java tutorial

Introduction

Here is the source code for hydrograph.server.metadata.strategy.HiveMetadataStrategy.java

Source

/*******************************************************************************
 * Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package hydrograph.server.metadata.strategy;

import hydrograph.server.metadata.entity.TableEntity;
import hydrograph.server.metadata.entity.TableSchemaFieldEntity;
import hydrograph.server.metadata.exception.ParamsCannotBeNullOrEmpty;
import hydrograph.server.metadata.strategy.base.MetadataStrategyTemplate;
import hydrograph.server.utilities.Constants;
import hydrograph.server.utilities.ServiceUtilities;
import hydrograph.server.utilities.kerberos.KerberosUtilities;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.*;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.security.auth.login.LoginException;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * <p>
 * Concrete implementation for to retrieve hive meta store details.
 * </p>
 * <p>
 * This class requires kerberos token for security purpose authentication.
 * </p>
 */
public class HiveMetadataStrategy extends MetadataStrategyTemplate {

    /**
     * Used to set the connection for RedShift
     *
     * @param connectionProperties - contain request params details
     * @throws ClassNotFoundException
     * @throws SQLException
     */
    @SuppressWarnings("unchecked")
    @Override
    public void setConnection(Map connectionProperties) {
        String userId = connectionProperties
                .getOrDefault(Constants.USERNAME,
                        new ParamsCannotBeNullOrEmpty(Constants.USERNAME + " not found in request parameter"))
                .toString();
        String service_pwd = connectionProperties
                .getOrDefault(Constants.SERVICE_PWD,
                        new ParamsCannotBeNullOrEmpty(Constants.SERVICE_PWD + " not found in request parameter"))
                .toString();
        String databaseName = connectionProperties.getOrDefault(Constants.DATABASE_NAME,
                new ParamsCannotBeNullOrEmpty(Constants.DATABASE_NAME + " name not found in request parameter"))
                .toString();
        String tableName = connectionProperties
                .getOrDefault(Constants.TABLENAME,
                        new ParamsCannotBeNullOrEmpty(Constants.TABLENAME + " not found in request parameter"))
                .toString();

        KerberosUtilities kerberosUtilities = new KerberosUtilities();
        Configuration conf = new Configuration();

        // load hdfs-site.xml and core-site.xml
        String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle()
                .getString(Constants.HDFS_SITE_CONFIG_PATH);
        String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle()
                .getString(Constants.CORE_SITE_CONFIG_PATH);
        LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath);
        conf.addResource(new Path(hdfsConfigPath));
        LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath);
        conf.addResource(new Path(coreSiteConfigPath));

        try {
            kerberosUtilities.applyKerberosToken(userId, service_pwd, conf);
        } catch (LoginException e1) {
            throw new RuntimeException("Unable to login " + e1.getMessage());
        } catch (IOException e1) {
            throw new RuntimeException("Login failed : " + e1.getMessage());
        }
        this.hiveConf = new HiveConf();
        String pathToHiveSiteXml = ServiceUtilities.getServiceConfigResourceBundle()
                .getString(Constants.HIVE_SITE_CONFIG_PATH);

        if (pathToHiveSiteXml.equals(null) || pathToHiveSiteXml.equals("")) {
            LOG.error("Error loading hive-site.xml: Path to hive-site.xml should not be null or empty.");
            throw new RuntimeException(
                    "Error loading hive-site.xml: Path to hive-site.xml should not be null or empty.");
        }
        LOG.debug("Loading hive-site.xml: " + pathToHiveSiteXml);
        hiveConf.addResource(new Path(pathToHiveSiteXml));

        HiveMetaStoreClient client;
        try {
            client = new HiveMetaStoreClient(hiveConf);
            this.table = client.getTable(databaseName, tableName);
            this.storageDescriptor = table.getSd();
        } catch (MetaException e) {
            throw new RuntimeException(e.getMessage());
        } catch (NoSuchObjectException e) {
            throw new RuntimeException(e.getMessage());
        } catch (TException e) {
            throw new RuntimeException(e.getMessage());
        }

    }

    /**
     * @param componentSchemaProperties - Contain request parameter details
     * @return {@link TableEntity}
     */
    @SuppressWarnings("unchecked")
    @Override
    public TableEntity fillComponentSchema(Map componentSchemaProperties) {
        // hiveTableEntity = new HiveTableSchema(database, tableName);
        hiveTableEntity = new TableEntity();
        hiveTableEntity
                .setDatabaseName(
                        componentSchemaProperties
                                .getOrDefault(Constants.DATABASE_NAME,
                                        new ParamsCannotBeNullOrEmpty(
                                                Constants.DATABASE_NAME + " not found in request parameter"))
                                .toString());
        hiveTableEntity
                .setTableName(
                        componentSchemaProperties
                                .getOrDefault(Constants.TABLENAME,
                                        new ParamsCannotBeNullOrEmpty(
                                                Constants.TABLENAME + " not found in request parameter"))
                                .toString());
        fillHiveTableSchema();
        hiveTableEntity = getHiveTableSchema();
        return hiveTableEntity;
    }

    private enum InputOutputFormat {
        PARQUET("parquet"), TEXTDELIMITED("textdelimited"), SEQUENCE("sequence");

        private String name;

        InputOutputFormat(String name) {
            this.name = name;
        }

        public String getName() {
            return name;
        }
    }

    private static final Logger LOG = LoggerFactory.getLogger(HiveMetadataStrategy.class);
    HiveConf hiveConf = null;
    TableEntity hiveTableEntity = null;
    StorageDescriptor storageDescriptor = null;
    Table table;
    boolean isTableExternal;

    private void fillHiveTableSchema() {
        setTableLocation();
        setExternalTable();
        setInputOutputFormat();
        setPartitionKeys();
        setFieldDelimiter();
        setOwner();
        fillFieldSchema();
    }

    private void setExternalTable() {

        if (checkIfhiveTableIsExternal())
            this.hiveTableEntity.setExternalTableLocation(storageDescriptor.getLocation());

    }

    private boolean checkIfhiveTableIsExternal() {

        String hiveWarehouseDir = hiveConf.get("hive.metastore.warehouse.dir");
        if (!storageDescriptor.getLocation().contains(hiveWarehouseDir))
            isTableExternal = true;

        return isTableExternal;

    }

    private void fillFieldSchema() {

        List<FieldSchema> columns = storageDescriptor.getCols();
        List<FieldSchema> partitionKeys = table.getPartitionKeys();
        List<TableSchemaFieldEntity> listOfHiveTableSchemaFieldEntity = new ArrayList<TableSchemaFieldEntity>();
        fillHiveTableSchemaFields(columns, listOfHiveTableSchemaFieldEntity);
        fillHiveTableSchemaFields(partitionKeys, listOfHiveTableSchemaFieldEntity);
        this.hiveTableEntity.setSchemaFields(listOfHiveTableSchemaFieldEntity);
    }

    private void fillHiveTableSchemaFields(List<FieldSchema> columns,
            List<TableSchemaFieldEntity> listOfHiveTableSchemaFieldEntity) {
        for (FieldSchema fieldSchema : columns) {
            TableSchemaFieldEntity hiveSchemaField = new TableSchemaFieldEntity();
            hiveSchemaField = fillHiveTableSchemaField(fieldSchema);
            listOfHiveTableSchemaFieldEntity.add(hiveSchemaField);
        }
    }

    private TableSchemaFieldEntity fillHiveTableSchemaField(FieldSchema fieldSchema) {

        TableSchemaFieldEntity hiveSchemaField = new TableSchemaFieldEntity();
        hiveSchemaField.setFieldName(fieldSchema.getName());
        if (fieldSchema.getType().equals("string")) {
            hiveSchemaField.setFieldType("java.lang.String");
        } else if (fieldSchema.getType().equals("int")) {
            hiveSchemaField.setFieldType("java.lang.Integer");
        } else if (fieldSchema.getType().equals("bigint")) {
            hiveSchemaField.setFieldType("java.lang.Long");
        } else if (fieldSchema.getType().equals("smallint")) {
            hiveSchemaField.setFieldType("java.lang.Short");
        } else if (fieldSchema.getType().equals("date")) {
            hiveSchemaField.setFieldType("java.util.Date");
        } else if (fieldSchema.getType().equals("timestamp")) {
            hiveSchemaField.setFieldType("java.util.Date");
        } else if (fieldSchema.getType().equals("double")) {
            hiveSchemaField.setFieldType("java.lang.Double");
        } else if (fieldSchema.getType().equals("boolean")) {
            hiveSchemaField.setFieldType("java.lang.Boolean");
        } else if (fieldSchema.getType().equals("float")) {
            hiveSchemaField.setFieldType("java.lang.Float");
        } else if (fieldSchema.getType().contains("decimal")) {
            hiveSchemaField.setFieldType("java.math.BigDecimal");
            hiveSchemaField.setScale(getScale(fieldSchema.getType()));
            hiveSchemaField.setPrecision(getPrecision(fieldSchema.getType()));
        }
        return hiveSchemaField;
    }

    private String getScalePrecision(String typeWithScale, int index) {

        String pattern = "decimal\\((\\d+),(\\d+)\\)";
        Pattern r = Pattern.compile(pattern);

        Matcher m = r.matcher(typeWithScale);
        if (m.find()) {
            return m.group(index);
        } else {
            return "-999";
        }
    }

    private String getScale(String typeWithScale) {
        return getScalePrecision(typeWithScale, 2);
    }

    private String getPrecision(String typeWithPrecision) {
        return getScalePrecision(typeWithPrecision, 1);
    }

    private void setPartitionKeys() {

        List<String> listOfPartitionKeys = new ArrayList<String>();
        for (FieldSchema fieldSchema : table.getPartitionKeys()) {
            listOfPartitionKeys.add(fieldSchema.getName());
        }
        this.hiveTableEntity.setPartitionKeys(listOfPartitionKeys.toString().replace("[", "").replace("]", ""));
    }

    private void setInputOutputFormat() {

        if (storageDescriptor.getInputFormat().contains("parquet")) {
            this.hiveTableEntity.setInputOutputFormat(InputOutputFormat.PARQUET.getName());
        } else if (storageDescriptor.getInputFormat().contains("sequence")) {
            this.hiveTableEntity.setInputOutputFormat(InputOutputFormat.SEQUENCE.getName());
        } else {
            this.hiveTableEntity.setInputOutputFormat(InputOutputFormat.TEXTDELIMITED.getName());
        }
    }

    private void setTableLocation() {

        this.hiveTableEntity.setLocation(storageDescriptor.getLocation());
    }

    private void setFieldDelimiter() {
        SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo();
        this.hiveTableEntity.setFieldDelimiter(serDeInfo.getParameters().get("field.delim"));

    }

    private void setOwner() {

        this.hiveTableEntity.setOwner(table.getOwner());
    }

    public TableEntity getHiveTableSchema() {
        return hiveTableEntity;
    }

}