Java tutorial
/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package hydrograph.server.metadata.strategy; import hydrograph.server.metadata.entity.TableEntity; import hydrograph.server.metadata.entity.TableSchemaFieldEntity; import hydrograph.server.metadata.exception.ParamsCannotBeNullOrEmpty; import hydrograph.server.metadata.strategy.base.MetadataStrategyTemplate; import hydrograph.server.utilities.Constants; import hydrograph.server.utilities.ServiceUtilities; import hydrograph.server.utilities.kerberos.KerberosUtilities; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.*; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.security.auth.login.LoginException; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * <p> * Concrete implementation for to retrieve hive meta store details. * </p> * <p> * This class requires kerberos token for security purpose authentication. * </p> */ public class HiveMetadataStrategy extends MetadataStrategyTemplate { /** * Used to set the connection for RedShift * * @param connectionProperties - contain request params details * @throws ClassNotFoundException * @throws SQLException */ @SuppressWarnings("unchecked") @Override public void setConnection(Map connectionProperties) { String userId = connectionProperties .getOrDefault(Constants.USERNAME, new ParamsCannotBeNullOrEmpty(Constants.USERNAME + " not found in request parameter")) .toString(); String service_pwd = connectionProperties .getOrDefault(Constants.SERVICE_PWD, new ParamsCannotBeNullOrEmpty(Constants.SERVICE_PWD + " not found in request parameter")) .toString(); String databaseName = connectionProperties.getOrDefault(Constants.DATABASE_NAME, new ParamsCannotBeNullOrEmpty(Constants.DATABASE_NAME + " name not found in request parameter")) .toString(); String tableName = connectionProperties .getOrDefault(Constants.TABLENAME, new ParamsCannotBeNullOrEmpty(Constants.TABLENAME + " not found in request parameter")) .toString(); KerberosUtilities kerberosUtilities = new KerberosUtilities(); Configuration conf = new Configuration(); // load hdfs-site.xml and core-site.xml String hdfsConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.HDFS_SITE_CONFIG_PATH); String coreSiteConfigPath = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.CORE_SITE_CONFIG_PATH); LOG.debug("Loading hdfs-site.xml:" + hdfsConfigPath); conf.addResource(new Path(hdfsConfigPath)); LOG.debug("Loading hdfs-site.xml:" + coreSiteConfigPath); conf.addResource(new Path(coreSiteConfigPath)); try { kerberosUtilities.applyKerberosToken(userId, service_pwd, conf); } catch (LoginException e1) { throw new RuntimeException("Unable to login " + e1.getMessage()); } catch (IOException e1) { throw new RuntimeException("Login failed : " + e1.getMessage()); } this.hiveConf = new HiveConf(); String pathToHiveSiteXml = ServiceUtilities.getServiceConfigResourceBundle() .getString(Constants.HIVE_SITE_CONFIG_PATH); if (pathToHiveSiteXml.equals(null) || pathToHiveSiteXml.equals("")) { LOG.error("Error loading hive-site.xml: Path to hive-site.xml should not be null or empty."); throw new RuntimeException( "Error loading hive-site.xml: Path to hive-site.xml should not be null or empty."); } LOG.debug("Loading hive-site.xml: " + pathToHiveSiteXml); hiveConf.addResource(new Path(pathToHiveSiteXml)); HiveMetaStoreClient client; try { client = new HiveMetaStoreClient(hiveConf); this.table = client.getTable(databaseName, tableName); this.storageDescriptor = table.getSd(); } catch (MetaException e) { throw new RuntimeException(e.getMessage()); } catch (NoSuchObjectException e) { throw new RuntimeException(e.getMessage()); } catch (TException e) { throw new RuntimeException(e.getMessage()); } } /** * @param componentSchemaProperties - Contain request parameter details * @return {@link TableEntity} */ @SuppressWarnings("unchecked") @Override public TableEntity fillComponentSchema(Map componentSchemaProperties) { // hiveTableEntity = new HiveTableSchema(database, tableName); hiveTableEntity = new TableEntity(); hiveTableEntity .setDatabaseName( componentSchemaProperties .getOrDefault(Constants.DATABASE_NAME, new ParamsCannotBeNullOrEmpty( Constants.DATABASE_NAME + " not found in request parameter")) .toString()); hiveTableEntity .setTableName( componentSchemaProperties .getOrDefault(Constants.TABLENAME, new ParamsCannotBeNullOrEmpty( Constants.TABLENAME + " not found in request parameter")) .toString()); fillHiveTableSchema(); hiveTableEntity = getHiveTableSchema(); return hiveTableEntity; } private enum InputOutputFormat { PARQUET("parquet"), TEXTDELIMITED("textdelimited"), SEQUENCE("sequence"); private String name; InputOutputFormat(String name) { this.name = name; } public String getName() { return name; } } private static final Logger LOG = LoggerFactory.getLogger(HiveMetadataStrategy.class); HiveConf hiveConf = null; TableEntity hiveTableEntity = null; StorageDescriptor storageDescriptor = null; Table table; boolean isTableExternal; private void fillHiveTableSchema() { setTableLocation(); setExternalTable(); setInputOutputFormat(); setPartitionKeys(); setFieldDelimiter(); setOwner(); fillFieldSchema(); } private void setExternalTable() { if (checkIfhiveTableIsExternal()) this.hiveTableEntity.setExternalTableLocation(storageDescriptor.getLocation()); } private boolean checkIfhiveTableIsExternal() { String hiveWarehouseDir = hiveConf.get("hive.metastore.warehouse.dir"); if (!storageDescriptor.getLocation().contains(hiveWarehouseDir)) isTableExternal = true; return isTableExternal; } private void fillFieldSchema() { List<FieldSchema> columns = storageDescriptor.getCols(); List<FieldSchema> partitionKeys = table.getPartitionKeys(); List<TableSchemaFieldEntity> listOfHiveTableSchemaFieldEntity = new ArrayList<TableSchemaFieldEntity>(); fillHiveTableSchemaFields(columns, listOfHiveTableSchemaFieldEntity); fillHiveTableSchemaFields(partitionKeys, listOfHiveTableSchemaFieldEntity); this.hiveTableEntity.setSchemaFields(listOfHiveTableSchemaFieldEntity); } private void fillHiveTableSchemaFields(List<FieldSchema> columns, List<TableSchemaFieldEntity> listOfHiveTableSchemaFieldEntity) { for (FieldSchema fieldSchema : columns) { TableSchemaFieldEntity hiveSchemaField = new TableSchemaFieldEntity(); hiveSchemaField = fillHiveTableSchemaField(fieldSchema); listOfHiveTableSchemaFieldEntity.add(hiveSchemaField); } } private TableSchemaFieldEntity fillHiveTableSchemaField(FieldSchema fieldSchema) { TableSchemaFieldEntity hiveSchemaField = new TableSchemaFieldEntity(); hiveSchemaField.setFieldName(fieldSchema.getName()); if (fieldSchema.getType().equals("string")) { hiveSchemaField.setFieldType("java.lang.String"); } else if (fieldSchema.getType().equals("int")) { hiveSchemaField.setFieldType("java.lang.Integer"); } else if (fieldSchema.getType().equals("bigint")) { hiveSchemaField.setFieldType("java.lang.Long"); } else if (fieldSchema.getType().equals("smallint")) { hiveSchemaField.setFieldType("java.lang.Short"); } else if (fieldSchema.getType().equals("date")) { hiveSchemaField.setFieldType("java.util.Date"); } else if (fieldSchema.getType().equals("timestamp")) { hiveSchemaField.setFieldType("java.util.Date"); } else if (fieldSchema.getType().equals("double")) { hiveSchemaField.setFieldType("java.lang.Double"); } else if (fieldSchema.getType().equals("boolean")) { hiveSchemaField.setFieldType("java.lang.Boolean"); } else if (fieldSchema.getType().equals("float")) { hiveSchemaField.setFieldType("java.lang.Float"); } else if (fieldSchema.getType().contains("decimal")) { hiveSchemaField.setFieldType("java.math.BigDecimal"); hiveSchemaField.setScale(getScale(fieldSchema.getType())); hiveSchemaField.setPrecision(getPrecision(fieldSchema.getType())); } return hiveSchemaField; } private String getScalePrecision(String typeWithScale, int index) { String pattern = "decimal\\((\\d+),(\\d+)\\)"; Pattern r = Pattern.compile(pattern); Matcher m = r.matcher(typeWithScale); if (m.find()) { return m.group(index); } else { return "-999"; } } private String getScale(String typeWithScale) { return getScalePrecision(typeWithScale, 2); } private String getPrecision(String typeWithPrecision) { return getScalePrecision(typeWithPrecision, 1); } private void setPartitionKeys() { List<String> listOfPartitionKeys = new ArrayList<String>(); for (FieldSchema fieldSchema : table.getPartitionKeys()) { listOfPartitionKeys.add(fieldSchema.getName()); } this.hiveTableEntity.setPartitionKeys(listOfPartitionKeys.toString().replace("[", "").replace("]", "")); } private void setInputOutputFormat() { if (storageDescriptor.getInputFormat().contains("parquet")) { this.hiveTableEntity.setInputOutputFormat(InputOutputFormat.PARQUET.getName()); } else if (storageDescriptor.getInputFormat().contains("sequence")) { this.hiveTableEntity.setInputOutputFormat(InputOutputFormat.SEQUENCE.getName()); } else { this.hiveTableEntity.setInputOutputFormat(InputOutputFormat.TEXTDELIMITED.getName()); } } private void setTableLocation() { this.hiveTableEntity.setLocation(storageDescriptor.getLocation()); } private void setFieldDelimiter() { SerDeInfo serDeInfo = storageDescriptor.getSerdeInfo(); this.hiveTableEntity.setFieldDelimiter(serDeInfo.getParameters().get("field.delim")); } private void setOwner() { this.hiveTableEntity.setOwner(table.getOwner()); } public TableEntity getHiveTableSchema() { return hiveTableEntity; } }