Java tutorial
/** * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file * except in compliance with the License. A copy of the License is located at * * http://aws.amazon.com/apache2.0/ * * or in the "LICENSE.TXT" file accompanying this file. This file is distributed on an "AS IS" * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under the License. */ package org.apache.hadoop.hive.dynamodb; import com.google.common.base.Strings; import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; import com.amazonaws.services.dynamodbv2.model.TableDescription; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.dynamodb.DynamoDBClient; import org.apache.hadoop.dynamodb.DynamoDBConstants; import org.apache.hadoop.dynamodb.DynamoDBItemWritable; import org.apache.hadoop.dynamodb.DynamoDBUtil; import org.apache.hadoop.hive.dynamodb.filter.DynamoDBFilterPushdown; import org.apache.hadoop.hive.dynamodb.read.HiveDynamoDBInputFormat; import org.apache.hadoop.hive.dynamodb.type.HiveDynamoDBType; import org.apache.hadoop.hive.dynamodb.type.HiveDynamoDBTypeFactory; import org.apache.hadoop.hive.dynamodb.util.HiveDynamoDBUtil; import org.apache.hadoop.hive.dynamodb.write.HiveDynamoDBOutputFormat; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; public class DynamoDBStorageHandler implements HiveMetaHook, HiveStoragePredicateHandler, HiveStorageHandler { private static final Log log = LogFactory.getLog(DynamoDBStorageHandler.class); private Configuration conf; @Override public void commitCreateTable(Table table) throws MetaException { } @Override public void commitDropTable(Table table, boolean deleteData) throws MetaException { } @Override public void preCreateTable(Table table) throws MetaException { DynamoDBClient client = createDynamoDBClient(table); try { boolean isExternal = MetaStoreUtils.isExternalTable(table); if (!isExternal) { throw new MetaException("Only EXTERNAL tables are supported for DynamoDB."); } String tableName = HiveDynamoDBUtil.getDynamoDBTableName( table.getParameters().get(DynamoDBConstants.TABLE_NAME), table.getTableName()); TableDescription tableDescription = client.describeTable(tableName); checkTableStatus(tableDescription); checkTableSchemaMapping(tableDescription, table); checkTableSchemaType(tableDescription, table); } finally { client.close(); } } @Override public void preDropTable(Table table) throws MetaException { } @Override public void rollbackCreateTable(Table table) throws MetaException { } @Override public void rollbackDropTable(Table table) throws MetaException { } @Override public DecomposedPredicate decomposePredicate(JobConf jobConf, Deserializer deserializer, ExprNodeDesc predicate) { if (jobConf.getBoolean(DynamoDBConstants.DYNAMODB_FILTER_PUSHDOWN, true)) { return new DynamoDBFilterPushdown().pushPredicate(HiveDynamoDBUtil.extractHiveTypeMapping(jobConf), predicate); } else { return null; } } @Override public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { DynamoDBClient client = new DynamoDBClient(conf, tableDesc.getProperties().getProperty(DynamoDBConstants.REGION)); try { String tableName = HiveDynamoDBUtil.getDynamoDBTableName( tableDesc.getProperties().getProperty(DynamoDBConstants.TABLE_NAME), tableDesc.getTableName()); TableDescription description = client.describeTable(tableName); Double averageItemSize = DynamoDBUtil.calculateAverageItemSize(description); log.info("Average item size: " + averageItemSize); String endpoint = conf.get(DynamoDBConstants.ENDPOINT); if (!Strings.isNullOrEmpty(tableDesc.getProperties().getProperty(DynamoDBConstants.ENDPOINT))) { endpoint = tableDesc.getProperties().getProperty(DynamoDBConstants.ENDPOINT); } if (!Strings.isNullOrEmpty(endpoint)) { jobProperties.put(DynamoDBConstants.ENDPOINT, endpoint); } if (!Strings.isNullOrEmpty(tableDesc.getProperties().getProperty(DynamoDBConstants.REGION))) { jobProperties.put(DynamoDBConstants.REGION, tableDesc.getProperties().getProperty(DynamoDBConstants.REGION)); } jobProperties.put(DynamoDBConstants.OUTPUT_TABLE_NAME, tableName); jobProperties.put(DynamoDBConstants.INPUT_TABLE_NAME, tableName); jobProperties.put(DynamoDBConstants.TABLE_NAME, tableName); Map<String, String> hiveToDynamoDBSchemaMapping = HiveDynamoDBUtil.getHiveToDynamoDBSchemaMapping( tableDesc.getProperties().getProperty(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING)); // Column map can be null if only full backup is being used if (hiveToDynamoDBSchemaMapping != null) { jobProperties.put(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING, HiveDynamoDBUtil.toJsonString(hiveToDynamoDBSchemaMapping)); } if (tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_READ_PERCENT) != null) { jobProperties.put(DynamoDBConstants.THROUGHPUT_READ_PERCENT, tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_READ_PERCENT)); } if (tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT) != null) { jobProperties.put(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT, tableDesc.getProperties().getProperty(DynamoDBConstants.THROUGHPUT_WRITE_PERCENT)); } String readThroughput = description.getProvisionedThroughput().getReadCapacityUnits().toString(); String writeThroughput = description.getProvisionedThroughput().getWriteCapacityUnits().toString(); jobProperties.put(DynamoDBConstants.READ_THROUGHPUT, readThroughput); jobProperties.put(DynamoDBConstants.WRITE_THROUGHPUT, writeThroughput); jobProperties.put(DynamoDBConstants.ITEM_COUNT, description.getItemCount().toString()); jobProperties.put(DynamoDBConstants.TABLE_SIZE_BYTES, description.getTableSizeBytes().toString()); jobProperties.put(DynamoDBConstants.AVG_ITEM_SIZE, averageItemSize.toString()); log.info("Average item size: " + averageItemSize); log.info("Item count: " + description.getItemCount()); log.info("Table size: " + description.getTableSizeBytes()); log.info("Read throughput: " + readThroughput); log.info("Write throughput: " + writeThroughput); } finally { client.close(); } } @Override public Class<? extends InputFormat<Text, DynamoDBItemWritable>> getInputFormatClass() { return HiveDynamoDBInputFormat.class; } @Override public HiveMetaHook getMetaHook() { return this; } @Override public Class<? extends OutputFormat<Text, DynamoDBItemWritable>> getOutputFormatClass() { return HiveDynamoDBOutputFormat.class; } @Override public Class<? extends SerDe> getSerDeClass() { return DynamoDBSerDe.class; } @Override public Configuration getConf() { return conf; } @Override public void setConf(Configuration conf) { this.conf = conf; } @Override public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException { return new DefaultHiveAuthorizationProvider(); } @Override public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureTableJobProperties(tableDesc, jobProperties); } @Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) { configureTableJobProperties(tableDesc, jobProperties); } void checkTableSchemaMapping(TableDescription tableDescription, Table table) throws MetaException { String mapping = table.getParameters().get(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING); Map<String, String> columnMapping = HiveDynamoDBUtil.getHiveToDynamoDBSchemaMapping(mapping); List<FieldSchema> tableSchema = table.getSd().getCols(); for (FieldSchema fieldSchema : tableSchema) { if (HiveDynamoDBTypeFactory.isHiveDynamoDBItemMapType(fieldSchema.getType())) { // We don't need column mapping as this column contains full // DynamoDB row continue; } String fieldSchemaName = fieldSchema.getName().toLowerCase(); if (columnMapping.containsKey(fieldSchemaName)) { if (columnMapping.get(fieldSchemaName).isEmpty()) { throw new MetaException("Invalid column mapping for column: " + fieldSchemaName); } } else { throw new MetaException("Could not find column mapping for column: " + fieldSchemaName); } } } void checkTableSchemaType(TableDescription tableDescription, Table table) throws MetaException { List<FieldSchema> tableSchema = table.getSd().getCols(); for (FieldSchema fieldSchema : tableSchema) { for (AttributeDefinition definition : tableDescription.getAttributeDefinitions()) { validateKeySchema(definition.getAttributeName(), definition.getAttributeType(), fieldSchema); } // Check for each field type if (HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(fieldSchema.getType()) == null) { throw new MetaException( "The hive type " + fieldSchema.getType() + " is not supported in " + "DynamoDB"); } } } private DynamoDBClient createDynamoDBClient(Table table) { String region = table.getParameters().get(DynamoDBConstants.REGION); return new DynamoDBClient(conf, region); } private void validateKeySchema(String attributeName, String attributeType, FieldSchema fieldSchema) throws MetaException { if (fieldSchema.getName().equalsIgnoreCase(attributeName)) { HiveDynamoDBType ddType = HiveDynamoDBTypeFactory.getTypeObjectFromHiveType(fieldSchema.getType()); if ((ddType == null) || (ddType.equals(HiveDynamoDBTypeFactory.DYNAMODB_ITEM_TYPE)) || (!ddType.getDynamoDBType().equals(attributeType))) { throw new MetaException("The key element " + fieldSchema.getName() + " does not match " + "type. DynamoDB Type: " + attributeType + " Hive type: " + fieldSchema.getType()); } } } private void checkTableStatus(TableDescription tableDescription) throws MetaException { String status = tableDescription.getTableStatus(); if ("CREATING".equals(status) || "DELETING".equals(status)) { throw new MetaException("Table " + tableDescription.getTableName() + " is in state " + status); } } @Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { Map<String, String> jobProperties = new HashMap<>(); configureTableJobProperties(tableDesc, jobProperties); for (Entry<String, String> entry : jobProperties.entrySet()) { jobConf.set(entry.getKey(), entry.getValue()); } } }