org.apache.hadoop.dynamodb.DynamoDBClient.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.dynamodb.DynamoDBClient.java

Source

/**
 * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file
 * except in compliance with the License. A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0/
 *
 * or in the "LICENSE.TXT" file accompanying this file. This file is distributed on an "AS IS"
 * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under the License.
 */

package org.apache.hadoop.dynamodb;

import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_BATCH_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_ITEMS_PER_BATCH;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_ITEM_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_BATCH_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_ITEMS_PER_BATCH;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_ITEM_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBUtil.getDynamoDBEndpoint;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.primitives.Ints;

import com.amazonaws.AmazonClientException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSCredentialsProviderChain;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest;
import com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult;
import com.amazonaws.services.dynamodbv2.model.Condition;
import com.amazonaws.services.dynamodbv2.model.ConsumedCapacity;
import com.amazonaws.services.dynamodbv2.model.DescribeTableRequest;
import com.amazonaws.services.dynamodbv2.model.DescribeTableResult;
import com.amazonaws.services.dynamodbv2.model.PutRequest;
import com.amazonaws.services.dynamodbv2.model.QueryRequest;
import com.amazonaws.services.dynamodbv2.model.QueryResult;
import com.amazonaws.services.dynamodbv2.model.ReturnConsumedCapacity;
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
import com.amazonaws.services.dynamodbv2.model.ScanResult;
import com.amazonaws.services.dynamodbv2.model.TableDescription;
import com.amazonaws.services.dynamodbv2.model.WriteRequest;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.dynamodb.DynamoDBFibonacciRetryer.RetryResult;
import org.apache.hadoop.dynamodb.filter.DynamoDBQueryFilter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;
import org.joda.time.Duration;

import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.concurrent.Callable;

public class DynamoDBClient {

    private static final Log log = LogFactory.getLog(DynamoDBClient.class);

    private static final int DEFAULT_RETRY_DURATION = 10;
    private static final long MAX_BACKOFF_IN_MILLISECONDS = 1000 * 3;
    private static final CredentialPairName DYNAMODB_CREDENTIAL_PAIR_NAME = new CredentialPairName(
            DynamoDBConstants.DYNAMODB_ACCESS_KEY_CONF, DynamoDBConstants.DYNAMODB_SECRET_KEY_CONF);
    private static final CredentialPairName DEFAULT_CREDENTIAL_PAIR_NAME = new CredentialPairName(
            DynamoDBConstants.DEFAULT_ACCESS_KEY_CONF, DynamoDBConstants.DEFAULT_SECRET_KEY_CONF);
    private final Map<String, List<WriteRequest>> writeBatchMap = new HashMap<>();
    private final AmazonDynamoDBClient dynamoDB;
    private int writeBatchMapSizeBytes;
    private int batchWriteRetries;
    private final Configuration config;
    private final long maxBatchSize;
    private final long maxItemByteSize;

    // For unit testing only
    public DynamoDBClient() {
        dynamoDB = null;
        config = null;
        maxBatchSize = DEFAULT_MAX_BATCH_SIZE;
        maxItemByteSize = DEFAULT_MAX_ITEM_SIZE;
    }

    public DynamoDBClient(Configuration conf) {
        this(conf, null);
    }

    public DynamoDBClient(Configuration conf, String region) {
        Preconditions.checkNotNull(conf, "conf cannot be null.");
        config = conf;
        dynamoDB = getDynamoDBClient(conf);
        dynamoDB.setEndpoint(getDynamoDBEndpoint(conf, region));
        maxBatchSize = config.getLong(MAX_BATCH_SIZE, DEFAULT_MAX_BATCH_SIZE);
        maxItemByteSize = config.getLong(MAX_ITEM_SIZE, DEFAULT_MAX_ITEM_SIZE);
    }

    public TableDescription describeTable(String tableName) {
        final DescribeTableRequest describeTablesRequest = new DescribeTableRequest().withTableName(tableName);
        try {
            RetryResult<DescribeTableResult> describeResult = getRetryDriver()
                    .runWithRetry(new Callable<DescribeTableResult>() {
                        @Override
                        public DescribeTableResult call() {
                            DescribeTableResult result = dynamoDB.describeTable(describeTablesRequest);
                            log.info("Describe table output: " + result);
                            return result;
                        }
                    }, null, null);
            return describeResult.result.getTable();
        } catch (Exception e) {
            throw new RuntimeException("Could not lookup table " + tableName + " in DynamoDB.", e);
        }
    }

    public RetryResult<ScanResult> scanTable(String tableName, DynamoDBQueryFilter dynamoDBQueryFilter,
            Integer segment, Integer totalSegments, Map<String, AttributeValue> exclusiveStartKey, long limit,
            Reporter reporter) {
        final ScanRequest scanRequest = new ScanRequest(tableName).withExclusiveStartKey(exclusiveStartKey)
                .withLimit(Ints.checkedCast(limit)).withSegment(segment).withTotalSegments(totalSegments)
                .withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL);

        if (dynamoDBQueryFilter != null) {
            Map<String, Condition> scanFilter = dynamoDBQueryFilter.getScanFilter();
            if (!scanFilter.isEmpty()) {
                scanRequest.setScanFilter(scanFilter);
            }
        }

        RetryResult<ScanResult> retryResult = getRetryDriver().runWithRetry(new Callable<ScanResult>() {
            @Override
            public ScanResult call() {
                log.debug("Executing DynamoDB scan: " + scanRequest);
                return dynamoDB.scan(scanRequest);
            }
        }, reporter, PrintCounter.DynamoDBReadThrottle);
        return retryResult;
    }

    public RetryResult<QueryResult> queryTable(String tableName, DynamoDBQueryFilter dynamoDBQueryFilter,
            Map<String, AttributeValue> exclusiveStartKey, long limit, Reporter reporter) {
        final QueryRequest queryRequest = new QueryRequest().withTableName(tableName)
                .withExclusiveStartKey(exclusiveStartKey).withKeyConditions(dynamoDBQueryFilter.getKeyConditions())
                .withLimit(Ints.checkedCast(limit)).withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL);

        RetryResult<QueryResult> retryResult = getRetryDriver().runWithRetry(new Callable<QueryResult>() {
            @Override
            public QueryResult call() {
                log.debug("Executing DynamoDB query: " + queryRequest);
                return dynamoDB.query(queryRequest);
            }
        }, reporter, PrintCounter.DynamoDBReadThrottle);
        return retryResult;
    }

    public BatchWriteItemResult putBatch(String tableName, Map<String, AttributeValue> item, long maxItemsPerBatch,
            Reporter reporter) throws UnsupportedEncodingException {
        int itemSizeBytes = DynamoDBUtil.getItemSizeBytes(item);
        if (itemSizeBytes > maxItemByteSize) {
            throw new RuntimeException("Cannot pass items with size greater than " + maxItemByteSize
                    + ". Item with size of " + itemSizeBytes + " was given.");
        }
        maxItemsPerBatch = DynamoDBUtil.getBoundedBatchLimit(config, maxItemsPerBatch);
        BatchWriteItemResult result = null;
        if (writeBatchMap.containsKey(tableName)) {

            boolean writeRequestsForTableAtLimit = writeBatchMap.get(tableName).size() >= maxItemsPerBatch;

            boolean totalSizeOfWriteBatchesOverLimit = writeBatchMapSizeBytes + itemSizeBytes > maxBatchSize;

            if (writeRequestsForTableAtLimit || totalSizeOfWriteBatchesOverLimit) {
                result = writeBatch(reporter, itemSizeBytes);
            }
        }
        // writeBatchMap could be cleared from writeBatch()
        List<WriteRequest> writeBatchList;
        if (!writeBatchMap.containsKey(tableName)) {
            writeBatchList = new ArrayList<>((int) maxItemsPerBatch);
            writeBatchMap.put(tableName, writeBatchList);
        } else {
            writeBatchList = writeBatchMap.get(tableName);
        }
        writeBatchList.add(new WriteRequest().withPutRequest(new PutRequest().withItem(item)));
        writeBatchMapSizeBytes += itemSizeBytes;

        return result;
    }

    public void close() {
        while (!writeBatchMap.isEmpty()) {
            writeBatch(Reporter.NULL, 0);
        }

        if (dynamoDB != null) {
            dynamoDB.shutdown();
        }
    }

    /**
     * @param roomNeeded number of bytes that writeBatch MUST make room for
     */
    private BatchWriteItemResult writeBatch(Reporter reporter, final int roomNeeded) {
        final BatchWriteItemRequest batchWriteItemRequest = new BatchWriteItemRequest()
                .withRequestItems(writeBatchMap).withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL);

        RetryResult<BatchWriteItemResult> retryResult = getRetryDriver()
                .runWithRetry(new Callable<BatchWriteItemResult>() {
                    @Override
                    public BatchWriteItemResult call() throws UnsupportedEncodingException, InterruptedException {
                        pauseExponentially(batchWriteRetries);
                        BatchWriteItemResult result = dynamoDB.batchWriteItem(batchWriteItemRequest);

                        Map<String, List<WriteRequest>> unprocessedItems = result.getUnprocessedItems();
                        if (unprocessedItems == null || unprocessedItems.isEmpty()) {
                            batchWriteRetries = 0;
                        } else {
                            batchWriteRetries++;

                            int unprocessedItemCount = 0;
                            for (List<WriteRequest> unprocessedWriteRequests : unprocessedItems.values()) {
                                unprocessedItemCount += unprocessedWriteRequests.size();

                                int batchSizeBytes = 0;
                                for (WriteRequest request : unprocessedWriteRequests) {
                                    batchSizeBytes += DynamoDBUtil
                                            .getItemSizeBytes(request.getPutRequest().getItem());
                                }

                                long maxItemsPerBatch = config.getLong(MAX_ITEMS_PER_BATCH,
                                        DEFAULT_MAX_ITEMS_PER_BATCH);
                                long maxBatchSize = config.getLong(MAX_BATCH_SIZE, DEFAULT_MAX_BATCH_SIZE);

                                if (unprocessedWriteRequests.size() >= maxItemsPerBatch
                                        || (maxBatchSize - batchSizeBytes) < roomNeeded) {
                                    throw new AmazonClientException("Full list of write requests not processed");
                                }
                            }

                            double consumed = 0.0;
                            for (ConsumedCapacity consumedCapacity : result.getConsumedCapacity()) {
                                consumed = consumedCapacity.getCapacityUnits();
                            }

                            int batchSize = 0;
                            for (List<WriteRequest> writeRequests : batchWriteItemRequest.getRequestItems()
                                    .values()) {
                                batchSize += writeRequests.size();
                            }

                            log.debug("BatchWriteItem attempted " + batchSize + " items, consumed " + consumed + " "
                                    + "wcu, left unprocessed " + unprocessedItemCount + " items," + " " + "now at "
                                    + "" + batchWriteRetries + " retries");
                        }
                        return result;
                    }
                }, reporter, PrintCounter.DynamoDBWriteThrottle);

        writeBatchMap.clear();
        writeBatchMapSizeBytes = 0;

        // If some items failed to go through, add them back to the writeBatchMap
        Map<String, List<WriteRequest>> unprocessedItems = retryResult.result.getUnprocessedItems();
        for (Entry<String, List<WriteRequest>> entry : unprocessedItems.entrySet()) {
            String key = entry.getKey();
            List<WriteRequest> requests = entry.getValue();
            for (WriteRequest request : requests) {
                writeBatchMapSizeBytes += DynamoDBUtil.getItemSizeBytes(request.getPutRequest().getItem());
            }
            writeBatchMap.put(key, requests);
        }
        return retryResult.result;
    }

    private DynamoDBFibonacciRetryer getRetryDriver() {
        return new DynamoDBFibonacciRetryer(Duration.standardMinutes(DEFAULT_RETRY_DURATION));
    }

    private void pauseExponentially(int retries) throws InterruptedException {
        if (retries == 0) {
            return;
        }
        long scaleFactor = 500 + new Random().nextInt(100);
        long delay = (long) (Math.pow(2, retries) * scaleFactor) / 4;
        delay = Math.min(delay, MAX_BACKOFF_IN_MILLISECONDS);
        log.info("Pausing " + delay + " ms at retry " + retries);
        Thread.sleep(delay);
    }

    private AmazonDynamoDBClient getDynamoDBClient(Configuration conf) {
        ClientConfiguration clientConfig = new ClientConfiguration().withMaxErrorRetry(1);
        applyProxyConfiguration(clientConfig, conf);
        return new AmazonDynamoDBClient(getAWSCredentialsProvider(conf), clientConfig);
    }

    @VisibleForTesting
    void applyProxyConfiguration(ClientConfiguration clientConfig, Configuration conf) {
        final String proxyHost = conf.get(DynamoDBConstants.PROXY_HOST);
        final int proxyPort = conf.getInt(DynamoDBConstants.PROXY_PORT, 0);
        final String proxyUsername = conf.get(DynamoDBConstants.PROXY_USERNAME);
        final String proxyPassword = conf.get(DynamoDBConstants.PROXY_PASSWORD);
        boolean proxyHostAndPortPresent = false;
        if (!Strings.isNullOrEmpty(proxyHost) && proxyPort > 0) {
            clientConfig.setProxyHost(proxyHost);
            clientConfig.setProxyPort(proxyPort);
            proxyHostAndPortPresent = true;
        } else if (Strings.isNullOrEmpty(proxyHost) ^ proxyPort <= 0) {
            throw new RuntimeException("Only one of proxy host and port are set, when both are required");
        }
        if (!Strings.isNullOrEmpty(proxyUsername) && !Strings.isNullOrEmpty(proxyPassword)) {
            if (!proxyHostAndPortPresent) {
                throw new RuntimeException(
                        "Proxy host and port must be supplied if proxy username and " + "password are present");
            } else {
                clientConfig.setProxyUsername(proxyUsername);
                clientConfig.setProxyPassword(proxyPassword);
            }
        } else if (Strings.isNullOrEmpty(proxyUsername) ^ Strings.isNullOrEmpty(proxyPassword)) {
            throw new RuntimeException(
                    "Only one of proxy username and password are set, when both are " + "required");
        }
    }

    protected AWSCredentialsProvider getAWSCredentialsProvider(Configuration conf) {
        List<AWSCredentialsProvider> providersList = new ArrayList<>();

        // try to load custom credential provider, fail if a provider is specified but cannot be
        // initialized
        String providerClass = conf.get(DynamoDBConstants.CUSTOM_CREDENTIALS_PROVIDER_CONF);
        if (!Strings.isNullOrEmpty(providerClass)) {
            try {
                providersList.add(
                        (AWSCredentialsProvider) ReflectionUtils.newInstance(Class.forName(providerClass), conf));
            } catch (ClassNotFoundException e) {
                throw new RuntimeException("Custom AWSCredentialsProvider not found: " + providerClass, e);
            }
        }

        // try to fetch credentials from core-site
        String accessKey = conf.get(DYNAMODB_CREDENTIAL_PAIR_NAME.getAccessKeyName());
        String secretKey;
        if (Strings.isNullOrEmpty(accessKey)) {
            accessKey = conf.get(DEFAULT_CREDENTIAL_PAIR_NAME.getAccessKeyName());
            secretKey = conf.get(DEFAULT_CREDENTIAL_PAIR_NAME.getSecretKeyName());
        } else {
            secretKey = conf.get(DYNAMODB_CREDENTIAL_PAIR_NAME.getSecretKeyName());
        }

        if (Strings.isNullOrEmpty(accessKey) || Strings.isNullOrEmpty(secretKey)) {
            providersList.add(new InstanceProfileCredentialsProvider());
        } else {
            final AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
            providersList.add(new AWSCredentialsProvider() {
                @Override
                public AWSCredentials getCredentials() {
                    return credentials;
                }

                @Override
                public void refresh() {
                }
            });
        }

        AWSCredentialsProvider[] providerArray = providersList
                .toArray(new AWSCredentialsProvider[providersList.size()]);

        AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providerArray);
        providerChain.setReuseLastProvider(true);
        return providerChain;
    }

}