AmazonKinesisFirehoseToRedshiftSample.java Source code

Java tutorial

Introduction

Here is the source code for AmazonKinesisFirehoseToRedshiftSample.java

Source

/*
 * Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.TimeUnit;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.kinesisfirehose.model.BufferingHints;
import com.amazonaws.services.kinesisfirehose.model.CopyCommand;
import com.amazonaws.services.kinesisfirehose.model.CreateDeliveryStreamRequest;
import com.amazonaws.services.kinesisfirehose.model.DeliveryStreamDescription;
import com.amazonaws.services.kinesisfirehose.model.RedshiftDestinationConfiguration;
import com.amazonaws.services.kinesisfirehose.model.RedshiftDestinationUpdate;
import com.amazonaws.services.kinesisfirehose.model.S3DestinationConfiguration;
import com.amazonaws.services.kinesisfirehose.model.UpdateDestinationRequest;
import com.amazonaws.util.StringUtils;

/**
 * Amazon Kinesis Firehose is a fully managed service for real-time streaming data delivery
 * to destinations such as Amazon S3 and Amazon Redshift. Firehose is part of the Amazon Kinesis
 * streaming data family, along with Amazon Kinesis Streams. With Firehose, you do not need to
 * write any applications or manage any resources. You configure your data producers to send data
 * to Firehose and it automatically delivers the data to the destination that you specified.
 *
 * Detailed Amazon Kinesis Firehose documentation can be found here:
 * https://aws.amazon.com/documentation/firehose/
 *
 * This is a sample java application to deliver data to Amazon Redshift destination.
 */
public class AmazonKinesisFirehoseToRedshiftSample extends AbstractAmazonKinesisFirehoseDelivery {

    /*
     * Before running the code:
     *
     * Step 1: Please check you have AWS access credentials set under
     * (~/.aws/credentials). If not, fill in your AWS access credentials in the
     * provided credentials file template, and be sure to move the file to the
     * default location (~/.aws/credentials) where the sample code will load the
     * credentials from.
     * https://console.aws.amazon.com/iam/home?#security_credential
     *
     * WARNING: To avoid accidental leakage of your credentials, DO NOT keep the
     * credentials file in your source directory.
     *
     * Step 2: Update the firehosetoredshiftsample.properties file with required parameters.
     */

    // Redshift properties
    private static String clusterJDBCUrl;
    private static String username;
    private static String password;
    private static String dataTableName;
    private static String copyOptions;
    private static String updatedCopyOptions;

    // Properties file
    private static final String CONFIG_FILE = "firehosetoredshiftsample.properties";

    // Logger
    private static final Log LOG = LogFactory.getLog(AmazonKinesisFirehoseToRedshiftSample.class);

    /**
     * Initialize the parameters.
     *
     * @throws Exception
     */
    private static void init() throws Exception {
        // Load the parameters from properties file
        loadConfig();

        // Initialize the clients
        initClients();

        // Validate AccountId parameter is set
        if (StringUtils.isNullOrEmpty(accountId)) {
            throw new IllegalArgumentException(
                    "AccountId is empty. Please enter the accountId in " + CONFIG_FILE + " file");
        }
    }

    /**
     * Load the input parameters from properties file.
     *
     * @throws FileNotFoundException
     * @throws IOException
     */
    private static void loadConfig() throws FileNotFoundException, IOException {
        try (InputStream configStream = Thread.currentThread().getContextClassLoader()
                .getResourceAsStream(CONFIG_FILE)) {
            if (configStream == null) {
                throw new FileNotFoundException();
            }

            properties = new Properties();
            properties.load(configStream);
        }

        // Read properties
        accountId = properties.getProperty("customerAccountId");
        createS3Bucket = Boolean.valueOf(properties.getProperty("createS3Bucket"));
        s3RegionName = properties.getProperty("s3RegionName");
        s3BucketName = properties.getProperty("s3BucketName").trim();
        s3BucketARN = getBucketARN(s3BucketName);
        s3ObjectPrefix = properties.getProperty("s3ObjectPrefix").trim();

        String sizeInMBsProperty = properties.getProperty("destinationSizeInMBs");
        s3DestinationSizeInMBs = StringUtils.isNullOrEmpty(sizeInMBsProperty) ? null
                : Integer.parseInt(sizeInMBsProperty.trim());
        String intervalInSecondsProperty = properties.getProperty("destinationIntervalInSeconds");
        s3DestinationIntervalInSeconds = StringUtils.isNullOrEmpty(intervalInSecondsProperty) ? null
                : Integer.parseInt(intervalInSecondsProperty.trim());

        clusterJDBCUrl = properties.getProperty("clusterJDBCUrl");
        username = properties.getProperty("username");
        password = properties.getProperty("password");
        dataTableName = properties.getProperty("dataTableName");
        copyOptions = properties.getProperty("copyOptions");

        deliveryStreamName = properties.getProperty("deliveryStreamName");
        firehoseRegion = properties.getProperty("firehoseRegion");
        iamRoleName = properties.getProperty("iamRoleName");
        iamRegion = properties.getProperty("iamRegion");

        // Update Delivery Stream Destination related properties
        enableUpdateDestination = Boolean.valueOf(properties.getProperty("updateDestination"));
        updatedCopyOptions = properties.getProperty("updatedCopyOptions");
    }

    public static void main(String[] args) throws Exception {
        init();

        try {
            // Create S3 bucket for DeliveryStream to deliver data
            createS3Bucket();

            // Create the DeliveryStream
            createDeliveryStream();

            // Print the list of delivery streams
            printDeliveryStreams();

            // Put records into DeliveryStream
            LOG.info("Putting records in DeliveryStream : " + deliveryStreamName + " via Put Record method.");
            putRecordIntoDeliveryStream();

            // Batch Put records into DeliveryStream
            LOG.info("Putting records in DeliveryStream : " + deliveryStreamName
                    + " via Put Record Batch method. Now you can check your S3 bucket " + s3BucketName
                    + " for the data delivered by DeliveryStream.");
            putRecordBatchIntoDeliveryStream();

            // Wait for some interval for the firehose to write data to redshift destination
            int waitTimeSecs = s3DestinationIntervalInSeconds == null ? DEFAULT_WAIT_INTERVAL_FOR_DATA_DELIVERY_SECS
                    : s3DestinationIntervalInSeconds;
            waitForDataDelivery(waitTimeSecs);

            // Update the DeliveryStream and Put records into updated DeliveryStream, only if the flag is set
            if (enableUpdateDestination) {
                // Update the DeliveryStream
                updateDeliveryStream();

                // Wait for some interval to propagate the updated configuration options before ingesting data
                LOG.info("Waiting for few seconds to propagate the updated configuration options.");
                TimeUnit.SECONDS.sleep(60);

                // Put records into updated DeliveryStream.
                LOG.info("Putting records in updated DeliveryStream : " + deliveryStreamName
                        + " via Put Record method.");
                putRecordIntoDeliveryStream();

                // Batch Put records into updated DeliveryStream.
                LOG.info("Putting records in updated DeliveryStream : " + deliveryStreamName
                        + " via Put Record Batch method.");
                putRecordBatchIntoDeliveryStream();

                // Wait for some interval for the DeliveryStream to write data to redshift destination
                waitForDataDelivery(waitTimeSecs);
            }
        } catch (AmazonServiceException ase) {
            LOG.error("Caught Amazon Service Exception");
            LOG.error("Status Code " + ase.getErrorCode());
            LOG.error("Message: " + ase.getErrorMessage(), ase);
        } catch (AmazonClientException ace) {
            LOG.error("Caught Amazon Client Exception");
            LOG.error("Exception Message " + ace.getMessage(), ace);
        }
    }

    /**
     * Method to create delivery stream with Redshift destination configuration.
     *
     * @throws Exception
     */
    private static void createDeliveryStream() throws Exception {

        boolean deliveryStreamExists = false;

        LOG.info("Checking if " + deliveryStreamName + " already exits");
        List<String> deliveryStreamNames = listDeliveryStreams();
        if (deliveryStreamNames != null && deliveryStreamNames.contains(deliveryStreamName)) {
            deliveryStreamExists = true;
            LOG.info("DeliveryStream " + deliveryStreamName
                    + " already exists. Not creating the new delivery stream");
        } else {
            LOG.info("DeliveryStream " + deliveryStreamName + " does not exist");
        }

        if (!deliveryStreamExists) {
            // Create DeliveryStream
            CreateDeliveryStreamRequest createDeliveryStreamRequest = new CreateDeliveryStreamRequest();
            createDeliveryStreamRequest.setDeliveryStreamName(deliveryStreamName);

            S3DestinationConfiguration redshiftS3Configuration = new S3DestinationConfiguration();
            redshiftS3Configuration.setBucketARN(s3BucketARN);
            redshiftS3Configuration.setPrefix(s3ObjectPrefix);

            BufferingHints bufferingHints = null;
            if (s3DestinationSizeInMBs != null || s3DestinationIntervalInSeconds != null) {
                bufferingHints = new BufferingHints();
                bufferingHints.setSizeInMBs(s3DestinationSizeInMBs);
                bufferingHints.setIntervalInSeconds(s3DestinationIntervalInSeconds);
            }
            redshiftS3Configuration.setBufferingHints(bufferingHints);

            // Create and set IAM role so that firehose service has access to the S3Buckets to put data. 
            // Please check the trustPolicyDocument.json and permissionsPolicyDocument.json files 
            // for the trust and permissions policies set for the role.
            String iamRoleArn = createIamRole(s3ObjectPrefix);
            redshiftS3Configuration.setRoleARN(iamRoleArn);

            CopyCommand copyCommand = new CopyCommand();
            copyCommand.withCopyOptions(copyOptions).withDataTableName(dataTableName);

            RedshiftDestinationConfiguration redshiftDestinationConfiguration = new RedshiftDestinationConfiguration();
            redshiftDestinationConfiguration.withClusterJDBCURL(clusterJDBCUrl).withRoleARN(iamRoleArn)
                    .withUsername(username).withPassword(password).withCopyCommand(copyCommand)
                    .withS3Configuration(redshiftS3Configuration);

            createDeliveryStreamRequest.setRedshiftDestinationConfiguration(redshiftDestinationConfiguration);

            firehoseClient.createDeliveryStream(createDeliveryStreamRequest);

            // The Delivery Stream is now being created.
            LOG.info("Creating DeliveryStream : " + deliveryStreamName);
            waitForDeliveryStreamToBecomeAvailable(deliveryStreamName);
        }
    }

    /**
     * Method to update redshift destination with updated copy options.
     */
    private static void updateDeliveryStream() {
        DeliveryStreamDescription deliveryStreamDescription = describeDeliveryStream(deliveryStreamName);

        LOG.info("Updating DeliveryStream Destination: " + deliveryStreamName + " with new configuration options");
        // get(0) -> DeliveryStream currently supports only one destination per DeliveryStream
        UpdateDestinationRequest updateDestinationRequest = new UpdateDestinationRequest()
                .withDeliveryStreamName(deliveryStreamName)
                .withCurrentDeliveryStreamVersionId(deliveryStreamDescription.getVersionId())
                .withDestinationId(deliveryStreamDescription.getDestinations().get(0).getDestinationId());

        CopyCommand updatedCopyCommand = new CopyCommand().withDataTableName(dataTableName)
                .withCopyOptions(updatedCopyOptions);
        RedshiftDestinationUpdate redshiftDestinationUpdate = new RedshiftDestinationUpdate()
                .withCopyCommand(updatedCopyCommand);

        updateDestinationRequest.setRedshiftDestinationUpdate(redshiftDestinationUpdate);

        // Update DeliveryStream destination with new configuration options such as s3Prefix and Buffering Hints.
        // Can also update Compression format, KMS key values and IAM Role.
        firehoseClient.updateDestination(updateDestinationRequest);
    }
}