com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils.java Source code

Introduction

Here is the source code for com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils.java
Source

/**
 * Amazon Kinesis Aggregators
 *
 * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Amazon Software License (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/asl/
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws.services.kinesis.aggregators;

import java.io.File;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.GetItemRequest;
import com.amazonaws.services.kinesis.AmazonKinesisClient;
import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey;
import com.amazonaws.services.kinesis.aggregators.datastore.DynamoUtils;
import com.amazonaws.services.kinesis.model.LimitExceededException;
import com.amazonaws.services.kinesis.model.ResourceNotFoundException;
import com.amazonaws.services.kinesis.model.Shard;
import com.amazonaws.services.kinesis.model.StreamDescription;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

/**
 * Utility methods used across the Amazon Kinesis Aggregators framework.
 */
public class StreamAggregatorUtils {
    private static final Log LOG = LogFactory.getLog(StreamAggregatorUtils.class);

    private static final String rsTimeformat = "yyyy-mm-dd hh:mi:ss";

    private static final ObjectMapper mapper = new ObjectMapper();

    static {
        mapper.configure(DeserializationFeature.ACCEPT_SINGLE_VALUE_AS_ARRAY, true);
        mapper.configure(DeserializationFeature.WRAP_EXCEPTIONS, false);
    }

    private StreamAggregatorUtils() {
    }

    /**
     * Helper method which converts input values from Aggregator configurations
     * into names for attributes in Dynamo DB. In particular this supports
     * Object based Aggregators who will be configured using get methods. For
     * example, this methods will turn 'getValue' into 'value' and 'isSomething'
     * to 'isSomething'.
     * 
     * @param methodName The name of the method to be converted into an
     *        Attribute Name.
     * @return A string value to be used as the corresponding attribute name.
     */
    public static String methodToColumn(String methodName) {
        if (methodName.startsWith("get")) {
            return methodName.substring(3, 4).toLowerCase() + methodName.substring(4);
        } else {
            return methodName.substring(0, 1).toLowerCase() + methodName.substring(1);
        }
    }

    /**
     * Returns a statement which can be used to create an External Table in Hive
     * which wraps the Aggregator Table indicated, using the required name in
     * Hive.
     * 
     * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB.
     * @param hiveTableName The table name to generate for the Hive Table.
     * @param dynamoTable The name of the aggregator table in Dynamo DB.
     * @return A CREATE EXTERNAL TABLE statement to be used in Hive
     * @throws Exception
     */
    public static String getDynamoHiveWrapper(AmazonDynamoDB dynamoClient, String hiveTableName, String dynamoTable)
            throws Exception {
        LOG.info("Generating Hive Integration Statement");

        StringBuffer sb = new StringBuffer();
        sb.append(String.format("CREATE EXTERNAL TABLE %s(", hiveTableName));

        // add the hive table spec
        List<String> tableDefinition = DynamoUtils.getDictionaryEntry(dynamoClient, dynamoTable);
        for (String s : tableDefinition) {
            sb.append(String.format("%s string,", s));
        }
        sb.replace(sb.length() - 1, sb.length(), "");

        sb.append(String.format(
                ") STORED BY 'org.apache.hadoop.hive.dynamodb.DynamoDBStorageHandler' TBLPROPERTIES (\"dynamodb.table.name\" = \"%s\", \"dynamodb.column.mapping\" = \"",
                dynamoTable));
        for (String s : tableDefinition) {
            sb.append(String.format("%s:%s,", s, s));
        }
        sb.replace(sb.length() - 1, sb.length(), "");
        sb.append("))");

        return sb.toString();
    }

    /**
     * Helper method to generate a Redshift CREATE TABLE command which matches
     * the structure of the aggregate table, and a COPY command which will load
     * the table data from an Aggregator table into a Redshift Table. * @param
     * dynamoClient Dynamo DB Client to use for connection to Dynamo DB.
     * 
     * @param redshiftTableName The table name to use in Redshift.
     * @param dynamoTable The Aggregator table name in Dynamo DB.
     * @return A String which contains the create table and copy commands to be
     *         issued against redshift.
     */
    public static String getRedshiftCopyCommand(final AmazonDynamoDB dynamoClient, String redshiftTableName,
            String dynamoTable) throws Exception {
        LOG.info("Generating Redshift Copy Command");
        StringBuffer sb = new StringBuffer();

        // generate the create table statement
        sb.append(String.format("CREATE TABLE %S(\n", redshiftTableName));
        int i = 0;
        List<String> tableStructure = DynamoUtils.getDictionaryEntry(dynamoClient, dynamoTable);
        String columnSpec = null;
        String dataType = null;
        for (String s : tableStructure) {
            i++;

            switch (s) {
            case StreamAggregator.LAST_WRITE_SEQ:
                dataType = "BIGINT";
                break;
            case StreamAggregator.LAST_WRITE_TIME:
                dataType = "TIMESTAMP";
                break;
            case StreamAggregator.EVENT_COUNT:
                dataType = "INT";
                break;
            default:
                if (s.contains("-SUM") || s.contains("-MIN") || s.contains("-MAX")) {
                    dataType = "INT";
                } else {
                    dataType = "VARCHAR(1000)";
                }
                break;
            }
            ;

            columnSpec = s + " " + dataType;

            if (i == tableStructure.size()) {
                sb.append(columnSpec);
            } else {
                sb.append(columnSpec + ",");
            }
        }
        sb.append(");\n\n");

        // generate the copy command
        sb.append(String.format(
                "copy %s from 'dynamodb://%s' credentials 'aws_access_key_id=<Your-Access-Key-ID>;aws_secret_access_key=<Your-Secret-Access-Key>' readratio 50 timeformat 'yyyy-MM-dd hh:mi:ss';",
                redshiftTableName, dynamoTable, rsTimeformat));
        return sb.toString();
    }

    /**
     * Index name which should be used for the last write sequence GSI on a
     * table
     * 
     * @param dynamoTable The table name in Dynamo DB.
     * @return The name for the global secondary index on the table for last
     *         write sequence.
     */
    public static final String getLastWriteSeqIndexName(String dynamoTable) {
        return dynamoTable + "-seq";
    }

    /**
     * Index name which should be used for the last write sequence GSI on a
     * table
     * 
     * @param dynamoTable The table name in Dynamo DB.
     * @return The name for the global secondary index on the table for last
     *         write sequence.
     */
    public static final String getDateDimensionIndexName(String dynamoTable, String dateAttribute) {
        return String.format("%s-%s", dynamoTable, dateAttribute);
    }

    /**
     * Method which will generate a correctly formatted primary key for a dynamo
     * table hosting aggregated data.
     * 
     * @param updateKey An {@link UpdateKey} which should be pivoted into a key.
     * @return
     */
    public static Map<String, AttributeValue> getTableKey(UpdateKey updateKey) {
        return getTableKey(updateKey.getAggregateColumnName(),
                updateKey.getTimeHorizon().getAbbrev() + "-" + updateKey.getAggregatedValue(),
                updateKey.getDateValueColumnName(), updateKey.getDateValue());
    }

    /**
     * Method which will generate a correctly formatted primary key for a dynamo
     * table hosting aggregated data.
     * 
     * @param keyColumnName The attribute name in the table to be used as the
     *        first part of a hash key.
     * @param fieldValue The value of the hash key to query for.
     * @param dateColumnName The attribute name of the date column to be used as
     *        the range key.
     * @param dateValue The value of the range key value to query for.
     * @return
     */
    protected static Map<String, AttributeValue> getTableKey(String keyColumnName, String fieldValue,
            String dateColumnName, String dateValue) {
        HashMap<String, AttributeValue> key = new HashMap<>();
        key.put(keyColumnName, new AttributeValue().withS(fieldValue));
        key.put(dateColumnName, new AttributeValue().withS(dateValue));

        return key;
    }

    protected static Map<String, AttributeValue> getValue(final AmazonDynamoDB dynamoClient, final String tableName,
            final UpdateKey key) {
        GetItemRequest req = new GetItemRequest().withTableName(tableName).withKey(getTableKey(key));
        return dynamoClient.getItem(req).getItem();
    }

    protected static String getTableName(final String applicationName, final String namespace) {
        return String.format("%s-%s", applicationName, namespace);
    }

    public static JsonNode asJsonNode(String s) throws Exception {
        return mapper.readTree(s);
    }

    public static JsonNode asJsonNode(File f) throws Exception {
        return mapper.readTree(f);
    }

    public static JsonNode readJsonValue(JsonNode json, String atPath) {
        if (!atPath.contains(".")) {
            return json.get(atPath);
        } else {
            String[] path = atPath.split("\\.");

            JsonNode node = json.get(path[0]);
            for (int i = 1; i < path.length; i++) {
                node = node.path(path[i]);
            }

            return node;
        }
    }

    public static String readValueAsString(JsonNode json, String atPath) {
        JsonNode node = readJsonValue(json, atPath);

        return node == null ? null : node.asText();
    }

    /**
     * Get a list of all Open shards ordered by their start hash
     * 
     * @param streamName
     * @return A Map of only Open Shards indexed by the Shard ID
     */
    public static Map<String, Shard> getOpenShards(AmazonKinesisClient kinesisClient, String streamName)
            throws Exception {
        Map<String, Shard> shardMap = new LinkedHashMap<>();
        final int BACKOFF_MILLIS = 10;
        final int MAX_DESCRIBE_ATTEMPTS = 10;
        int describeAttempts = 0;
        StreamDescription stream = null;
        try {
            do {
                try {
                    stream = kinesisClient.describeStream(streamName).getStreamDescription();
                } catch (LimitExceededException e) {
                    Thread.sleep(2 ^ describeAttempts * BACKOFF_MILLIS);
                    describeAttempts++;
                }
            } while (stream == null && describeAttempts < MAX_DESCRIBE_ATTEMPTS);
        } catch (InterruptedException e) {
            LOG.error(e);
            throw e;
        }

        if (stream == null) {
            throw new Exception(
                    String.format("Unable to describe Stream after %s attempts", MAX_DESCRIBE_ATTEMPTS));
        }
        Collection<String> openShardNames = new ArrayList<String>();

        // load all the shards on the stream
        for (Shard shard : stream.getShards()) {
            openShardNames.add(shard.getShardId());
            shardMap.put(shard.getShardId(), shard);

            // remove this shard's parents from the set of active shards -
            // we
            // can't do anything to them
            if (shard.getParentShardId() != null) {
                openShardNames.remove(shard.getParentShardId());
            }
            if (shard.getAdjacentParentShardId() != null) {
                openShardNames.remove(shard.getAdjacentParentShardId());
            }
        }

        // create a List of Open shards for sorting
        List<Shard> shards = new ArrayList<Shard>();
        for (String s : openShardNames) {
            shards.add(shardMap.get(s));
        }

        // sort the list into lowest start hash order
        Collections.sort(shards, new Comparator<Shard>() {
            public int compare(Shard o1, Shard o2) {
                return new BigInteger(o1.getHashKeyRange().getStartingHashKey())
                        .compareTo(new BigInteger(o2.getHashKeyRange().getStartingHashKey()));
            }
        });

        // rebuild the shard map into the correct order
        shardMap.clear();
        for (Shard s : shards) {
            shardMap.put(s.getShardId(), s);
        }

        return shardMap;

    }

    public static Shard getFirstShard(AmazonKinesisClient kinesisClient, String streamName) throws Exception {
        return getOpenShards(kinesisClient, streamName).values().iterator().next();
    }

    public static String getFirstShardName(AmazonKinesisClient kinesisClient, String streamName) throws Exception {
        return getFirstShard(kinesisClient, streamName).getShardId();
    }

    public static int getShardCount(AmazonKinesisClient kinesisClient, String streamName) throws Exception {
        return getOpenShards(kinesisClient, streamName).keySet().size();
    }
}