Source code

Java tutorial


Here is the source code for


 * Kinesis Aggregation/Deaggregation Libraries for Java
 * Copyright 2014,, Inc. or its affiliates. All Rights Reserved.
 * Licensed under the Amazon Software License (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 * or in the "license" file accompanying this file. This file is distributed
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
package com.amazonaws.kinesis.agg;

import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import org.apache.commons.lang.StringUtils;

import com.amazonaws.annotation.NotThreadSafe;

 * Represents a single aggregated Kinesis record. This Kinesis record is built
 * by adding multiple user records and then serializing them to bytes using the
 * Kinesis aggregated record format. This class lifts heavily from the existing
 * KPL C++ libraries found at
 * This class is NOT thread-safe.
 * @see <a href=
 *      ""></a>
public class AggRecord {
    // Serialization protocol constants via the specification at
    private static final byte[] AGGREGATED_RECORD_MAGIC = new byte[] { (byte) 0xf3, (byte) 0x89, (byte) 0x9a,
            (byte) 0xc2 };
    protected static final String MESSAGE_DIGEST_NAME = "MD5";
    private static final BigInteger UINT_128_MAX = new BigInteger(StringUtils.repeat("FF", 16), 16);

    // Kinesis Limits
    // (
    protected static final int MAX_BYTES_PER_RECORD = 1024 * 1024; // 1 MB
    protected static final int AGGREGATION_OVERHEAD_BYTES = 256;
    protected static final int PARTITION_KEY_MIN_LENGTH = 1;
    protected static final int PARTITION_KEY_MAX_LENGTH = 256;

    /** The current size of the aggregated protobuf message. */
    private int aggregatedMessageSizeBytes;
    /** The set of unique explicit hash keys in the protocol buffer message. */
    private final KeySet explicitHashKeys;
    /** The set of unique partition keys in the protocol buffer message. */
    private final KeySet partitionKeys;
     * The current builder object by which we're actively constructing a record.
    private AggregatedRecord.Builder aggregatedRecordBuilder;
     * The message digest to use for calculating MD5 checksums per the protocol
     * specification.
    private final MessageDigest md5;
    /** The partition key for the entire aggregated record. */
    private String aggPartitionKey;
    /** The explicit hash key for the entire aggregated record. */
    private String aggExplicitHashKey;

     * Construct a new (empty) aggregated Kinesis record.
    public AggRecord() {
        this.aggregatedRecordBuilder = AggregatedRecord.newBuilder();
        this.aggregatedMessageSizeBytes = 0;
        this.explicitHashKeys = new KeySet();
        this.partitionKeys = new KeySet();

        this.aggExplicitHashKey = "";
        this.aggPartitionKey = "";

        try {
            this.md5 = MessageDigest.getInstance(MESSAGE_DIGEST_NAME);
        } catch (NoSuchAlgorithmException e) {
            throw new IllegalStateException("Could not create an MD5 message digest.", e);

     * Get the current number of user records contained inside this aggregate
     * record.
     * @return The current number of user records added via the "addUserRecord(...)"
     *         method.
    public int getNumUserRecords() {
        return this.aggregatedRecordBuilder.getRecordsCount();

     * Get the current size in bytes of the fully serialized aggregated record.
     * @return The current size in bytes of this message in its serialized form.
    public int getSizeBytes() {
        if (getNumUserRecords() == 0) {
            return 0;

        return AGGREGATED_RECORD_MAGIC.length + this.aggregatedMessageSizeBytes + this.md5.getDigestLength();

     * Serialize this record to bytes. Has no side effects (i.e. does not affect the
     * contents of this record object).
     * @return A byte array containing an Kinesis aggregated format-compatible
     *         Kinesis record.
    public byte[] toRecordBytes() {
        if (getNumUserRecords() == 0) {
            return new byte[0];

        byte[] messageBody =;

        byte[] messageDigest = this.md5.digest(messageBody);

        // The way Java's API works is that write(byte[]) throws IOException on
        // a ByteArrayOutputStream, but
        // write(byte[],int,int) doesn't so that's why we're using the long
        // version of "write" here
        ByteArrayOutputStream baos = new ByteArrayOutputStream(getSizeBytes());
        baos.write(messageBody, 0, messageBody.length);
        baos.write(messageDigest, 0, messageDigest.length);

        return baos.toByteArray();

     * Clears out all records and metadata from this object so that it can be reused
     * just like a fresh instance of this object.
    public void clear() {
        this.aggExplicitHashKey = "";
        this.aggPartitionKey = "";
        this.aggregatedMessageSizeBytes = 0;
        this.aggregatedRecordBuilder = AggregatedRecord.newBuilder();

     * Get the overarching partition key for the entire aggregated record.
     * @return The partition key to use for the aggregated record or null if this
     *         aggregated record is empty.
    public String getPartitionKey() {
        if (getNumUserRecords() == 0) {
            return null;

        return this.aggPartitionKey;

     * Get the overarching explicit hash key for the entire aggregated record.
     * @return The explicit hash key to use for the aggregated record or null if
     *         this aggregated record is empty.
    public String getExplicitHashKey() {
        if (getNumUserRecords() == 0) {
            return null;

        return this.aggExplicitHashKey;

     * Based on the current size of this aggregated record, calculate what the new
     * size would be if we added another user record with the specified parameters
     * (used to determine when this aggregated record is full and can't accept any
     * more user records). This calculation is highly dependent on the Kinesis
     * aggregated message format.
     * @param partitionKey
     *            The partition key of the new record to simulate adding
     * @param explicitHashKey
     *            The explicit hash key of the new record to simulate adding
     * @param data
     *            The raw data of the new record to simulate adding
     * @return The new size of this existing record in bytes if a new user record
     *         with the specified parameters was added.
     * @see
    private int calculateRecordSize(String partitionKey, String explicitHashKey, byte[] data) {
        int messageSize = 0;

        // has the partition key been added to the table of known PKs yet?
        if (!this.partitionKeys.contains(partitionKey)) {
            int pkLength = partitionKey.getBytes().length;
            messageSize += 1; // (message index + wire type for PK table)
            messageSize += calculateVarintSize(pkLength); // size of pk length
            // value
            messageSize += pkLength; // actual pk length

        // has the explicit hash key been added to the table of known EHKs yet?
        if (!this.explicitHashKeys.contains(explicitHashKey)) {
            int ehkLength = explicitHashKey.getBytes().length;
            messageSize += 1; // (message index + wire type for EHK table)
            messageSize += calculateVarintSize(ehkLength); /* size of ehk length value */
            messageSize += ehkLength; // actual ehk length

        // remaining calculations are for adding the new record to the list of
        // records

        long innerRecordSize = 0;

        // partition key field
        innerRecordSize += 1; // (message index + wire type for PK index)
        innerRecordSize += calculateVarintSize(
                this.partitionKeys.getPotentialIndex(partitionKey)); /* size of pk index value */

        // explicit hash key field (this is optional)
        if (explicitHashKey != null) {
            innerRecordSize += 1; // (message index + wire type for EHK index)
            innerRecordSize += calculateVarintSize(
                    this.explicitHashKeys.getPotentialIndex(explicitHashKey)); /* size of ehk index value */

        // data field
        innerRecordSize += 1; // (message index + wire type for record data)

        innerRecordSize += calculateVarintSize(data.length); /* size of data length value */
        innerRecordSize += data.length; // actual data length

        messageSize += 1; // (message index + wire type for record)
        messageSize += calculateVarintSize(innerRecordSize); /* size of entire record length value */
        messageSize += innerRecordSize; // actual entire record length

        return messageSize;

     * For an integral value represented by a varint, calculate how many bytes are
     * necessary to represent the value in a protobuf message.
     * @param value
     *            The value whose varint size will be calculated
     * @return The number of bytes necessary to represent the input value as a
     *         varint.
     * @see
    private int calculateVarintSize(long value) {
        if (value < 0) {
            throw new IllegalArgumentException("Size values should not be negative.");

        int numBitsNeeded = 0;
        if (value == 0) {
            numBitsNeeded = 1;
        } else {
            // shift the value right one bit at a time until
            // there are no more '1' bits left...this counts
            // how many bits we need to represent the number
            while (value > 0) {
                value = value >> 1;

        // varints only use 7 bits of the byte for the actual value
        int numVarintBytes = numBitsNeeded / 7;
        if (numBitsNeeded % 7 > 0) {
            numVarintBytes += 1;

        return numVarintBytes;

     * Add a new user record to this existing aggregated record if there is enough
     * space (based on the defined Kinesis limits for a PutRecord call).
     * @param partitionKey
     *            The partition key of the new user record to add
     * @param explicitHashKey
     *            The explicit hash key of the new user record to add
     * @param data
     *            The raw data of the new user record to add
     * @return True if the new user record was successfully added to this aggregated
     *         record or false if this aggregated record is too full.
    public boolean addUserRecord(String partitionKey, String explicitHashKey, byte[] data) {
        // set the explicit hash key for the message to the partition key -
        // required for encoding
        explicitHashKey = explicitHashKey != null ? explicitHashKey : createExplicitHashKey(partitionKey);

        // validate values from the provided message

        // Validate new record size won't overflow max size for a
        // PutRecordRequest
        int sizeOfNewRecord = calculateRecordSize(partitionKey, explicitHashKey, data);
        if (getSizeBytes() + sizeOfNewRecord > MAX_BYTES_PER_RECORD) {
            return false;
        } else if (sizeOfNewRecord > MAX_BYTES_PER_RECORD) {
            throw new IllegalArgumentException(
                    "Input record (PK=" + partitionKey + ", EHK=" + explicitHashKey + ", SizeBytes="
                            + sizeOfNewRecord + ") is larger than the maximum size before Aggregation encoding of "
                            + (MAX_BYTES_PER_RECORD - AGGREGATION_OVERHEAD_BYTES) + " bytes");

        Record.Builder newRecord = Record.newBuilder()
                .setData(data != null ? ByteString.copyFrom(data) : ByteString.EMPTY);

        ExistenceIndexPair pkAddResult = this.partitionKeys.add(partitionKey);
        if (pkAddResult.getFirst().booleanValue()) {

        ExistenceIndexPair ehkAddResult = this.explicitHashKeys.add(explicitHashKey);
        if (ehkAddResult.getFirst().booleanValue()) {

        this.aggregatedMessageSizeBytes += sizeOfNewRecord;

        // if this is the first record, we use its partition key and hash key
        // for the entire agg record
        if (this.aggregatedRecordBuilder.getRecordsCount() == 1) {
            this.aggPartitionKey = partitionKey;
            this.aggExplicitHashKey = explicitHashKey;

        return true;

     * Convert the aggregated data in this record into a single PutRecordRequest.
     * This method has no side effects (i.e. it will not clear the current contents
     * of the aggregated record).
     * @param streamName
     *            The Kinesis stream name where this PutRecordRequest will be sent.
     * @return A PutRecordRequest containing all the current data in this aggregated
     *         record.
    public PutRecordRequest toPutRecordRequest(String streamName) {
        byte[] recordBytes = toRecordBytes();
        ByteBuffer bb = ByteBuffer.wrap(recordBytes);
        return new PutRecordRequest().withStreamName(streamName).withExplicitHashKey(getExplicitHashKey())

     * Convert the aggregated data in this record into a single
     * PutRecordsRequestEntry. This method has no side effects (i.e. it will not
     * clear the current contents of the aggregated record).
     * @return A PutRecordsRequestEntry containing all the current data in this
     *         aggregated record that can be sent to Kinesis via a
     *         PutRecordsRequest.
    public PutRecordsRequestEntry toPutRecordsRequestEntry() {
        return new PutRecordsRequestEntry().withExplicitHashKey(getExplicitHashKey())

     * Validate the data portion of an input Kinesis user record.
     * @param data
     *            A byte array containing Kinesis user record data.
    private void validateData(final byte[] data) {
        final int maxAllowableDataLength = MAX_BYTES_PER_RECORD - AGGREGATED_RECORD_MAGIC.length
                - this.md5.getDigestLength();
        if (data != null && data.length > (maxAllowableDataLength)) {
            throw new IllegalArgumentException("Data must be less than or equal to " + maxAllowableDataLength
                    + " bytes in size, got " + data.length + " bytes");

     * Validate the partition key of an input Kinesis user record.
     * @param partitionKey
     *            The string containing the input partition key to validate.
    private void validatePartitionKey(final String partitionKey) {
        if (partitionKey == null) {
            throw new IllegalArgumentException("Partition key cannot be null");

        if (partitionKey.getBytes().length < PARTITION_KEY_MIN_LENGTH
                || partitionKey.getBytes().length > PARTITION_KEY_MAX_LENGTH) {
            throw new IllegalArgumentException(
                    "Invalid partition key. Length must be at least " + PARTITION_KEY_MIN_LENGTH + " and at most "
                            + PARTITION_KEY_MAX_LENGTH + ", got length of " + partitionKey.getBytes().length);

        try {
        } catch (Exception e) {
            throw new IllegalArgumentException(
                    "Partition key must be valid " + StandardCharsets.UTF_8.displayName());

     * Validate the explicit hash key of an input Kinesis user record.
     * @param explicitHashKey
     *            The string containing the input explicit hash key to validate.
    private void validateExplicitHashKey(final String explicitHashKey) {
        if (explicitHashKey == null) {

        BigInteger b = null;
        try {
            b = new BigInteger(explicitHashKey);
            if (b.compareTo(UINT_128_MAX) > 0 || b.compareTo(BigInteger.ZERO) < 0) {
                throw new IllegalArgumentException(
                        "Invalid explicitHashKey, must be greater or equal to zero and less than or equal to (2^128 - 1), got "
                                + explicitHashKey);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                    "Invalid explicitHashKey, must be an integer, got " + explicitHashKey);

     * Calculate a new explicit hash key based on the input partition key (following
     * the algorithm from the original KPL).
     * @param partitionKey
     *            The partition key to seed the new explicit hash key with
     * @return An explicit hash key based on the input partition key generated using
     *         an algorithm from the original KPL.
    private String createExplicitHashKey(final String partitionKey) {
        BigInteger hashKey = BigInteger.ZERO;

        byte[] pkDigest = this.md5.digest(partitionKey.getBytes(StandardCharsets.UTF_8));

        for (int i = 0; i < this.md5.getDigestLength(); i++) {
            BigInteger p = new BigInteger(String.valueOf((int) pkDigest[i] & 0xFF)); // convert
            // to
            // unsigned
            // integer
            BigInteger shifted = p.shiftLeft((16 - i - 1) * 8);
            hashKey = hashKey.add(shifted);

        return hashKey.toString(10);

     * A class for tracking unique partition keys or explicit hash keys for an
     * aggregated Kinesis record. Also assists in keeping track of indexes for their
     * locations in the protobuf tables.
    private class KeySet {
        /** The list of unique keys in this keyset. */
        private List<String> keys;
        /** A lookup mapping of key to index in protobuf table. */
        private Map<String, Long> lookup;

         * Create a new empty keyset.
        public KeySet() {
            this.keys = new LinkedList<>();
            this.lookup = new TreeMap<>();

         * If the input key were added to this KeySet, determine what its resulting
         * index would be.
         * @param s
         *            The input string to potentially add to the KeySet
         * @return The table index that this string would occupy if it were added to
         *         this KeySet.
        public Long getPotentialIndex(String s) {
            Long it = this.lookup.get(s);
            if (it != null) {
                return it;

            return Long.valueOf(this.keys.size());

         * Add a new key to this keyset.
         * @param s
         *            The key to add to the keyset.
         * @return A pair of <boolean,long>. The boolean is true if this key is not
         *         already in this keyset, false otherwise. The long indicates the index
         *         of the key.
        public ExistenceIndexPair add(String s) {
            Long it = this.lookup.get(s);
            if (it != null) {
                return new ExistenceIndexPair(false, it);

            if (!this.lookup.containsKey(s)) {
                this.lookup.put(s, Long.valueOf(this.keys.size()));

            return new ExistenceIndexPair(true, Long.valueOf(this.keys.size() - 1));

         * @return True if this keyset contains the input key, false otherwise.
        public boolean contains(String s) {
            return s != null && this.lookup.containsKey(s);

         * Clear all the contents of this keyset.
        public void clear() {

     * A helper class for use with the KeySet that indicates whether or not a key
     * exists in the KeySet and what its saved index would be.
    private class ExistenceIndexPair {
        private Boolean first;
        private Long second;

        public ExistenceIndexPair(Boolean first, Long second) {
            this.first = first;
            this.second = second;

        public Boolean getFirst() {
            return this.first;

        public Long getSecond() {
            return this.second;