Java tutorial
/** * Amazon Kinesis Aggregators * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.aggregators.datastore; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBAsyncClient; import com.amazonaws.services.dynamodbv2.model.AttributeAction; import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; import com.amazonaws.services.dynamodbv2.model.AttributeValue; import com.amazonaws.services.dynamodbv2.model.AttributeValueUpdate; import com.amazonaws.services.dynamodbv2.model.ConditionalCheckFailedException; import com.amazonaws.services.dynamodbv2.model.ExpectedAttributeValue; import com.amazonaws.services.dynamodbv2.model.GlobalSecondaryIndex; import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; import com.amazonaws.services.dynamodbv2.model.KeyType; import com.amazonaws.services.dynamodbv2.model.Projection; import com.amazonaws.services.dynamodbv2.model.ProjectionType; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; import com.amazonaws.services.dynamodbv2.model.ReturnValue; import com.amazonaws.services.dynamodbv2.model.ScanRequest; import com.amazonaws.services.dynamodbv2.model.ScanResult; import com.amazonaws.services.dynamodbv2.model.Select; import com.amazonaws.services.dynamodbv2.model.UpdateItemRequest; import com.amazonaws.services.dynamodbv2.model.UpdateItemResult; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.aggregators.AggregatorType; import com.amazonaws.services.kinesis.aggregators.StreamAggregator; import com.amazonaws.services.kinesis.aggregators.StreamAggregatorUtils; import com.amazonaws.services.kinesis.aggregators.cache.UpdateKey; import com.amazonaws.services.kinesis.aggregators.cache.UpdateValue; import com.amazonaws.services.kinesis.aggregators.summary.SummaryCalculation; import com.amazonaws.services.kinesis.model.ResourceNotFoundException; public class DynamoDataStore implements IDataStore { public enum DynamoSummaryUpdateMethod { PUT(AttributeAction.PUT), ADD(AttributeAction.ADD), CONDITIONAL(null); private AttributeAction action; private DynamoSummaryUpdateMethod(AttributeAction a) { this.action = a; } public AttributeAction getAction() { return this.action; } } /** * The default amount of read IOPS to be provisioned, if the aggregator does * not override. */ public static final long DEFAULT_READ_CAPACITY = 10L; /** * The default amount of write IOPS to be provisioned, if the aggregator * does not override. */ public static final long DEFAULT_WRITE_CAPACITY = 10L; private final Log LOG = LogFactory.getLog(DynamoDataStore.class); private String environment, tableName, streamName; private AggregatorType aggregatorType; private boolean reportedStructure = false; private AmazonDynamoDB dynamoClient; private AmazonKinesisClient kinesisClient; private long readCapacity = DEFAULT_READ_CAPACITY; private long writeCapacity = DEFAULT_WRITE_CAPACITY; private String labelAttribute, dateAttribute; private boolean online = false; private Region region = Region.getRegion(Regions.US_EAST_1); public static final String SCATTER_PREFIX_ATTRIBUTE = "scatterPrefix"; public static final int SCATTER_WIDTH = 99; private final Random r = new Random(); private DynamoQueryEngine queryEngine; public DynamoDataStore(AmazonDynamoDB dynamoClient, AmazonKinesisClient kinesisClient, AggregatorType aggregatorType, String streamName, String tableName, String labelAttribute, String dateAttribute) { this.dynamoClient = dynamoClient; this.kinesisClient = kinesisClient; this.aggregatorType = aggregatorType; this.streamName = streamName; this.tableName = tableName; this.labelAttribute = labelAttribute; this.dateAttribute = dateAttribute; } public DynamoDataStore(AWSCredentialsProvider credentials, AggregatorType aggregatorType, String streamName, String tableName, String labelAttribute, String dateAttribute) { this(new AmazonDynamoDBAsyncClient(credentials), new AmazonKinesisClient(credentials), aggregatorType, streamName, tableName, labelAttribute, dateAttribute); } @Override public void initialise() throws Exception { if (!this.online) { if (this.region != null) { this.dynamoClient.setRegion(this.region); if (this.streamName != null) { this.kinesisClient.setRegion(this.region); } } initAggTable(this.labelAttribute, this.dateAttribute, this.readCapacity, this.writeCapacity); this.queryEngine = new DynamoQueryEngine(this.dynamoClient, this.tableName, this.labelAttribute, this.dateAttribute); this.online = true; } } @Override public Map<UpdateKey, Map<String, AggregateAttributeModification>> write(Map<UpdateKey, UpdateValue> data) throws Exception { UpdateItemRequest req = null; UpdateItemResult result; Map<String, AggregateAttributeModification> updatedValues; Map<UpdateKey, Map<String, AggregateAttributeModification>> updatedData = new HashMap<>(); int conditionals = 0; if (data != null && data.keySet().size() > 0) { LOG.debug(String.format("Flushing %s Cache Updates", data.size())); // go through all pending updates and write down increments to event // counts and SUM operations first, then do other types of // calculations which need conditional updates after for (final UpdateKey key1 : data.keySet()) { // initialise the map of all updates made for final value // processing if (!updatedData.containsKey(key1)) { updatedValues = new HashMap<>(); } else { updatedValues = updatedData.get(key1); } Map<String, AttributeValueUpdate> updates = new HashMap<>(); /* updates.put( SCATTER_PREFIX_ATTRIBUTE, new AttributeValueUpdate().withAction(AttributeAction.PUT).withValue( new AttributeValue().withN("" + r.nextInt(SCATTER_WIDTH))));*/ // add the event count update to the list of updates to be made updates.put(StreamAggregator.EVENT_COUNT, new AttributeValueUpdate().withAction(AttributeAction.ADD) .withValue(new AttributeValue().withN("" + data.get(key1).getAggregateCount()))); // add the time horizon type to the item updates.put(StreamAggregator.TIME_HORIZON_ATTR, new AttributeValueUpdate().withAction(AttributeAction.PUT) .withValue(new AttributeValue().withS(key1.getTimeHorizon().getAbbrev()))); // add last update time and sequence updates.put(StreamAggregator.LAST_WRITE_SEQ, new AttributeValueUpdate().withAction(AttributeAction.PUT) .withValue(new AttributeValue().withS(data.get(key1).getLastWriteSeq()))); updates.put(StreamAggregator.LAST_WRITE_TIME, new AttributeValueUpdate().withAction(AttributeAction.PUT) .withValue(new AttributeValue().withS(StreamAggregator.dateFormatter .format(new Date(data.get(key1).getLastWriteTime()))))); if (this.aggregatorType.equals(AggregatorType.SUM)) { for (final String attribute : data.get(key1).getSummaryValues().keySet()) { final AggregateAttributeModification update = data.get(key1).getSummaryValues() .get(attribute); if (!update.getCalculationApplied().getSummaryUpdateMethod() .equals(DynamoSummaryUpdateMethod.CONDITIONAL)) { String setAttributeName = StreamAggregatorUtils.methodToColumn(attribute); updates.put(setAttributeName, new AttributeValueUpdate() .withAction(update.getCalculationApplied().getSummaryUpdateMethod().getAction()) .withValue(new AttributeValue().withN("" + update.getFinalValue()))); // add a stub entry so that we can extract the // updated value from the resultset updatedValues.put(setAttributeName, new AggregateAttributeModification(update.getAttributeName(), update.getOriginatingValueName(), null, update.getCalculationApplied())); } } } // do the update to all sum and count attributes as well // as the last write sequence and time - this gives us a key to // write other calculations onto req = new UpdateItemRequest().withTableName(tableName) .withKey(StreamAggregatorUtils.getTableKey(key1)).withAttributeUpdates(updates) .withReturnValues(ReturnValue.UPDATED_NEW); result = DynamoUtils.updateWithRetries(dynamoClient, req); // add the event count to the modifications made updatedValues.put(StreamAggregator.EVENT_COUNT, new AggregateAttributeModification(StreamAggregator.EVENT_COUNT, StreamAggregator.EVENT_COUNT, Double.parseDouble(result.getAttributes().get(StreamAggregator.EVENT_COUNT).getN()), SummaryCalculation.SUM)); // extract all updated values processed by the previous update for (String attribute : updatedValues.keySet()) { updatedValues.put(attribute, new AggregateAttributeModification(updatedValues.get(attribute).getAttributeName(), updatedValues.get(attribute).getOriginatingValueName(), Double.parseDouble(result.getAttributes().get(attribute).getN()), updatedValues.get(attribute).getCalculationApplied(), updatedValues.get(attribute).getWritesSoFar() + 1)); } // add all the updates for this key updatedData.put(key1, updatedValues); // log the structure of the table once, so the customer can // retrieve it directly if (!reportedStructure) { LOG.info(getTableStructure()); reportedStructure = true; } } // now process all non summing calculations which are conditional // and // require that the table keys already exist if (this.aggregatorType.equals(AggregatorType.SUM)) { for (final UpdateKey key2 : data.keySet()) { updatedValues = updatedData.get(key2); // we perform a single update for all SUM operations and the // count, last write sequence and time, and a // separate conditional update for every instance of MIN or // MAX // calculations as these must be conditionally applied to be // correct for (final String attribute : data.get(key2).getSummaryValues().keySet()) { final AggregateAttributeModification update = data.get(key2).getSummaryValues() .get(attribute); if (update.getCalculationApplied().getSummaryUpdateMethod() .equals(DynamoSummaryUpdateMethod.CONDITIONAL)) { conditionals++; result = updateConditionalValue(dynamoClient, tableName, key2, attribute, update); // if the update was made by this conditional // update, then add its items to the update set Double finalValue = null; int increment = update.getWritesSoFar(); if (result != null && result.getAttributes() != null) { finalValue = Double.parseDouble(result.getAttributes().get(attribute).getN()); increment++; } updatedValues.put(attribute, new AggregateAttributeModification(update.getAttributeName(), update.getOriginatingValueName(), finalValue, update.getCalculationApplied(), increment)); } } // add the conditional update items into the overall update // set updatedData.put(key2, updatedValues); } LOG.debug(String.format("Processed %s Conditional Updates", conditionals)); } } return updatedData; } public UpdateItemResult updateConditionalValue(final AmazonDynamoDB dynamoClient, final String tableName, final UpdateKey key, final String attribute, final AggregateAttributeModification update) throws Exception { Map<String, AttributeValue> updateKey = StreamAggregatorUtils.getTableKey(key); UpdateItemResult result; final ReturnValue returnValue = ReturnValue.UPDATED_NEW; final String setAttribute = StreamAggregatorUtils.methodToColumn(attribute); // create the update that we want to write final Map<String, AttributeValueUpdate> thisCalcUpdate = new HashMap<String, AttributeValueUpdate>() { { put(setAttribute, new AttributeValueUpdate().withAction(AttributeAction.PUT) .withValue(new AttributeValue().withN("" + update.getFinalValue()))); } }; // create the request UpdateItemRequest req = new UpdateItemRequest().withTableName(tableName).withKey(updateKey) .withReturnValues(returnValue).withAttributeUpdates(thisCalcUpdate); Map<String, ExpectedAttributeValue> expected = new HashMap<>(); final SummaryCalculation calc = update.getCalculationApplied(); // try an update to PUT the value if NOT EXISTS, to establish if we // are the first writer for this key expected = new HashMap<String, ExpectedAttributeValue>() { { put(setAttribute, new ExpectedAttributeValue().withExists(false)); } }; req.setExpected(expected); try { result = DynamoUtils.updateWithRetries(dynamoClient, req); // yay - we were the first writer, so our value was written return result; } catch (ConditionalCheckFailedException e1) { // set the expected to the comparison contained in the update // calculation expected.clear(); expected.put(setAttribute, new ExpectedAttributeValue().withComparisonOperator(calc.getDynamoComparisonOperator()) .withValue(new AttributeValue().withN("" + update.getFinalValue()))); req.setExpected(expected); // do the conditional update on the summary // calculation. this may result in no update being // applied because the new value is greater than the // current minimum for MIN, or less than the current // maximum for MAX. try { result = DynamoUtils.updateWithRetries(dynamoClient, req); return result; } catch (ConditionalCheckFailedException e2) { // no worries - we just weren't the min or max! return null; } } } /** * Method which examines an table which backs an Aggregator, and returns a * string value which represents the list of attributes in the table. This * method assumes that all elements in an aggregate table are the same. * * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable The Table to get the structure of. * @return A String representation of the attribute names in the table. * @throws Exception */ public String getTableStructure() throws Exception { List<String> columns = getDictionaryEntry(); StringBuffer sb = new StringBuffer(); for (String s : columns) { sb.append(String.format("%s,", s)); } return String.format("Dynamo Table %s (%s)", sb.toString().substring(0, sb.length() - 1), this.tableName); } /** * Generate a list of attribute names found in the Aggregator's dynamo * table. Assumes that all Items in the Aggregator table are of the same * structure. * * @param dynamoClient Dynamo DB Client to use for connection to Dynamo DB. * @param dynamoTable The Dynamo Table for the Aggregator * @return A list of attribute names from the Dynamo table * @throws Exception */ protected List<String> getDictionaryEntry() throws Exception { // get a list of all columns in the table, with keys first List<String> columns = new ArrayList<>(); List<KeySchemaElement> keys = dynamoClient.describeTable(this.tableName).getTable().getKeySchema(); for (KeySchemaElement key : keys) { columns.add(key.getAttributeName()); } ScanResult scan = dynamoClient.scan( new ScanRequest().withTableName(this.tableName).withSelect(Select.ALL_ATTRIBUTES).withLimit(1)); List<Map<String, AttributeValue>> scannedItems = scan.getItems(); for (Map<String, AttributeValue> map : scannedItems) { for (String s : map.keySet()) { if (!columns.contains(s)) columns.add(s); } } return columns; } /* * Configure the aggregate table with the indicated capacity, including * global secondary index on last_write_seq for facilitating aggregate * cleanup */ public void initAggTable(final String keyColumn, final String dateColumnName, final long readCapacity, final long writeCapacity) throws Exception { final String setDateColumn = dateColumnName == null ? StreamAggregator.DEFAULT_DATE_VALUE : dateColumnName; long setReadCapacity = readCapacity == -1 ? DEFAULT_READ_CAPACITY : readCapacity; long setWriteCapacity = writeCapacity == -1 ? DEFAULT_WRITE_CAPACITY : writeCapacity; // we have to add this attribute list so that we can project the key // into the GSI List<AttributeDefinition> attributes = new ArrayList<AttributeDefinition>() { { add(new AttributeDefinition().withAttributeName(keyColumn).withAttributeType("S")); add(new AttributeDefinition().withAttributeName(setDateColumn).withAttributeType("S")); } }; Collection<GlobalSecondaryIndex> gsi = null; // new ArrayList<>(); // Global Secondary Index for accessing the table by date item // We do not need to query on date alone, so we do not need to maintain this index. /* gsi.add(new GlobalSecondaryIndex().withIndexName( StreamAggregatorUtils.getDateDimensionIndexName(tableName, setDateColumn)).withKeySchema( new KeySchemaElement().withAttributeName(SCATTER_PREFIX_ATTRIBUTE).withKeyType( KeyType.HASH), new KeySchemaElement().withAttributeName(setDateColumn).withKeyType(KeyType.RANGE)).withProjection( new Projection().withProjectionType(ProjectionType.KEYS_ONLY)).withProvisionedThroughput( new ProvisionedThroughput().withReadCapacityUnits(setReadCapacity).withWriteCapacityUnits( setWriteCapacity))); attributes.add(new AttributeDefinition().withAttributeName(SCATTER_PREFIX_ATTRIBUTE).withAttributeType( "N")); */ // table is hash/range on value and date List<KeySchemaElement> key = new ArrayList<KeySchemaElement>() { { add(new KeySchemaElement().withAttributeName(keyColumn).withKeyType(KeyType.HASH)); add(new KeySchemaElement().withAttributeName(setDateColumn).withKeyType(KeyType.RANGE)); } }; // initialise the table DynamoUtils.initTable(this.dynamoClient, this.tableName, setReadCapacity, setWriteCapacity, attributes, key, gsi); } public long refreshForceCheckpointThresholds() { LOG.info("Refreshing Provisioned Throughput settings"); // get the current provisioned capacity this.writeCapacity = getProvisionedWrites(); // get the current number of provisioned kinesis shards for the stream, // if we know what stream we are working against int currentShardCount = 1; if (this.streamName != null) { try { currentShardCount = StreamAggregatorUtils.getShardCount(this.kinesisClient, this.streamName); return (4 * (60 * this.writeCapacity)) / currentShardCount; } catch (Exception e) { LOG.warn(String.format( "Unable to get Shard Count for Stream %s. Using Overly Optimistic Throughput Settings", this.streamName)); } } return (4 * (60 * this.writeCapacity)); } private long getProvisionedWrites() { return dynamoClient.describeTable(this.tableName).getTable().getProvisionedThroughput() .getWriteCapacityUnits(); } public DynamoQueryEngine queryEngine() { return this.queryEngine; } public Region getRegion() { return this.region; } @Override public void setRegion(Region region) { this.region = region; } public DynamoDataStore withStorageCapacity(long readCapacity, long writeCapacity) { if (readCapacity > 0l) this.readCapacity = readCapacity; if (writeCapacity > 0l) this.writeCapacity = writeCapacity; return this; } }