Java tutorial
/** * Amazon Kinesis Scaling Utility * * Copyright 2014, Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.services.kinesis.scaling.auto; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.javatuples.Pair; import org.javatuples.Triplet; import org.joda.time.DateTime; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.http.IdleConnectionReaper; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.cloudwatch.AmazonCloudWatch; import com.amazonaws.services.cloudwatch.AmazonCloudWatchClient; import com.amazonaws.services.cloudwatch.model.Datapoint; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.scaling.AlreadyOneShardException; import com.amazonaws.services.kinesis.scaling.ScaleDirection; import com.amazonaws.services.kinesis.scaling.ScalingCompletionStatus; import com.amazonaws.services.kinesis.scaling.ScalingOperationReport; import com.amazonaws.services.kinesis.scaling.StreamScaler; import com.amazonaws.services.kinesis.scaling.StreamScalingUtils; import com.amazonaws.services.sns.AmazonSNSClient; public class StreamMonitor implements Runnable { private final Log LOG = LogFactory.getLog(StreamMonitor.class); private AmazonKinesisClient kinesisClient; private AmazonCloudWatch cloudWatchClient; private AmazonSNSClient snsClient; public static final int CLOUDWATCH_PERIOD = 60; private AutoscalingConfiguration config; private volatile boolean keepRunning = true; private DateTime lastScaleDown = null; private StreamScaler scaler = null; private Exception exception; /* incomplete constructor only for testing */ protected StreamMonitor(AutoscalingConfiguration config, StreamScaler scaler) throws Exception { this.config = config; this.scaler = scaler; } public StreamMonitor(AutoscalingConfiguration config) throws Exception { this.config = config; Region setRegion = Region.getRegion(Regions.fromName(this.config.getRegion())); this.scaler = new StreamScaler(setRegion); this.cloudWatchClient = new AmazonCloudWatchClient(new DefaultAWSCredentialsProviderChain()); this.cloudWatchClient.setRegion(setRegion); this.kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain()); this.kinesisClient.setRegion(setRegion); this.snsClient = new AmazonSNSClient(new DefaultAWSCredentialsProviderChain()); this.snsClient.setRegion(setRegion); } public void stop() { this.keepRunning = false; this.kinesisClient.shutdown(); this.cloudWatchClient.shutdown(); // the idle-connection-reaper is causing a thread leak without an // explicit shutdown IdleConnectionReaper.shutdown(); LOG.info(String.format("Signalling Monitor for Stream %s to Stop", config.getStreamName())); } /* method has been lifted out of run() for unit testing purposes */ protected ScalingOperationReport processCloudwatchMetrics( Map<KinesisOperationType, Map<StreamMetric, Map<Datapoint, Double>>> currentUtilisationMetrics, Map<KinesisOperationType, StreamMetrics> streamMaxCapacity, int cwSampleDuration, DateTime now) { ScalingOperationReport report = null; ScaleDirection finalScaleDirection = null; // for each type of operation that the customer has requested profiling // (PUT, GET) Map<KinesisOperationType, ScaleDirection> scaleVotes = new HashMap<>(); for (Map.Entry<KinesisOperationType, Map<StreamMetric, Map<Datapoint, Double>>> entry : currentUtilisationMetrics .entrySet()) { // set the default scaling vote to 'do nothing' scaleVotes.put(entry.getKey(), ScaleDirection.NONE); Map<StreamMetric, Triplet<Integer, Integer, Double>> perMetricSamples = new HashMap<>(); StreamMetric higherUtilisationMetric; Double higherUtilisationPct; // process each metric type, including Records and Bytes for (StreamMetric metric : StreamMetric.values()) { double currentMax = 0D; double currentPct = 0D; double latestPct = 0d; double latestMax = 0d; double latestAvg = 0d; DateTime lastTime = null; int lowSamples = 0; int highSamples = 0; Map<Datapoint, Double> metrics = new HashMap<>(); if (!currentUtilisationMetrics.containsKey(entry.getKey()) || !entry.getValue().containsKey(metric)) { // we have no samples for this type of metric which is ok - // they'll later be counted as low metrics } else { metrics = entry.getValue().get(metric); } // if we got nothing back, then there are no operations of the // given type happening, so this is a full 'low sample' if (metrics.size() == 0) { lowSamples = this.config.getScaleDown().getScaleAfterMins(); } // process the data point aggregates retrieved from CloudWatch // and log scale up/down votes by period for (Map.Entry<Datapoint, Double> datapointEntry : metrics.entrySet()) { currentMax = datapointEntry.getValue(); currentPct = currentMax / streamMaxCapacity.get(entry.getKey()).get(metric); // keep track of the last measures if (lastTime == null || new DateTime(datapointEntry.getKey().getTimestamp()).isAfter(lastTime)) { latestPct = currentPct; latestMax = currentMax; // latest average is a simple moving average latestAvg = latestAvg == 0d ? currentPct : (latestAvg + currentPct) / 2; } lastTime = new DateTime(datapointEntry.getKey().getTimestamp()); // if the pct for the datapoint exceeds or is below the // thresholds, then add low/high samples if (currentPct > new Double(this.config.getScaleUp().getScaleThresholdPct()) / 100) { LOG.debug(String.format("%s %s: Cached High Alarm Condition for %.2f %s/Second (%.2f%%)", entry.getKey(), metric, currentMax, metric, currentPct * 100)); highSamples++; } else if (currentPct < new Double(this.config.getScaleDown().getScaleThresholdPct()) / 100) { LOG.debug(String.format("%s %s: Cached Low Alarm Condition for %.2f %s/Second (%.2f%%)", entry.getKey(), metric, currentMax, metric, currentPct * 100)); lowSamples++; } } // add low samples for the periods which we didn't get any // data points, if there are any if (metrics.size() < cwSampleDuration) { lowSamples += cwSampleDuration - metrics.size(); } LOG.info(String.format( metric + ": Stream %s Used %s[%s] Capacity ~ %.2f%% (%,.0f " + metric + " of %d)", config.getStreamName(), entry.getKey(), metric, latestAvg * 100, latestMax, streamMaxCapacity.get(entry.getKey()).get(metric))); // merge the per-stream metric samples together for the // operation if (!perMetricSamples.containsKey(metric)) { // create a new sample entry perMetricSamples.put(metric, new Triplet<>(highSamples, lowSamples, latestAvg)); } else { // merge the samples Triplet<Integer, Integer, Double> previousHighLow = perMetricSamples.get(metric); Triplet<Integer, Integer, Double> newHighLow = new Triplet<>( previousHighLow.getValue0() + highSamples, previousHighLow.getValue1() + lowSamples, (previousHighLow.getValue2() + latestAvg) / 2); perMetricSamples.put(metric, newHighLow); } } /*- * we now have per metric samples for this operation type * * For Example: * * Metric | High Samples | Low Samples | Pct Used * Bytes | 3 | 0 | .98 * Records | 0 | 10 | .2 * * Check these values against the provided configuration. If we have * been above the 'scaleAfterMins' with high samples for either * metric, then we scale up. If not, then if we've been below the * scaleAfterMins with low samples, then we scale down. Otherwise * the vote stays as NONE */ // first find out which of the dimensions of stream utilisation are // higher - we'll use the higher of the two for time checks if (perMetricSamples.get(StreamMetric.Bytes).getValue2() >= perMetricSamples.get(StreamMetric.Records) .getValue2()) { higherUtilisationMetric = StreamMetric.Bytes; higherUtilisationPct = perMetricSamples.get(StreamMetric.Bytes).getValue2(); } else { higherUtilisationMetric = StreamMetric.Records; higherUtilisationPct = perMetricSamples.get(StreamMetric.Records).getValue2(); } LOG.info(String.format( "Will decide scaling action based on metric %s[%s] due to higher utilisation metric %.2f%%", entry.getKey(), higherUtilisationMetric, higherUtilisationPct * 100)); if (perMetricSamples.get(higherUtilisationMetric).getValue0() >= config.getScaleUp() .getScaleAfterMins()) { scaleVotes.put(entry.getKey(), ScaleDirection.UP); } else if (perMetricSamples.get(higherUtilisationMetric).getValue1() >= config.getScaleDown() .getScaleAfterMins()) { scaleVotes.put(entry.getKey(), ScaleDirection.DOWN); } } // process the scaling votes ScaleDirection getVote = scaleVotes.get(KinesisOperationType.GET); ScaleDirection putVote = scaleVotes.get(KinesisOperationType.PUT); // check if we have both get and put votes - if we have both then // implement the decision matrix if (getVote != null && putVote != null) { // if either of the votes are to scale up, then do so. If both are // None, // then do nothing. Otherwise scale down if (getVote == ScaleDirection.UP || putVote == ScaleDirection.UP) { finalScaleDirection = ScaleDirection.UP; } else if (getVote == ScaleDirection.NONE && putVote == ScaleDirection.NONE) { finalScaleDirection = ScaleDirection.NONE; } else { finalScaleDirection = ScaleDirection.DOWN; } } else { // we only have get or put votes, so use the non-null one finalScaleDirection = (getVote == null ? putVote : getVote); } try { int currentShardCount = this.scaler.getOpenShardCount(this.config.getStreamName()); // if the metric stats indicate a scale up or down, then do the // action if (finalScaleDirection.equals(ScaleDirection.UP)) { // submit a scale up task Integer scaleUpCount = this.config.getScaleUp().getScaleCount(); LOG.info(String.format( "Requesting Scale Up of Stream %s by %s as %s has been above %s%% for %s Minutes", this.config.getStreamName(), (scaleUpCount != null) ? scaleUpCount : this.config.getScaleUp().getScalePct() + "%", this.config.getScaleOnOperations().toString(), this.config.getScaleUp().getScaleThresholdPct(), this.config.getScaleUp().getScaleAfterMins())); // TODO migrate this block to UpdateShardCount API if (scaleUpCount != null) { report = this.scaler.updateShardCount(this.config.getStreamName(), currentShardCount, currentShardCount + scaleUpCount, this.config.getMinShards(), this.config.getMaxShards()); } else { report = this.scaler.updateShardCount(this.config.getStreamName(), currentShardCount, new Double( currentShardCount * (new Double(this.config.getScaleUp().getScalePct()) / 100)) .intValue(), this.config.getMinShards(), this.config.getMaxShards()); } // send SNS notifications if (this.config.getScaleUp().getNotificationARN() != null && this.snsClient != null) { StreamScalingUtils.sendNotification(this.snsClient, this.config.getScaleUp().getNotificationARN(), "Kinesis Autoscaling - Scale Up", (report == null ? "No Changes Made" : report.asJson())); } } else if (finalScaleDirection.equals(ScaleDirection.DOWN)) { // check the cool down interval if (lastScaleDown != null && now.minusMinutes(this.config.getScaleDown().getCoolOffMins()).isBefore(lastScaleDown)) { LOG.info(String.format( "Stream %s: Deferring Scale Down until Cool Off Period of %s Minutes has elapsed", this.config.getStreamName(), this.config.getScaleDown().getCoolOffMins())); } else { // submit a scale down Integer scaleDownCount = this.config.getScaleDown().getScaleCount(); LOG.info(String.format( "Requesting Scale Down of Stream %s by %s as %s has been below %s%% for %s Minutes", this.config.getStreamName(), (scaleDownCount != null) ? scaleDownCount : this.config.getScaleDown().getScalePct() + "%", config.getScaleOnOperations().toString(), this.config.getScaleDown().getScaleThresholdPct(), this.config.getScaleDown().getScaleAfterMins())); try { if (scaleDownCount != null) { report = this.scaler.updateShardCount(this.config.getStreamName(), currentShardCount, currentShardCount - scaleDownCount, this.config.getMinShards(), this.config.getMaxShards()); } else { report = this.scaler.updateShardCount(this.config.getStreamName(), currentShardCount, new Double(currentShardCount - (new Double(this.config.getScaleDown().getScalePct()) / 100)) .intValue(), this.config.getMinShards(), this.config.getMaxShards()); } lastScaleDown = new DateTime(System.currentTimeMillis()); // send SNS notifications if (this.config.getScaleDown().getNotificationARN() != null && this.snsClient != null) { StreamScalingUtils.sendNotification(this.snsClient, this.config.getScaleDown().getNotificationARN(), "Kinesis Autoscaling - Scale Down", (report == null ? "No Changes Made" : report.asJson())); } } catch (AlreadyOneShardException aose) { // do nothing - we're already at 1 shard LOG.info(String.format("Stream %s: Not Scaling Down - Already at Minimum of 1 Shard", this.config.getStreamName())); } } } else { // scale direction not set, so we're not going to scale // up or down - everything fine LOG.info("No Scaling required - Stream capacity within specified tolerances"); return this.scaler.reportFor(ScalingCompletionStatus.NoActionRequired, this.config.getStreamName(), 0, finalScaleDirection); } } catch (Exception e) { LOG.error("Failed to process stream " + this.config.getStreamName(), e); } return report; } @Override public void run() { LOG.info(String.format("Started Stream Monitor for %s", config.getStreamName())); DateTime lastShardCapacityRefreshTime = new DateTime(System.currentTimeMillis()); // create a StreamMetricManager object StreamMetricManager metricManager = new StreamMetricManager(this.config.getStreamName(), this.config.getScaleOnOperations(), this.cloudWatchClient, this.kinesisClient); try { // load the current configured max capacity metricManager.loadMaxCapacity(); // configure the duration to request from cloudwatch int cwSampleDuration = Math.max(config.getScaleUp().getScaleAfterMins(), config.getScaleDown().getScaleAfterMins()); ScalingOperationReport report = null; do { DateTime now = new DateTime(System.currentTimeMillis()); DateTime metricEndTime = now; // fetch only the last N minutes metrics DateTime metricStartTime = metricEndTime.minusMinutes(cwSampleDuration); // load the current cloudwatch metrics for the stream via the // metrics manager @SuppressWarnings("rawtypes") Map currentUtilisationMetrics = metricManager.queryCurrentUtilisationMetrics(cwSampleDuration, metricStartTime, metricEndTime); // process the aggregated set of Cloudwatch Datapoints report = processCloudwatchMetrics(currentUtilisationMetrics, metricManager.getStreamMaxCapacity(), cwSampleDuration, now); if (report != null) { // refresh the current max capacity after the // modification metricManager.loadMaxCapacity(); lastShardCapacityRefreshTime = now; // notify all report listeners that we've completed a // scaling operation if (this.config.getScalingOperationReportListener() != null) { this.config.getScalingOperationReportListener().onReport(report); } if (report.getScaleDirection() != ScaleDirection.NONE) { LOG.info(report.toString()); } report = null; } // refresh shard stats every configured period, in case someone // has manually updated the number of shards manually if (now.minusMinutes(this.config.getRefreshShardsNumberAfterMin()) .isAfter(lastShardCapacityRefreshTime)) { metricManager.loadMaxCapacity(); lastShardCapacityRefreshTime = now; } try { LOG.debug("Sleep"); Thread.sleep(this.config.getCheckInterval() * 1000); } catch (InterruptedException e) { LOG.error(e.getMessage(), e); break; } } while (keepRunning); LOG.info(String.format("Stream Monitor for %s in %s Completed. Exiting.", this.config.getStreamName(), this.config.getRegion())); } catch (Exception e) { this.exception = e; } } public void throwExceptions() throws Exception { if (this.exception != null) throw this.exception; } public Exception getException() { return this.exception; } protected void setLastScaleDown(DateTime setLastScaleDown) { this.lastScaleDown = setLastScaleDown; } AutoscalingConfiguration getConfig() { return this.config; } }