Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.streaming.connectors.kinesis.proxy; import com.amazonaws.AmazonServiceException; import com.amazonaws.services.kinesis.AmazonKinesisClient; import com.amazonaws.services.kinesis.model.DescribeStreamRequest; import com.amazonaws.services.kinesis.model.DescribeStreamResult; import com.amazonaws.services.kinesis.model.GetRecordsRequest; import com.amazonaws.services.kinesis.model.GetRecordsResult; import com.amazonaws.services.kinesis.model.GetShardIteratorResult; import com.amazonaws.services.kinesis.model.LimitExceededException; import com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException; import com.amazonaws.services.kinesis.model.ResourceNotFoundException; import com.amazonaws.services.kinesis.model.StreamStatus; import com.amazonaws.services.kinesis.model.Shard; import com.amazonaws.services.kinesis.model.GetShardIteratorRequest; import com.amazonaws.services.kinesis.model.ShardIteratorType; import org.apache.flink.streaming.connectors.kinesis.config.ConsumerConfigConstants; import org.apache.flink.streaming.connectors.kinesis.model.KinesisStreamShard; import org.apache.flink.streaming.connectors.kinesis.util.AWSUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Properties; import java.util.Map; import java.util.Random; import java.util.Date; import static org.apache.flink.util.Preconditions.checkNotNull; /** * Kinesis proxy implementation - a utility class that is used as a proxy to make * calls to AWS Kinesis for several functions, such as getting a list of shards and * fetching a batch of data records starting from a specified record sequence number. * * NOTE: * In the AWS KCL library, there is a similar implementation - {@link com.amazonaws.services.kinesis.clientlibrary.proxies.KinesisProxy}. * This implementation differs mainly in that we can make operations to arbitrary Kinesis streams, which is a needed * functionality for the Flink Kinesis Connecter since the consumer may simultaneously read from multiple Kinesis streams. */ public class KinesisProxy implements KinesisProxyInterface { private static final Logger LOG = LoggerFactory.getLogger(KinesisProxy.class); /** The actual Kinesis client from the AWS SDK that we will be using to make calls */ private final AmazonKinesisClient kinesisClient; /** Random seed used to calculate backoff jitter for Kinesis operations */ private final static Random seed = new Random(); // ------------------------------------------------------------------------ // describeStream() related performance settings // ------------------------------------------------------------------------ /** Base backoff millis for the describe stream operation */ private final long describeStreamBaseBackoffMillis; /** Maximum backoff millis for the describe stream operation */ private final long describeStreamMaxBackoffMillis; /** Exponential backoff power constant for the describe stream operation */ private final double describeStreamExpConstant; // ------------------------------------------------------------------------ // getRecords() related performance settings // ------------------------------------------------------------------------ /** Base backoff millis for the get records operation */ private final long getRecordsBaseBackoffMillis; /** Maximum backoff millis for the get records operation */ private final long getRecordsMaxBackoffMillis; /** Exponential backoff power constant for the get records operation */ private final double getRecordsExpConstant; /** Maximum attempts for the get records operation */ private final int getRecordsMaxAttempts; // ------------------------------------------------------------------------ // getShardIterator() related performance settings // ------------------------------------------------------------------------ /** Base backoff millis for the get shard iterator operation */ private final long getShardIteratorBaseBackoffMillis; /** Maximum backoff millis for the get shard iterator operation */ private final long getShardIteratorMaxBackoffMillis; /** Exponential backoff power constant for the get shard iterator operation */ private final double getShardIteratorExpConstant; /** Maximum attempts for the get shard iterator operation */ private final int getShardIteratorMaxAttempts; /** * Create a new KinesisProxy based on the supplied configuration properties * * @param configProps configuration properties containing AWS credential and AWS region info */ private KinesisProxy(Properties configProps) { checkNotNull(configProps); this.kinesisClient = AWSUtil.createKinesisClient(configProps); this.describeStreamBaseBackoffMillis = Long .valueOf(configProps.getProperty(ConsumerConfigConstants.STREAM_DESCRIBE_BACKOFF_BASE, Long.toString(ConsumerConfigConstants.DEFAULT_STREAM_DESCRIBE_BACKOFF_BASE))); this.describeStreamMaxBackoffMillis = Long .valueOf(configProps.getProperty(ConsumerConfigConstants.STREAM_DESCRIBE_BACKOFF_MAX, Long.toString(ConsumerConfigConstants.DEFAULT_STREAM_DESCRIBE_BACKOFF_MAX))); this.describeStreamExpConstant = Double.valueOf(configProps.getProperty( ConsumerConfigConstants.STREAM_DESCRIBE_BACKOFF_EXPONENTIAL_CONSTANT, Double.toString(ConsumerConfigConstants.DEFAULT_STREAM_DESCRIBE_BACKOFF_EXPONENTIAL_CONSTANT))); this.getRecordsBaseBackoffMillis = Long .valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_GETRECORDS_BACKOFF_BASE, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETRECORDS_BACKOFF_BASE))); this.getRecordsMaxBackoffMillis = Long .valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_GETRECORDS_BACKOFF_MAX, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETRECORDS_BACKOFF_MAX))); this.getRecordsExpConstant = Double.valueOf(configProps.getProperty( ConsumerConfigConstants.SHARD_GETRECORDS_BACKOFF_EXPONENTIAL_CONSTANT, Double.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETRECORDS_BACKOFF_EXPONENTIAL_CONSTANT))); this.getRecordsMaxAttempts = Integer .valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_GETRECORDS_RETRIES, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETRECORDS_RETRIES))); this.getShardIteratorBaseBackoffMillis = Long .valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_GETITERATOR_BACKOFF_BASE, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETITERATOR_BACKOFF_BASE))); this.getShardIteratorMaxBackoffMillis = Long .valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_GETITERATOR_BACKOFF_MAX, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETITERATOR_BACKOFF_MAX))); this.getShardIteratorExpConstant = Double.valueOf(configProps.getProperty( ConsumerConfigConstants.SHARD_GETITERATOR_BACKOFF_EXPONENTIAL_CONSTANT, Double.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETITERATOR_BACKOFF_EXPONENTIAL_CONSTANT))); this.getShardIteratorMaxAttempts = Integer .valueOf(configProps.getProperty(ConsumerConfigConstants.SHARD_GETITERATOR_RETRIES, Long.toString(ConsumerConfigConstants.DEFAULT_SHARD_GETITERATOR_RETRIES))); } /** * Creates a Kinesis proxy. * * @param configProps configuration properties * @return the created kinesis proxy */ public static KinesisProxyInterface create(Properties configProps) { return new KinesisProxy(configProps); } /** * {@inheritDoc} */ @Override public GetRecordsResult getRecords(String shardIterator, int maxRecordsToGet) throws InterruptedException { final GetRecordsRequest getRecordsRequest = new GetRecordsRequest(); getRecordsRequest.setShardIterator(shardIterator); getRecordsRequest.setLimit(maxRecordsToGet); GetRecordsResult getRecordsResult = null; int attempt = 0; while (attempt <= getRecordsMaxAttempts && getRecordsResult == null) { try { getRecordsResult = kinesisClient.getRecords(getRecordsRequest); } catch (AmazonServiceException ex) { if (isRecoverableException(ex)) { long backoffMillis = fullJitterBackoff(getRecordsBaseBackoffMillis, getRecordsMaxBackoffMillis, getRecordsExpConstant, attempt++); LOG.warn("Got recoverable AmazonServiceException. Backing off for " + backoffMillis + " millis (" + ex.getErrorMessage() + ")"); Thread.sleep(backoffMillis); } else { throw ex; } } } if (getRecordsResult == null) { throw new RuntimeException("Rate Exceeded for getRecords operation - all " + getRecordsMaxAttempts + " retry attempts returned ProvisionedThroughputExceededException."); } return getRecordsResult; } /** * {@inheritDoc} */ @Override public GetShardListResult getShardList(Map<String, String> streamNamesWithLastSeenShardIds) throws InterruptedException { GetShardListResult result = new GetShardListResult(); for (Map.Entry<String, String> streamNameWithLastSeenShardId : streamNamesWithLastSeenShardIds.entrySet()) { String stream = streamNameWithLastSeenShardId.getKey(); String lastSeenShardId = streamNameWithLastSeenShardId.getValue(); result.addRetrievedShardsToStream(stream, getShardsOfStream(stream, lastSeenShardId)); } return result; } /** * {@inheritDoc} */ @Override public String getShardIterator(KinesisStreamShard shard, String shardIteratorType, @Nullable Object startingMarker) throws InterruptedException { GetShardIteratorRequest getShardIteratorRequest = new GetShardIteratorRequest() .withStreamName(shard.getStreamName()).withShardId(shard.getShard().getShardId()) .withShardIteratorType(shardIteratorType); switch (ShardIteratorType.fromValue(shardIteratorType)) { case TRIM_HORIZON: case LATEST: break; case AT_TIMESTAMP: if (startingMarker instanceof Date) { getShardIteratorRequest.setTimestamp((Date) startingMarker); } else { throw new IllegalArgumentException( "Invalid object given for GetShardIteratorRequest() when ShardIteratorType is AT_TIMESTAMP. Must be a Date object."); } break; case AT_SEQUENCE_NUMBER: case AFTER_SEQUENCE_NUMBER: if (startingMarker instanceof String) { getShardIteratorRequest.setStartingSequenceNumber((String) startingMarker); } else { throw new IllegalArgumentException( "Invalid object given for GetShardIteratorRequest() when ShardIteratorType is AT_SEQUENCE_NUMBER or AFTER_SEQUENCE_NUMBER. Must be a String."); } } return getShardIterator(getShardIteratorRequest); } private String getShardIterator(GetShardIteratorRequest getShardIteratorRequest) throws InterruptedException { GetShardIteratorResult getShardIteratorResult = null; int attempt = 0; while (attempt <= getShardIteratorMaxAttempts && getShardIteratorResult == null) { try { getShardIteratorResult = kinesisClient.getShardIterator(getShardIteratorRequest); } catch (AmazonServiceException ex) { if (isRecoverableException(ex)) { long backoffMillis = fullJitterBackoff(getShardIteratorBaseBackoffMillis, getShardIteratorMaxBackoffMillis, getShardIteratorExpConstant, attempt++); LOG.warn("Got recoverable AmazonServiceException. Backing off for " + backoffMillis + " millis (" + ex.getErrorMessage() + ")"); Thread.sleep(backoffMillis); } else { throw ex; } } } if (getShardIteratorResult == null) { throw new RuntimeException( "Rate Exceeded for getShardIterator operation - all " + getShardIteratorMaxAttempts + " retry attempts returned ProvisionedThroughputExceededException."); } return getShardIteratorResult.getShardIterator(); } /** * Determines whether the exception is recoverable using exponential-backoff. * * @param ex Exception to inspect * @return <code>true</code> if the exception can be recovered from, else * <code>false</code> */ protected static boolean isRecoverableException(AmazonServiceException ex) { if (ex.getErrorType() == null) { return false; } switch (ex.getErrorType()) { case Client: return ex instanceof ProvisionedThroughputExceededException; case Service: case Unknown: return true; default: return false; } } private List<KinesisStreamShard> getShardsOfStream(String streamName, @Nullable String lastSeenShardId) throws InterruptedException { List<KinesisStreamShard> shardsOfStream = new ArrayList<>(); DescribeStreamResult describeStreamResult; do { describeStreamResult = describeStream(streamName, lastSeenShardId); List<Shard> shards = describeStreamResult.getStreamDescription().getShards(); for (Shard shard : shards) { shardsOfStream.add(new KinesisStreamShard(streamName, shard)); } if (shards.size() != 0) { lastSeenShardId = shards.get(shards.size() - 1).getShardId(); } } while (describeStreamResult.getStreamDescription().isHasMoreShards()); return shardsOfStream; } /** * Get metainfo for a Kinesis stream, which contains information about which shards this Kinesis stream possess. * * This method is using a "full jitter" approach described in AWS's article, * <a href="https://www.awsarchitectureblog.com/2015/03/backoff.html">"Exponential Backoff and Jitter"</a>. * This is necessary because concurrent calls will be made by all parallel subtask's fetcher. This * jitter backoff approach will help distribute calls across the fetchers over time. * * @param streamName the stream to describe * @param startShardId which shard to start with for this describe operation (earlier shard's infos will not appear in result) * @return the result of the describe stream operation */ private DescribeStreamResult describeStream(String streamName, @Nullable String startShardId) throws InterruptedException { final DescribeStreamRequest describeStreamRequest = new DescribeStreamRequest(); describeStreamRequest.setStreamName(streamName); describeStreamRequest.setExclusiveStartShardId(startShardId); DescribeStreamResult describeStreamResult = null; // Call DescribeStream, with full-jitter backoff (if we get LimitExceededException). int attemptCount = 0; while (describeStreamResult == null) { // retry until we get a result try { describeStreamResult = kinesisClient.describeStream(describeStreamRequest); } catch (LimitExceededException le) { long backoffMillis = fullJitterBackoff(describeStreamBaseBackoffMillis, describeStreamMaxBackoffMillis, describeStreamExpConstant, attemptCount++); LOG.warn("Got LimitExceededException when describing stream " + streamName + ". Backing off for " + backoffMillis + " millis."); Thread.sleep(backoffMillis); } catch (ResourceNotFoundException re) { throw new RuntimeException("Error while getting stream details", re); } } String streamStatus = describeStreamResult.getStreamDescription().getStreamStatus(); if (!(streamStatus.equals(StreamStatus.ACTIVE.toString()) || streamStatus.equals(StreamStatus.UPDATING.toString()))) { if (LOG.isWarnEnabled()) { LOG.warn("The status of stream " + streamName + " is " + streamStatus + "; result of the current " + "describeStream operation will not contain any shard information."); } } // Kinesalite (mock implementation of Kinesis) does not correctly exclude shards before the exclusive // start shard id in the returned shards list; check if we need to remove these erroneously returned shards if (startShardId != null) { List<Shard> shards = describeStreamResult.getStreamDescription().getShards(); Iterator<Shard> shardItr = shards.iterator(); while (shardItr.hasNext()) { if (KinesisStreamShard.compareShardIds(shardItr.next().getShardId(), startShardId) <= 0) { shardItr.remove(); } } } return describeStreamResult; } private static long fullJitterBackoff(long base, long max, double power, int attempt) { long exponentialBackoff = (long) Math.min(max, base * Math.pow(power, attempt)); return (long) (seed.nextDouble() * exponentialBackoff); // random jitter between 0 and the exponential backoff } }