org.apache.samza.system.kafka.KafkaSystemAdmin.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.samza.system.kafka.KafkaSystemAdmin.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.samza.system.kafka;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.admin.CreateTopicsResult;
import org.apache.kafka.clients.admin.DeleteTopicsResult;
import org.apache.kafka.clients.admin.DescribeTopicsResult;
import org.apache.kafka.clients.admin.NewTopic;
import org.apache.kafka.clients.admin.RecordsToDelete;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.config.TopicConfig;
import org.apache.kafka.common.errors.TopicExistsException;
import org.apache.samza.Partition;
import org.apache.samza.SamzaException;
import org.apache.samza.config.ApplicationConfig;
import org.apache.samza.config.Config;
import org.apache.samza.config.KafkaConfig;
import org.apache.samza.config.MapConfig;
import org.apache.samza.config.StreamConfig;
import org.apache.samza.config.SystemConfig;
import org.apache.samza.startpoint.Startpoint;
import org.apache.samza.startpoint.StartpointOldest;
import org.apache.samza.startpoint.StartpointSpecific;
import org.apache.samza.startpoint.StartpointTimestamp;
import org.apache.samza.startpoint.StartpointUpcoming;
import org.apache.samza.startpoint.StartpointVisitor;
import org.apache.samza.system.StreamSpec;
import org.apache.samza.system.StreamValidationException;
import org.apache.samza.system.SystemAdmin;
import org.apache.samza.system.SystemStream;
import org.apache.samza.system.SystemStreamMetadata;
import org.apache.samza.system.SystemStreamPartition;
import org.apache.samza.util.ExponentialSleepStrategy;
import org.apache.samza.util.KafkaUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.collection.JavaConverters;
import scala.runtime.AbstractFunction0;
import scala.runtime.AbstractFunction1;
import scala.runtime.AbstractFunction2;
import scala.runtime.BoxedUnit;

public class KafkaSystemAdmin implements SystemAdmin {
    private static final Logger LOG = LoggerFactory.getLogger(KafkaSystemAdmin.class);

    // Default exponential sleep strategy values
    protected static final double DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER = 2.0;
    protected static final long DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS = 500;
    protected static final long DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS = 10000;
    protected static final int MAX_RETRIES_ON_EXCEPTION = 5;
    protected static final int DEFAULT_REPL_FACTOR = 2;
    private static final int KAFKA_ADMIN_OPS_TIMEOUT_MS = 50000;

    // used in TestRepartitionJoinWindowApp TODO - remove SAMZA-1945
    @VisibleForTesting
    public static volatile boolean deleteMessageCalled = false;

    protected final String systemName;
    protected final Config config;

    // Custom properties to create a new coordinator stream.
    private final Properties coordinatorStreamProperties;

    // Replication factor for a new coordinator stream.
    private final int coordinatorStreamReplicationFactor;

    // Replication factor and kafka properties for changelog topic creation
    private final Map<String, ChangelogInfo> changelogTopicMetaInformation;

    // Kafka properties for intermediate topics creation
    private final Map<String, Properties> intermediateStreamProperties;

    // used for intermediate streams
    protected final boolean deleteCommittedMessages;

    // admin client for create/remove topics
    final AdminClient adminClient;

    private final AtomicBoolean stopped = new AtomicBoolean(false);
    private final ThreadSafeKafkaConsumer threadSafeKafkaConsumer;
    private final KafkaStartpointToOffsetResolver kafkaStartpointToOffsetResolver;

    public KafkaSystemAdmin(String systemName, Config config, Consumer metadataConsumer) {
        this.systemName = systemName;
        this.config = config;

        if (metadataConsumer == null) {
            throw new SamzaException(
                    "Cannot construct KafkaSystemAdmin for system " + systemName + " with null metadataConsumer");
        }
        this.threadSafeKafkaConsumer = new ThreadSafeKafkaConsumer(metadataConsumer);
        this.kafkaStartpointToOffsetResolver = new KafkaStartpointToOffsetResolver(threadSafeKafkaConsumer);

        Properties props = createAdminClientProperties();
        LOG.info("New admin client with props:" + props);
        adminClient = AdminClient.create(props);

        KafkaConfig kafkaConfig = new KafkaConfig(config);
        coordinatorStreamReplicationFactor = Integer.valueOf(kafkaConfig.getCoordinatorReplicationFactor());
        coordinatorStreamProperties = getCoordinatorStreamProperties(kafkaConfig);

        Map<String, String> storeToChangelog = JavaConverters
                .mapAsJavaMapConverter(kafkaConfig.getKafkaChangelogEnabledStores()).asJava();
        // Construct the meta information for each topic, if the replication factor is not defined,
        // we use 2 (DEFAULT_REPL_FACTOR) as the number of replicas for the change log stream.
        changelogTopicMetaInformation = new HashMap<>();
        for (Map.Entry<String, String> e : storeToChangelog.entrySet()) {
            String storeName = e.getKey();
            String topicName = e.getValue();
            String replicationFactorStr = kafkaConfig.getChangelogStreamReplicationFactor(storeName);
            int replicationFactor = StringUtils.isEmpty(replicationFactorStr) ? DEFAULT_REPL_FACTOR
                    : Integer.valueOf(replicationFactorStr);
            ChangelogInfo changelogInfo = new ChangelogInfo(replicationFactor,
                    kafkaConfig.getChangelogKafkaProperties(storeName));
            LOG.info(String.format("Creating topic meta information for topic: %s with replication factor: %s",
                    topicName, replicationFactor));
            changelogTopicMetaInformation.put(topicName, changelogInfo);
        }

        // special flag to allow/enforce deleting of committed messages
        SystemConfig systemConfig = new SystemConfig(config);
        this.deleteCommittedMessages = systemConfig.deleteCommittedMessages(systemName);

        intermediateStreamProperties = getIntermediateStreamProperties(config);

        LOG.info(String.format("Created KafkaSystemAdmin for system %s", systemName));
    }

    @Override
    public void start() {
        // Plese note. There is slight inconsistency in the use of this class.
        // Some of the functionality of this class may actually be used BEFORE start() is called.
        // The SamzaContainer gets metadata (using this class) in SamzaContainer.apply,
        // but this "start" actually gets called in SamzaContainer.run.
        // review this usage (SAMZA-1888)

        // Throw exception if start is called after stop
        if (stopped.get()) {
            throw new IllegalStateException("SamzaKafkaAdmin.start() is called after stop()");
        }
    }

    @Override
    public void stop() {
        if (stopped.compareAndSet(false, true)) {
            try {
                threadSafeKafkaConsumer.close();
            } catch (Exception e) {
                LOG.warn(String.format("Exception occurred when closing consumer of system: %s.", systemName), e);
            }
        }

        if (adminClient != null) {
            adminClient.close();
        }
    }

    /**
     * Note! This method does not populate SystemStreamMetadata for each stream with real data.
     * Thus, this method should ONLY be used to get number of partitions for each stream.
     * It will throw NotImplementedException if anyone tries to access the actual metadata.
     * @param streamNames set of streams for which get the partitions counts
     * @param cacheTTL cache TTL if caching the data
     * @return a map, keyed on stream names. Number of partitions in SystemStreamMetadata is the output of this method.
     */
    @Override
    public Map<String, SystemStreamMetadata> getSystemStreamPartitionCounts(Set<String> streamNames,
            long cacheTTL) {
        // This optimization omits actual metadata for performance. Instead, we inject a dummy for all partitions.
        final SystemStreamMetadata.SystemStreamPartitionMetadata dummySspm = new SystemStreamMetadata.SystemStreamPartitionMetadata(
                null, null, null) {
            String msg = "getSystemStreamPartitionCounts does not populate SystemStreaMetadata info. Only number of partitions";

            @Override
            public String getOldestOffset() {
                throw new NotImplementedException(msg);
            }

            @Override
            public String getNewestOffset() {
                throw new NotImplementedException(msg);
            }

            @Override
            public String getUpcomingOffset() {
                throw new NotImplementedException(msg);
            }
        };

        ExponentialSleepStrategy strategy = new ExponentialSleepStrategy(
                DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER, DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS,
                DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS);

        Function1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>> fetchMetadataOperation = new AbstractFunction1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>>() {
            @Override
            public Map<String, SystemStreamMetadata> apply(ExponentialSleepStrategy.RetryLoop loop) {
                Map<String, SystemStreamMetadata> allMetadata = new HashMap<>();

                streamNames.forEach(streamName -> {
                    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = new HashMap<>();

                    List<PartitionInfo> partitionInfos = threadSafeKafkaConsumer
                            .execute(consumer -> consumer.partitionsFor(streamName));
                    LOG.debug("Stream {} has partitions {}", streamName, partitionInfos);
                    partitionInfos.forEach(partitionInfo -> partitionMetadata
                            .put(new Partition(partitionInfo.partition()), dummySspm));
                    allMetadata.put(streamName, new SystemStreamMetadata(streamName, partitionMetadata));
                });

                loop.done();
                return allMetadata;
            }
        };

        Map<String, SystemStreamMetadata> result = strategy.run(fetchMetadataOperation,
                new AbstractFunction2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit>() {
                    @Override
                    public BoxedUnit apply(Exception exception, ExponentialSleepStrategy.RetryLoop loop) {
                        if (loop.sleepCount() < MAX_RETRIES_ON_EXCEPTION) {
                            LOG.warn(String.format(
                                    "Fetching systemstreampartition counts for: %s threw an exception. Retrying.",
                                    streamNames), exception);
                        } else {
                            LOG.error(String.format(
                                    "Fetching systemstreampartition counts for: %s threw an exception.",
                                    streamNames), exception);
                            loop.done();
                            throw new SamzaException(exception);
                        }
                        return null;
                    }
                }).get();

        LOG.info("SystemStream partition counts for system {}: {}", systemName, result);
        return result;
    }

    @Override
    public Map<SystemStreamPartition, String> getOffsetsAfter(Map<SystemStreamPartition, String> offsets) {
        // This is safe to do with Kafka, even if a topic is key-deduped. If the
        // offset doesn't exist on a compacted topic, Kafka will return the first
        // message AFTER the offset that was specified in the fetch request.
        return offsets.entrySet().stream().collect(
                Collectors.toMap(Map.Entry::getKey, (entry) -> String.valueOf(Long.valueOf(entry.getValue()) + 1)));
    }

    @Override
    public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames) {
        return getSystemStreamMetadata(streamNames,
                new ExponentialSleepStrategy(DEFAULT_EXPONENTIAL_SLEEP_BACK_OFF_MULTIPLIER,
                        DEFAULT_EXPONENTIAL_SLEEP_INITIAL_DELAY_MS, DEFAULT_EXPONENTIAL_SLEEP_MAX_DELAY_MS));
    }

    @Override
    public Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> getSSPMetadata(
            Set<SystemStreamPartition> ssps) {

        LOG.info("Fetching SSP metadata for: {}", ssps);
        List<TopicPartition> topicPartitions = ssps.stream()
                .map(ssp -> new TopicPartition(ssp.getStream(), ssp.getPartition().getPartitionId()))
                .collect(Collectors.toList());

        OffsetsMaps topicPartitionsMetadata = fetchTopicPartitionsMetadata(topicPartitions);

        Map<SystemStreamPartition, SystemStreamMetadata.SystemStreamPartitionMetadata> sspToSSPMetadata = new HashMap<>();
        for (SystemStreamPartition ssp : ssps) {
            String oldestOffset = topicPartitionsMetadata.getOldestOffsets().get(ssp);
            String newestOffset = topicPartitionsMetadata.getNewestOffsets().get(ssp);
            String upcomingOffset = topicPartitionsMetadata.getUpcomingOffsets().get(ssp);

            sspToSSPMetadata.put(ssp, new SystemStreamMetadata.SystemStreamPartitionMetadata(oldestOffset,
                    newestOffset, upcomingOffset));
        }
        return sspToSSPMetadata;
    }

    /**
     * Given a set of stream names (topics), fetch metadata from Kafka for each
     * stream, and return a map from stream name to SystemStreamMetadata for
     * each stream. This method will return null for oldest and newest offsets
     * if a given SystemStreamPartition is empty. This method will block and
     * retry indefinitely until it gets a successful response from Kafka.
     *
     * @param streamNames a set of strings of stream names/topics
     * @param retryBackoff retry backoff strategy
     * @return a map from topic to SystemStreamMetadata which has offsets for each partition
     */
    public Map<String, SystemStreamMetadata> getSystemStreamMetadata(Set<String> streamNames,
            ExponentialSleepStrategy retryBackoff) {

        LOG.info("Fetching system stream metadata for {} from system {}", streamNames, systemName);

        Function1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>> fetchMetadataOperation = new AbstractFunction1<ExponentialSleepStrategy.RetryLoop, Map<String, SystemStreamMetadata>>() {
            @Override
            public Map<String, SystemStreamMetadata> apply(ExponentialSleepStrategy.RetryLoop loop) {
                Map<String, SystemStreamMetadata> metadata = fetchSystemStreamMetadata(streamNames);
                loop.done();
                return metadata;
            }
        };

        Function2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit> onExceptionRetryOperation = new AbstractFunction2<Exception, ExponentialSleepStrategy.RetryLoop, BoxedUnit>() {
            @Override
            public BoxedUnit apply(Exception exception, ExponentialSleepStrategy.RetryLoop loop) {
                if (loop.sleepCount() < MAX_RETRIES_ON_EXCEPTION) {
                    LOG.warn(String.format("Fetching system stream metadata for: %s threw an exception. Retrying.",
                            streamNames), exception);
                } else {
                    LOG.error(String.format("Fetching system stream metadata for: %s threw an exception.",
                            streamNames), exception);
                    loop.done();
                    throw new SamzaException(exception);
                }

                return null;
            }
        };

        Function0<Map<String, SystemStreamMetadata>> fallbackOperation = new AbstractFunction0<Map<String, SystemStreamMetadata>>() {
            @Override
            public Map<String, SystemStreamMetadata> apply() {
                throw new SamzaException("Failed to get system stream metadata");
            }
        };

        return retryBackoff.run(fetchMetadataOperation, onExceptionRetryOperation).getOrElse(fallbackOperation);
    }

    /**
     * Uses the kafka consumer to fetch the metadata for the {@code topicPartitions}.
     */
    private OffsetsMaps fetchTopicPartitionsMetadata(List<TopicPartition> topicPartitions) {
        Map<SystemStreamPartition, String> oldestOffsets = new HashMap<>();
        Map<SystemStreamPartition, String> newestOffsets = new HashMap<>();
        Map<SystemStreamPartition, String> upcomingOffsets = new HashMap<>();
        final Map<TopicPartition, Long> oldestOffsetsWithLong = new HashMap<>();
        final Map<TopicPartition, Long> upcomingOffsetsWithLong = new HashMap<>();

        threadSafeKafkaConsumer.execute(consumer -> {
            Map<TopicPartition, Long> beginningOffsets = consumer.beginningOffsets(topicPartitions);
            LOG.debug("Beginning offsets for topic-partitions: {} is {}", topicPartitions, beginningOffsets);
            oldestOffsetsWithLong.putAll(beginningOffsets);
            Map<TopicPartition, Long> endOffsets = consumer.endOffsets(topicPartitions);
            LOG.debug("End offsets for topic-partitions: {} is {}", topicPartitions, endOffsets);
            upcomingOffsetsWithLong.putAll(endOffsets);
            return Optional.empty();
        });

        oldestOffsetsWithLong.forEach((topicPartition, offset) -> oldestOffsets
                .put(KafkaUtil.toSystemStreamPartition(systemName, topicPartition), String.valueOf(offset)));

        upcomingOffsetsWithLong.forEach((topicPartition, offset) -> {
            upcomingOffsets.put(KafkaUtil.toSystemStreamPartition(systemName, topicPartition),
                    String.valueOf(offset));

            // Kafka's beginning Offset corresponds to the offset for the oldest message.
            // Kafka's end offset corresponds to the offset for the upcoming message, and it is the newest offset + 1.
            // When upcoming offset is <=0, the topic appears empty, we put oldest offset 0 and the newest offset null.
            // When upcoming offset is >0, we subtract the upcoming offset by one for the newest offset.
            // For normal case, the newest offset will correspond to the offset of the newest message in the stream;
            // But for the big message, it is not the case. Seeking on the newest offset gives nothing for the newest big message.
            // For now, we keep it as is for newest offsets the same as historical metadata structure.
            if (offset <= 0) {
                LOG.warn(
                        "Empty Kafka topic partition {} with upcoming offset {}. Skipping newest offset and setting oldest offset to 0 to consume from beginning",
                        topicPartition, offset);
                oldestOffsets.put(KafkaUtil.toSystemStreamPartition(systemName, topicPartition), "0");
            } else {
                newestOffsets.put(KafkaUtil.toSystemStreamPartition(systemName, topicPartition),
                        String.valueOf(offset - 1));
            }
        });
        return new OffsetsMaps(oldestOffsets, newestOffsets, upcomingOffsets);
    }

    /**
     * Fetch SystemStreamMetadata for each topic with the consumer
     * @param topics set of topics to get metadata info for
     * @return map of topic to SystemStreamMetadata
     */
    private Map<String, SystemStreamMetadata> fetchSystemStreamMetadata(Set<String> topics) {
        Map<SystemStreamPartition, String> allOldestOffsets = new HashMap<>();
        Map<SystemStreamPartition, String> allNewestOffsets = new HashMap<>();
        Map<SystemStreamPartition, String> allUpcomingOffsets = new HashMap<>();

        LOG.info("Fetching SystemStreamMetadata for topics {} on system {}", topics, systemName);

        topics.forEach(topic -> {
            OffsetsMaps offsetsForTopic = threadSafeKafkaConsumer.execute(consumer -> {
                List<PartitionInfo> partitionInfos = consumer.partitionsFor(topic);
                if (partitionInfos == null) {
                    String msg = String.format("Partition info not(yet?) available for system %s topic %s",
                            systemName, topic);
                    throw new SamzaException(msg);
                }
                List<TopicPartition> topicPartitions = partitionInfos.stream()
                        .map(partitionInfo -> new TopicPartition(partitionInfo.topic(), partitionInfo.partition()))
                        .collect(Collectors.toList());
                return fetchTopicPartitionsMetadata(topicPartitions);
            });
            allOldestOffsets.putAll(offsetsForTopic.getOldestOffsets());
            allNewestOffsets.putAll(offsetsForTopic.getNewestOffsets());
            allUpcomingOffsets.putAll(offsetsForTopic.getUpcomingOffsets());
        });

        return assembleMetadata(allOldestOffsets, allNewestOffsets, allUpcomingOffsets);
    }

    @Override
    public Integer offsetComparator(String offset1, String offset2) {
        if (offset1 == null || offset2 == null) {
            return -1;
        }

        return Long.valueOf(offset1).compareTo(Long.valueOf(offset2));
    }

    @Override
    public boolean createStream(StreamSpec streamSpec) {
        LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(),
                streamSpec.getSystemName());
        final String REPL_FACTOR = "replication.factor";

        KafkaStreamSpec kSpec = toKafkaSpec(streamSpec);
        String topicName = kSpec.getPhysicalName();

        // create topic.
        NewTopic newTopic = new NewTopic(topicName, kSpec.getPartitionCount(),
                (short) kSpec.getReplicationFactor());

        // specify the configs
        Map<String, String> streamConfig = new HashMap<>(streamSpec.getConfig());
        // HACK - replication.factor is invalid config for AdminClient.createTopics
        if (streamConfig.containsKey(REPL_FACTOR)) {
            String repl = streamConfig.get(REPL_FACTOR);
            LOG.warn("Configuration {}={} for topic={} is invalid. Using kSpec repl factor {}", REPL_FACTOR, repl,
                    kSpec.getPhysicalName(), kSpec.getReplicationFactor());
            streamConfig.remove(REPL_FACTOR);
        }
        newTopic.configs(new MapConfig(streamConfig));
        CreateTopicsResult result = adminClient.createTopics(ImmutableSet.of(newTopic));
        try {
            result.all().get(KAFKA_ADMIN_OPS_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (Exception e) {
            if (e instanceof TopicExistsException || e.getCause() instanceof TopicExistsException) {
                LOG.info("Topic {} already exists.", topicName);
                return false;
            }

            throw new SamzaException(String.format("Creation of topic %s failed.", topicName), e);
        }
        LOG.info("Successfully created topic {}", topicName);
        DescribeTopicsResult desc = adminClient.describeTopics(ImmutableSet.of(topicName));
        try {
            TopicDescription td = desc.all().get(KAFKA_ADMIN_OPS_TIMEOUT_MS, TimeUnit.MILLISECONDS).get(topicName);
            LOG.info("Topic {} created with {}", topicName, td);
            return true;
        } catch (Exception e) {
            LOG.error("'Describe after create' failed for topic " + topicName, e);
            return false;
        }
    }

    @Override
    public boolean clearStream(StreamSpec streamSpec) {
        LOG.info("Creating Kafka topic: {} on system: {}", streamSpec.getPhysicalName(),
                streamSpec.getSystemName());

        String topicName = streamSpec.getPhysicalName();

        try {
            DeleteTopicsResult deleteTopicsResult = adminClient.deleteTopics(ImmutableSet.of(topicName));
            deleteTopicsResult.all().get(KAFKA_ADMIN_OPS_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (Exception e) {
            LOG.error("Failed to delete topic {} with exception {}.", topicName, e);
            return false;
        }

        return true;
    }

    /**
     * Converts a StreamSpec into a KafkaStreamSpec. Special handling for coordinator and changelog stream.
     * @param spec a StreamSpec object
     * @return KafkaStreamSpec object
     */
    public KafkaStreamSpec toKafkaSpec(StreamSpec spec) {
        KafkaStreamSpec kafkaSpec;
        if (spec.isChangeLogStream()) {
            String topicName = spec.getPhysicalName();
            ChangelogInfo topicMeta = changelogTopicMetaInformation.get(topicName);
            if (topicMeta == null) {
                throw new StreamValidationException("Unable to find topic information for topic " + topicName);
            }

            kafkaSpec = new KafkaStreamSpec(spec.getId(), topicName, systemName, spec.getPartitionCount(),
                    topicMeta.getReplicationFactor(), topicMeta.getKafkaProperties());
        } else if (spec.isCoordinatorStream()) {
            kafkaSpec = new KafkaStreamSpec(spec.getId(), spec.getPhysicalName(), systemName, 1,
                    coordinatorStreamReplicationFactor, coordinatorStreamProperties);
        } else if (intermediateStreamProperties.containsKey(spec.getId())) {
            kafkaSpec = KafkaStreamSpec.fromSpec(spec);
            Properties properties = kafkaSpec.getProperties();
            properties.putAll(intermediateStreamProperties.get(spec.getId()));
            kafkaSpec = kafkaSpec.copyWithProperties(properties);
        } else {
            kafkaSpec = KafkaStreamSpec.fromSpec(spec);
        }
        return kafkaSpec;
    }

    @Override
    public void validateStream(StreamSpec streamSpec) throws StreamValidationException {
        LOG.info("About to validate stream = " + streamSpec);

        String streamName = streamSpec.getPhysicalName();
        SystemStreamMetadata systemStreamMetadata = getSystemStreamMetadata(Collections.singleton(streamName))
                .get(streamName);
        if (systemStreamMetadata == null) {
            throw new StreamValidationException(
                    "Failed to obtain metadata for stream " + streamName + ". Validation failed.");
        }

        int actualPartitionCounter = systemStreamMetadata.getSystemStreamPartitionMetadata().size();
        int expectedPartitionCounter = streamSpec.getPartitionCount();
        LOG.info("actualCount=" + actualPartitionCounter + "; expectedCount=" + expectedPartitionCounter);
        if (actualPartitionCounter != expectedPartitionCounter) {
            throw new StreamValidationException(
                    String.format("Mismatch of partitions for stream %s. Expected %d, got %d. Validation failed.",
                            streamName, expectedPartitionCounter, actualPartitionCounter));
        }
    }

    /**
     * Delete records up to (and including) the provided ssp offsets for
     * all system stream partitions specified in the map.
     * This only works with Kafka cluster 0.11 or later. Otherwise it's a no-op.
     * @param offsets specifies up to what offsets the messages should be deleted
     */
    @Override
    public void deleteMessages(Map<SystemStreamPartition, String> offsets) {
        if (deleteCommittedMessages) {
            Map<TopicPartition, RecordsToDelete> recordsToDelete = offsets.entrySet().stream()
                    .collect(Collectors.toMap(
                            entry -> new TopicPartition(entry.getKey().getStream(),
                                    entry.getKey().getPartition().getPartitionId()),
                            entry -> RecordsToDelete.beforeOffset(Long.parseLong(entry.getValue()) + 1)));

            adminClient.deleteRecords(recordsToDelete).all().whenComplete((ignored, exception) -> {
                if (exception != null) {
                    LOG.error("Delete message failed for SSPs " + offsets.keySet() + " due to", exception);
                }
            });

            deleteMessageCalled = true;
        }
    }

    protected Properties createAdminClientProperties() {
        // populate brokerList from either consumer or producer configs
        Properties props = new Properties();
        // included SSL settings if needed

        props.putAll(config.subset(String.format("systems.%s.consumer.", systemName), true));

        //validate brokerList
        String brokerList = config.get(String.format(KafkaConfig.CONSUMER_CONFIGS_CONFIG_KEY(), systemName,
                ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
        if (brokerList == null) {
            brokerList = config.get(String.format(KafkaConfig.PRODUCER_CONFIGS_CONFIG_KEY(), systemName,
                    ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG));
        }
        if (brokerList == null) {
            throw new SamzaException(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG
                    + " is required for systemAdmin for system " + systemName);
        }
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList);

        return props;
    }

    @Override
    public Set<SystemStream> getAllSystemStreams() {
        Map<String, List<PartitionInfo>> topicToPartitionInfoMap = threadSafeKafkaConsumer
                .execute(consumer -> consumer.listTopics());
        Set<SystemStream> systemStreams = topicToPartitionInfoMap.keySet().stream()
                .map(topic -> new SystemStream(systemName, topic)).collect(Collectors.toSet());
        return systemStreams;
    }

    /**
     * A helper method that takes oldest, newest, and upcoming offsets for each
     * system stream partition, and creates a single map from stream name to
     * SystemStreamMetadata.
     *
     * @param newestOffsets map of SSP to newest offset
     * @param oldestOffsets map of SSP to oldest offset
     * @param upcomingOffsets map of SSP to upcoming offset
     * @return a {@link Map} from {@code system} to {@link SystemStreamMetadata}
     */
    @VisibleForTesting
    static Map<String, SystemStreamMetadata> assembleMetadata(Map<SystemStreamPartition, String> oldestOffsets,
            Map<SystemStreamPartition, String> newestOffsets, Map<SystemStreamPartition, String> upcomingOffsets) {
        HashSet<SystemStreamPartition> allSSPs = new HashSet<>();
        allSSPs.addAll(oldestOffsets.keySet());
        allSSPs.addAll(newestOffsets.keySet());
        allSSPs.addAll(upcomingOffsets.keySet());

        Map<String, SystemStreamMetadata> assembledMetadata = allSSPs.stream()
                .collect(Collectors.groupingBy(SystemStreamPartition::getStream)).entrySet().stream()
                .collect(Collectors.toMap(Map.Entry::getKey, entry -> {
                    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> partitionMetadata = entry
                            .getValue().stream()
                            .collect(Collectors.toMap(SystemStreamPartition::getPartition,
                                    ssp -> new SystemStreamMetadata.SystemStreamPartitionMetadata(
                                            oldestOffsets.getOrDefault(ssp, null),
                                            newestOffsets.getOrDefault(ssp, null), upcomingOffsets.get(ssp))));
                    return new SystemStreamMetadata(entry.getKey(), partitionMetadata);
                }));

        return assembledMetadata;
    }

    /**
     * Fetch stream properties for all intermediate streams.
     *
     * @param config kafka system config
     * @return a {@link Map} from {@code streamId} to stream {@link Properties}
     */
    @VisibleForTesting
    static Map<String, Properties> getIntermediateStreamProperties(Config config) {
        Map<String, Properties> intermedidateStreamProperties = Collections.emptyMap();
        ApplicationConfig appConfig = new ApplicationConfig(config);

        if (appConfig.getAppMode() == ApplicationConfig.ApplicationMode.BATCH) {
            StreamConfig streamConfig = new StreamConfig(config);
            intermedidateStreamProperties = JavaConverters.asJavaCollectionConverter(streamConfig.getStreamIds())
                    .asJavaCollection().stream().filter(streamConfig::getIsIntermediateStream)
                    .collect(Collectors.toMap(Function.identity(), streamId -> {
                        Properties properties = new Properties();
                        properties.putAll(streamConfig.getStreamProperties(streamId));
                        properties.putIfAbsent(TopicConfig.RETENTION_MS_CONFIG,
                                String.valueOf(KafkaConfig.DEFAULT_RETENTION_MS_FOR_BATCH()));
                        return properties;
                    }));
        }

        return intermedidateStreamProperties;
    }

    private Properties getCoordinatorStreamProperties(KafkaConfig config) {
        Properties coordinatorStreamProperties = new Properties();
        coordinatorStreamProperties.put(TopicConfig.CLEANUP_POLICY_CONFIG, TopicConfig.CLEANUP_POLICY_COMPACT);
        coordinatorStreamProperties.put(TopicConfig.SEGMENT_BYTES_CONFIG, config.getCoordinatorSegmentBytes());

        return coordinatorStreamProperties;
    }

    @Override
    public String resolveStartpointToOffset(SystemStreamPartition systemStreamPartition, Startpoint startpoint) {
        return startpoint.apply(systemStreamPartition, kafkaStartpointToOffsetResolver);
    }

    /**
     * Container for metadata about offsets.
     */
    private static class OffsetsMaps {
        private final Map<SystemStreamPartition, String> oldestOffsets;
        private final Map<SystemStreamPartition, String> newestOffsets;
        private final Map<SystemStreamPartition, String> upcomingOffsets;

        private OffsetsMaps(Map<SystemStreamPartition, String> oldestOffsets,
                Map<SystemStreamPartition, String> newestOffsets,
                Map<SystemStreamPartition, String> upcomingOffsets) {
            this.oldestOffsets = oldestOffsets;
            this.newestOffsets = newestOffsets;
            this.upcomingOffsets = upcomingOffsets;
        }

        private Map<SystemStreamPartition, String> getOldestOffsets() {
            return oldestOffsets;
        }

        private Map<SystemStreamPartition, String> getNewestOffsets() {
            return newestOffsets;
        }

        private Map<SystemStreamPartition, String> getUpcomingOffsets() {
            return upcomingOffsets;
        }
    }

    /**
     * A helper class for represent changelog related information.
     */
    private static class ChangelogInfo {
        final int replicationFactor;
        final Properties kafkaProperties;

        /**
         * @param replicationFactor The number of replicas for the changelog stream
         * @param kafkaProperties The kafka specific properties that need to be used for changelog stream creation
         */
        ChangelogInfo(int replicationFactor, Properties kafkaProperties) {
            this.replicationFactor = replicationFactor;
            this.kafkaProperties = kafkaProperties;
        }

        public int getReplicationFactor() {
            return replicationFactor;
        }

        public Properties getKafkaProperties() {
            return kafkaProperties;
        }
    }

    /**
     * Offers a kafka specific implementation of {@link StartpointVisitor} that resolves
     * different types of {@link Startpoint} to samza offset.
      */
    @VisibleForTesting
    static class KafkaStartpointToOffsetResolver implements StartpointVisitor<SystemStreamPartition, String> {

        private final ThreadSafeKafkaConsumer threadSafeKafkaConsumer;

        public KafkaStartpointToOffsetResolver(ThreadSafeKafkaConsumer threadSafeKafkaConsumer) {
            this.threadSafeKafkaConsumer = threadSafeKafkaConsumer;
        }

        @VisibleForTesting
        KafkaStartpointToOffsetResolver(Consumer consumer) {
            this.threadSafeKafkaConsumer = new ThreadSafeKafkaConsumer(consumer);
        }

        @Override
        public String visit(SystemStreamPartition systemStreamPartition, StartpointSpecific startpointSpecific) {
            return startpointSpecific.getSpecificOffset();
        }

        @Override
        public String visit(SystemStreamPartition systemStreamPartition, StartpointTimestamp startpointTimestamp) {
            Preconditions.checkNotNull(startpointTimestamp, "Startpoint cannot be null");
            Preconditions.checkNotNull(startpointTimestamp.getTimestampOffset(),
                    "Timestamp field in startpoint cannot be null");
            TopicPartition topicPartition = toTopicPartition(systemStreamPartition);

            Map<TopicPartition, Long> topicPartitionToTimestamp = ImmutableMap.of(topicPartition,
                    startpointTimestamp.getTimestampOffset());
            LOG.info("Finding offset for timestamp: {} in topic partition: {}.",
                    startpointTimestamp.getTimestampOffset(), topicPartition);
            Map<TopicPartition, OffsetAndTimestamp> topicPartitionToOffsetTimestamps = threadSafeKafkaConsumer
                    .execute(consumer -> consumer.offsetsForTimes(topicPartitionToTimestamp));

            OffsetAndTimestamp offsetAndTimestamp = topicPartitionToOffsetTimestamps.get(topicPartition);
            if (offsetAndTimestamp != null) {
                return String.valueOf(offsetAndTimestamp.offset());
            } else {
                LOG.info("Offset for timestamp: {} does not exist for partition: {}. Falling back to end offset.",
                        startpointTimestamp.getTimestampOffset(), topicPartition);
                return getEndOffset(systemStreamPartition);
            }
        }

        @Override
        public String visit(SystemStreamPartition systemStreamPartition, StartpointOldest startpointOldest) {
            TopicPartition topicPartition = toTopicPartition(systemStreamPartition);
            Map<TopicPartition, Long> topicPartitionToOffsets = threadSafeKafkaConsumer
                    .execute(consumer -> consumer.beginningOffsets(ImmutableSet.of(topicPartition)));
            Long beginningOffset = topicPartitionToOffsets.get(topicPartition);
            LOG.info("Beginning offset for topic partition: {} is {}.", topicPartition, beginningOffset);
            return String.valueOf(beginningOffset);
        }

        @Override
        public String visit(SystemStreamPartition systemStreamPartition, StartpointUpcoming startpointUpcoming) {
            return getEndOffset(systemStreamPartition);
        }

        /**
         * Converts the {@link SystemStreamPartition} to {@link TopicPartition}.
         * @param systemStreamPartition the input system stream partition.
         * @return the converted topic partition.
         */
        static TopicPartition toTopicPartition(SystemStreamPartition systemStreamPartition) {
            Preconditions.checkNotNull(systemStreamPartition);
            Preconditions.checkNotNull(systemStreamPartition.getPartition());
            Preconditions.checkNotNull(systemStreamPartition.getStream());

            return new TopicPartition(systemStreamPartition.getStream(),
                    systemStreamPartition.getPartition().getPartitionId());
        }

        /**
         * Determines the end offset of the {@param SystemStreamPartition}.
         * @param systemStreamPartition represents the system stream partition.
         * @return the end offset of the partition.
         */
        private String getEndOffset(SystemStreamPartition systemStreamPartition) {
            TopicPartition topicPartition = toTopicPartition(systemStreamPartition);
            Map<TopicPartition, Long> topicPartitionToOffsets = threadSafeKafkaConsumer
                    .execute(consumer -> consumer.endOffsets(ImmutableSet.of(topicPartition)));
            Long endOffset = topicPartitionToOffsets.get(topicPartition);
            LOG.info("End offset for topic partition: {} is {}.", topicPartition, endOffset);
            return String.valueOf(endOffset);
        }
    }

    /**
     * Offers thread-safe operations over the vanilla {@link Consumer}.
     */
    static class ThreadSafeKafkaConsumer {

        private final Consumer kafkaConsumer;

        ThreadSafeKafkaConsumer(Consumer kafkaConsumer) {
            this.kafkaConsumer = kafkaConsumer;
        }

        /**
         * Executes the lambda function comprised of kafka-consumer operations in a thread-safe manner
         * and returns the result of the execution.
         *
         * @param function accepts the kafka consumer as argument and returns a result after executing a
         *                 sequence of operations on a kafka-broker.
         * @param <T> the return type of the lambda function.
         * @return the result of executing the lambda function.
         */
        public <T> T execute(Function<Consumer, T> function) {
            // Kafka consumer is not thread-safe
            synchronized (kafkaConsumer) {
                return function.apply(kafkaConsumer);
            }
        }

        /**
         * Closes the underlying kafka consumer.
         */
        public void close() {
            synchronized (kafkaConsumer) {
                kafkaConsumer.close();
            }
        }
    }
}