Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.datatorrent.contrib.kafka; import com.datatorrent.api.Context.OperatorContext; import com.datatorrent.api.DefaultPartition; import com.datatorrent.api.InputOperator; import com.datatorrent.api.Operator; import com.datatorrent.api.Operator.ActivationListener; import com.datatorrent.api.Partitioner; import com.datatorrent.api.Stats; import com.datatorrent.api.StatsListener; import com.datatorrent.api.annotation.OperatorAnnotation; import com.datatorrent.api.annotation.Stateless; import com.datatorrent.lib.util.KryoCloneUtils; import com.google.common.base.Joiner; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import kafka.api.FetchRequest; import kafka.api.FetchRequestBuilder; import kafka.cluster.Broker; import kafka.javaapi.FetchResponse; import kafka.javaapi.PartitionMetadata; import kafka.javaapi.consumer.SimpleConsumer; import kafka.message.Message; import kafka.message.MessageAndOffset; import org.apache.apex.malhar.lib.wal.WindowDataManager; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.validation.Valid; import javax.validation.constraints.Min; import javax.validation.constraints.NotNull; import java.io.IOException; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import static com.datatorrent.contrib.kafka.KafkaConsumer.KafkaMeterStatsUtil.getOffsetsForPartitions; /** * This is a base implementation of a Kafka input operator, which consumes data from Kafka message bus. * Subclasses should implement the method for emitting tuples to downstream operators. * It will be dynamically partitioned based on the upstream kafka partition. * <p> * <b>Partition Strategy:</b> * <p><b>1. ONE_TO_ONE partition</b> Each operator partition will consume from only one kafka partition </p> * <p><b>2. ONE_TO_MANY partition</b> Each operator partition consumer from multiple kafka partition with some hard ingestion rate limit</p> * <p><b>3. ONE_TO_MANY_HEURISTIC partition</b>(Not implemented yet) Each operator partition consumer from multiple kafka partition and partition number depends on heuristic function(real time bottle neck)</p> * <p><b>Note:</b> ONE_TO_MANY partition only support simple kafka consumer because * <p> 1) high-level consumer can only balance the number of brokers it consumes from rather than the actual load from each broker</p> * <p> 2) high-level consumer can not reset offset once it's committed so the tuples are not replayable </p> * <p></p> * <br> * <br> * <b>Basic Algorithm:</b> * <p>1.Pull the metadata(how many partitions) of the topic from brokerList of {@link KafkaConsumer}</p> * <p>2.cloneConsumer method is used to initialize the new {@link KafkaConsumer} instance for the new partition operator</p> * <p>3.cloneOperator method is used to initialize the new {@link AbstractKafkaInputOperator} instance for the new partition operator</p> * <p>4.ONE_TO_MANY partition use first-fit decreasing algorithm(http://en.wikipedia.org/wiki/Bin_packing_problem) to minimize the partition operator * <br> * <br> * <b>Load balance:</b> refer to {@link SimpleKafkaConsumer} and {@link HighlevelKafkaConsumer} <br> * <b>Kafka partition failover:</b> refer to {@link SimpleKafkaConsumer} and {@link HighlevelKafkaConsumer} * <br> * <br> * <b>Self adjust to Kafka partition change:</b> * <p><b>EACH</b> operator partition periodically check the leader broker(s) change which it consumes from and adjust connection without repartition</p> * <p><b>ONLY APPMASTER</b> operator periodically check overall kafka partition layout and add operator partition due to kafka partition add(no delete supported by kafka for now)</p> * <br> * <br> * </p> * Properties:<br> * <b>tuplesBlast</b>: Number of tuples emitted in each burst<br> * <b>bufferSize</b>: Size of holding buffer<br> * <br> * Compile time checks:<br> * Class derived from this has to implement the abstract method emitTuple() <br> * <br> * Run time checks:<br> * None<br> * <br> * Benchmarks:<br> * TBD<br> * <br> * * Each operator can consume 1 topic from multiple partitions and clusters<br> * </p> * * @displayName Abstract Kafka Input * @category Messaging * @tags input operator * * @since 0.3.2 */ @OperatorAnnotation(partitionable = true) public abstract class AbstractKafkaInputOperator<K extends KafkaConsumer> implements InputOperator, ActivationListener<OperatorContext>, Operator.CheckpointNotificationListener, Partitioner<AbstractKafkaInputOperator<K>>, StatsListener { private static final Logger logger = LoggerFactory.getLogger(AbstractKafkaInputOperator.class); @Min(1) private int maxTuplesPerWindow = Integer.MAX_VALUE; @Min(1) private long maxTotalMsgSizePerWindow = Long.MAX_VALUE; private transient int emitCount = 0; private transient long emitTotalMsgSize = 0; protected WindowDataManager windowDataManager; protected transient long currentWindowId; protected transient int operatorId; protected final transient Map<KafkaPartition, MutablePair<Long, Integer>> currentWindowRecoveryState; /** * Offsets that are checkpointed for recovery */ protected Map<KafkaPartition, Long> offsetStats = new HashMap<KafkaPartition, Long>(); /** * offset history with window id */ protected transient List<Pair<Long, Map<KafkaPartition, Long>>> offsetTrackHistory = new LinkedList<>(); private transient OperatorContext context = null; // By default the partition policy is 1:1 public PartitionStrategy strategy = PartitionStrategy.ONE_TO_ONE; // Deprecated: Please don't use this property. @Deprecated private long msgRateUpperBound = Long.MAX_VALUE; // Deprecated: Please don't use this property. @Deprecated private long byteRateUpperBound = Long.MAX_VALUE; // Store the current operator partition topology private transient List<PartitionInfo> currentPartitionInfo = Lists.newLinkedList(); // Store the current collected kafka consumer stats private transient Map<Integer, List<KafkaConsumer.KafkaMeterStats>> kafkaStatsHolder = new HashMap<Integer, List<KafkaConsumer.KafkaMeterStats>>(); private OffsetManager offsetManager = null; // Minimal interval between 2 (re)partition actions private long repartitionInterval = 30000L; // Minimal interval between checking collected stats and decide whether it needs to repartition or not. // And minimal interval between 2 offset updates private long repartitionCheckInterval = 5000L; private transient long lastCheckTime = 0L; private transient long lastRepartitionTime = 0L; // A list store the newly discovered partitions private transient List<KafkaPartition> newWaitingPartition = new LinkedList<KafkaPartition>(); private transient KafkaConsumer.KafkaMessage pendingMessage; @Min(1) private int initialPartitionCount = 1; @NotNull @Valid protected KafkaConsumer consumer = new SimpleKafkaConsumer(); public AbstractKafkaInputOperator() { windowDataManager = new WindowDataManager.NoopWindowDataManager(); currentWindowRecoveryState = new HashMap<KafkaPartition, MutablePair<Long, Integer>>(); } /** * Any concrete class derived from KafkaInputOperator has to implement this method to emit tuples to an output port. * */ protected abstract void emitTuple(Message message); /** * Concrete class derived from KafkaInputOpertor should implement this method if it wants to access kafka offset and partitionId along with kafka message. */ protected void emitTuple(KafkaConsumer.KafkaMessage message) { emitTuple(message.msg); } public int getMaxTuplesPerWindow() { return maxTuplesPerWindow; } public void setMaxTuplesPerWindow(int maxTuplesPerWindow) { this.maxTuplesPerWindow = maxTuplesPerWindow; } /** * Get the maximum total size of messages to be transmitted per window. When the sum of the message sizes transmitted * in a window reaches this limit no more messages are transmitted till the next window. There is one exception * however, if the size of the first message in a window is greater than the limit it is still transmitted so that the * processing of messages doesn't get stuck. * @return The maximum for the total size */ public long getMaxTotalMsgSizePerWindow() { return maxTotalMsgSizePerWindow; } /** * Set the maximum total size of messages to be transmitted per window. See {@link #getMaxTotalMsgSizePerWindow()} for * more description about this property. * * @param maxTotalMsgSizePerWindow The maximum for the total size */ public void setMaxTotalMsgSizePerWindow(long maxTotalMsgSizePerWindow) { this.maxTotalMsgSizePerWindow = maxTotalMsgSizePerWindow; } @Override public void setup(OperatorContext context) { logger.debug("consumer {} topic {} cacheSize {}", consumer, consumer.getTopic(), consumer.getCacheSize()); consumer.create(); // reset the offsets to checkpointed one if (consumer instanceof SimpleKafkaConsumer && !offsetStats.isEmpty()) { Map<KafkaPartition, Long> currentOffsets = new HashMap<>(); // Increment the offsets and set it to consumer for (Map.Entry<KafkaPartition, Long> e : offsetStats.entrySet()) { currentOffsets.put(e.getKey(), e.getValue() + 1); } ((SimpleKafkaConsumer) consumer).resetOffset(currentOffsets); } this.context = context; operatorId = context.getId(); if (consumer instanceof HighlevelKafkaConsumer && !(windowDataManager instanceof WindowDataManager.NoopWindowDataManager)) { throw new RuntimeException("Idempotency is not supported for High Level Kafka Consumer"); } windowDataManager.setup(context); } @Override public void teardown() { windowDataManager.teardown(); consumer.teardown(); } @Override public void beginWindow(long windowId) { currentWindowId = windowId; if (windowId <= windowDataManager.getLargestCompletedWindow()) { replay(windowId); } emitCount = 0; emitTotalMsgSize = 0; } protected void replay(long windowId) { try { @SuppressWarnings("unchecked") Map<KafkaPartition, MutablePair<Long, Integer>> recoveredData = (Map<KafkaPartition, MutablePair<Long, Integer>>) windowDataManager .retrieve(windowId); if (recoveredData != null) { Map<String, List<PartitionMetadata>> pms = KafkaMetadataUtil .getPartitionsForTopic(getConsumer().brokers, getConsumer().topic); if (pms != null) { SimpleKafkaConsumer cons = (SimpleKafkaConsumer) getConsumer(); // add all partition request in one Fretch request together FetchRequestBuilder frb = new FetchRequestBuilder().clientId(cons.getClientId()); for (Map.Entry<KafkaPartition, MutablePair<Long, Integer>> rc : recoveredData.entrySet()) { KafkaPartition kp = rc.getKey(); List<PartitionMetadata> pmsVal = pms.get(kp.getClusterId()); Iterator<PartitionMetadata> pmIterator = pmsVal.iterator(); PartitionMetadata pm = pmIterator.next(); while (pm.partitionId() != kp.getPartitionId()) { if (!pmIterator.hasNext()) break; pm = pmIterator.next(); } if (pm.partitionId() != kp.getPartitionId()) continue; Broker bk = pm.leader(); frb.addFetch(consumer.topic, rc.getKey().getPartitionId(), rc.getValue().left, cons.getBufferSize()); FetchRequest req = frb.build(); SimpleConsumer ksc = new SimpleConsumer(bk.host(), bk.port(), cons.getTimeout(), cons.getBufferSize(), cons.getClientId()); FetchResponse fetchResponse = ksc.fetch(req); Integer count = 0; for (MessageAndOffset msg : fetchResponse.messageSet(consumer.topic, kp.getPartitionId())) { KafkaConsumer.KafkaMessage kafkaMessage = new KafkaConsumer.KafkaMessage(kp, msg.message(), msg.offset()); emitTuple(kafkaMessage); offsetStats.put(kp, msg.offset()); count = count + 1; if (count.equals(rc.getValue().right)) break; } } } } if (windowId == windowDataManager.getLargestCompletedWindow()) { // Start the consumer at the largest recovery window SimpleKafkaConsumer cons = (SimpleKafkaConsumer) getConsumer(); // Set the offset positions to the consumer Map<KafkaPartition, Long> currentOffsets = new HashMap<KafkaPartition, Long>( cons.getCurrentOffsets()); // Increment the offsets for (Map.Entry<KafkaPartition, Long> e : offsetStats.entrySet()) { currentOffsets.put(e.getKey(), e.getValue() + 1); } cons.resetOffset(currentOffsets); cons.start(); } } catch (IOException e) { throw new RuntimeException("replay", e); } } @Override public void endWindow() { //TODO depends on APEX-78 only needs to keep the history of windows needs to be commit if (getConsumer() instanceof SimpleKafkaConsumer) { Map<KafkaPartition, Long> carryOn = new HashMap<>(offsetStats); offsetTrackHistory.add(Pair.of(currentWindowId, carryOn)); } if (currentWindowId > windowDataManager.getLargestCompletedWindow()) { try { windowDataManager.save(currentWindowRecoveryState, currentWindowId); } catch (IOException e) { throw new RuntimeException("saving recovery", e); } } currentWindowRecoveryState.clear(); } @Override public void checkpointed(long windowId) { // commit the consumer offset getConsumer().commitOffset(); } @Override public void beforeCheckpoint(long windowId) { } @Override public void committed(long windowId) { if ((getConsumer() instanceof SimpleKafkaConsumer)) { SimpleKafkaConsumer cons = (SimpleKafkaConsumer) getConsumer(); for (Iterator<Pair<Long, Map<KafkaPartition, Long>>> iter = offsetTrackHistory.iterator(); iter .hasNext();) { Pair<Long, Map<KafkaPartition, Long>> item = iter.next(); if (item.getLeft() < windowId) { iter.remove(); continue; } else if (item.getLeft() == windowId) { if (logger.isDebugEnabled()) { logger.debug("report offsets {} ", Joiner.on(';').withKeyValueSeparator("=").join(item.getRight())); } context.setCounters(cons.getConsumerStats(item.getRight())); } break; } } try { windowDataManager.committed(windowId); } catch (IOException e) { throw new RuntimeException("deleting state", e); } } @Override public void activate(OperatorContext ctx) { if (context.getValue(OperatorContext.ACTIVATION_WINDOW_ID) != Stateless.WINDOW_ID && context .getValue(OperatorContext.ACTIVATION_WINDOW_ID) < windowDataManager.getLargestCompletedWindow()) { // If it is a replay state, don't start the consumer return; } // Don't start thread here! // # of kafka_consumer_threads depends on the type of kafka client and the message // metadata(topic/partition/replica) layout consumer.start(); } @Override public void deactivate() { consumer.stop(); } @Override public void emitTuples() { if (currentWindowId <= windowDataManager.getLargestCompletedWindow()) { return; } int count = consumer.messageSize() + ((pendingMessage != null) ? 1 : 0); if (maxTuplesPerWindow > 0) { count = Math.min(count, maxTuplesPerWindow - emitCount); } KafkaConsumer.KafkaMessage message = null; for (int i = 0; i < count; i++) { if (pendingMessage != null) { message = pendingMessage; pendingMessage = null; } else { message = consumer.pollMessage(); } // If the total size transmitted in the window will be exceeded don't transmit anymore messages in this window // Make an exception for the case when no message has been transmitted in the window and transmit at least one // message even if the condition is violated so that the processing doesn't get stuck if ((emitCount > 0) && ((maxTotalMsgSizePerWindow - emitTotalMsgSize) < message.msg.size())) { pendingMessage = message; break; } emitTuple(message); emitCount++; emitTotalMsgSize += message.msg.size(); offsetStats.put(message.kafkaPart, message.offSet); MutablePair<Long, Integer> offsetAndCount = currentWindowRecoveryState.get(message.kafkaPart); if (offsetAndCount == null) { currentWindowRecoveryState.put(message.kafkaPart, new MutablePair<Long, Integer>(message.offSet, 1)); } else { offsetAndCount.setRight(offsetAndCount.right + 1); } } } public void setConsumer(K consumer) { this.consumer = consumer; } public KafkaConsumer getConsumer() { return consumer; } /** * Set the Topic. * @omitFromUI */ @Deprecated public void setTopic(String topic) { this.consumer.setTopic(topic); } /** * Set the ZooKeeper quorum of the Kafka cluster(s) you want to consume data from. * The operator will discover the brokers that it needs to consume messages from. * @omitFromUI */ @Deprecated public void setZookeeper(String zookeeperString) { this.consumer.setZookeeper(zookeeperString); } @Override public void partitioned(Map<Integer, Partitioner.Partition<AbstractKafkaInputOperator<K>>> partitions) { // update the last repartition time lastRepartitionTime = System.currentTimeMillis(); } @Override public Collection<Partitioner.Partition<AbstractKafkaInputOperator<K>>> definePartitions( Collection<Partitioner.Partition<AbstractKafkaInputOperator<K>>> partitions, Partitioner.PartitioningContext context) { // Initialize brokers from zookeepers getConsumer().initBrokers(); boolean isInitialParitition = true; // check if it's the initial partition if (partitions.iterator().hasNext()) { isInitialParitition = partitions.iterator().next().getStats() == null; } // Operator partitions List<Partitioner.Partition<AbstractKafkaInputOperator<K>>> newPartitions = null; // initialize the offset Map<KafkaPartition, Long> initOffset = null; if (isInitialParitition && offsetManager != null) { initOffset = offsetManager.loadInitialOffsets(); logger.info("Initial offsets: {} ", "{ " + Joiner.on(", ").useForNull("").withKeyValueSeparator(": ").join(initOffset) + " }"); } Set<Integer> deletedOperators = Sets.newHashSet(); Collection<Partition<AbstractKafkaInputOperator<K>>> resultPartitions = partitions; boolean numPartitionsChanged = false; switch (strategy) { // For the 1 to 1 mapping The framework will create number of operator partitions based on kafka topic partitions // Each operator partition will consume from only one kafka partition case ONE_TO_ONE: if (isInitialParitition) { lastRepartitionTime = System.currentTimeMillis(); logger.info("[ONE_TO_ONE]: Initializing partition(s)"); // get partition metadata for topics. // Whatever operator is using high-level or simple kafka consumer, the operator always create a temporary simple kafka consumer to get the metadata of the topic // The initial value of brokerList of the KafkaConsumer is used to retrieve the topic metadata Map<String, List<PartitionMetadata>> kafkaPartitions = KafkaMetadataUtil .getPartitionsForTopic(getConsumer().brokers, getConsumer().getTopic()); // initialize the number of operator partitions according to number of kafka partitions newPartitions = new LinkedList<Partitioner.Partition<AbstractKafkaInputOperator<K>>>(); for (Map.Entry<String, List<PartitionMetadata>> kp : kafkaPartitions.entrySet()) { String clusterId = kp.getKey(); for (PartitionMetadata pm : kp.getValue()) { logger.info( "[ONE_TO_ONE]: Create operator partition for cluster {}, topic {}, kafka partition {} ", clusterId, getConsumer().topic, pm.partitionId()); newPartitions.add(createPartition( Sets.newHashSet(new KafkaPartition(clusterId, consumer.topic, pm.partitionId())), initOffset)); } } resultPartitions = newPartitions; numPartitionsChanged = true; } else if (newWaitingPartition.size() != 0) { // add partition for new kafka partition for (KafkaPartition newPartition : newWaitingPartition) { logger.info("[ONE_TO_ONE]: Add operator partition for cluster {}, topic {}, partition {}", newPartition.getClusterId(), getConsumer().topic, newPartition.getPartitionId()); partitions.add(createPartition(Sets.newHashSet(newPartition), null)); } newWaitingPartition.clear(); resultPartitions = partitions; numPartitionsChanged = true; } break; // For the 1 to N mapping The initial partition number is defined by stream application // Afterwards, the framework will dynamically adjust the partition and allocate consumers to as less operator partitions as it can // and guarantee the total intake rate for each operator partition is below some threshold case ONE_TO_MANY: if (getConsumer() instanceof HighlevelKafkaConsumer) { throw new UnsupportedOperationException( "[ONE_TO_MANY]: The high-level consumer is not supported for ONE_TO_MANY partition strategy."); } if (isInitialParitition || newWaitingPartition.size() != 0) { lastRepartitionTime = System.currentTimeMillis(); logger.info("[ONE_TO_MANY]: Initializing partition(s)"); // get partition metadata for topics. // Whatever operator is using high-level or simple kafka consumer, the operator always create a temporary simple kafka consumer to get the metadata of the topic // The initial value of brokerList of the KafkaConsumer is used to retrieve the topic metadata Map<String, List<PartitionMetadata>> kafkaPartitions = KafkaMetadataUtil .getPartitionsForTopic(getConsumer().brokers, getConsumer().getTopic()); int size = initialPartitionCount; @SuppressWarnings("unchecked") Set<KafkaPartition>[] kps = (Set<KafkaPartition>[]) Array .newInstance((new HashSet<KafkaPartition>()).getClass(), size); int i = 0; for (Map.Entry<String, List<PartitionMetadata>> en : kafkaPartitions.entrySet()) { String clusterId = en.getKey(); for (PartitionMetadata pm : en.getValue()) { if (kps[i % size] == null) { kps[i % size] = new HashSet<KafkaPartition>(); } kps[i % size].add(new KafkaPartition(clusterId, consumer.topic, pm.partitionId())); i++; } } size = i > size ? size : i; newPartitions = new ArrayList<Partitioner.Partition<AbstractKafkaInputOperator<K>>>(size); for (i = 0; i < size; i++) { logger.info("[ONE_TO_MANY]: Create operator partition for kafka partition(s): {} ", StringUtils.join(kps[i], ", ")); newPartitions.add(createPartition(kps[i], initOffset)); } // Add the existing partition Ids to the deleted operators for (Partition<AbstractKafkaInputOperator<K>> op : partitions) { deletedOperators.add(op.getPartitionedInstance().operatorId); } newWaitingPartition.clear(); resultPartitions = newPartitions; numPartitionsChanged = true; } break; case ONE_TO_MANY_HEURISTIC: throw new UnsupportedOperationException("[ONE_TO_MANY_HEURISTIC]: Not implemented yet"); default: break; } if (numPartitionsChanged) { List<WindowDataManager> managers = windowDataManager.partition(resultPartitions.size(), deletedOperators); int i = 0; for (Partition<AbstractKafkaInputOperator<K>> partition : resultPartitions) { partition.getPartitionedInstance().setWindowDataManager(managers.get(i++)); } } return resultPartitions; } /** * Create a new partition with the partition Ids and initial offset positions * * @deprecated use {@link #createPartition(Set, Map)} */ @Deprecated protected Partitioner.Partition<AbstractKafkaInputOperator<K>> createPartition(Set<KafkaPartition> pIds, Map<KafkaPartition, Long> initOffsets, @SuppressWarnings("UnusedParameters") Collection<WindowDataManager> newManagers) { return createPartition(pIds, initOffsets); } // Create a new partition with the partition Ids and initial offset positions protected Partitioner.Partition<AbstractKafkaInputOperator<K>> createPartition(Set<KafkaPartition> pIds, Map<KafkaPartition, Long> initOffsets) { Partitioner.Partition<AbstractKafkaInputOperator<K>> p = new DefaultPartition<>( KryoCloneUtils.cloneObject(this)); if (p.getPartitionedInstance().getConsumer() instanceof SimpleKafkaConsumer) { p.getPartitionedInstance().getConsumer().resetPartitionsAndOffset(pIds, initOffsets); if (initOffsets != null) { //Don't send all offsets to all partitions //p.getPartitionedInstance().offsetStats.putAll(initOffsets); p.getPartitionedInstance().offsetStats .putAll(p.getPartitionedInstance().getConsumer().getCurrentOffsets()); } } PartitionInfo pif = new PartitionInfo(); pif.kpids = pIds; currentPartitionInfo.add(pif); return p; } @Override public StatsListener.Response processStats(StatsListener.BatchedOperatorStats stats) { StatsListener.Response resp = new StatsListener.Response(); List<KafkaConsumer.KafkaMeterStats> kstats = extractKafkaStats(stats); resp.repartitionRequired = isPartitionRequired(stats.getOperatorId(), kstats); return resp; } private void updateOffsets(List<KafkaConsumer.KafkaMeterStats> kstats) { //In every partition check interval, call offsetmanager to update the offsets if (offsetManager != null) { Map<KafkaPartition, Long> offsetsForPartitions = getOffsetsForPartitions(kstats); if (offsetsForPartitions.size() > 0) { logger.debug("Passing offset updates to offset manager"); offsetManager.updateOffsets(offsetsForPartitions); } } } private List<KafkaConsumer.KafkaMeterStats> extractKafkaStats(StatsListener.BatchedOperatorStats stats) { //preprocess the stats List<KafkaConsumer.KafkaMeterStats> kmsList = new LinkedList<KafkaConsumer.KafkaMeterStats>(); for (Stats.OperatorStats os : stats.getLastWindowedStats()) { if (os != null && os.counters instanceof KafkaConsumer.KafkaMeterStats) { kmsList.add((KafkaConsumer.KafkaMeterStats) os.counters); } } return kmsList; } /** * * Check whether the operator needs repartition based on reported stats * * @return true if repartition is required * false if repartition is not required */ private boolean isPartitionRequired(int opid, List<KafkaConsumer.KafkaMeterStats> kstats) { long t = System.currentTimeMillis(); // If stats are available then update offsets // Do this before re-partition interval check below to not miss offset updates if (kstats.size() > 0) { logger.debug("Checking offset updates for offset manager"); updateOffsets(kstats); } if (t - lastCheckTime < repartitionCheckInterval) { // return false if it's within repartitionCheckInterval since last time it check the stats return false; } if (repartitionInterval < 0) { // if repartition is disabled return false; } if (t - lastRepartitionTime < repartitionInterval) { // return false if it's still within repartitionInterval since last (re)partition return false; } kafkaStatsHolder.put(opid, kstats); if (kafkaStatsHolder.size() != currentPartitionInfo.size() || currentPartitionInfo.size() == 0) { // skip checking if the operator hasn't collected all the stats from all the current partitions return false; } try { // monitor if new kafka partition added { Set<KafkaPartition> existingIds = new HashSet<KafkaPartition>(); for (PartitionInfo pio : currentPartitionInfo) { existingIds.addAll(pio.kpids); } Map<String, List<PartitionMetadata>> partitionsMeta = KafkaMetadataUtil .getPartitionsForTopic(consumer.brokers, consumer.getTopic()); if (partitionsMeta == null) { //broker(s) has temporary issue to get metadata return false; } for (Map.Entry<String, List<PartitionMetadata>> en : partitionsMeta.entrySet()) { if (en.getValue() == null) { //broker(s) has temporary issue to get metadata continue; } for (PartitionMetadata pm : en.getValue()) { KafkaPartition pa = new KafkaPartition(en.getKey(), consumer.topic, pm.partitionId()); if (!existingIds.contains(pa)) { newWaitingPartition.add(pa); } } } if (newWaitingPartition.size() != 0) { // found new kafka partition lastRepartitionTime = t; return true; } } return false; } finally { // update last check time lastCheckTime = System.currentTimeMillis(); } } public static enum PartitionStrategy { /** * Each operator partition connect to only one kafka partition */ ONE_TO_ONE, /** * Each operator consumes from several kafka partitions with overall input rate under some certain hard limit in msgs/s or bytes/s * For now it <b>only</b> support <b>simple kafka consumer</b> */ ONE_TO_MANY, /** * 1 to N partition based on the heuristic function * <b>NOT</b> implemented yet * TODO implement this later */ ONE_TO_MANY_HEURISTIC } static class PartitionInfo { Set<KafkaPartition> kpids; long msgRateLeft; long byteRateLeft; } public WindowDataManager getWindowDataManager() { return windowDataManager; } public void setWindowDataManager(WindowDataManager windowDataManager) { this.windowDataManager = windowDataManager; } public void setInitialPartitionCount(int partitionCount) { this.initialPartitionCount = partitionCount; } public int getInitialPartitionCount() { return initialPartitionCount; } public long getMsgRateUpperBound() { return msgRateUpperBound; } public void setMsgRateUpperBound(long msgRateUpperBound) { this.msgRateUpperBound = msgRateUpperBound; } public long getByteRateUpperBound() { return byteRateUpperBound; } public void setByteRateUpperBound(long byteRateUpperBound) { this.byteRateUpperBound = byteRateUpperBound; } public void setInitialOffset(String initialOffset) { this.consumer.initialOffset = initialOffset; } public void setOffsetManager(OffsetManager offsetManager) { this.offsetManager = offsetManager; } public OffsetManager getOffsetManager() { return offsetManager; } public void setRepartitionCheckInterval(long repartitionCheckInterval) { this.repartitionCheckInterval = repartitionCheckInterval; } public long getRepartitionCheckInterval() { return repartitionCheckInterval; } public void setRepartitionInterval(long repartitionInterval) { this.repartitionInterval = repartitionInterval; } public long getRepartitionInterval() { return repartitionInterval; } //@Pattern(regexp="ONE_TO_ONE|ONE_TO_MANY|ONE_TO_MANY_HEURISTIC", flags={Flag.CASE_INSENSITIVE}) public void setStrategy(String policy) { this.strategy = PartitionStrategy.valueOf(policy.toUpperCase()); } }