Java tutorial
/* * Licensed to Metamarkets Group Inc. (Metamarkets) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Metamarkets licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package io.druid.indexing.kafka.supervisor; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.primitives.Ints; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.metamx.emitter.EmittingLogger; import io.druid.concurrent.Execs; import io.druid.indexing.common.TaskInfoProvider; import io.druid.indexing.common.TaskLocation; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.task.Task; import io.druid.indexing.common.task.TaskResource; import io.druid.indexing.kafka.KafkaDataSourceMetadata; import io.druid.indexing.kafka.KafkaIOConfig; import io.druid.indexing.kafka.KafkaIndexTask; import io.druid.indexing.kafka.KafkaIndexTaskClient; import io.druid.indexing.kafka.KafkaIndexTaskClientFactory; import io.druid.indexing.kafka.KafkaPartitions; import io.druid.indexing.kafka.KafkaTuningConfig; import io.druid.indexing.overlord.DataSourceMetadata; import io.druid.indexing.overlord.IndexerMetadataStorageCoordinator; import io.druid.indexing.overlord.TaskMaster; import io.druid.indexing.overlord.TaskQueue; import io.druid.indexing.overlord.TaskRunner; import io.druid.indexing.overlord.TaskRunnerListener; import io.druid.indexing.overlord.TaskRunnerWorkItem; import io.druid.indexing.overlord.TaskStorage; import io.druid.indexing.overlord.supervisor.Supervisor; import io.druid.indexing.overlord.supervisor.SupervisorReport; import io.druid.java.util.common.ISE; import io.druid.metadata.EntryExistsException; import org.apache.commons.codec.digest.DigestUtils; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.joda.time.DateTime; import javax.annotation.Nullable; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; /** * Supervisor responsible for managing the KafkaIndexTasks for a single dataSource. At a high level, the class accepts a * {@link KafkaSupervisorSpec} which includes the Kafka topic and configuration as well as an ingestion spec which will * be used to generate the indexing tasks. The run loop periodically refreshes its view of the Kafka topic's partitions * and the list of running indexing tasks and ensures that all partitions are being read from and that there are enough * tasks to satisfy the desired number of replicas. As tasks complete, new tasks are queued to process the next range of * Kafka offsets. */ public class KafkaSupervisor implements Supervisor { private static final EmittingLogger log = new EmittingLogger(KafkaSupervisor.class); private static final Random RANDOM = new Random(); private static final long MAX_RUN_FREQUENCY_MILLIS = 1000; // prevent us from running too often in response to events private static final long NOT_SET = -1; // Internal data structures // -------------------------------------------------------- /** * A TaskGroup is the main data structure used by KafkaSupervisor to organize and monitor Kafka partitions and * indexing tasks. All the tasks in a TaskGroup should always be doing the same thing (reading the same partitions and * starting from the same offset) and if [replicas] is configured to be 1, a TaskGroup will contain a single task (the * exception being if the supervisor started up and discovered and adopted some already running tasks). At any given * time, there should only be up to a maximum of [taskCount] actively-reading task groups (tracked in the [taskGroups] * map) + zero or more pending-completion task groups (tracked in [pendingCompletionTaskGroups]). */ private class TaskGroup { // This specifies the partitions and starting offsets for this task group. It is set on group creation from the data // in [partitionGroups] and never changes during the lifetime of this task group, which will live until a task in // this task group has completed successfully, at which point this will be destroyed and a new task group will be // created with new starting offsets. This allows us to create replacement tasks for failed tasks that process the // same offsets, even if the values in [partitionGroups] has been changed. final ImmutableMap<Integer, Long> partitionOffsets; final ConcurrentHashMap<String, TaskData> tasks = new ConcurrentHashMap<>(); final Optional<DateTime> minimumMessageTime; DateTime completionTimeout; // is set after signalTasksToFinish(); if not done by timeout, take corrective action public TaskGroup(ImmutableMap<Integer, Long> partitionOffsets, Optional<DateTime> minimumMessageTime) { this.partitionOffsets = partitionOffsets; this.minimumMessageTime = minimumMessageTime; } } private class TaskData { TaskStatus status; DateTime startTime; } // Map<{group ID}, {actively reading task group}>; see documentation for TaskGroup class private final ConcurrentHashMap<Integer, TaskGroup> taskGroups = new ConcurrentHashMap<>(); // After telling a taskGroup to stop reading and begin publishing a segment, it is moved from [taskGroups] to here so // we can monitor its status while we queue new tasks to read the next range of offsets. This is a list since we could // have multiple sets of tasks publishing at once if time-to-publish > taskDuration. // Map<{group ID}, List<{pending completion task groups}>> private final ConcurrentHashMap<Integer, CopyOnWriteArrayList<TaskGroup>> pendingCompletionTaskGroups = new ConcurrentHashMap<>(); // The starting offset for a new partition in [partitionGroups] is initially set to NOT_SET. When a new task group // is created and is assigned partitions, if the offset in [partitionGroups] is NOT_SET it will take the starting // offset value from the metadata store, and if it can't find it there, from Kafka. Once a task begins // publishing, the offset in partitionGroups will be updated to the ending offset of the publishing-but-not-yet- // completed task, which will cause the next set of tasks to begin reading from where the previous task left // off. If that previous task now fails, we will set the offset in [partitionGroups] back to NOT_SET which will // cause successive tasks to again grab their starting offset from metadata store. This mechanism allows us to // start up successive tasks without waiting for the previous tasks to succeed and still be able to handle task // failures during publishing. // Map<{group ID}, Map<{partition ID}, {startingOffset}>> private final ConcurrentHashMap<Integer, ConcurrentHashMap<Integer, Long>> partitionGroups = new ConcurrentHashMap<>(); // -------------------------------------------------------- private final TaskStorage taskStorage; private final TaskMaster taskMaster; private final IndexerMetadataStorageCoordinator indexerMetadataStorageCoordinator; private final KafkaIndexTaskClient taskClient; private final ObjectMapper sortingMapper; private final KafkaSupervisorSpec spec; private final String dataSource; private final KafkaSupervisorIOConfig ioConfig; private final KafkaSupervisorTuningConfig tuningConfig; private final KafkaTuningConfig taskTuningConfig; private final String supervisorId; private final TaskInfoProvider taskInfoProvider; private final ExecutorService exec; private final ScheduledExecutorService scheduledExec; private final ListeningExecutorService workerExec; private final BlockingQueue<Notice> notices = new LinkedBlockingDeque<>(); private final Object stopLock = new Object(); private final Object stateChangeLock = new Object(); private boolean listenerRegistered = false; private long lastRunTime; private volatile DateTime firstRunTime; private volatile KafkaConsumer consumer; private volatile boolean started = false; private volatile boolean stopped = false; public KafkaSupervisor(final TaskStorage taskStorage, final TaskMaster taskMaster, final IndexerMetadataStorageCoordinator indexerMetadataStorageCoordinator, final KafkaIndexTaskClientFactory taskClientFactory, final ObjectMapper mapper, final KafkaSupervisorSpec spec) { this.taskStorage = taskStorage; this.taskMaster = taskMaster; this.indexerMetadataStorageCoordinator = indexerMetadataStorageCoordinator; this.sortingMapper = mapper.copy().configure(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY, true); this.spec = spec; this.dataSource = spec.getDataSchema().getDataSource(); this.ioConfig = spec.getIoConfig(); this.tuningConfig = spec.getTuningConfig(); this.taskTuningConfig = KafkaTuningConfig.copyOf(this.tuningConfig); this.supervisorId = String.format("KafkaSupervisor-%s", dataSource); this.exec = Execs.singleThreaded(supervisorId); this.scheduledExec = Execs.scheduledSingleThreaded(supervisorId + "-Scheduler-%d"); int workerThreads = (this.tuningConfig.getWorkerThreads() != null ? this.tuningConfig.getWorkerThreads() : Math.min(10, this.ioConfig.getTaskCount())); this.workerExec = MoreExecutors .listeningDecorator(Execs.multiThreaded(workerThreads, supervisorId + "-Worker-%d")); log.info("Created worker pool with [%d] threads for dataSource [%s]", workerThreads, this.dataSource); this.taskInfoProvider = new TaskInfoProvider() { @Override public TaskLocation getTaskLocation(final String id) { Preconditions.checkNotNull(id, "id"); Optional<TaskRunner> taskRunner = taskMaster.getTaskRunner(); if (taskRunner.isPresent()) { Optional<? extends TaskRunnerWorkItem> item = Iterables .tryFind(taskRunner.get().getRunningTasks(), new Predicate<TaskRunnerWorkItem>() { @Override public boolean apply(TaskRunnerWorkItem taskRunnerWorkItem) { return id.equals(taskRunnerWorkItem.getTaskId()); } }); if (item.isPresent()) { return item.get().getLocation(); } } else { log.error("Failed to get task runner because I'm not the leader!"); } return TaskLocation.unknown(); } @Override public Optional<TaskStatus> getTaskStatus(String id) { return taskStorage.getStatus(id); } }; int chatThreads = (this.tuningConfig.getChatThreads() != null ? this.tuningConfig.getChatThreads() : Math.min(10, this.ioConfig.getTaskCount() * this.ioConfig.getReplicas())); this.taskClient = taskClientFactory.build(taskInfoProvider, dataSource, chatThreads, this.tuningConfig.getHttpTimeout(), this.tuningConfig.getChatRetries()); log.info("Created taskClient with dataSource[%s] chatThreads[%d] httpTimeout[%s] chatRetries[%d]", dataSource, chatThreads, this.tuningConfig.getHttpTimeout(), this.tuningConfig.getChatRetries()); } @Override public void start() { synchronized (stateChangeLock) { Preconditions.checkState(!started, "already started"); Preconditions.checkState(!exec.isShutdown(), "already stopped"); try { consumer = getKafkaConsumer(); exec.submit(new Runnable() { @Override public void run() { try { while (!Thread.currentThread().isInterrupted()) { final Notice notice = notices.take(); try { notice.handle(); } catch (Exception e) { log.makeAlert(e, "KafkaSupervisor[%s] failed to handle notice", dataSource) .addData("noticeClass", notice.getClass().getSimpleName()).emit(); } } } catch (InterruptedException e) { log.info("KafkaSupervisor[%s] interrupted, exiting", dataSource); } } }); firstRunTime = DateTime.now().plus(ioConfig.getStartDelay()); scheduledExec.scheduleAtFixedRate(buildRunTask(), ioConfig.getStartDelay().getMillis(), Math.max(ioConfig.getPeriod().getMillis(), MAX_RUN_FREQUENCY_MILLIS), TimeUnit.MILLISECONDS); started = true; log.info("Started KafkaSupervisor[%s], first run in [%s], with spec: [%s]", dataSource, ioConfig.getStartDelay(), spec.toString()); } catch (Exception e) { if (consumer != null) { consumer.close(); } log.makeAlert(e, "Exception starting KafkaSupervisor[%s]", dataSource).emit(); throw Throwables.propagate(e); } } } @Override public void stop(boolean stopGracefully) { synchronized (stateChangeLock) { Preconditions.checkState(started, "not started"); log.info("Beginning shutdown of KafkaSupervisor[%s]", dataSource); try { scheduledExec.shutdownNow(); // stop recurring executions Optional<TaskRunner> taskRunner = taskMaster.getTaskRunner(); if (taskRunner.isPresent()) { taskRunner.get().unregisterListener(supervisorId); } // Stopping gracefully will synchronize the end offsets of the tasks and signal them to publish, and will block // until the tasks have acknowledged or timed out. We want this behavior when we're explicitly shut down through // the API, but if we shut down for other reasons (e.g. we lose leadership) we want to just stop and leave the // tasks as they are. synchronized (stopLock) { if (stopGracefully) { log.info( "Posting GracefulShutdownNotice, signalling managed tasks to complete and publish"); notices.add(new GracefulShutdownNotice()); } else { log.info("Posting ShutdownNotice"); notices.add(new ShutdownNotice()); } long shutdownTimeoutMillis = tuningConfig.getShutdownTimeout().getMillis(); long endTime = System.currentTimeMillis() + shutdownTimeoutMillis; while (!stopped) { long sleepTime = endTime - System.currentTimeMillis(); if (sleepTime <= 0) { log.info("Timed out while waiting for shutdown (timeout [%,dms])", shutdownTimeoutMillis); stopped = true; break; } stopLock.wait(sleepTime); } } log.info("Shutdown notice handled"); taskClient.close(); workerExec.shutdownNow(); exec.shutdownNow(); started = false; log.info("KafkaSupervisor[%s] has stopped", dataSource); } catch (Exception e) { log.makeAlert(e, "Exception stopping KafkaSupervisor[%s]", dataSource).emit(); } } } @Override public SupervisorReport getStatus() { return generateReport(true); } @Override public void reset() { log.info("Posting ResetNotice"); notices.add(new ResetNotice()); } public void possiblyRegisterListener() { // getTaskRunner() sometimes fails if the task queue is still being initialized so retry later until we succeed if (listenerRegistered) { return; } Optional<TaskRunner> taskRunner = taskMaster.getTaskRunner(); if (taskRunner.isPresent()) { taskRunner.get().registerListener(new TaskRunnerListener() { @Override public String getListenerId() { return supervisorId; } @Override public void locationChanged(final String taskId, final TaskLocation newLocation) { // do nothing } @Override public void statusChanged(String taskId, TaskStatus status) { notices.add(new RunNotice()); } }, MoreExecutors.sameThreadExecutor()); listenerRegistered = true; } } private interface Notice { void handle() throws ExecutionException, InterruptedException; } private class RunNotice implements Notice { @Override public void handle() throws ExecutionException, InterruptedException { long nowTime = System.currentTimeMillis(); if (nowTime - lastRunTime < MAX_RUN_FREQUENCY_MILLIS) { return; } lastRunTime = nowTime; runInternal(); } } private class GracefulShutdownNotice extends ShutdownNotice { @Override public void handle() throws InterruptedException, ExecutionException { gracefulShutdownInternal(); super.handle(); } } private class ShutdownNotice implements Notice { @Override public void handle() throws InterruptedException, ExecutionException { consumer.close(); synchronized (stopLock) { stopped = true; stopLock.notifyAll(); } } } private class ResetNotice implements Notice { @Override public void handle() { resetInternal(); } } @VisibleForTesting void resetInternal() { boolean result = indexerMetadataStorageCoordinator.deleteDataSourceMetadata(dataSource); log.info("Reset dataSource[%s] - dataSource metadata entry deleted? [%s]", dataSource, result); for (TaskGroup taskGroup : taskGroups.values()) { for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { String taskId = entry.getKey(); log.info("Reset dataSource[%s] - killing task [%s]", dataSource, taskId); killTask(taskId); } } partitionGroups.clear(); taskGroups.clear(); } @VisibleForTesting void gracefulShutdownInternal() throws ExecutionException, InterruptedException { // Prepare for shutdown by 1) killing all tasks that haven't been assigned to a worker yet, and 2) causing all // running tasks to begin publishing by setting their startTime to a very long time ago so that the logic in // checkTaskDuration() will be triggered. This is better than just telling these tasks to publish whatever they // have, as replicas that are supposed to publish the same segment may not have read the same set of offsets. for (TaskGroup taskGroup : taskGroups.values()) { for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { if (taskInfoProvider.getTaskLocation(entry.getKey()).equals(TaskLocation.unknown())) { killTask(entry.getKey()); } else { entry.getValue().startTime = new DateTime(0); } } } checkTaskDuration(); } @VisibleForTesting void runInternal() throws ExecutionException, InterruptedException { possiblyRegisterListener(); updatePartitionDataFromKafka(); discoverTasks(); updateTaskStatus(); checkTaskDuration(); checkPendingCompletionTasks(); checkCurrentTaskState(); createNewTasks(); if (log.isDebugEnabled()) { log.debug(generateReport(true).toString()); } else { log.info(generateReport(false).toString()); } } @VisibleForTesting String generateSequenceName(int groupId) { StringBuilder sb = new StringBuilder(); Map<Integer, Long> startPartitions = taskGroups.get(groupId).partitionOffsets; for (Map.Entry<Integer, Long> entry : startPartitions.entrySet()) { sb.append(String.format("+%d(%d)", entry.getKey(), entry.getValue())); } String partitionOffsetStr = sb.toString().substring(1); Optional<DateTime> minimumMessageTime = taskGroups.get(groupId).minimumMessageTime; String minMsgTimeStr = (minimumMessageTime.isPresent() ? String.valueOf(minimumMessageTime.get().getMillis()) : ""); String dataSchema, tuningConfig; try { dataSchema = sortingMapper.writeValueAsString(spec.getDataSchema()); tuningConfig = sortingMapper.writeValueAsString(taskTuningConfig); } catch (JsonProcessingException e) { throw Throwables.propagate(e); } String hashCode = DigestUtils.sha1Hex(dataSchema + tuningConfig + partitionOffsetStr + minMsgTimeStr) .substring(0, 15); return Joiner.on("_").join("index_kafka", dataSource, hashCode); } private static String getRandomId() { final StringBuilder suffix = new StringBuilder(8); for (int i = 0; i < Ints.BYTES * 2; ++i) { suffix.append((char) ('a' + ((RANDOM.nextInt() >>> (i * 4)) & 0x0F))); } return suffix.toString(); } private KafkaConsumer<byte[], byte[]> getKafkaConsumer() { final Properties props = new Properties(); props.putAll(ioConfig.getConsumerProperties()); props.setProperty("enable.auto.commit", "false"); props.setProperty("metadata.max.age.ms", "10000"); props.setProperty("group.id", String.format("kafka-supervisor-%s", getRandomId())); ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); return new KafkaConsumer<>(props, new ByteArrayDeserializer(), new ByteArrayDeserializer()); } finally { Thread.currentThread().setContextClassLoader(currCtxCl); } } private void updatePartitionDataFromKafka() { Map<String, List<PartitionInfo>> topics; try { topics = consumer.listTopics(); // updates the consumer's list of partitions from the brokers } catch (Exception e) { // calls to the consumer throw NPEs when the broker doesn't respond log.warn(e, "Unable to get partition data from Kafka for brokers [%s], are the brokers up?", ioConfig.getConsumerProperties().get(KafkaSupervisorIOConfig.BOOTSTRAP_SERVERS_KEY)); return; } List<PartitionInfo> partitions = topics.get(ioConfig.getTopic()); int numPartitions = (partitions != null ? partitions.size() : 0); log.debug("Found [%d] Kafka partitions for topic [%s]", numPartitions, ioConfig.getTopic()); for (int partition = 0; partition < numPartitions; partition++) { int taskGroupId = getTaskGroupIdForPartition(partition); partitionGroups.putIfAbsent(taskGroupId, new ConcurrentHashMap<Integer, Long>()); ConcurrentHashMap<Integer, Long> partitionMap = partitionGroups.get(taskGroupId); // The starting offset for a new partition in [partitionGroups] is initially set to NOT_SET; when a new task group // is created and is assigned partitions, if the offset in [partitionGroups] is NOT_SET it will take the starting // offset value from the metadata store, and if it can't find it there, from Kafka. Once a task begins // publishing, the offset in partitionGroups will be updated to the ending offset of the publishing-but-not-yet- // completed task, which will cause the next set of tasks to begin reading from where the previous task left // off. If that previous task now fails, we will set the offset in [partitionGroups] back to NOT_SET which will // cause successive tasks to again grab their starting offset from metadata store. This mechanism allows us to // start up successive tasks without waiting for the previous tasks to succeed and still be able to handle task // failures during publishing. if (partitionMap.putIfAbsent(partition, NOT_SET) == null) { log.info("New partition [%d] discovered for topic [%s], added to task group [%d]", partition, ioConfig.getTopic(), taskGroupId); } } } private void discoverTasks() throws ExecutionException, InterruptedException { int taskCount = 0; List<String> futureTaskIds = Lists.newArrayList(); List<ListenableFuture<Boolean>> futures = Lists.newArrayList(); List<Task> tasks = taskStorage.getActiveTasks(); for (Task task : tasks) { if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) { continue; } taskCount++; final KafkaIndexTask kafkaTask = (KafkaIndexTask) task; final String taskId = task.getId(); // Determine which task group this task belongs to based on one of the partitions handled by this task. If we // later determine that this task is actively reading, we will make sure that it matches our current partition // allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read // by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing // state, we will permit it to complete even if it doesn't match our current partition allocation to support // seamless schema migration. Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet() .iterator(); final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null); if (taskGroupId != null) { // check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups] // and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING) TaskGroup taskGroup = taskGroups.get(taskGroupId); if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) { futureTaskIds.add(taskId); futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() { @Override public Boolean apply(KafkaIndexTask.Status status) { if (status == KafkaIndexTask.Status.PUBLISHING) { addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions() .getPartitionOffsetMap()); // update partitionGroups with the publishing task's offsets (if they are greater than what is // existing) so that the next tasks will start reading from where this task left off Map<Integer, Long> publishingTaskCurrentOffsets = taskClient .getCurrentOffsets(taskId, true); for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets .entrySet()) { Integer partition = entry.getKey(); Long offset = entry.getValue(); ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups .get(getTaskGroupIdForPartition(partition)); boolean succeeded; do { succeeded = true; Long previousOffset = partitionOffsets.putIfAbsent(partition, offset); if (previousOffset != null && previousOffset < offset) { succeeded = partitionOffsets.replace(partition, previousOffset, offset); } } while (!succeeded); } } else { for (Integer partition : kafkaTask.getIOConfig().getStartPartitions() .getPartitionOffsetMap().keySet()) { if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) { log.warn( "Stopping task [%s] which does not match the expected partition allocation", taskId); try { stopTask(taskId, false).get(); } catch (InterruptedException | ExecutionException e) { log.warn(e, "Exception while stopping task"); } return false; } } if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup( ImmutableMap.copyOf(kafkaTask.getIOConfig() .getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) { log.debug("Created new task group [%d]", taskGroupId); } if (!isTaskCurrent(taskGroupId, taskId)) { log.info( "Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId); try { stopTask(taskId, false).get(); } catch (InterruptedException | ExecutionException e) { log.warn(e, "Exception while stopping task"); } return false; } else { taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData()); } } return true; } }, workerExec)); } } } List<Boolean> results = Futures.successfulAsList(futures).get(); for (int i = 0; i < results.size(); i++) { if (results.get(i) == null) { String taskId = futureTaskIds.get(i); log.warn("Task [%s] failed to return status, killing task", taskId); killTask(taskId); } } log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource); } private void addDiscoveredTaskToPendingCompletionTaskGroups(int groupId, String taskId, Map<Integer, Long> startingPartitions) { pendingCompletionTaskGroups.putIfAbsent(groupId, Lists.<TaskGroup>newCopyOnWriteArrayList()); CopyOnWriteArrayList<TaskGroup> taskGroupList = pendingCompletionTaskGroups.get(groupId); for (TaskGroup taskGroup : taskGroupList) { if (taskGroup.partitionOffsets.equals(startingPartitions)) { if (taskGroup.tasks.putIfAbsent(taskId, new TaskData()) == null) { log.info("Added discovered task [%s] to existing pending task group", taskId); } return; } } log.info("Creating new pending completion task group for discovered task [%s]", taskId); // reading the minimumMessageTime from the publishing task and setting it here is not necessary as this task cannot // change to a state where it will read any more events TaskGroup newTaskGroup = new TaskGroup(ImmutableMap.copyOf(startingPartitions), Optional.<DateTime>absent()); newTaskGroup.tasks.put(taskId, new TaskData()); newTaskGroup.completionTimeout = DateTime.now().plus(ioConfig.getCompletionTimeout()); taskGroupList.add(newTaskGroup); } private void updateTaskStatus() throws ExecutionException, InterruptedException { final List<ListenableFuture<Boolean>> futures = Lists.newArrayList(); final List<String> futureTaskIds = Lists.newArrayList(); // update status (and startTime if unknown) of current tasks in taskGroups for (TaskGroup group : taskGroups.values()) { for (Map.Entry<String, TaskData> entry : group.tasks.entrySet()) { final String taskId = entry.getKey(); final TaskData taskData = entry.getValue(); if (taskData.startTime == null) { futureTaskIds.add(taskId); futures.add(Futures.transform(taskClient.getStartTimeAsync(taskId), new Function<DateTime, Boolean>() { @Nullable @Override public Boolean apply(@Nullable DateTime startTime) { if (startTime == null) { return false; } taskData.startTime = startTime; long millisRemaining = ioConfig.getTaskDuration().getMillis() - (System.currentTimeMillis() - taskData.startTime.getMillis()); if (millisRemaining > 0) { scheduledExec.schedule(buildRunTask(), millisRemaining + MAX_RUN_FREQUENCY_MILLIS, TimeUnit.MILLISECONDS); } return true; } }, workerExec)); } taskData.status = taskStorage.getStatus(taskId).get(); } } // update status of pending completion tasks in pendingCompletionTaskGroups for (List<TaskGroup> taskGroups : pendingCompletionTaskGroups.values()) { for (TaskGroup group : taskGroups) { for (Map.Entry<String, TaskData> entry : group.tasks.entrySet()) { entry.getValue().status = taskStorage.getStatus(entry.getKey()).get(); } } } List<Boolean> results = Futures.successfulAsList(futures).get(); for (int i = 0; i < results.size(); i++) { // false means the task hasn't started running yet and that's okay; null means it should be running but the HTTP // request threw an exception so kill the task if (results.get(i) == null) { String taskId = futureTaskIds.get(i); log.warn("Task [%s] failed to return start time, killing task", taskId); killTask(taskId); } } } private void checkTaskDuration() throws InterruptedException, ExecutionException { final List<ListenableFuture<Map<Integer, Long>>> futures = Lists.newArrayList(); final List<Integer> futureGroupIds = Lists.newArrayList(); for (Map.Entry<Integer, TaskGroup> entry : taskGroups.entrySet()) { Integer groupId = entry.getKey(); TaskGroup group = entry.getValue(); // find the longest running task from this group DateTime earliestTaskStart = DateTime.now(); for (TaskData taskData : group.tasks.values()) { if (earliestTaskStart.isAfter(taskData.startTime)) { earliestTaskStart = taskData.startTime; } } // if this task has run longer than the configured duration, signal all tasks in the group to persist if (earliestTaskStart.plus(ioConfig.getTaskDuration()).isBeforeNow()) { log.info("Task group [%d] has run for [%s]", groupId, ioConfig.getTaskDuration()); futureGroupIds.add(groupId); futures.add(signalTasksToFinish(groupId)); } } List<Map<Integer, Long>> results = Futures.successfulAsList(futures).get(); for (int j = 0; j < results.size(); j++) { Integer groupId = futureGroupIds.get(j); TaskGroup group = taskGroups.get(groupId); Map<Integer, Long> endOffsets = results.get(j); if (endOffsets != null) { // set a timeout and put this group in pendingCompletionTaskGroups so that it can be monitored for completion group.completionTimeout = DateTime.now().plus(ioConfig.getCompletionTimeout()); pendingCompletionTaskGroups.putIfAbsent(groupId, Lists.<TaskGroup>newCopyOnWriteArrayList()); pendingCompletionTaskGroups.get(groupId).add(group); // set endOffsets as the next startOffsets for (Map.Entry<Integer, Long> entry : endOffsets.entrySet()) { partitionGroups.get(groupId).put(entry.getKey(), entry.getValue()); } } else { log.warn("All tasks in group [%s] failed to transition to publishing state, killing tasks [%s]", groupId, group.tasks.keySet()); for (String id : group.tasks.keySet()) { killTask(id); } } // remove this task group from the list of current task groups now that it has been handled taskGroups.remove(groupId); } } private ListenableFuture<Map<Integer, Long>> signalTasksToFinish(final int groupId) { final TaskGroup taskGroup = taskGroups.get(groupId); // 1) Check if any task completed (in which case we're done) and kill unassigned tasks Iterator<Map.Entry<String, TaskData>> i = taskGroup.tasks.entrySet().iterator(); while (i.hasNext()) { Map.Entry<String, TaskData> taskEntry = i.next(); String taskId = taskEntry.getKey(); TaskData task = taskEntry.getValue(); if (task.status.isSuccess()) { // If any task in this group has already completed, stop the rest of the tasks in the group and return. // This will cause us to create a new set of tasks next cycle that will start from the offsets in // metadata store (which will have advanced if we succeeded in publishing and will remain the same if publishing // failed and we need to re-ingest) return Futures.transform(stopTasksInGroup(taskGroup), new Function<Void, Map<Integer, Long>>() { @Nullable @Override public Map<Integer, Long> apply(@Nullable Void input) { return null; } }, workerExec); } if (task.status.isRunnable()) { if (taskInfoProvider.getTaskLocation(taskId).equals(TaskLocation.unknown())) { log.info("Killing task [%s] which hasn't been assigned to a worker", taskId); killTask(taskId); i.remove(); } } } // 2) Pause running tasks final List<ListenableFuture<Map<Integer, Long>>> pauseFutures = Lists.newArrayList(); final List<String> pauseTaskIds = ImmutableList.copyOf(taskGroup.tasks.keySet()); for (final String taskId : pauseTaskIds) { pauseFutures.add(taskClient.pauseAsync(taskId)); } return Futures.transform(Futures.successfulAsList(pauseFutures), new Function<List<Map<Integer, Long>>, Map<Integer, Long>>() { @Nullable @Override public Map<Integer, Long> apply(List<Map<Integer, Long>> input) { // 3) Build a map of the highest offset read by any task in the group for each partition final Map<Integer, Long> endOffsets = new HashMap<>(); for (int i = 0; i < input.size(); i++) { Map<Integer, Long> result = input.get(i); if (result == null || result.isEmpty()) { // kill tasks that didn't return a value String taskId = pauseTaskIds.get(i); log.warn("Task [%s] failed to respond to [pause] in a timely manner, killing task", taskId); killTask(taskId); taskGroup.tasks.remove(taskId); } else { // otherwise build a map of the highest offsets seen for (Map.Entry<Integer, Long> offset : result.entrySet()) { if (!endOffsets.containsKey(offset.getKey()) || endOffsets.get(offset.getKey()).compareTo(offset.getValue()) < 0) { endOffsets.put(offset.getKey(), offset.getValue()); } } } } // 4) Set the end offsets for each task to the values from step 3 and resume the tasks. All the tasks should // finish reading and start publishing within a short period, depending on how in sync the tasks were. final List<ListenableFuture<Boolean>> setEndOffsetFutures = Lists.newArrayList(); final List<String> setEndOffsetTaskIds = ImmutableList.copyOf(taskGroup.tasks.keySet()); if (setEndOffsetTaskIds.isEmpty()) { log.info("All tasks in taskGroup [%d] have failed, tasks will be re-created", groupId); return null; } log.info("Setting endOffsets for tasks in taskGroup [%d] to %s and resuming", groupId, endOffsets); for (final String taskId : setEndOffsetTaskIds) { setEndOffsetFutures.add(taskClient.setEndOffsetsAsync(taskId, endOffsets, true)); } try { List<Boolean> results = Futures.successfulAsList(setEndOffsetFutures).get(); for (int i = 0; i < results.size(); i++) { if (results.get(i) == null || !results.get(i)) { String taskId = setEndOffsetTaskIds.get(i); log.warn( "Task [%s] failed to respond to [set end offsets] in a timely manner, killing task", taskId); killTask(taskId); taskGroup.tasks.remove(taskId); } } } catch (Exception e) { Throwables.propagate(e); } if (taskGroup.tasks.isEmpty()) { log.info("All tasks in taskGroup [%d] have failed, tasks will be re-created", groupId); return null; } return endOffsets; } }, workerExec); } /** * Monitors [pendingCompletionTaskGroups] for tasks that have completed. If any task in a task group has completed, we * can safely stop the rest of the tasks in that group. If a task group has exceeded its publishing timeout, then * we need to stop all tasks in not only that task group but also 1) any subsequent task group that is also pending * completion and 2) the current task group that is running, because the assumption that we have handled up to the * starting offset for subsequent task groups is no longer valid, and subsequent tasks would fail as soon as they * attempted to publish because of the contiguous range consistency check. */ private void checkPendingCompletionTasks() throws ExecutionException, InterruptedException { List<ListenableFuture<Void>> futures = Lists.newArrayList(); for (Map.Entry<Integer, CopyOnWriteArrayList<TaskGroup>> pendingGroupList : pendingCompletionTaskGroups .entrySet()) { boolean stopTasksInTaskGroup = false; Integer groupId = pendingGroupList.getKey(); CopyOnWriteArrayList<TaskGroup> taskGroupList = pendingGroupList.getValue(); List<TaskGroup> toRemove = Lists.newArrayList(); for (TaskGroup group : taskGroupList) { boolean foundSuccess = false, entireTaskGroupFailed = false; if (stopTasksInTaskGroup) { // One of the earlier groups that was handling the same partition set timed out before the segments were // published so stop any additional groups handling the same partition set that are pending completion. futures.add(stopTasksInGroup(group)); toRemove.add(group); continue; } Iterator<Map.Entry<String, TaskData>> iTask = group.tasks.entrySet().iterator(); while (iTask.hasNext()) { Map.Entry<String, TaskData> task = iTask.next(); if (task.getValue().status.isFailure()) { iTask.remove(); // remove failed task if (group.tasks.isEmpty()) { // if all tasks in the group have failed, just nuke all task groups with this partition set and restart entireTaskGroupFailed = true; break; } } if (task.getValue().status.isSuccess()) { // If one of the pending completion tasks was successful, stop the rest of the tasks in the group as // we no longer need them to publish their segment. log.info("Task [%s] completed successfully, stopping tasks %s", task.getKey(), group.tasks.keySet()); futures.add(stopTasksInGroup(group)); foundSuccess = true; toRemove.add(group); // remove the TaskGroup from the list of pending completion task groups break; // skip iterating the rest of the tasks in this group as they've all been stopped now } } if ((!foundSuccess && group.completionTimeout.isBeforeNow()) || entireTaskGroupFailed) { if (entireTaskGroupFailed) { log.warn( "All tasks in group [%d] failed to publish, killing all tasks for these partitions", groupId); } else { log.makeAlert("No task in [%s] succeeded before the completion timeout elapsed [%s]!", group.tasks.keySet(), ioConfig.getCompletionTimeout()).emit(); } // reset partitions offsets for this task group so that they will be re-read from metadata storage partitionGroups.remove(groupId); // stop all the tasks in this pending completion group futures.add(stopTasksInGroup(group)); // set a flag so the other pending completion groups for this set of partitions will also stop stopTasksInTaskGroup = true; // stop all the tasks in the currently reading task group and remove the bad task group futures.add(stopTasksInGroup(taskGroups.remove(groupId))); toRemove.add(group); } } taskGroupList.removeAll(toRemove); } Futures.successfulAsList(futures).get(); // wait for all task shutdowns to complete before returning } private void checkCurrentTaskState() throws ExecutionException, InterruptedException { List<ListenableFuture<Void>> futures = Lists.newArrayList(); Iterator<Map.Entry<Integer, TaskGroup>> iTaskGroups = taskGroups.entrySet().iterator(); while (iTaskGroups.hasNext()) { Map.Entry<Integer, TaskGroup> taskGroupEntry = iTaskGroups.next(); Integer groupId = taskGroupEntry.getKey(); TaskGroup taskGroup = taskGroupEntry.getValue(); // Iterate the list of known tasks in this group and: // 1) Kill any tasks which are not "current" (have the partitions, starting offsets, and minimumMessageTime // (if applicable) in [taskGroups]) // 2) Remove any tasks that have failed from the list // 3) If any task completed successfully, stop all the tasks in this group and move to the next group log.debug("Task group [%d] pre-pruning: %s", groupId, taskGroup.tasks.keySet()); Iterator<Map.Entry<String, TaskData>> iTasks = taskGroup.tasks.entrySet().iterator(); while (iTasks.hasNext()) { Map.Entry<String, TaskData> task = iTasks.next(); String taskId = task.getKey(); TaskData taskData = task.getValue(); // stop and remove bad tasks from the task group if (!isTaskCurrent(groupId, taskId)) { log.info("Stopping task [%s] which does not match the expected offset range and ingestion spec", taskId); futures.add(stopTask(taskId, false)); iTasks.remove(); continue; } // remove failed tasks if (taskData.status.isFailure()) { iTasks.remove(); continue; } // check for successful tasks, and if we find one, stop all tasks in the group and remove the group so it can // be recreated with the next set of offsets if (taskData.status.isSuccess()) { futures.add(stopTasksInGroup(taskGroup)); iTaskGroups.remove(); break; } } log.debug("Task group [%d] post-pruning: %s", groupId, taskGroup.tasks.keySet()); } Futures.successfulAsList(futures).get(); // wait for all task shutdowns to complete before returning } void createNewTasks() { // check that there is a current task group for each group of partitions in [partitionGroups] for (Integer groupId : partitionGroups.keySet()) { if (!taskGroups.containsKey(groupId)) { log.info("Creating new task group [%d] for partitions %s", groupId, partitionGroups.get(groupId).keySet()); Optional<DateTime> minimumMessageTime = (ioConfig.getLateMessageRejectionPeriod().isPresent() ? Optional.of(DateTime.now().minus(ioConfig.getLateMessageRejectionPeriod().get())) : Optional.<DateTime>absent()); taskGroups.put(groupId, new TaskGroup(generateStartingOffsetsForPartitionGroup(groupId), minimumMessageTime)); } } // iterate through all the current task groups and make sure each one has the desired number of replica tasks boolean createdTask = false; for (Map.Entry<Integer, TaskGroup> entry : taskGroups.entrySet()) { TaskGroup taskGroup = entry.getValue(); Integer groupId = entry.getKey(); if (ioConfig.getReplicas() > taskGroup.tasks.size()) { log.info( "Number of tasks [%d] does not match configured numReplicas [%d] in task group [%d], creating more tasks", taskGroup.tasks.size(), ioConfig.getReplicas(), groupId); createKafkaTasksForGroup(groupId, ioConfig.getReplicas() - taskGroup.tasks.size()); createdTask = true; } } if (createdTask && firstRunTime.isBeforeNow()) { // Schedule a run event after a short delay to update our internal data structures with the new tasks that were // just created. This is mainly for the benefit of the status API in situations where the run period is lengthy. scheduledExec.schedule(buildRunTask(), 5000, TimeUnit.MILLISECONDS); } } private void createKafkaTasksForGroup(int groupId, int replicas) { Map<Integer, Long> startPartitions = taskGroups.get(groupId).partitionOffsets; Map<Integer, Long> endPartitions = new HashMap<>(); for (Integer partition : startPartitions.keySet()) { endPartitions.put(partition, Long.MAX_VALUE); } String sequenceName = generateSequenceName(groupId); Map<String, String> consumerProperties = Maps.newHashMap(ioConfig.getConsumerProperties()); DateTime minimumMessageTime = taskGroups.get(groupId).minimumMessageTime.orNull(); KafkaIOConfig kafkaIOConfig = new KafkaIOConfig(sequenceName, new KafkaPartitions(ioConfig.getTopic(), startPartitions), new KafkaPartitions(ioConfig.getTopic(), endPartitions), consumerProperties, true, false, minimumMessageTime); for (int i = 0; i < replicas; i++) { String taskId = Joiner.on("_").join(sequenceName, getRandomId()); KafkaIndexTask indexTask = new KafkaIndexTask(taskId, new TaskResource(sequenceName, 1), spec.getDataSchema(), taskTuningConfig, kafkaIOConfig, spec.getContext(), null); Optional<TaskQueue> taskQueue = taskMaster.getTaskQueue(); if (taskQueue.isPresent()) { try { taskQueue.get().add(indexTask); } catch (EntryExistsException e) { log.error("Tried to add task [%s] but it already exists", indexTask.getId()); } } else { log.error("Failed to get task queue because I'm not the leader!"); } } } private ImmutableMap<Integer, Long> generateStartingOffsetsForPartitionGroup(int groupId) { ImmutableMap.Builder<Integer, Long> builder = ImmutableMap.builder(); for (Map.Entry<Integer, Long> entry : partitionGroups.get(groupId).entrySet()) { Integer partition = entry.getKey(); Long offset = entry.getValue(); if (offset != null && offset != NOT_SET) { // if we are given a startingOffset (set by a previous task group which is pending completion) then use it builder.put(partition, offset); } else { // if we don't have a startingOffset (first run or we had some previous failures and reset the offsets) then // get the offset from metadata storage (if available) or Kafka (otherwise) builder.put(partition, getOffsetFromStorageForPartition(partition)); } } return builder.build(); } /** * Queries the dataSource metadata table to see if there is a previous ending offset for this partition. If it doesn't * find any data, it will retrieve the latest or earliest Kafka offset depending on the useEarliestOffset config. */ private long getOffsetFromStorageForPartition(int partition) { long offset; Map<Integer, Long> metadataOffsets = getOffsetsFromMetadataStorage(); if (metadataOffsets.get(partition) != null) { offset = metadataOffsets.get(partition); log.debug("Getting offset [%,d] from metadata storage for partition [%d]", offset, partition); long latestKafkaOffset = getOffsetFromKafkaForPartition(partition, false); if (offset > latestKafkaOffset) { throw new ISE( "Offset in metadata storage [%,d] > latest Kafka offset [%,d] for partition[%d] dataSource[%s]. If these " + "messages are no longer available (perhaps you deleted and re-created your Kafka topic) you can use the " + "supervisor reset API to restart ingestion.", offset, latestKafkaOffset, partition, dataSource); } } else { offset = getOffsetFromKafkaForPartition(partition, ioConfig.isUseEarliestOffset()); log.debug("Getting offset [%,d] from Kafka for partition [%d]", offset, partition); } return offset; } private Map<Integer, Long> getOffsetsFromMetadataStorage() { DataSourceMetadata dataSourceMetadata = indexerMetadataStorageCoordinator.getDataSourceMetadata(dataSource); if (dataSourceMetadata != null && dataSourceMetadata instanceof KafkaDataSourceMetadata) { KafkaPartitions partitions = ((KafkaDataSourceMetadata) dataSourceMetadata).getKafkaPartitions(); if (partitions != null) { if (!ioConfig.getTopic().equals(partitions.getTopic())) { log.warn( "Topic in metadata storage [%s] doesn't match spec topic [%s], ignoring stored offsets", partitions.getTopic(), ioConfig.getTopic()); return ImmutableMap.of(); } else if (partitions.getPartitionOffsetMap() != null) { return partitions.getPartitionOffsetMap(); } } } return ImmutableMap.of(); } private long getOffsetFromKafkaForPartition(int partition, boolean useEarliestOffset) { TopicPartition topicPartition = new TopicPartition(ioConfig.getTopic(), partition); if (!consumer.assignment().contains(topicPartition)) { consumer.assign(Lists.newArrayList(topicPartition)); } if (useEarliestOffset) { consumer.seekToBeginning(topicPartition); } else { consumer.seekToEnd(topicPartition); } return consumer.position(topicPartition); } /** * Compares the sequence name from the task with one generated for the task's group ID and returns false if they do * not match. The sequence name is generated from a hash of the dataSchema, tuningConfig, starting offsets, and the * minimumMessageTime if set. */ private boolean isTaskCurrent(int taskGroupId, String taskId) { Optional<Task> taskOptional = taskStorage.getTask(taskId); if (!taskOptional.isPresent() || !(taskOptional.get() instanceof KafkaIndexTask)) { return false; } String taskSequenceName = ((KafkaIndexTask) taskOptional.get()).getIOConfig().getBaseSequenceName(); return generateSequenceName(taskGroupId).equals(taskSequenceName); } private ListenableFuture<Void> stopTasksInGroup(TaskGroup taskGroup) { if (taskGroup == null) { return Futures.immediateFuture(null); } final List<ListenableFuture<Void>> futures = Lists.newArrayList(); for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { if (!entry.getValue().status.isComplete()) { futures.add(stopTask(entry.getKey(), false)); } } return Futures.transform(Futures.successfulAsList(futures), new Function<List<Void>, Void>() { @Nullable @Override public Void apply(@Nullable List<Void> input) { return null; } }, workerExec); } private ListenableFuture<Void> stopTask(final String id, final boolean publish) { return Futures.transform(taskClient.stopAsync(id, publish), new Function<Boolean, Void>() { @Nullable @Override public Void apply(@Nullable Boolean result) { if (result == null || !result) { log.info("Task [%s] failed to stop in a timely manner, killing task", id); killTask(id); } return null; } }, workerExec); } private void killTask(final String id) { Optional<TaskQueue> taskQueue = taskMaster.getTaskQueue(); if (taskQueue.isPresent()) { taskQueue.get().shutdown(id); } else { log.error("Failed to get task queue because I'm not the leader!"); } } private int getTaskGroupIdForPartition(int partition) { return partition % ioConfig.getTaskCount(); } private boolean isTaskInPendingCompletionGroups(String taskId) { for (List<TaskGroup> taskGroups : pendingCompletionTaskGroups.values()) { for (TaskGroup taskGroup : taskGroups) { if (taskGroup.tasks.containsKey(taskId)) { return true; } } } return false; } private KafkaSupervisorReport generateReport(boolean includeOffsets) { int numPartitions = 0; for (Map<Integer, Long> partitionGroup : partitionGroups.values()) { numPartitions += partitionGroup.size(); } KafkaSupervisorReport report = new KafkaSupervisorReport(dataSource, DateTime.now(), ioConfig.getTopic(), numPartitions, ioConfig.getReplicas(), ioConfig.getTaskDuration().getMillis() / 1000); List<TaskReportData> taskReports = Lists.newArrayList(); List<ListenableFuture<Map<Integer, Long>>> futures = Lists.newArrayList(); try { for (TaskGroup taskGroup : taskGroups.values()) { for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { String taskId = entry.getKey(); DateTime startTime = entry.getValue().startTime; Long remainingSeconds = null; if (startTime != null) { remainingSeconds = Math.max(0, ioConfig.getTaskDuration().getMillis() - (DateTime.now().getMillis() - startTime.getMillis())) / 1000; } taskReports.add(new TaskReportData(taskId, (includeOffsets ? taskGroup.partitionOffsets : null), null, startTime, remainingSeconds, TaskReportData.TaskType.ACTIVE)); if (includeOffsets) { futures.add(taskClient.getCurrentOffsetsAsync(taskId, false)); } } } for (List<TaskGroup> taskGroups : pendingCompletionTaskGroups.values()) { for (TaskGroup taskGroup : taskGroups) { for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { String taskId = entry.getKey(); DateTime startTime = entry.getValue().startTime; Long remainingSeconds = null; if (taskGroup.completionTimeout != null) { remainingSeconds = Math.max(0, taskGroup.completionTimeout.getMillis() - DateTime.now().getMillis()) / 1000; } taskReports.add( new TaskReportData(taskId, (includeOffsets ? taskGroup.partitionOffsets : null), null, startTime, remainingSeconds, TaskReportData.TaskType.PUBLISHING)); if (includeOffsets) { futures.add(taskClient.getCurrentOffsetsAsync(taskId, false)); } } } } List<Map<Integer, Long>> results = Futures.successfulAsList(futures).get(); for (int i = 0; i < taskReports.size(); i++) { TaskReportData reportData = taskReports.get(i); if (includeOffsets) { reportData.setCurrentOffsets(results.get(i)); } report.addTask(reportData); } } catch (Exception e) { log.warn(e, "Failed to generate status report"); } return report; } private Runnable buildRunTask() { return new Runnable() { @Override public void run() { notices.add(new RunNotice()); } }; } }