Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.samza.coordinator; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.samza.AzureClient; import org.apache.samza.config.AzureConfig; import org.apache.samza.coordinator.data.BarrierState; import org.apache.samza.config.ApplicationConfig; import org.apache.samza.config.Config; import org.apache.samza.config.ConfigException; import org.apache.samza.config.JobConfig; import org.apache.samza.config.TaskConfig; import org.apache.samza.container.TaskName; import org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouper; import org.apache.samza.container.grouper.stream.SystemStreamPartitionGrouperFactory; import org.apache.samza.job.model.JobModel; import org.apache.samza.runtime.ProcessorIdGenerator; import org.apache.samza.coordinator.scheduler.HeartbeatScheduler; import org.apache.samza.coordinator.scheduler.JMVersionUpgradeScheduler; import org.apache.samza.coordinator.scheduler.LeaderBarrierCompleteScheduler; import org.apache.samza.coordinator.scheduler.LeaderLivenessCheckScheduler; import org.apache.samza.coordinator.scheduler.LivenessCheckScheduler; import org.apache.samza.coordinator.scheduler.RenewLeaseScheduler; import org.apache.samza.coordinator.scheduler.SchedulerStateChangeListener; import org.apache.samza.system.StreamMetadataCache; import org.apache.samza.system.SystemStream; import org.apache.samza.system.SystemStreamMetadata; import org.apache.samza.system.SystemStreamPartition; import org.apache.samza.util.BlobUtils; import org.apache.samza.util.ClassLoaderHelper; import org.apache.samza.util.LeaseBlobManager; import org.apache.samza.util.TableUtils; import org.apache.samza.util.Util; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.collection.JavaConverters; /** * Class that provides coordination mechanism for Samza standalone in Azure. * Handles processor lifecycle through Azure blob and table storage. Orchestrates leader election. * The leader job coordinator generates partition mapping, writes shared data to the blob and manages rebalancing. */ public class AzureJobCoordinator implements JobCoordinator { private static final Logger LOG = LoggerFactory.getLogger(AzureJobCoordinator.class); private static final int METADATA_CACHE_TTL_MS = 5000; private static final String INITIAL_STATE = "UNASSIGNED"; private final Consumer<String> errorHandler; private final AzureLeaderElector azureLeaderElector; private final BlobUtils leaderBlob; private final TableUtils table; private final Config config; private final String processorId; private final AzureClient client; private final AtomicReference<String> currentJMVersion; private final AtomicBoolean versionUpgradeDetected; private final HeartbeatScheduler heartbeat; private final JMVersionUpgradeScheduler versionUpgrade; private final LeaderLivenessCheckScheduler leaderAlive; private LivenessCheckScheduler liveness; private RenewLeaseScheduler renewLease; private LeaderBarrierCompleteScheduler leaderBarrierScheduler; private StreamMetadataCache streamMetadataCache = null; private JobCoordinatorListener coordinatorListener = null; private JobModel jobModel = null; /** * Creates an instance of Azure job coordinator, along with references to Azure leader elector, Azure Blob and Azure Table. * @param config User defined config */ public AzureJobCoordinator(Config config) { //TODO: Cleanup previous values in the table when barrier times out. this.config = config; processorId = createProcessorId(config); currentJMVersion = new AtomicReference<>(INITIAL_STATE); AzureConfig azureConfig = new AzureConfig(config); client = new AzureClient(azureConfig.getAzureConnect()); leaderBlob = new BlobUtils(client, azureConfig.getAzureContainerName(), azureConfig.getAzureBlobName(), azureConfig.getAzureBlobLength()); errorHandler = (errorMsg) -> { LOG.error(errorMsg); stop(); }; table = new TableUtils(client, azureConfig.getAzureTableName(), INITIAL_STATE); azureLeaderElector = new AzureLeaderElector(new LeaseBlobManager(leaderBlob.getBlob())); azureLeaderElector.setLeaderElectorListener(new AzureLeaderElectorListener()); versionUpgradeDetected = new AtomicBoolean(false); heartbeat = new HeartbeatScheduler(errorHandler, table, currentJMVersion, processorId); versionUpgrade = new JMVersionUpgradeScheduler(errorHandler, leaderBlob, currentJMVersion, versionUpgradeDetected, processorId); leaderAlive = new LeaderLivenessCheckScheduler(errorHandler, table, leaderBlob, currentJMVersion, INITIAL_STATE); leaderBarrierScheduler = null; renewLease = null; liveness = null; } @Override public void start() { LOG.info("Starting Azure job coordinator."); streamMetadataCache = StreamMetadataCache.apply(METADATA_CACHE_TTL_MS, config); table.addProcessorEntity(INITIAL_STATE, processorId, false); // Start scheduler for heartbeating LOG.info("Starting scheduler for heartbeating."); heartbeat.scheduleTask(); azureLeaderElector.tryBecomeLeader(); // Start scheduler to check for job model version upgrades LOG.info("Starting scheduler to check for job model version upgrades."); versionUpgrade.setStateChangeListener(createJMVersionUpgradeListener()); versionUpgrade.scheduleTask(); // Start scheduler to check for leader liveness LOG.info("Starting scheduler to check for leader liveness."); leaderAlive.setStateChangeListener(createLeaderLivenessListener()); leaderAlive.scheduleTask(); } @Override public void stop() { LOG.info("Shutting down Azure job coordinator."); if (coordinatorListener != null) { coordinatorListener.onJobModelExpired(); } // Resign leadership if (azureLeaderElector.amILeader()) { azureLeaderElector.resignLeadership(); } // Shutdown all schedulers shutdownSchedulers(); if (coordinatorListener != null) { coordinatorListener.onCoordinatorStop(); } } @Override public String getProcessorId() { return processorId; } @Override public void setListener(JobCoordinatorListener listener) { this.coordinatorListener = listener; } @Override public JobModel getJobModel() { return jobModel; } private void shutdownSchedulers() { if (renewLease != null) { renewLease.shutdown(); } if (leaderBarrierScheduler != null) { leaderBarrierScheduler.shutdown(); } if (liveness != null) { liveness.shutdown(); } heartbeat.shutdown(); leaderAlive.shutdown(); versionUpgrade.shutdown(); } /** * Creates a listener for LeaderBarrierCompleteScheduler class. * Invoked by the leader when it detects that rebalancing has completed by polling the processor table. * Updates the barrier state on the blob to denote that the barrier has completed. * Cancels all future tasks scheduled by the LeaderBarrierComplete scheduler to check if barrier has completed. * @return an instance of SchedulerStateChangeListener. */ private SchedulerStateChangeListener createLeaderBarrierCompleteListener(String nextJMVersion, AtomicBoolean barrierTimeout) { return () -> { versionUpgradeDetected.getAndSet(false); String state; if (barrierTimeout.get()) { LOG.error("Barrier timed out for version {}", nextJMVersion); state = BarrierState.TIMEOUT.name() + " " + nextJMVersion; } else { LOG.info("Leader detected barrier completion."); state = BarrierState.END.name() + " " + nextJMVersion; } if (!leaderBlob.publishBarrierState(state, azureLeaderElector.getLeaseId().get())) { LOG.info("Leader failed to publish the job model {}. Stopping the processor with PID: .", jobModel, processorId); stop(); table.deleteProcessorEntity(currentJMVersion.get(), processorId); } leaderBarrierScheduler.shutdown(); }; } /** * Creates a listener for LivenessCheckScheduler class. * Invoked by the leader when the list of active processors in the system changes. * @return an instance of SchedulerStateChangeListener. */ private SchedulerStateChangeListener createLivenessListener(AtomicReference<List<String>> liveProcessors) { return () -> { LOG.info("Leader detected change in list of live processors."); doOnProcessorChange(liveProcessors.get()); }; } /** * Creates a listener for JMVersionUpgradeScheduler class. * Invoked when the processor detects a job model version upgrade on the blob. * Stops listening for job model version upgrades until rebalancing achieved. * @return an instance of SchedulerStateChangeListener. */ private SchedulerStateChangeListener createJMVersionUpgradeListener() { return () -> { LOG.info("Job model version upgrade detected."); versionUpgradeDetected.getAndSet(true); onNewJobModelAvailable(leaderBlob.getJobModelVersion()); }; } /** * Creates a listener for LeaderLivenessCheckScheduler class. * Invoked when an existing leader dies. Enables the JC to participate in leader election again. * @return an instance of SchedulerStateChangeListener. */ private SchedulerStateChangeListener createLeaderLivenessListener() { return () -> { LOG.info("Existing leader died."); azureLeaderElector.tryBecomeLeader(); }; } /** * For each input stream specified in config, exactly determine its * partitions, returning a set of SystemStreamPartitions containing them all. */ private Set<SystemStreamPartition> getInputStreamPartitions() { TaskConfig taskConfig = new TaskConfig(config); scala.collection.immutable.Set<SystemStream> inputSystemStreams = taskConfig.getInputStreams(); // Get the set of partitions for each SystemStream from the stream metadata Set<SystemStreamPartition> sspSet = JavaConverters .mapAsJavaMapConverter(streamMetadataCache.getStreamMetadata(inputSystemStreams, true)).asJava() .entrySet().stream().flatMap(this::mapSSMToSSP).collect(Collectors.toSet()); return sspSet; } private Stream<SystemStreamPartition> mapSSMToSSP(Map.Entry<SystemStream, SystemStreamMetadata> ssMs) { return ssMs.getValue().getSystemStreamPartitionMetadata().keySet().stream() .map(partition -> new SystemStreamPartition(ssMs.getKey(), partition)); } /** * Gets a SystemStreamPartitionGrouper object from the configuration. */ private SystemStreamPartitionGrouper getSystemStreamPartitionGrouper() { JobConfig jobConfig = new JobConfig(config); String factoryString = jobConfig.getSystemStreamPartitionGrouperFactory(); SystemStreamPartitionGrouper grouper = Util.<SystemStreamPartitionGrouperFactory>getObj(factoryString) .getSystemStreamPartitionGrouper(jobConfig); return grouper; } private int getMaxNumTasks() { // Do grouping to fetch TaskName to SSP mapping Set<SystemStreamPartition> allSystemStreamPartitions = getInputStreamPartitions(); SystemStreamPartitionGrouper grouper = getSystemStreamPartitionGrouper(); Map<TaskName, Set<SystemStreamPartition>> groups = grouper.group(allSystemStreamPartitions); LOG.info("SystemStreamPartitionGrouper " + grouper.toString() + " has grouped the SystemStreamPartitions into " + Integer.toString(groups.size()) + " tasks with the following taskNames: {}", groups.keySet()); return groups.size(); } /** * Called only by the leader, either when the processor becomes the leader, or when the list of live processors changes. * @param currentProcessorIds New updated list of processor IDs which caused the rebalancing. */ private void doOnProcessorChange(List<String> currentProcessorIds) { // if list of processors is empty - it means we are called from 'onBecomeLeader' // Check if number of processors is greater than number of tasks List<String> initialProcessorIds = new ArrayList<>(currentProcessorIds); int numTasks = getMaxNumTasks(); if (currentProcessorIds.size() > numTasks) { int iterator = 0; while (currentProcessorIds.size() != numTasks) { if (!currentProcessorIds.get(iterator).equals(processorId)) { currentProcessorIds.remove(iterator); iterator++; } } } LOG.info("currentProcessorIds = {}", currentProcessorIds); LOG.info("initialProcessorIds = {}", initialProcessorIds); String nextJMVersion; String prevJMVersion = currentJMVersion.get(); JobModel prevJobModel = jobModel; AtomicBoolean barrierTimeout = new AtomicBoolean(false); if (currentProcessorIds.isEmpty()) { if (currentJMVersion.get().equals(INITIAL_STATE)) { nextJMVersion = "1"; } else { nextJMVersion = Integer.toString(Integer.valueOf(prevJMVersion) + 1); } currentProcessorIds = new ArrayList<>(table.getActiveProcessorsList(currentJMVersion)); initialProcessorIds = currentProcessorIds; } else { //Check if previous barrier not reached, then previous barrier times out. String blobJMV = leaderBlob.getJobModelVersion(); nextJMVersion = Integer.toString(Integer.valueOf(prevJMVersion) + 1); if (blobJMV != null && Integer.valueOf(blobJMV) > Integer.valueOf(prevJMVersion)) { prevJMVersion = blobJMV; prevJobModel = leaderBlob.getJobModel(); nextJMVersion = Integer.toString(Integer.valueOf(blobJMV) + 1); versionUpgradeDetected.getAndSet(false); leaderBarrierScheduler.shutdown(); leaderBlob.publishBarrierState(BarrierState.TIMEOUT.name() + " " + blobJMV, azureLeaderElector.getLeaseId().get()); } } // Generate the new JobModel JobModel newJobModel = JobModelManager.readJobModel(this.config, Collections.emptyMap(), null, streamMetadataCache, currentProcessorIds); LOG.info("pid=" + processorId + "Generated new Job Model. Version = " + nextJMVersion); // Publish the new job model boolean jmWrite = leaderBlob.publishJobModel(prevJobModel, newJobModel, prevJMVersion, nextJMVersion, azureLeaderElector.getLeaseId().get()); // Publish barrier state boolean barrierWrite = leaderBlob.publishBarrierState(BarrierState.START.name() + " " + nextJMVersion, azureLeaderElector.getLeaseId().get()); barrierTimeout.set(false); // Publish list of processors this function was called with boolean processorWrite = leaderBlob.publishLiveProcessorList(initialProcessorIds, azureLeaderElector.getLeaseId().get()); //Shut down processor if write fails even after retries. These writes have an inherent retry policy. if (!jmWrite || !barrierWrite || !processorWrite) { LOG.info("Leader failed to publish the job model {}. Stopping the processor with PID: .", jobModel, processorId); stop(); table.deleteProcessorEntity(currentJMVersion.get(), processorId); } LOG.info("pid=" + processorId + "Published new Job Model. Version = " + nextJMVersion); // Start scheduler to check if barrier reached long startTime = System.currentTimeMillis(); leaderBarrierScheduler = new LeaderBarrierCompleteScheduler(errorHandler, table, nextJMVersion, initialProcessorIds, startTime, barrierTimeout, currentJMVersion, processorId); leaderBarrierScheduler .setStateChangeListener(createLeaderBarrierCompleteListener(nextJMVersion, barrierTimeout)); leaderBarrierScheduler.scheduleTask(); } /** * Called when the JC detects a job model version upgrade on the shared blob. * @param nextJMVersion The new job model version after rebalancing. */ private void onNewJobModelAvailable(final String nextJMVersion) { LOG.info("pid=" + processorId + "new JobModel available with job model version {}", nextJMVersion); //Get the new job model from blob jobModel = leaderBlob.getJobModel(); LOG.info("pid=" + processorId + ": new JobModel available. ver=" + nextJMVersion + "; jm = " + jobModel); if (!jobModel.getContainers().containsKey(processorId)) { LOG.info("JobModel: {} does not contain the processorId: {}. Stopping the processor.", jobModel, processorId); stop(); table.deleteProcessorEntity(currentJMVersion.get(), processorId); } else { //Stop current work if (coordinatorListener != null) { coordinatorListener.onJobModelExpired(); } // Add entry with new job model version to the processor table table.addProcessorEntity(nextJMVersion, processorId, azureLeaderElector.amILeader()); // Start polling blob to check if barrier reached Random random = new Random(); String blobBarrierState = leaderBlob.getBarrierState(); while (true) { if (blobBarrierState.equals(BarrierState.END.name() + " " + nextJMVersion)) { LOG.info("Barrier completion detected by the worker for barrier version {}.", nextJMVersion); versionUpgradeDetected.getAndSet(false); onNewJobModelConfirmed(nextJMVersion); break; } else if (blobBarrierState.equals(BarrierState.TIMEOUT.name() + " " + nextJMVersion) || (Integer.valueOf(leaderBlob.getJobModelVersion()) > Integer.valueOf(nextJMVersion))) { LOG.info("Barrier timed out for version number {}", nextJMVersion); versionUpgradeDetected.getAndSet(false); break; } else { try { Thread.sleep(random.nextInt(5000)); } catch (InterruptedException e) { Thread.interrupted(); } LOG.info("Checking for barrier state on the blob again..."); blobBarrierState = leaderBlob.getBarrierState(); } } } } /** * Called when the JC detects that the barrier has completed by checking the barrier state on the blob. * @param nextJMVersion The new job model version after rebalancing. */ private void onNewJobModelConfirmed(final String nextJMVersion) { LOG.info("pid=" + processorId + "new version " + nextJMVersion + " of the job model got confirmed"); // Delete previous value if (table.getEntity(currentJMVersion.get(), processorId) != null) { table.deleteProcessorEntity(currentJMVersion.get(), processorId); } if (table.getEntity(INITIAL_STATE, processorId) != null) { table.deleteProcessorEntity(INITIAL_STATE, processorId); } //Start heartbeating to new entry only when barrier reached. //Changing the current job model version enables that since we are heartbeating to a row identified by the current job model version. currentJMVersion.getAndSet(nextJMVersion); //Start the container with the new model if (coordinatorListener != null) { coordinatorListener.onNewJobModel(processorId, jobModel); } } private String createProcessorId(Config config) { // TODO: This check to be removed after 0.13+ ApplicationConfig appConfig = new ApplicationConfig(config); if (appConfig.getProcessorId() != null) { return appConfig.getProcessorId(); } else if (StringUtils.isNotBlank(appConfig.getAppProcessorIdGeneratorClass())) { ProcessorIdGenerator idGenerator = ClassLoaderHelper .fromClassName(appConfig.getAppProcessorIdGeneratorClass(), ProcessorIdGenerator.class); return idGenerator.generateProcessorId(config); } else { throw new ConfigException(String.format("Expected either %s or %s to be configured", ApplicationConfig.PROCESSOR_ID, ApplicationConfig.APP_PROCESSOR_ID_GENERATOR_CLASS)); } } public class AzureLeaderElectorListener implements LeaderElectorListener { /** * Keep renewing the lease and do the required tasks as a leader. */ @Override public void onBecomingLeader() { // Update table to denote that it is a leader. table.updateIsLeader(currentJMVersion.get(), processorId, true); // Schedule a task to renew the lease after a fixed time interval LOG.info("Starting scheduler to keep renewing lease held by the leader."); renewLease = new RenewLeaseScheduler((errorMsg) -> { LOG.error(errorMsg); table.updateIsLeader(currentJMVersion.get(), processorId, false); azureLeaderElector.resignLeadership(); renewLease.shutdown(); liveness.shutdown(); }, azureLeaderElector.getLeaseBlobManager(), azureLeaderElector.getLeaseId()); renewLease.scheduleTask(); doOnProcessorChange(new ArrayList<>()); // Start scheduler to check for change in list of live processors LOG.info("Starting scheduler to check for change in list of live processors in the system."); liveness = new LivenessCheckScheduler(errorHandler, table, leaderBlob, currentJMVersion, processorId); liveness.setStateChangeListener(createLivenessListener(liveness.getLiveProcessors())); liveness.scheduleTask(); } } }