com.spotify.styx.StyxScheduler.java Source code

Java tutorial

Introduction

Here is the source code for com.spotify.styx.StyxScheduler.java

Source

/*-
 * -\-\-
 * Spotify Styx Scheduler Service
 * --
 * Copyright (C) 2016 Spotify AB
 * --
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * -/-/-
 */

package com.spotify.styx;

import static com.spotify.styx.monitoring.MeteredProxy.instrument;
import static com.spotify.styx.util.Connections.createBigTableConnection;
import static com.spotify.styx.util.Connections.createDatastore;
import static com.spotify.styx.util.GuardedRunnable.guard;
import static com.spotify.styx.util.ReplayEvents.replayActiveStates;
import static com.spotify.styx.util.ReplayEvents.transitionLogger;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;
import static java.util.stream.Collectors.toMap;

import com.codahale.metrics.Gauge;
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.googleapis.util.Utils;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.services.container.v1beta1.Container;
import com.google.api.services.container.v1beta1.ContainerScopes;
import com.google.api.services.container.v1beta1.model.Cluster;
import com.google.api.services.iam.v1.Iam;
import com.google.api.services.iam.v1.IamScopes;
import com.google.cloud.datastore.Datastore;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Throwables;
import com.google.common.io.Closer;
import com.google.common.util.concurrent.RateLimiter;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.spotify.apollo.AppInit;
import com.spotify.apollo.Environment;
import com.spotify.apollo.route.Route;
import com.spotify.metrics.core.SemanticMetricRegistry;
import com.spotify.styx.api.Api;
import com.spotify.styx.api.SchedulerResource;
import com.spotify.styx.docker.DockerRunner;
import com.spotify.styx.model.Event;
import com.spotify.styx.model.SequenceEvent;
import com.spotify.styx.model.StyxConfig;
import com.spotify.styx.model.Workflow;
import com.spotify.styx.model.WorkflowId;
import com.spotify.styx.model.WorkflowInstance;
import com.spotify.styx.monitoring.MetricsStats;
import com.spotify.styx.monitoring.MonitoringHandler;
import com.spotify.styx.monitoring.Stats;
import com.spotify.styx.publisher.Publisher;
import com.spotify.styx.serialization.PersistentWorkflowInstanceState;
import com.spotify.styx.state.OutputHandler;
import com.spotify.styx.state.QueuedStateManager;
import com.spotify.styx.state.RunState;
import com.spotify.styx.state.StateManager;
import com.spotify.styx.state.TimeoutConfig;
import com.spotify.styx.state.handlers.DockerRunnerHandler;
import com.spotify.styx.state.handlers.ExecutionDescriptionHandler;
import com.spotify.styx.state.handlers.PublisherHandler;
import com.spotify.styx.state.handlers.TerminationHandler;
import com.spotify.styx.storage.AggregateStorage;
import com.spotify.styx.storage.InMemStorage;
import com.spotify.styx.storage.Storage;
import com.spotify.styx.util.CachedSupplier;
import com.spotify.styx.util.Debug;
import com.spotify.styx.util.DockerImageValidator;
import com.spotify.styx.util.IsClosedException;
import com.spotify.styx.util.RetryUtil;
import com.spotify.styx.util.StorageFactory;
import com.spotify.styx.util.Time;
import com.spotify.styx.util.TriggerUtil;
import com.spotify.styx.util.WorkflowValidator;
import com.spotify.styx.workflow.WorkflowInitializer;
import com.typesafe.config.Config;
import io.fabric8.kubernetes.client.ConfigBuilder;
import io.fabric8.kubernetes.client.DefaultKubernetesClient;
import io.fabric8.kubernetes.client.NamespacedKubernetesClient;
import java.io.Closeable;
import java.io.IOException;
import java.security.GeneralSecurityException;
import java.time.Duration;
import java.time.Instant;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import org.apache.hadoop.hbase.client.Connection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class StyxScheduler implements AppInit {

    public static final String SERVICE_NAME = "styx-scheduler";

    public static final String GKE_CLUSTER_PATH = "styx.gke";
    public static final String GKE_CLUSTER_PROJECT_ID = "project-id";
    public static final String GKE_CLUSTER_ZONE = "cluster-zone";
    public static final String GKE_CLUSTER_ID = "cluster-id";
    public static final String GKE_CLUSTER_NAMESPACE = "namespace";

    public static final String STYX_STALE_STATE_TTL_CONFIG = "styx.stale-state-ttls";
    public static final String STYX_MODE = "styx.mode";
    public static final String STYX_MODE_DEVELOPMENT = "development";

    public static final int SCHEDULER_TICK_INTERVAL_SECONDS = 2;
    public static final int TRIGGER_MANAGER_TICK_INTERVAL_SECONDS = 1;
    public static final long CLEANER_TICK_INTERVAL_SECONDS = MINUTES.toSeconds(30);
    public static final int RUNTIME_CONFIG_UPDATE_INTERVAL_SECONDS = 5;
    public static final Duration DEFAULT_RETRY_BASE_DELAY = Duration.ofMinutes(3);
    public static final int DEFAULT_RETRY_MAX_EXPONENT = 4;
    public static final Duration DEFAULT_RETRY_BASE_DELAY_BT = Duration.ofSeconds(1);
    public static final RetryUtil DEFAULT_RETRY_UTIL = new RetryUtil(DEFAULT_RETRY_BASE_DELAY,
            DEFAULT_RETRY_MAX_EXPONENT);
    public static final double DEFAULT_SUBMISSION_RATE_PER_SEC = 1000D;

    private static final Logger LOG = LoggerFactory.getLogger(StyxScheduler.class);

    private final Time time;
    private final StorageFactory storageFactory;
    private final DockerRunnerFactory dockerRunnerFactory;
    private final StatsFactory statsFactory;
    private final ExecutorFactory executorFactory;
    private final PublisherFactory publisherFactory;
    private final RetryUtil retryUtil;
    private final WorkflowResourceDecorator resourceDecorator;
    private final EventConsumerFactory eventConsumerFactory;
    private final WorkflowConsumerFactory workflowConsumerFactory;
    private final WorkflowExecutionGateFactory executionGateFactory;

    private StateManager stateManager;
    private Scheduler scheduler;
    private TriggerManager triggerManager;
    private BackfillTriggerManager backfillTriggerManager;

    private Consumer<Workflow> workflowRemoveListener;
    private Consumer<Workflow> workflowChangeListener;

    // === Type aliases for dependency injectors ====================================================
    public interface StateFactory extends Function<WorkflowInstance, RunState> {
    }

    public interface StatsFactory extends Function<Environment, Stats> {
    }

    public interface PublisherFactory extends Function<Environment, Publisher> {
    }

    public interface EventConsumerFactory
            extends BiFunction<Environment, Stats, BiConsumer<SequenceEvent, RunState>> {
    }

    public interface WorkflowConsumerFactory
            extends BiFunction<Environment, Stats, BiConsumer<Optional<Workflow>, Optional<Workflow>>> {
    }

    public interface WorkflowExecutionGateFactory extends BiFunction<Environment, Storage, WorkflowExecutionGate> {
    }

    @FunctionalInterface
    interface DockerRunnerFactory {
        DockerRunner create(String id, Environment environment, StateManager stateManager,
                ScheduledExecutorService scheduler, Stats stats, Debug debug);
    }

    @FunctionalInterface
    interface ExecutorFactory {
        ScheduledExecutorService create(int threads, ThreadFactory threadFactory);
    }

    public static class Builder {

        private Time time = Instant::now;
        private StorageFactory storageFactory = storage(StyxScheduler::storage);
        private DockerRunnerFactory dockerRunnerFactory = StyxScheduler::createDockerRunner;
        private StatsFactory statsFactory = StyxScheduler::stats;
        private ExecutorFactory executorFactory = Executors::newScheduledThreadPool;
        private PublisherFactory publisherFactory = (env) -> Publisher.NOOP;
        private RetryUtil retryUtil = DEFAULT_RETRY_UTIL;
        private WorkflowResourceDecorator resourceDecorator = WorkflowResourceDecorator.NOOP;
        private EventConsumerFactory eventConsumerFactory = (env, stats) -> (event, state) -> {
        };
        private WorkflowConsumerFactory workflowConsumerFactory = (env, stats) -> (oldWorkflow, newWorkflow) -> {
        };
        private WorkflowExecutionGateFactory executionGateFactory = (env, storage) -> WorkflowExecutionGate.NOOP;

        public Builder setTime(Time time) {
            this.time = time;
            return this;
        }

        public Builder setStorageFactory(StorageFactory storageFactory) {
            this.storageFactory = storageFactory;
            return this;
        }

        public Builder setDockerRunnerFactory(DockerRunnerFactory dockerRunnerFactory) {
            this.dockerRunnerFactory = dockerRunnerFactory;
            return this;
        }

        public Builder setStatsFactory(StatsFactory statsFactory) {
            this.statsFactory = statsFactory;
            return this;
        }

        public Builder setExecutorFactory(ExecutorFactory executorFactory) {
            this.executorFactory = executorFactory;
            return this;
        }

        public Builder setPublisherFactory(PublisherFactory publisherFactory) {
            this.publisherFactory = publisherFactory;
            return this;
        }

        public Builder setRetryUtil(RetryUtil retryUtil) {
            this.retryUtil = retryUtil;
            return this;
        }

        public Builder setResourceDecorator(WorkflowResourceDecorator resourceDecorator) {
            this.resourceDecorator = resourceDecorator;
            return this;
        }

        public Builder setEventConsumerFactory(EventConsumerFactory eventConsumerFactory) {
            this.eventConsumerFactory = eventConsumerFactory;
            return this;
        }

        public Builder setWorkflowConsumerFactory(WorkflowConsumerFactory workflowConsumerFactory) {
            this.workflowConsumerFactory = workflowConsumerFactory;
            return this;
        }

        public Builder setExecutionGateFactory(WorkflowExecutionGateFactory executionGateFactory) {
            this.executionGateFactory = executionGateFactory;
            return this;
        }

        public StyxScheduler build() {
            return new StyxScheduler(this);
        }
    }

    public static Builder newBuilder() {
        return new Builder();
    }

    public static StyxScheduler createDefault() {
        return newBuilder().build();
    }

    // ==============================================================================================

    private StyxScheduler(Builder builder) {
        this.time = requireNonNull(builder.time);
        this.storageFactory = requireNonNull(builder.storageFactory);
        this.dockerRunnerFactory = requireNonNull(builder.dockerRunnerFactory);
        this.statsFactory = requireNonNull(builder.statsFactory);
        this.executorFactory = requireNonNull(builder.executorFactory);
        this.publisherFactory = requireNonNull(builder.publisherFactory);
        this.retryUtil = requireNonNull(builder.retryUtil);
        this.resourceDecorator = requireNonNull(builder.resourceDecorator);
        this.eventConsumerFactory = requireNonNull(builder.eventConsumerFactory);
        this.workflowConsumerFactory = requireNonNull(builder.workflowConsumerFactory);
        this.executionGateFactory = requireNonNull(builder.executionGateFactory);
    }

    @Override
    public void create(Environment environment) {
        final Config config = environment.config();
        final Closer closer = environment.closer();

        final Thread.UncaughtExceptionHandler uncaughtExceptionHandler = (thread, throwable) -> LOG
                .error("Thread {} threw {}", thread, throwable);
        final ThreadFactory schedulerTf = new ThreadFactoryBuilder().setDaemon(true)
                .setNameFormat("styx-scheduler-%d").setUncaughtExceptionHandler(uncaughtExceptionHandler).build();
        final ThreadFactory eventTf = new ThreadFactoryBuilder().setDaemon(true)
                .setNameFormat("styx-event-worker-%d").setUncaughtExceptionHandler(uncaughtExceptionHandler)
                .build();

        final Publisher publisher = publisherFactory.apply(environment);
        closer.register(publisher);

        final ScheduledExecutorService executor = executorFactory.create(3, schedulerTf);
        closer.register(executorCloser("scheduler", executor));
        final ExecutorService outputHandlerExecutor = Executors.newFixedThreadPool(16, eventTf);
        closer.register(executorCloser("output-handler", outputHandlerExecutor));
        final ExecutorService eventConsumerExecutor = Executors.newSingleThreadExecutor();
        closer.register(executorCloser("event-consumer", eventConsumerExecutor));

        final Stats stats = statsFactory.apply(environment);
        final WorkflowCache workflowCache = new InMemWorkflowCache();
        final Storage storage = instrument(Storage.class, storageFactory.apply(environment), stats, time);

        warmUpCache(workflowCache, storage);

        final QueuedStateManager stateManager = closer.register(new QueuedStateManager(time, outputHandlerExecutor,
                storage, eventConsumerFactory.apply(environment, stats), eventConsumerExecutor));

        final Config staleStateTtlConfig = config.getConfig(STYX_STALE_STATE_TTL_CONFIG);
        final TimeoutConfig timeoutConfig = TimeoutConfig.createFromConfig(staleStateTtlConfig);

        final Supplier<StyxConfig> styxConfig = new CachedSupplier<>(storage::config, time);
        final Supplier<String> dockerId = () -> styxConfig.get().globalDockerRunnerId();
        final Debug debug = () -> styxConfig.get().debugEnabled();
        final DockerRunner routingDockerRunner = DockerRunner.routing(
                id -> dockerRunnerFactory.create(id, environment, stateManager, executor, stats, debug), dockerId);
        final DockerRunner dockerRunner = instrument(DockerRunner.class, routingDockerRunner, stats, time);

        final RateLimiter dequeueRateLimiter = RateLimiter.create(DEFAULT_SUBMISSION_RATE_PER_SEC);

        final OutputHandler[] outputHandlers = new OutputHandler[] { transitionLogger(""),
                new DockerRunnerHandler(dockerRunner, stateManager),
                new TerminationHandler(retryUtil, stateManager), new MonitoringHandler(stats),
                new PublisherHandler(publisher), new ExecutionDescriptionHandler(storage, stateManager,
                        new WorkflowValidator(new DockerImageValidator())) };
        final StateFactory stateFactory = (workflowInstance) -> RunState.fresh(workflowInstance, time,
                outputHandlers);

        final TriggerListener trigger = new StateInitializingTrigger(stateFactory, stateManager);
        final TriggerManager triggerManager = new TriggerManager(trigger, time, storage, stats);
        final BackfillTriggerManager backfillTriggerManager = new BackfillTriggerManager(stateManager,
                workflowCache, storage, trigger);

        final WorkflowInitializer workflowInitializer = new WorkflowInitializer(storage, time);
        final BiConsumer<Optional<Workflow>, Optional<Workflow>> workflowConsumer = workflowConsumerFactory
                .apply(environment, stats);
        final Consumer<Workflow> workflowRemoveListener = workflowRemoved(workflowCache, storage, workflowConsumer);
        final Consumer<Workflow> workflowChangeListener = workflowChanged(workflowCache, workflowInitializer, stats,
                stateManager, workflowConsumer);

        final Scheduler scheduler = new Scheduler(time, timeoutConfig, stateManager, workflowCache, storage,
                resourceDecorator, stats, dequeueRateLimiter, executionGateFactory.apply(environment, storage));

        final Cleaner cleaner = new Cleaner(dockerRunner);

        restoreState(storage, outputHandlers, stateManager, dockerRunner);
        startTriggerManager(triggerManager, executor);
        startBackfillTriggerManager(backfillTriggerManager, executor);
        startScheduler(scheduler, executor);
        startRuntimeConfigUpdate(styxConfig, executor, dequeueRateLimiter);
        startCleaner(cleaner, executor);
        setupMetrics(stateManager, workflowCache, storage, dequeueRateLimiter, stats);

        final SchedulerResource schedulerResource = new SchedulerResource(stateManager, trigger,
                workflowChangeListener, workflowRemoveListener, storage, time,
                new WorkflowValidator(new DockerImageValidator()));

        environment.routingEngine().registerAutoRoute(Route.sync("GET", "/ping", rc -> "pong"))
                .registerRoutes(Api.withCommonMiddleware(schedulerResource.routes()));

        this.stateManager = stateManager;
        this.scheduler = scheduler;
        this.triggerManager = triggerManager;
        this.backfillTriggerManager = backfillTriggerManager;
        this.workflowRemoveListener = workflowRemoveListener;
        this.workflowChangeListener = workflowChangeListener;
    }

    @VisibleForTesting
    void receive(Event event) throws IsClosedException {
        stateManager.receive(event);
    }

    @VisibleForTesting
    RunState getState(WorkflowInstance workflowInstance) {
        return stateManager.get(workflowInstance);
    }

    @VisibleForTesting
    void tickScheduler() {
        scheduler.tick();
    }

    @VisibleForTesting
    void tickTriggerManager() {
        triggerManager.tick();
    }

    @VisibleForTesting
    void tickBackfillTriggerManager() {
        backfillTriggerManager.tick();
    }

    @VisibleForTesting
    Consumer<Workflow> getWorkflowRemoveListener() {
        return workflowRemoveListener;
    }

    @VisibleForTesting
    Consumer<Workflow> getWorkflowChangeListener() {
        return workflowChangeListener;
    }

    private void warmUpCache(WorkflowCache cache, Storage storage) {
        try {
            storage.workflows().values().forEach(cache::store);
        } catch (IOException e) {
            LOG.warn("Failed to get workflows from storage", e);
        }
    }

    private void restoreState(Storage storage, OutputHandler[] outputHandlers, StateManager stateManager,
            DockerRunner dockerRunner) {
        try {
            final Map<WorkflowInstance, PersistentWorkflowInstanceState> activeInstances = storage
                    .readActiveWorkflowInstances();

            replayActiveStates(activeInstances, storage, true).entrySet().stream()
                    .collect(
                            toMap(e -> e.getKey().withHandlers(outputHandlers).withTime(time), Map.Entry::getValue))
                    .forEach(stateManager::restore);
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }

        // Eagerly fetch container state before starting the scheduler in order to recover executions
        // that completed while styx was offline and avoiding re-running WFIs due to state timeouts.
        dockerRunner.restore();
    }

    private static void startCleaner(Cleaner cleaner, ScheduledExecutorService exec) {
        exec.scheduleWithFixedDelay(guard(cleaner::tick), 0, CLEANER_TICK_INTERVAL_SECONDS, TimeUnit.SECONDS);
    }

    private static void startTriggerManager(TriggerManager triggerManager, ScheduledExecutorService exec) {
        exec.scheduleWithFixedDelay(guard(triggerManager::tick), TRIGGER_MANAGER_TICK_INTERVAL_SECONDS,
                TRIGGER_MANAGER_TICK_INTERVAL_SECONDS, TimeUnit.SECONDS);
    }

    private static void startBackfillTriggerManager(BackfillTriggerManager backfillTriggerManager,
            ScheduledExecutorService exec) {
        exec.scheduleWithFixedDelay(guard(backfillTriggerManager::tick), TRIGGER_MANAGER_TICK_INTERVAL_SECONDS,
                TRIGGER_MANAGER_TICK_INTERVAL_SECONDS, TimeUnit.SECONDS);
    }

    private static void startScheduler(Scheduler scheduler, ScheduledExecutorService exec) {
        exec.scheduleAtFixedRate(guard(scheduler::tick), SCHEDULER_TICK_INTERVAL_SECONDS,
                SCHEDULER_TICK_INTERVAL_SECONDS, TimeUnit.SECONDS);
    }

    private static void startRuntimeConfigUpdate(Supplier<StyxConfig> config, ScheduledExecutorService exec,
            RateLimiter submissionRateLimiter) {
        exec.scheduleAtFixedRate(guard(() -> updateRuntimeConfig(config, submissionRateLimiter)), 0,
                RUNTIME_CONFIG_UPDATE_INTERVAL_SECONDS, TimeUnit.SECONDS);
    }

    private static void updateRuntimeConfig(Supplier<StyxConfig> config, RateLimiter rateLimiter) {
        try {
            double currentRate = rateLimiter.getRate();
            Double updatedRate = config.get().submissionRateLimit()
                    .orElse(StyxScheduler.DEFAULT_SUBMISSION_RATE_PER_SEC);
            if (Math.abs(updatedRate - currentRate) >= 0.1) {
                LOG.info("Updating submission rate limit: {} -> {}", currentRate, updatedRate);
                rateLimiter.setRate(updatedRate);
            }
        } catch (Exception e) {
            LOG.warn("Failed to fetch the submission rate config from storage, " + "skipping RateLimiter update");
        }
    }

    private void setupMetrics(StateManager stateManager, WorkflowCache workflowCache, Storage storage,
            RateLimiter submissionRateLimiter, Stats stats) {

        stats.registerQueuedEventsMetric(stateManager::getQueuedEventsCount);

        stats.registerWorkflowCountMetric("all", () -> (long) workflowCache.all().size());

        stats.registerWorkflowCountMetric("configured", () -> workflowCache.all().stream()
                .filter(workflow -> workflow.configuration().dockerImage().isPresent()).count());

        final Supplier<Gauge<Long>> configuredEnabledWorkflowsCountGaugeSupplier = () -> {
            final Supplier<Set<WorkflowId>> enabledWorkflowSupplier = new CachedSupplier<>(storage::enabled,
                    Instant::now);
            return () -> workflowCache.all().stream()
                    .filter(workflow -> workflow.configuration().dockerImage().isPresent())
                    .filter(workflow -> enabledWorkflowSupplier.get().contains(WorkflowId.ofWorkflow(workflow)))
                    .count();
        };
        stats.registerWorkflowCountMetric("enabled", configuredEnabledWorkflowsCountGaugeSupplier.get());

        stats.registerWorkflowCountMetric("docker_termination_logging_enabled",
                () -> workflowCache.all().stream()
                        .filter(workflow -> workflow.configuration().dockerImage().isPresent())
                        .filter(workflow -> workflow.configuration().dockerTerminationLogging()).count());

        Arrays.stream(RunState.State.values()).forEach(state -> {
            TriggerUtil.triggerTypesList()
                    .forEach(triggerType -> stats.registerActiveStatesMetric(state, triggerType, () -> stateManager
                            .activeStates().values().stream().filter(runState -> runState.state().equals(state))
                            .filter(runState -> runState.data().trigger().isPresent()
                                    && triggerType.equals(TriggerUtil.triggerType(runState.data().trigger().get())))
                            .count()));
            stats.registerActiveStatesMetric(state, "none",
                    () -> stateManager.activeStates().values().stream()
                            .filter(runState -> runState.state().equals(state))
                            .filter(runState -> !runState.data().trigger().isPresent()).count());
        });

        workflowCache.all().forEach(workflow -> stats.registerActiveStatesMetric(workflow.id(),
                () -> stateManager.getActiveStatesCount(workflow.id())));

        stats.registerSubmissionRateLimitMetric(submissionRateLimiter::getRate);
    }

    private static Consumer<Workflow> workflowChanged(WorkflowCache cache, WorkflowInitializer workflowInitializer,
            Stats stats, StateManager stateManager,
            BiConsumer<Optional<Workflow>, Optional<Workflow>> workflowConsumer) {
        return (workflow) -> {
            stats.registerActiveStatesMetric(workflow.id(), () -> stateManager.getActiveStatesCount(workflow.id()));

            final Optional<Workflow> oldWorkflowOptional = cache.workflow(workflow.id());

            workflowInitializer.inspectChange(workflow);
            cache.store(workflow);
            workflowConsumer.accept(oldWorkflowOptional, Optional.of(workflow));
            if (oldWorkflowOptional.isPresent()) {
                LOG.info("Workflow modified, old config: {}, new config: {}", oldWorkflowOptional.get(), workflow);
            } else {
                LOG.info("Workflow added: {}", workflow);
            }
        };
    }

    private static Consumer<Workflow> workflowRemoved(WorkflowCache cache, Storage storage,
            BiConsumer<Optional<Workflow>, Optional<Workflow>> workflowConsumer) {
        return workflow -> cache.workflow(workflow.id()).ifPresent(existingWorkflow -> {
            try {
                storage.delete(workflow.id());
            } catch (IOException e) {
                LOG.warn("Couldn't remove workflow {}. ", workflow.id());
                return;
            }
            cache.remove(workflow);
            workflowConsumer.accept(Optional.of(workflow), Optional.empty());
            LOG.info("Workflow removed: {}", workflow);
        });
    }

    private static Stats stats(Environment environment) {
        return new MetricsStats(environment.resolve(SemanticMetricRegistry.class), Instant::now);
    }

    private static StorageFactory storage(StorageFactory storage) {
        return (environment) -> {
            if (isDevMode(environment.config())) {
                LOG.info("Running Styx in development mode, will use InMemStorage");
                return new InMemStorage();
            } else {
                return storage.apply(environment);
            }
        };
    }

    private static AggregateStorage storage(Environment environment) {
        final Config config = environment.config();
        final Closer closer = environment.closer();

        final Connection bigTable = closer.register(createBigTableConnection(config));
        final Datastore datastore = createDatastore(config);
        return new AggregateStorage(bigTable, datastore, DEFAULT_RETRY_BASE_DELAY_BT);
    }

    private static DockerRunner createDockerRunner(String id, Environment environment, StateManager stateManager,
            ScheduledExecutorService scheduler, Stats stats, Debug debug) {
        final Config config = environment.config();
        final Closer closer = environment.closer();

        if (isDevMode(config)) {
            LOG.info("Creating LocalDockerRunner");
            return closer.register(DockerRunner.local(scheduler, stateManager));
        } else {
            final NamespacedKubernetesClient kubernetes = closer
                    .register(getKubernetesClient(config, id, createGkeClient(), DefaultKubernetesClient::new));
            final ServiceAccountKeyManager serviceAccountKeyManager = createServiceAccountKeyManager();
            return closer.register(
                    DockerRunner.kubernetes(kubernetes, stateManager, stats, serviceAccountKeyManager, debug));
        }
    }

    private static Container createGkeClient() {
        try {
            final HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
            final JsonFactory jsonFactory = Utils.getDefaultJsonFactory();
            final GoogleCredential credential = GoogleCredential.getApplicationDefault(httpTransport, jsonFactory)
                    .createScoped(ContainerScopes.all());
            return new Container.Builder(httpTransport, jsonFactory, credential).setApplicationName(SERVICE_NAME)
                    .build();
        } catch (GeneralSecurityException | IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static ServiceAccountKeyManager createServiceAccountKeyManager() {
        try {
            final HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
            final JsonFactory jsonFactory = Utils.getDefaultJsonFactory();
            final GoogleCredential credential = GoogleCredential.getApplicationDefault(httpTransport, jsonFactory)
                    .createScoped(IamScopes.all());
            final Iam iam = new Iam.Builder(httpTransport, jsonFactory, credential).setApplicationName(SERVICE_NAME)
                    .build();
            return new ServiceAccountKeyManager(iam);
        } catch (GeneralSecurityException | IOException e) {
            throw new RuntimeException(e);
        }
    }

    static NamespacedKubernetesClient getKubernetesClient(Config rootConfig, String id, Container gke,
            KubernetesClientFactory clientFactory) {
        try {
            final Config config = rootConfig.getConfig(GKE_CLUSTER_PATH).getConfig(id);

            final Cluster cluster = gke.projects().locations().clusters()
                    .get(String.format("projects/%s/locations/%s/clusters/%s",
                            config.getString(GKE_CLUSTER_PROJECT_ID), config.getString(GKE_CLUSTER_ZONE),
                            config.getString(GKE_CLUSTER_ID)))
                    .execute();

            final io.fabric8.kubernetes.client.Config kubeConfig = new ConfigBuilder()
                    .withMasterUrl("https://" + cluster.getEndpoint())
                    .withCaCertData(cluster.getMasterAuth().getClusterCaCertificate())
                    .withClientCertData(cluster.getMasterAuth().getClientCertificate())
                    .withClientKeyData(cluster.getMasterAuth().getClientKey())
                    .withNamespace(config.getString(GKE_CLUSTER_NAMESPACE)).build();

            return clientFactory.apply(kubeConfig);
        } catch (IOException e) {
            throw Throwables.propagate(e);
        }
    }

    private static Closeable executorCloser(String name, ExecutorService executor) {
        return () -> {
            LOG.info("Shutting down executor: {}", name);
            executor.shutdown();
            try {
                executor.awaitTermination(1, SECONDS);
            } catch (InterruptedException ignored) {
            }
            final List<Runnable> runnables = executor.shutdownNow();
            if (!runnables.isEmpty()) {
                LOG.warn("{} task(s) in {} did not execute", runnables.size(), name);
            }
        };
    }

    private static boolean isDevMode(Config config) {
        return STYX_MODE_DEVELOPMENT.equals(config.getString(STYX_MODE));
    }

    interface KubernetesClientFactory
            extends Function<io.fabric8.kubernetes.client.Config, NamespacedKubernetesClient> {
    }
}