com.twitter.aurora.scheduler.state.CronJobManager.java Source code

Java tutorial

Introduction

Here is the source code for com.twitter.aurora.scheduler.state.CronJobManager.java

Source

/*
 * Copyright 2013 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.twitter.aurora.scheduler.state;

import java.util.Collections;
import java.util.Date;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.annotation.Nullable;
import javax.inject.Inject;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.eventbus.Subscribe;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

import org.apache.commons.lang.StringUtils;

import com.twitter.aurora.gen.CronCollisionPolicy;
import com.twitter.aurora.gen.ScheduleStatus;
import com.twitter.aurora.scheduler.base.JobKeys;
import com.twitter.aurora.scheduler.base.Query;
import com.twitter.aurora.scheduler.base.ScheduleException;
import com.twitter.aurora.scheduler.base.Tasks;
import com.twitter.aurora.scheduler.configuration.ConfigurationManager.TaskDescriptionException;
import com.twitter.aurora.scheduler.configuration.SanitizedConfiguration;
import com.twitter.aurora.scheduler.cron.CronException;
import com.twitter.aurora.scheduler.cron.CronScheduler;
import com.twitter.aurora.scheduler.events.PubsubEvent.EventSubscriber;
import com.twitter.aurora.scheduler.events.PubsubEvent.StorageStarted;
import com.twitter.aurora.scheduler.storage.Storage;
import com.twitter.aurora.scheduler.storage.Storage.MutateWork;
import com.twitter.aurora.scheduler.storage.Storage.Work;
import com.twitter.aurora.scheduler.storage.entities.IJobConfiguration;
import com.twitter.aurora.scheduler.storage.entities.IJobKey;
import com.twitter.aurora.scheduler.storage.entities.IScheduledTask;
import com.twitter.aurora.scheduler.storage.entities.ITaskConfig;
import com.twitter.common.application.ShutdownRegistry;
import com.twitter.common.args.Arg;
import com.twitter.common.args.CmdLine;
import com.twitter.common.base.ExceptionalCommand;
import com.twitter.common.base.Supplier;
import com.twitter.common.quantity.Amount;
import com.twitter.common.quantity.Time;
import com.twitter.common.stats.Stats;
import com.twitter.common.util.BackoffHelper;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import static com.twitter.aurora.gen.ScheduleStatus.PENDING;

/**
 * A job scheduler that receives jobs that should be run periodically on a cron schedule.
 */
public class CronJobManager extends JobManager implements EventSubscriber {

    public static final String MANAGER_KEY = "CRON";

    @VisibleForTesting
    static final String CRON_USER = "cron";

    private static final Logger LOG = Logger.getLogger(CronJobManager.class.getName());

    @CmdLine(name = "cron_start_initial_backoff", help = "Initial backoff delay while waiting for a previous cron run to start.")
    private static final Arg<Amount<Long, Time>> CRON_START_INITIAL_BACKOFF = Arg
            .create(Amount.of(1L, Time.SECONDS));

    @CmdLine(name = "cron_start_max_backoff", help = "Max backoff delay while waiting for a previous cron run to start.")
    private static final Arg<Amount<Long, Time>> CRON_START_MAX_BACKOFF = Arg.create(Amount.of(1L, Time.MINUTES));

    private final AtomicLong cronJobsTriggered = Stats.exportLong("cron_jobs_triggered");
    private final AtomicLong cronJobLaunchFailures = Stats.exportLong("cron_job_launch_failures");

    // Maps from the unique job identifier to the unique identifier used internally by the cron
    // scheduler.
    private final Map<IJobKey, String> scheduledJobs = Collections
            .synchronizedMap(Maps.<IJobKey, String>newHashMap());

    // Prevents runs from dogpiling while waiting for a run to transition out of the KILLING state.
    // This is necessary because killing a job (if dictated by cron collision policy) is an
    // asynchronous operation.
    private final Map<IJobKey, SanitizedConfiguration> pendingRuns = Collections
            .synchronizedMap(Maps.<IJobKey, SanitizedConfiguration>newHashMap());

    private final StateManager stateManager;
    private final Storage storage;
    private final CronScheduler cron;
    private final ShutdownRegistry shutdownRegistry;
    private final BackoffHelper delayedStartBackoff;
    private final Executor delayedRunExecutor;

    @Inject
    CronJobManager(StateManager stateManager, Storage storage, CronScheduler cron,
            ShutdownRegistry shutdownRegistry) {

        this(stateManager, storage, cron, shutdownRegistry, Executors.newCachedThreadPool(
                new ThreadFactoryBuilder().setDaemon(true).setNameFormat("CronDelay-%d").build()));
    }

    @VisibleForTesting
    CronJobManager(StateManager stateManager, Storage storage, CronScheduler cron,
            ShutdownRegistry shutdownRegistry, Executor delayedRunExecutor) {

        this.stateManager = checkNotNull(stateManager);
        this.storage = checkNotNull(storage);
        this.cron = checkNotNull(cron);
        this.shutdownRegistry = checkNotNull(shutdownRegistry);
        this.delayedStartBackoff = new BackoffHelper(CRON_START_INITIAL_BACKOFF.get(),
                CRON_START_MAX_BACKOFF.get());
        this.delayedRunExecutor = checkNotNull(delayedRunExecutor);

        Stats.exportSize("cron_num_pending_runs", pendingRuns);
    }

    private void mapScheduledJob(IJobConfiguration job, String scheduledJobKey) {
        IJobKey jobKey = job.getKey();
        synchronized (scheduledJobs) {
            Preconditions.checkState(!scheduledJobs.containsKey(jobKey),
                    "Illegal state - cron schedule already exists for " + JobKeys.toPath(jobKey));
            scheduledJobs.put(jobKey, scheduledJobKey);
        }
    }

    /**
     * Notifies the cron job manager that storage is started, and job configurations are ready to
     * load.
     *
     * @param storageStarted Event.
     */
    @Subscribe
    public void storageStarted(StorageStarted storageStarted) {
        cron.start();
        shutdownRegistry.addAction(new ExceptionalCommand<CronException>() {
            @Override
            public void execute() throws CronException {
                cron.stop();
            }
        });

        Iterable<IJobConfiguration> crons = storage.consistentRead(new Work.Quiet<Iterable<IJobConfiguration>>() {
            @Override
            public Iterable<IJobConfiguration> apply(Storage.StoreProvider storeProvider) {
                return storeProvider.getJobStore().fetchJobs(MANAGER_KEY);
            }
        });

        for (IJobConfiguration job : crons) {
            try {
                mapScheduledJob(job, scheduleJob(SanitizedConfiguration.fromUnsanitized(job)));
            } catch (ScheduleException | TaskDescriptionException e) {
                logLaunchFailure(job, e);
            }
        }
    }

    private void logLaunchFailure(IJobConfiguration job, Exception e) {
        cronJobLaunchFailures.incrementAndGet();
        LOG.log(Level.SEVERE, "Scheduling failed for recovered job " + job, e);
    }

    /**
     * Triggers execution of a job.
     *
     * @param jobKey Key of the job to start.
     */
    public void startJobNow(IJobKey jobKey) throws TaskDescriptionException {
        checkNotNull(jobKey);

        Optional<IJobConfiguration> jobConfig = fetchJob(jobKey);
        checkArgument(jobConfig.isPresent(), "No such cron job " + JobKeys.toPath(jobKey));

        cronTriggered(SanitizedConfiguration.fromUnsanitized(jobConfig.get()));
    }

    private void delayedRun(final Query.Builder query, final SanitizedConfiguration config) {
        IJobConfiguration job = config.getJobConfig();
        final String jobPath = JobKeys.toPath(job);
        final IJobKey jobKey = job.getKey();
        LOG.info("Waiting for job to terminate before launching cron job " + jobPath);
        if (pendingRuns.put(jobKey, config) == null) {
            LOG.info("Launching a task to wait for job to finish: " + jobPath);
            // There was no run already pending for this job, launch a task to delay launch until the
            // existing run has terminated.
            delayedRunExecutor.execute(new Runnable() {
                @Override
                public void run() {
                    runWhenTerminated(query, jobKey);
                }
            });
        }
    }

    private void runWhenTerminated(final Query.Builder query, final IJobKey jobKey) {
        try {
            delayedStartBackoff.doUntilSuccess(new Supplier<Boolean>() {
                @Override
                public Boolean get() {
                    if (!hasTasks(query)) {
                        LOG.info("Initiating delayed launch of cron " + jobKey);
                        SanitizedConfiguration config = pendingRuns.remove(jobKey);
                        checkNotNull(config, "Failed to fetch job for delayed run of " + jobKey);
                        LOG.info("Launching " + config.getTaskConfigs().size() + " tasks.");
                        stateManager.insertPendingTasks(config.getTaskConfigs());
                        return true;
                    } else {
                        LOG.info("Not yet safe to run cron " + jobKey);
                        return false;
                    }
                }
            });
        } catch (InterruptedException e) {
            LOG.log(Level.WARNING, "Interrupted while trying to launch cron " + jobKey, e);
            Thread.currentThread().interrupt();
        }
    }

    private boolean hasTasks(Query.Builder query) {
        return !Storage.Util.consistentFetchTasks(storage, query).isEmpty();
    }

    public static CronCollisionPolicy orDefault(@Nullable CronCollisionPolicy policy) {
        return Optional.fromNullable(policy).or(CronCollisionPolicy.KILL_EXISTING);
    }

    /**
     * Triggers execution of a cron job, depending on the cron collision policy for the job.
     *
     * @param config The config of the job to be triggered.
     */
    @VisibleForTesting
    void cronTriggered(SanitizedConfiguration config) {
        IJobConfiguration job = config.getJobConfig();
        LOG.info(String.format("Cron triggered for %s at %s with policy %s", JobKeys.toPath(job), new Date(),
                job.getCronCollisionPolicy()));
        cronJobsTriggered.incrementAndGet();

        ImmutableMap.Builder<Integer, ITaskConfig> builder = ImmutableMap.builder();
        final Query.Builder activeQuery = Query.jobScoped(job.getKey()).active();
        Set<IScheduledTask> activeTasks = Storage.Util.consistentFetchTasks(storage, activeQuery);

        if (activeTasks.isEmpty()) {
            builder.putAll(config.getTaskConfigs());
        } else {
            // Assign a default collision policy.
            CronCollisionPolicy collisionPolicy = orDefault(job.getCronCollisionPolicy());

            switch (collisionPolicy) {
            case KILL_EXISTING:
                try {
                    schedulerCore.killTasks(activeQuery, CRON_USER);
                    // Check immediately if the tasks are gone.  This could happen if the existing tasks
                    // were pending.
                    if (!hasTasks(activeQuery)) {
                        builder.putAll(config.getTaskConfigs());
                    } else {
                        delayedRun(activeQuery, config);
                    }
                } catch (ScheduleException e) {
                    LOG.log(Level.SEVERE, "Failed to kill job.", e);
                }
                break;

            case CANCEL_NEW:
                break;

            case RUN_OVERLAP:
                Map<Integer, IScheduledTask> byInstance = Maps.uniqueIndex(activeTasks,
                        Tasks.SCHEDULED_TO_INSTANCE_ID);
                Map<Integer, ScheduleStatus> existingTasks = Maps.transformValues(byInstance, Tasks.GET_STATUS);
                if (existingTasks.isEmpty()) {
                    builder.putAll(config.getTaskConfigs());
                } else if (Iterables.any(existingTasks.values(), Predicates.equalTo(PENDING))) {
                    LOG.info("Job " + JobKeys.toPath(job) + " has pending tasks, suppressing run.");
                } else {
                    // To safely overlap this run, we need to adjust the instance IDs of the overlapping
                    // run (maintaining the role/job/instance UUID invariant).
                    int instanceOffset = Ordering.natural().max(existingTasks.keySet()) + 1;
                    LOG.info("Adjusting instance IDs of " + JobKeys.toPath(job) + " by " + instanceOffset
                            + " for overlapping cron run.");
                    for (Map.Entry<Integer, ITaskConfig> entry : config.getTaskConfigs().entrySet()) {
                        builder.put(entry.getKey() + instanceOffset, entry.getValue());
                    }
                }
                break;

            default:
                LOG.severe("Unrecognized cron collision policy: " + job.getCronCollisionPolicy());
            }
        }

        Map<Integer, ITaskConfig> newTasks = builder.build();
        if (!newTasks.isEmpty()) {
            stateManager.insertPendingTasks(newTasks);
        }
    }

    /**
     * Updates (re-schedules) the existing cron job.
     *
     * @param config New job configuration to update to.
     * @throws ScheduleException If non-cron job confuration provided.
     */
    public void updateJob(SanitizedConfiguration config) throws ScheduleException {
        IJobConfiguration job = config.getJobConfig();
        if (!hasCronSchedule(job)) {
            throw new ScheduleException("A cron job may not be updated to a non-cron job.");
        }
        String key = scheduledJobs.remove(job.getKey());
        checkNotNull(key, "Attempted to update unknown job " + JobKeys.toPath(job));
        cron.deschedule(key);
        checkArgument(receiveJob(config));
    }

    @Override
    public String getUniqueKey() {
        return MANAGER_KEY;
    }

    private static boolean hasCronSchedule(IJobConfiguration job) {
        checkNotNull(job);
        return !StringUtils.isEmpty(job.getCronSchedule());
    }

    @Override
    public boolean receiveJob(SanitizedConfiguration config) throws ScheduleException {
        final IJobConfiguration job = config.getJobConfig();
        if (!hasCronSchedule(job)) {
            return false;
        }

        String scheduledJobKey = scheduleJob(config);
        storage.write(new MutateWork.NoResult.Quiet() {
            @Override
            protected void execute(Storage.MutableStoreProvider storeProvider) {
                storeProvider.getJobStore().saveAcceptedJob(MANAGER_KEY, job);
            }
        });
        mapScheduledJob(job, scheduledJobKey);

        return true;
    }

    private String scheduleJob(final SanitizedConfiguration config) throws ScheduleException {
        final IJobConfiguration job = config.getJobConfig();
        final String jobPath = JobKeys.toPath(job);
        if (!hasCronSchedule(job)) {
            throw new ScheduleException(String.format("Not a valid cronjob, %s has no cron schedule", jobPath));
        }

        if (!cron.isValidSchedule(job.getCronSchedule())) {
            throw new ScheduleException("Invalid cron schedule: " + job.getCronSchedule());
        }

        LOG.info(String.format("Scheduling cron job %s: %s", jobPath, job.getCronSchedule()));
        try {
            return cron.schedule(job.getCronSchedule(), new Runnable() {
                @Override
                public void run() {
                    // TODO(William Farner): May want to record information about job runs.
                    LOG.info("Running cron job: " + jobPath);
                    cronTriggered(config);
                }
            });
        } catch (CronException e) {
            throw new ScheduleException("Failed to schedule cron job: " + e.getMessage(), e);
        }
    }

    @Override
    public Iterable<IJobConfiguration> getJobs() {
        return storage.consistentRead(new Work.Quiet<Iterable<IJobConfiguration>>() {
            @Override
            public Iterable<IJobConfiguration> apply(Storage.StoreProvider storeProvider) {
                return storeProvider.getJobStore().fetchJobs(MANAGER_KEY);
            }
        });
    }

    @Override
    public boolean hasJob(IJobKey jobKey) {
        return fetchJob(jobKey).isPresent();
    }

    private Optional<IJobConfiguration> fetchJob(final IJobKey jobKey) {
        checkNotNull(jobKey);
        return storage.consistentRead(new Work.Quiet<Optional<IJobConfiguration>>() {
            @Override
            public Optional<IJobConfiguration> apply(Storage.StoreProvider storeProvider) {
                return storeProvider.getJobStore().fetchJob(MANAGER_KEY, jobKey);
            }
        });
    }

    @Override
    public boolean deleteJob(final IJobKey jobKey) {
        checkNotNull(jobKey);

        if (!hasJob(jobKey)) {
            return false;
        }

        String scheduledJobKey = scheduledJobs.remove(jobKey);
        if (scheduledJobKey != null) {
            cron.deschedule(scheduledJobKey);
            storage.write(new MutateWork.NoResult.Quiet() {
                @Override
                protected void execute(Storage.MutableStoreProvider storeProvider) {
                    storeProvider.getJobStore().removeJob(jobKey);
                }
            });
            LOG.info("Successfully deleted cron job " + jobKey);
        }
        return true;
    }

    private final Function<String, String> keyToSchedule = new Function<String, String>() {
        @Override
        public String apply(String key) {
            return cron.getSchedule(key).or("Not found.");
        }
    };

    public Map<IJobKey, String> getScheduledJobs() {
        synchronized (scheduledJobs) {
            return ImmutableMap.copyOf(Maps.transformValues(scheduledJobs, keyToSchedule));
        }
    }

    public Set<IJobKey> getPendingRuns() {
        synchronized (pendingRuns) {
            return ImmutableSet.copyOf(pendingRuns.keySet());
        }
    }
}