org.apache.aurora.scheduler.cron.quartz.AuroraCronJob.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.aurora.scheduler.cron.quartz.AuroraCronJob.java

Source

/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.aurora.scheduler.cron.quartz;

import java.lang.annotation.Retention;
import java.lang.annotation.Target;
import java.util.Date;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong;

import javax.inject.Inject;
import javax.inject.Qualifier;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;

import org.apache.aurora.common.stats.Stats;
import org.apache.aurora.common.stats.StatsProvider;
import org.apache.aurora.common.util.BackoffHelper;
import org.apache.aurora.gen.CronCollisionPolicy;
import org.apache.aurora.scheduler.BatchWorker;
import org.apache.aurora.scheduler.BatchWorker.NoResult;
import org.apache.aurora.scheduler.base.JobKeys;
import org.apache.aurora.scheduler.base.Query;
import org.apache.aurora.scheduler.base.Tasks;
import org.apache.aurora.scheduler.configuration.SanitizedConfiguration;
import org.apache.aurora.scheduler.cron.CronException;
import org.apache.aurora.scheduler.cron.SanitizedCronJob;
import org.apache.aurora.scheduler.events.PubsubEvent.EventSubscriber;
import org.apache.aurora.scheduler.state.StateManager;
import org.apache.aurora.scheduler.storage.Storage;
import org.apache.aurora.scheduler.storage.entities.IJobConfiguration;
import org.apache.aurora.scheduler.storage.entities.IJobKey;
import org.apache.aurora.scheduler.storage.entities.ITaskConfig;
import org.quartz.DisallowConcurrentExecution;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.quartz.PersistJobDataAfterExecution;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.lang.annotation.ElementType.FIELD;
import static java.lang.annotation.ElementType.METHOD;
import static java.lang.annotation.ElementType.PARAMETER;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
import static java.util.Objects.requireNonNull;

import static com.google.common.base.Preconditions.checkState;

import static org.apache.aurora.gen.ScheduleStatus.KILLING;

/**
 * Encapsulates the logic behind a single trigger of a single job key. Multiple executions may run
 * concurrently but only a single instance will be active at a time per job key.
 *
 * <p>
 * Executions may block for long periods of time when waiting for a kill to complete. The Quartz
 * scheduler should therefore be configured with a large number of threads.
 */
@DisallowConcurrentExecution
@PersistJobDataAfterExecution
class AuroraCronJob implements Job, EventSubscriber {
    private static final Logger LOG = LoggerFactory.getLogger(AuroraCronJob.class);

    private static final AtomicLong CRON_JOB_TRIGGERS = Stats.exportLong("cron_job_triggers");
    private static final AtomicLong CRON_JOB_MISFIRES = Stats.exportLong("cron_job_misfires");
    private static final AtomicLong CRON_JOB_PARSE_FAILURES = Stats.exportLong("cron_job_parse_failures");
    private static final AtomicLong CRON_JOB_COLLISIONS = Stats.exportLong("cron_job_collisions");
    private static final AtomicLong CRON_JOB_CONCURRENT_RUNS = Stats.exportLong("cron_job_concurrent_runs");

    @VisibleForTesting
    static final Optional<String> KILL_AUDIT_MESSAGE = Optional.of("Killed by cronScheduler");

    private final StateManager stateManager;
    private final BackoffHelper delayedStartBackoff;
    private final BatchWorker<NoResult> batchWorker;
    private final Set<IJobKey> killFollowups = Sets.newConcurrentHashSet();

    /**
     * Annotation for the max cron batch size.
     */
    @VisibleForTesting
    @Qualifier
    @Target({ FIELD, PARAMETER, METHOD })
    @Retention(RUNTIME)
    @interface CronMaxBatchSize {
    }

    static class CronBatchWorker extends BatchWorker<NoResult> {
        @Inject
        CronBatchWorker(Storage storage, StatsProvider statsProvider, @CronMaxBatchSize int maxBatchSize) {

            super(storage, statsProvider, maxBatchSize);
        }

        @Override
        protected String serviceName() {
            return "CronBatchWorker";
        }
    }

    @Inject
    AuroraCronJob(Config config, StateManager stateManager, CronBatchWorker batchWorker) {

        this.stateManager = requireNonNull(stateManager);
        this.batchWorker = requireNonNull(batchWorker);
        this.delayedStartBackoff = requireNonNull(config.getDelayedStartBackoff());
    }

    @Override
    public void execute(JobExecutionContext context) throws JobExecutionException {
        // We assume quartz prevents concurrent runs of this job for a given job key. This allows us
        // to avoid races where we might kill another run's tasks.
        checkState(context.getJobDetail().isConcurrentExectionDisallowed());

        doExecute(context);
    }

    @VisibleForTesting
    void doExecute(JobExecutionContext context) throws JobExecutionException {
        final IJobKey key = Quartz.auroraJobKey(context.getJobDetail().getKey());
        final String path = JobKeys.canonicalString(key);

        // Prevent a concurrent run for this job in case a previous trigger took longer to run.
        // This approach relies on saving the "work in progress" token within the job context itself
        // (see below) and relying on killFollowups to signal "work completion".
        if (context.getJobDetail().getJobDataMap().containsKey(path)) {
            CRON_JOB_CONCURRENT_RUNS.incrementAndGet();
            if (killFollowups.contains(key)) {
                context.getJobDetail().getJobDataMap().remove(path);
                killFollowups.remove(key);
                LOG.info("Resetting job context for cron {}", path);
            } else {
                LOG.info("Ignoring trigger as another concurrent run is active for cron {}", path);
                return;
            }
        }

        CompletableFuture<NoResult> scheduleResult = batchWorker.<NoResult>execute(storeProvider -> {
            Optional<IJobConfiguration> config = storeProvider.getCronJobStore().fetchJob(key);
            if (!config.isPresent()) {
                LOG.warn("Cron was triggered for {} but no job with that key was found in storage.", path);
                CRON_JOB_MISFIRES.incrementAndGet();
                return BatchWorker.NO_RESULT;
            }

            SanitizedCronJob cronJob;
            try {
                cronJob = SanitizedCronJob.from(new SanitizedConfiguration(config.get()));
            } catch (CronException e) {
                LOG.warn("Invalid cron job for {} in storage - failed to parse", key, e);
                CRON_JOB_PARSE_FAILURES.incrementAndGet();
                return BatchWorker.NO_RESULT;
            }

            CronCollisionPolicy collisionPolicy = cronJob.getCronCollisionPolicy();
            LOG.info("Cron triggered for {} at {} with policy {}", path, new Date(), collisionPolicy);
            CRON_JOB_TRIGGERS.incrementAndGet();

            final Query.Builder activeQuery = Query.jobScoped(key).active();
            Set<String> activeTasks = Tasks.ids(storeProvider.getTaskStore().fetchTasks(activeQuery));

            ITaskConfig task = cronJob.getSanitizedConfig().getJobConfig().getTaskConfig();
            Set<Integer> instanceIds = cronJob.getSanitizedConfig().getInstanceIds();
            if (activeTasks.isEmpty()) {
                stateManager.insertPendingTasks(storeProvider, task, instanceIds);
                return BatchWorker.NO_RESULT;
            }

            CRON_JOB_COLLISIONS.incrementAndGet();
            switch (collisionPolicy) {
            case KILL_EXISTING:
                for (String taskId : activeTasks) {
                    stateManager.changeState(storeProvider, taskId, Optional.absent(), KILLING, KILL_AUDIT_MESSAGE);
                }

                LOG.info("Waiting for job to terminate before launching cron job " + path);
                // Use job detail map to signal a "work in progress" condition to subsequent triggers.
                context.getJobDetail().getJobDataMap().put(path, null);
                batchWorker.executeWithReplay(delayedStartBackoff.getBackoffStrategy(), store -> {
                    Query.Builder query = Query.taskScoped(activeTasks).active();
                    if (Iterables.isEmpty(storeProvider.getTaskStore().fetchTasks(query))) {
                        LOG.info("Initiating delayed launch of cron " + path);
                        stateManager.insertPendingTasks(store, task, instanceIds);
                        return new BatchWorker.Result<>(true, null);
                    } else {
                        LOG.info("Not yet safe to run cron " + path);
                        return new BatchWorker.Result<>(false, null);
                    }
                }).thenAccept(ignored -> {
                    killFollowups.add(key);
                    LOG.info("Finished delayed launch for cron " + path);
                });
                break;

            case RUN_OVERLAP:
                LOG.error("Ignoring trigger for job {} with deprecated collision"
                        + "policy RUN_OVERLAP due to unterminated active tasks.", path);
                break;

            case CANCEL_NEW:
                break;

            default:
                LOG.error("Unrecognized cron collision policy: " + collisionPolicy);
            }
            return BatchWorker.NO_RESULT;
        });

        try {
            scheduleResult.get();
        } catch (ExecutionException | InterruptedException e) {
            LOG.warn("Interrupted while trying to launch cron " + path, e);
            Thread.currentThread().interrupt();
            throw new JobExecutionException(e);
        }
    }

    static class Config {
        private final BackoffHelper delayedStartBackoff;

        Config(BackoffHelper delayedStartBackoff) {
            this.delayedStartBackoff = requireNonNull(delayedStartBackoff);
        }

        public BackoffHelper getDelayedStartBackoff() {
            return delayedStartBackoff;
        }
    }
}