com.twitter.aurora.scheduler.state.SchedulerCoreImpl.java Source code

Java tutorial

Introduction

Here is the source code for com.twitter.aurora.scheduler.state.SchedulerCoreImpl.java

Source

/*
 * Copyright 2013 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.twitter.aurora.scheduler.state;

import java.util.Set;
import java.util.logging.Logger;

import javax.inject.Inject;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Functions;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import com.twitter.aurora.gen.ScheduleStatus;
import com.twitter.aurora.scheduler.TaskIdGenerator;
import com.twitter.aurora.scheduler.base.JobKeys;
import com.twitter.aurora.scheduler.base.Query;
import com.twitter.aurora.scheduler.base.ScheduleException;
import com.twitter.aurora.scheduler.base.Tasks;
import com.twitter.aurora.scheduler.configuration.ConfigurationManager.TaskDescriptionException;
import com.twitter.aurora.scheduler.configuration.SanitizedConfiguration;
import com.twitter.aurora.scheduler.storage.Storage;
import com.twitter.aurora.scheduler.storage.Storage.MutableStoreProvider;
import com.twitter.aurora.scheduler.storage.Storage.MutateWork;
import com.twitter.aurora.scheduler.storage.entities.IAssignedTask;
import com.twitter.aurora.scheduler.storage.entities.IJobConfiguration;
import com.twitter.aurora.scheduler.storage.entities.IJobKey;
import com.twitter.aurora.scheduler.storage.entities.IScheduledTask;
import com.twitter.aurora.scheduler.storage.entities.ITaskConfig;
import com.twitter.common.args.Arg;
import com.twitter.common.args.CmdLine;
import com.twitter.common.args.constraints.Positive;

import static com.google.common.base.Preconditions.checkNotNull;

import static com.twitter.aurora.gen.ScheduleStatus.KILLING;
import static com.twitter.aurora.gen.ScheduleStatus.RESTARTING;
import static com.twitter.aurora.scheduler.base.Tasks.ACTIVE_STATES;

/**
 * Implementation of the scheduler core.
 */
class SchedulerCoreImpl implements SchedulerCore {
    @Positive
    @CmdLine(name = "max_tasks_per_job", help = "Maximum number of allowed tasks in a single job.")
    public static final Arg<Integer> MAX_TASKS_PER_JOB = Arg.create(4000);

    private static final Logger LOG = Logger.getLogger(SchedulerCoreImpl.class.getName());

    private final Storage storage;

    private final CronJobManager cronScheduler;

    // Schedulers that are responsible for triggering execution of jobs.
    private final ImmutableList<JobManager> jobManagers;

    // TODO(Bill Farner): Avoid using StateManagerImpl.
    // State manager handles persistence of task modifications and state transitions.
    private final StateManagerImpl stateManager;

    private final TaskIdGenerator taskIdGenerator;
    private final JobFilter jobFilter;

    /**
     * Creates a new core scheduler.
     *
     * @param storage Backing store implementation.
     * @param cronScheduler Cron scheduler.
     * @param immediateScheduler Immediate scheduler.
     * @param stateManager Persistent state manager.
     * @param taskIdGenerator Task ID generator.
     * @param jobFilter Job filter.
     */
    @Inject
    public SchedulerCoreImpl(Storage storage, CronJobManager cronScheduler, ImmediateJobManager immediateScheduler,
            StateManagerImpl stateManager, TaskIdGenerator taskIdGenerator, JobFilter jobFilter) {

        this.storage = checkNotNull(storage);

        // The immediate scheduler will accept any job, so it's important that other schedulers are
        // placed first.
        this.jobManagers = ImmutableList.of(cronScheduler, immediateScheduler);
        this.cronScheduler = cronScheduler;
        this.stateManager = checkNotNull(stateManager);
        this.taskIdGenerator = checkNotNull(taskIdGenerator);
        this.jobFilter = checkNotNull(jobFilter);
    }

    private boolean hasActiveJob(IJobConfiguration job) {
        return Iterables.any(jobManagers, managerHasJob(job));
    }

    @Override
    public synchronized void tasksDeleted(Set<String> taskIds) {
        setTaskStatus(Query.taskScoped(taskIds), ScheduleStatus.UNKNOWN, Optional.<String>absent());
    }

    @Override
    public synchronized void createJob(SanitizedConfiguration sanitizedConfiguration) throws ScheduleException {

        IJobConfiguration job = sanitizedConfiguration.getJobConfig();
        if (hasActiveJob(job)) {
            throw new ScheduleException("Job already exists: " + JobKeys.toPath(job));
        }

        runJobFilters(job.getKey(), job.getTaskConfig(), job.getInstanceCount(), false);

        boolean accepted = false;
        for (final JobManager manager : jobManagers) {
            if (manager.receiveJob(sanitizedConfiguration)) {
                LOG.info("Job accepted by manager: " + manager.getUniqueKey());
                accepted = true;
                break;
            }
        }

        if (!accepted) {
            LOG.severe("Job was not accepted by any of the configured schedulers, discarding.");
            LOG.severe("Discarded job: " + job);
            throw new ScheduleException("Job not accepted, discarding.");
        }
    }

    // This number is derived from the maximum file name length limit on most UNIX systems, less
    // the number of characters we've observed being added by mesos for the executor ID, prefix, and
    // delimiters.
    @VisibleForTesting
    static final int MAX_TASK_ID_LENGTH = 255 - 90;

    // TODO(maximk): Consider a better approach to quota checking. MESOS-4476.
    private void runJobFilters(IJobKey jobKey, ITaskConfig task, int count, boolean incremental)
            throws ScheduleException {

        int instanceCount = count;
        if (incremental) {
            instanceCount += Storage.Util.weaklyConsistentFetchTasks(storage, Query.jobScoped(jobKey).active())
                    .size();
        }

        // TODO(maximk): This is a short-term hack to stop the bleeding from
        //               https://issues.apache.org/jira/browse/MESOS-691
        if (taskIdGenerator.generate(task, instanceCount).length() > MAX_TASK_ID_LENGTH) {
            throw new ScheduleException("Task ID is too long, please shorten your role or job name.");
        }

        JobFilter.JobFilterResult filterResult = jobFilter.filter(task, instanceCount);
        // TODO(maximk): Consider deprecating JobFilterResult in favor of custom exception.
        if (!filterResult.isPass()) {
            throw new ScheduleException(filterResult.getReason());
        }

        if (instanceCount > MAX_TASKS_PER_JOB.get()) {
            throw new ScheduleException("Job exceeds task limit of " + MAX_TASKS_PER_JOB.get());
        }
    }

    @Override
    public void validateJobResources(SanitizedConfiguration sanitizedConfiguration) throws ScheduleException {

        IJobConfiguration job = sanitizedConfiguration.getJobConfig();
        runJobFilters(job.getKey(), job.getTaskConfig(), job.getInstanceCount(), false);
    }

    @Override
    public void addInstances(final IJobKey jobKey, final ImmutableSet<Integer> instanceIds,
            final ITaskConfig config) throws ScheduleException {

        runJobFilters(jobKey, config, instanceIds.size(), true);
        storage.write(new MutateWork.NoResult<ScheduleException>() {
            @Override
            protected void execute(MutableStoreProvider storeProvider) throws ScheduleException {

                ImmutableSet<IScheduledTask> tasks = storeProvider.getTaskStore()
                        .fetchTasks(Query.jobScoped(jobKey).active());

                Set<Integer> existingInstanceIds = FluentIterable.from(tasks)
                        .transform(Tasks.SCHEDULED_TO_INSTANCE_ID).toSet();
                if (!Sets.intersection(existingInstanceIds, instanceIds).isEmpty()) {
                    throw new ScheduleException("Instance ID collision detected.");
                }

                stateManager.insertPendingTasks(Maps.asMap(instanceIds, Functions.constant(config)));
            }
        });
    }

    @Override
    public synchronized void startCronJob(IJobKey jobKey) throws ScheduleException, TaskDescriptionException {

        checkNotNull(jobKey);

        if (!cronScheduler.hasJob(jobKey)) {
            throw new ScheduleException("Cron job does not exist for " + JobKeys.toPath(jobKey));
        }

        cronScheduler.startJobNow(jobKey);
    }

    /**
     * Creates a predicate that will determine whether a job manager has a job matching a job key.
     *
     * @param job Job to match.
     * @return A new predicate matching the job owner and name given.
     */
    private static Predicate<JobManager> managerHasJob(final IJobConfiguration job) {
        return new Predicate<JobManager>() {
            @Override
            public boolean apply(JobManager manager) {
                return manager.hasJob(job.getKey());
            }
        };
    }

    @Override
    public synchronized void setTaskStatus(Query.Builder query, final ScheduleStatus status,
            Optional<String> message) {

        checkNotNull(query);
        checkNotNull(status);

        stateManager.changeState(query, status, message);
    }

    @Override
    public synchronized void killTasks(Query.Builder query, String user) throws ScheduleException {
        checkNotNull(query);
        LOG.info("Killing tasks matching " + query);

        boolean jobDeleted = false;

        if (Query.isOnlyJobScoped(query)) {
            // If this looks like a query for all tasks in a job, instruct the scheduler modules to
            // delete the job.
            IJobKey jobKey = JobKeys.from(query).get();
            for (JobManager manager : jobManagers) {
                if (manager.deleteJob(jobKey)) {
                    jobDeleted = true;
                }
            }
        }

        // Unless statuses were specifically supplied, only attempt to kill active tasks.
        Query.Builder taskQuery = query.get().isSetStatuses() ? query.byStatus(ACTIVE_STATES) : query;

        int tasksAffected = stateManager.changeState(taskQuery, KILLING, Optional.of("Killed by " + user));
        if (!jobDeleted && (tasksAffected == 0)) {
            throw new ScheduleException("No jobs to kill");
        }
    }

    @Override
    public void restartShards(IJobKey jobKey, final Set<Integer> shards, final String requestingUser)
            throws ScheduleException {

        if (!JobKeys.isValid(jobKey)) {
            throw new ScheduleException("Invalid job key: " + jobKey);
        }

        if (shards.isEmpty()) {
            throw new ScheduleException("At least one shard must be specified.");
        }

        final Query.Builder query = Query.instanceScoped(jobKey, shards).active();
        storage.write(new MutateWork.NoResult<ScheduleException>() {
            @Override
            protected void execute(MutableStoreProvider storeProvider) throws ScheduleException {

                Set<IScheduledTask> matchingTasks = storeProvider.getTaskStore().fetchTasks(query);
                if (matchingTasks.size() != shards.size()) {
                    throw new ScheduleException("Not all requested shards are active.");
                }
                LOG.info("Restarting shards matching " + query);
                stateManager.changeState(Query.taskScoped(Tasks.ids(matchingTasks)), RESTARTING,
                        Optional.of("Restarted by " + requestingUser));
            }
        });
    }

    @Override
    public synchronized void preemptTask(IAssignedTask task, IAssignedTask preemptingTask) {
        checkNotNull(task);
        checkNotNull(preemptingTask);
        // TODO(William Farner): Throw SchedulingException if either task doesn't exist, etc.

        stateManager.changeState(Query.taskScoped(task.getTaskId()), ScheduleStatus.PREEMPTING,
                Optional.of("Preempting in favor of " + preemptingTask.getTaskId()));
    }
}