org.apache.aurora.scheduler.pruning.TaskHistoryPruner.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.aurora.scheduler.pruning.TaskHistoryPruner.java

Source

/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.aurora.scheduler.pruning;

import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;

import javax.inject.Inject;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.eventbus.Subscribe;

import org.apache.aurora.common.application.Lifecycle;
import org.apache.aurora.common.quantity.Amount;
import org.apache.aurora.common.quantity.Time;
import org.apache.aurora.common.stats.StatsProvider;
import org.apache.aurora.common.util.Clock;
import org.apache.aurora.gen.apiConstants;
import org.apache.aurora.scheduler.BatchWorker;
import org.apache.aurora.scheduler.SchedulerModule.TaskEventBatchWorker;
import org.apache.aurora.scheduler.async.AsyncModule.AsyncExecutor;
import org.apache.aurora.scheduler.async.DelayExecutor;
import org.apache.aurora.scheduler.base.Query;
import org.apache.aurora.scheduler.base.Tasks;
import org.apache.aurora.scheduler.state.StateManager;
import org.apache.aurora.scheduler.storage.Storage;
import org.apache.aurora.scheduler.storage.entities.IJobKey;
import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.util.Objects.requireNonNull;

import static org.apache.aurora.scheduler.base.AsyncUtil.shutdownOnError;
import static org.apache.aurora.scheduler.events.PubsubEvent.EventSubscriber;
import static org.apache.aurora.scheduler.events.PubsubEvent.TaskStateChange;

/**
 * Prunes tasks in a job based on per-job history and an inactive time threshold by observing tasks
 * transitioning into one of the inactive states.
 */
public class TaskHistoryPruner implements EventSubscriber {
    private static final Logger LOG = LoggerFactory.getLogger(TaskHistoryPruner.class);
    private static final String FATAL_ERROR_FORMAT = "Unexpected problem pruning task history for %s. Triggering shutdown";
    @VisibleForTesting
    static final String TASKS_PRUNED = "tasks_pruned";

    private final DelayExecutor executor;
    private final StateManager stateManager;
    private final Clock clock;
    private final HistoryPrunnerSettings settings;
    private final Storage storage;
    private final Lifecycle lifecycle;
    private final TaskEventBatchWorker batchWorker;
    private final AtomicLong prunedTasksCount;

    private final Predicate<IScheduledTask> safeToDelete = new Predicate<IScheduledTask>() {
        @Override
        public boolean apply(IScheduledTask task) {
            return Tasks.getLatestEvent(task).getTimestamp() <= clock.nowMillis()
                    - settings.minRetentionThresholdMillis;
        }
    };

    static class HistoryPrunnerSettings {
        private final long pruneThresholdMillis;
        private final long minRetentionThresholdMillis;
        private final int perJobHistoryGoal;

        HistoryPrunnerSettings(Amount<Long, Time> inactivePruneThreshold, Amount<Long, Time> minRetentionThreshold,
                int perJobHistoryGoal) {

            this.pruneThresholdMillis = inactivePruneThreshold.as(Time.MILLISECONDS);
            this.minRetentionThresholdMillis = minRetentionThreshold.as(Time.MILLISECONDS);
            this.perJobHistoryGoal = perJobHistoryGoal;
        }
    }

    @Inject
    TaskHistoryPruner(@AsyncExecutor DelayExecutor executor, StateManager stateManager, Clock clock,
            HistoryPrunnerSettings settings, Storage storage, Lifecycle lifecycle, TaskEventBatchWorker batchWorker,
            StatsProvider statsProvider) {

        this.executor = requireNonNull(executor);
        this.stateManager = requireNonNull(stateManager);
        this.clock = requireNonNull(clock);
        this.settings = requireNonNull(settings);
        this.storage = requireNonNull(storage);
        this.lifecycle = requireNonNull(lifecycle);
        this.batchWorker = requireNonNull(batchWorker);
        this.prunedTasksCount = statsProvider.makeCounter(TASKS_PRUNED);
    }

    @VisibleForTesting
    long calculateTimeout(long taskEventTimestampMillis) {
        return Math.max(settings.minRetentionThresholdMillis,
                settings.pruneThresholdMillis - Math.max(0, clock.nowMillis() - taskEventTimestampMillis));
    }

    /**
     * When triggered, records an inactive task state change.
     *
     * @param change Event when a task changes state.
     */
    @Subscribe
    public void recordStateChange(TaskStateChange change) {
        if (Tasks.isTerminated(change.getNewState())) {
            long timeoutBasis = change.isTransition() ? clock.nowMillis()
                    : Iterables.getLast(change.getTask().getTaskEvents()).getTimestamp();
            registerInactiveTask(Tasks.getJob(change.getTask()), change.getTaskId(),
                    calculateTimeout(timeoutBasis));
        }
    }

    private void deleteTasks(final Set<String> taskIds) {
        LOG.info("Pruning inactive tasks " + taskIds);
        batchWorker.execute(storeProvider -> {
            stateManager.deleteTasks(storeProvider, taskIds);
            return BatchWorker.NO_RESULT;
        });
        prunedTasksCount.addAndGet(taskIds.size());
    }

    @VisibleForTesting
    static Query.Builder jobHistoryQuery(IJobKey jobKey) {
        return Query.jobScoped(jobKey).byStatus(apiConstants.TERMINAL_STATES);
    }

    private void registerInactiveTask(final IJobKey jobKey, final String taskId, long timeRemaining) {

        LOG.debug("Prune task {} in {} ms.", taskId, timeRemaining);

        executor.execute(
                shutdownOnError(lifecycle, LOG, String.format(FATAL_ERROR_FORMAT, "task: " + taskId), () -> {
                    LOG.info("Pruning expired inactive task " + taskId);
                    deleteTasks(ImmutableSet.of(taskId));
                }), Amount.of(timeRemaining, Time.MILLISECONDS));

        executor.execute(
                shutdownOnError(lifecycle, LOG, String.format(FATAL_ERROR_FORMAT, "job: " + jobKey), () -> {
                    Iterable<IScheduledTask> inactiveTasks = Storage.Util.fetchTasks(storage,
                            jobHistoryQuery(jobKey));
                    int numInactiveTasks = Iterables.size(inactiveTasks);
                    int tasksToPrune = numInactiveTasks - settings.perJobHistoryGoal;
                    if (tasksToPrune > 0 && numInactiveTasks > settings.perJobHistoryGoal) {
                        Set<String> toPrune = FluentIterable.from(Tasks.LATEST_ACTIVITY.sortedCopy(inactiveTasks))
                                .filter(safeToDelete).limit(tasksToPrune).transform(Tasks::id).toSet();
                        if (!toPrune.isEmpty()) {
                            deleteTasks(toPrune);
                        }
                    }
                }));
    }
}