gobblin.runtime.TaskExecutor.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.runtime.TaskExecutor.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.runtime;

import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import gobblin.runtime.fork.AsynchronousFork;
import gobblin.runtime.fork.Fork;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.AbstractIdleService;

import gobblin.configuration.ConfigurationKeys;
import gobblin.metrics.GobblinMetrics;
import gobblin.util.ExecutorsUtils;

/**
 * A class for executing {@link Task}s and retrying failed ones as well as for executing {@link AsynchronousFork}s.
 *
 * @author Yinan Li
 */
public class TaskExecutor extends AbstractIdleService {

    private static final Logger LOG = LoggerFactory.getLogger(TaskExecutor.class);

    // Thread pool executor for running tasks
    private final ExecutorService taskExecutor;

    // Scheduled thread pool executor for scheduling task retries
    private final ScheduledThreadPoolExecutor taskRetryExecutor;

    // A separate thread pool executor for running forks of tasks
    private final ExecutorService forkExecutor;

    // Task retry interval
    private final long retryIntervalInSeconds;

    /**
     * Constructor used internally.
     */
    private TaskExecutor(int taskExecutorThreadPoolSize, int coreRetryThreadPoolSize, long retryIntervalInSeconds) {
        Preconditions.checkArgument(taskExecutorThreadPoolSize > 0,
                "Task executor thread pool size should be positive");
        Preconditions.checkArgument(retryIntervalInSeconds > 0, "Task retry interval should be positive");

        // Currently a fixed-size thread pool is used to execute tasks. We probably need to revisit this later.
        this.taskExecutor = Executors.newFixedThreadPool(taskExecutorThreadPoolSize,
                ExecutorsUtils.newThreadFactory(Optional.of(LOG), Optional.of("TaskExecutor-%d")));

        // Using a separate thread pool for task retries to achieve isolation
        // between normal task execution and task retries
        this.taskRetryExecutor = new ScheduledThreadPoolExecutor(coreRetryThreadPoolSize,
                ExecutorsUtils.newThreadFactory(Optional.of(LOG), Optional.of("TaskRetryExecutor-%d")));
        this.retryIntervalInSeconds = retryIntervalInSeconds;

        this.forkExecutor = new ThreadPoolExecutor(
                // The core thread pool size is equal to that of the task executor as there's at least one fork per task
                taskExecutorThreadPoolSize,
                // The fork executor thread pool size is essentially unbounded. This is to make sure all forks of
                // a task get a thread to run so all forks of the task are making progress. This is necessary since
                // otherwise the parent task will be blocked if the record queue (bounded) of some fork is full and
                // that fork has not yet started to run because of no available thread. The task cannot proceed in
                // this case because it has to make sure every records go to every forks.
                Integer.MAX_VALUE, 0L, TimeUnit.MILLISECONDS,
                // The work queue is a SynchronousQueue. This essentially forces a new thread to be created for each fork.
                new SynchronousQueue<Runnable>(),
                ExecutorsUtils.newThreadFactory(Optional.of(LOG), Optional.of("ForkExecutor-%d")));
    }

    /**
     * Constructor to work with {@link java.util.Properties}.
     */
    public TaskExecutor(Properties properties) {
        this(Integer.parseInt(properties.getProperty(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY,
                Integer.toString(ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE))),
                Integer.parseInt(properties.getProperty(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY,
                        Integer.toString(ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE))),
                Long.parseLong(properties.getProperty(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY,
                        Long.toString(ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC))));
    }

    /**
     * Constructor to work with Hadoop {@link org.apache.hadoop.conf.Configuration}.
     */
    public TaskExecutor(Configuration conf) {
        this(conf.getInt(ConfigurationKeys.TASK_EXECUTOR_THREADPOOL_SIZE_KEY,
                ConfigurationKeys.DEFAULT_TASK_EXECUTOR_THREADPOOL_SIZE),
                conf.getInt(ConfigurationKeys.TASK_RETRY_THREAD_POOL_CORE_SIZE_KEY,
                        ConfigurationKeys.DEFAULT_TASK_RETRY_THREAD_POOL_CORE_SIZE),
                conf.getLong(ConfigurationKeys.TASK_RETRY_INTERVAL_IN_SEC_KEY,
                        ConfigurationKeys.DEFAULT_TASK_RETRY_INTERVAL_IN_SEC));
    }

    @Override
    protected void startUp() throws Exception {
        LOG.info("Starting the task executor");
        if (this.taskExecutor.isShutdown() || this.taskExecutor.isTerminated()) {
            throw new IllegalStateException("Task thread pool executor is shutdown or terminated");
        }
        if (this.taskRetryExecutor.isShutdown() || this.taskRetryExecutor.isTerminated()) {
            throw new IllegalStateException("Task retry thread pool executor is shutdown or terminated");
        }
        if (this.forkExecutor.isShutdown() || this.forkExecutor.isTerminated()) {
            throw new IllegalStateException("Fork thread pool executor is shutdown or terminated");
        }
    }

    @Override
    protected void shutDown() throws Exception {
        LOG.info("Stopping the task executor");
        try {
            ExecutorsUtils.shutdownExecutorService(this.taskExecutor, Optional.of(LOG));
        } finally {
            try {
                ExecutorsUtils.shutdownExecutorService(this.taskRetryExecutor, Optional.of(LOG));
            } finally {
                ExecutorsUtils.shutdownExecutorService(this.forkExecutor, Optional.of(LOG));
            }
        }
    }

    /**
     * Execute a {@link Task}.
     *
     * @param task {@link Task} to be executed
     */
    public void execute(Task task) {
        LOG.info(String.format("Executing task %s", task.getTaskId()));
        this.taskExecutor.execute(task);
    }

    /**
     * Submit a {@link Task} to run.
     *
     * @param task {@link Task} to be submitted
     * @return a {@link java.util.concurrent.Future} for the submitted {@link Task}
     */
    public Future<?> submit(Task task) {
        LOG.info(String.format("Submitting task %s", task.getTaskId()));
        return this.taskExecutor.submit(task);
    }

    /**
     * Execute a {@link Fork}.
     *
     * @param fork {@link Fork} to be executed
     */
    public void execute(Fork fork) {
        LOG.info(String.format("Executing fork %d of task %s", fork.getIndex(), fork.getTaskId()));
        this.forkExecutor.execute(fork);
    }

    /**
     * Submit a {@link Fork} to run.
     *
     * @param fork {@link Fork} to be submitted
     * @return a {@link java.util.concurrent.Future} for the submitted {@link Fork}
     */
    public Future<?> submit(Fork fork) {
        LOG.info(String.format("Submitting fork %d of task %s", fork.getIndex(), fork.getTaskId()));
        return this.forkExecutor.submit(fork);
    }

    /**
     * Retry a failed {@link Task}.
     *
     * @param task failed {@link Task} to be retried
     */
    public void retry(Task task) {
        if (GobblinMetrics.isEnabled(task.getTaskState().getWorkunit())
                && task.getTaskState().contains(ConfigurationKeys.FORK_BRANCHES_KEY)) {
            // Adjust metrics to clean up numbers from the failed task
            task.getTaskState()
                    .adjustJobMetricsOnRetry(task.getTaskState().getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY));
        }

        // Task retry interval increases linearly with number of retries
        long interval = task.getRetryCount() * this.retryIntervalInSeconds;
        // Schedule the retry of the failed task
        this.taskRetryExecutor.schedule(task, interval, TimeUnit.SECONDS);
        LOG.info(String.format("Scheduled retry of failed task %s to run in %d seconds", task.getTaskId(),
                interval));
        task.incrementRetryCount();
    }
}