gridool.processors.job.GridJobWorker.java Source code

Java tutorial

Introduction

Here is the source code for gridool.processors.job.GridJobWorker.java

Source

/*
 * @(#)$Id$
 *
 * Copyright 2006-2008 Makoto YUI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * Contributors:
 *     Makoto YUI - initial implementation
 */
package gridool.processors.job;

import gridool.GridConfiguration;
import gridool.GridException;
import gridool.GridJob;
import gridool.GridNode;
import gridool.GridResourceRegistry;
import gridool.GridTask;
import gridool.GridTaskResult;
import gridool.GridTaskResultPolicy;
import gridool.Settings;
import gridool.annotation.GridAnnotationProcessor;
import gridool.communication.GridCommunicationManager;
import gridool.construct.GridDiscoveryFailureLogger;
import gridool.discovery.GridDiscoveryListener;
import gridool.discovery.GridDiscoveryService;
import gridool.monitor.GridExecutionMonitor;
import gridool.processors.task.GridTaskProcessor;
import gridool.routing.GridNodeSelector;
import gridool.routing.GridRouter;
import gridool.taskqueue.sender.SenderResponseTaskQueue;
import gridool.util.GridUtils;
import gridool.util.concurrent.CancellableTask;
import gridool.util.concurrent.ExecutorFactory;
import gridool.util.datetime.DateTimeFormatter;
import gridool.util.datetime.TextProgressBar;
import gridool.util.lang.ClassUtils;
import gridool.util.primitive.Primitives;
import gridool.util.struct.Pair;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * 
 * <DIV lang="en"></DIV>
 * <DIV lang="ja"></DIV>
 * 
 * @author Makoto YUI (yuin405@gmail.com)
 */
public final class GridJobWorker<A, R> implements CancellableTask<R> {
    private static final Log LOG = LogFactory.getLog(GridJobWorker.class);
    private static final long JOB_INSPECTION_INTERVAL_SEC;
    static {
        JOB_INSPECTION_INTERVAL_SEC = Primitives.parseLong(Settings.get("gridool.job.inspection_interval"), 300);
    }

    private final GridJob<A, R> job;
    private final A arg;

    private final GridExecutionMonitor monitor;

    private final GridRouter router;
    private final GridCommunicationManager communicationMgr;
    private final GridTaskProcessor taskProc;
    private final GridDiscoveryService discoveryService;
    private final SenderResponseTaskQueue taskResponseQueue;
    private final GridAnnotationProcessor annotationProc;
    private final GridResourceRegistry registry;
    private final GridConfiguration config;

    private final ThreadPoolExecutor execPool;

    public GridJobWorker(@Nonnull GridJob<A, R> job, @Nullable A arg, @Nonnull GridExecutionMonitor monitor,
            @Nonnull GridResourceRegistry resourceRegistry, @Nonnull GridConfiguration config) {
        this.job = job;
        this.arg = arg;
        this.monitor = monitor;
        this.router = resourceRegistry.getRouter();
        this.communicationMgr = resourceRegistry.getCommunicationManager();
        this.taskProc = resourceRegistry.getTaskProcessor();
        this.discoveryService = resourceRegistry.getDiscoveryService();
        this.taskResponseQueue = resourceRegistry.getTaskManager().getSenderResponseQueue();
        this.annotationProc = resourceRegistry.getAnnotationProcessor();
        this.registry = resourceRegistry;
        this.config = config;
        int poolSize = config.getTaskAssignorPoolSize();
        this.execPool = ExecutorFactory.newFixedThreadPool(poolSize, "GridTaskAssignor");
        GridNode localNodeInfo = communicationMgr.getLocalNode();
        job.setJobNode(localNodeInfo);
    }

    public void cancel() {
        if (!job.isAsyncOps()) {
            execPool.shutdownNow();
        }
    }

    public FutureTask<R> newTask() {
        return new FutureTask<R>(this) {
            @SuppressWarnings("finally")
            public boolean cancel(boolean mayInterruptIfRunning) {
                try {
                    GridJobWorker.this.cancel();
                } finally {
                    return super.cancel(mayInterruptIfRunning);
                }
            }
        };
    }

    public R call() throws Exception {
        final long startTime = System.currentTimeMillis();

        String deployGroup = job.getDeploymentGroup();
        ClassLoader origLdr = Thread.currentThread().getContextClassLoader();
        ClassLoader newLdr = registry.getDeploymentGroupClassLoader(deployGroup, origLdr);
        Thread.currentThread().setContextClassLoader(newLdr);
        try {
            monitor.onJobStarted(job);
            return runJob();
        } catch (GridException ge) {
            LOG.error(ge.getMessage(), ge);
            throw ge;
        } catch (Throwable e) {
            LOG.fatal(e.getMessage(), e);
            throw new GridException(e);
        } finally {
            Thread.currentThread().setContextClassLoader(origLdr);
            if (job.logJobInfo()) {
                long elapsedTime = System.currentTimeMillis() - startTime;
                LOG.info(ClassUtils.getSimpleClassName(job) + " [" + job.getJobId() + "] finished in "
                        + DateTimeFormatter.formatTime(elapsedTime) + " ("
                        + DateTimeFormatter.formatTimeInSec(elapsedTime) + "sec)");
            }
            monitor.onJobFinished(job);
        }
    }

    private R runJob() throws GridException {
        if (job.injectResources()) {
            annotationProc.injectResources(job);
        }
        final Map<GridTask, GridNode> mappedTasks = job.map(router, arg);
        if (mappedTasks == null) {
            String errmsg = "Job mapping operation produces no mapped tasks for job: " + job;
            LOG.error(errmsg);
            throw new GridException(errmsg);
        }

        final String jobClassName = ClassUtils.getSimpleClassName(job);
        final String jobId = job.getJobId();
        final int numTasks = mappedTasks.size();
        if (numTasks == 0) {
            if (LOG.isInfoEnabled()) {
                LOG.info(jobClassName + "#map of a job [" + jobId + "] returns an empty result");
            }
            return job.reduce();
        }
        if (job.logJobInfo()) {
            LOG.info(jobClassName + " [" + jobId + "] is mapped to " + numTasks + " tasks (nodes)");
        }
        final TextProgressBar progressBar = new JobProgressBar(jobClassName + " [" + jobId + ']', numTasks);

        final BlockingQueue<GridTaskResult> resultQueue = new ArrayBlockingQueue<GridTaskResult>(numTasks * 5); // REVIEWME 5 times is enough?
        taskResponseQueue.addResponseQueue(jobId, resultQueue);
        final Map<String, Pair<GridTask, List<Future<?>>>> taskMap = new ConcurrentHashMap<String, Pair<GridTask, List<Future<?>>>>(
                numTasks); // taskMap contends rarely

        GridDiscoveryListener nodeFailoverHandler = job.handleNodeFailure()
                ? new GridNodeFailureHandler(mappedTasks, taskMap, resultQueue)
                : new GridDiscoveryFailureLogger();
        discoveryService.addListener(nodeFailoverHandler);

        // assign map tasks
        assignMappedTasks(mappedTasks, taskMap, resultQueue);

        if (job.isAsyncOps()) {
            discoveryService.removeListener(nodeFailoverHandler); // REVIEWME
            return null;
        }

        // collect map task results
        aggregateTaskResults(taskMap, resultQueue, progressBar);
        discoveryService.removeListener(nodeFailoverHandler);

        // invoke reduce
        R result = job.reduce();

        if (!taskMap.isEmpty()) {//TODO REVIEWME
            assert (!job.isAsyncOps());
            cancelRemainingTasks(taskMap);
        }

        progressBar.finish();
        return result;
    }

    private void assignMappedTasks(final Map<GridTask, GridNode> mappedTasks,
            final Map<String, Pair<GridTask, List<Future<?>>>> taskMap,
            final BlockingQueue<GridTaskResult> resultQueue) throws GridException {
        for (final Map.Entry<GridTask, GridNode> mappedTask : mappedTasks.entrySet()) {
            GridTask task = mappedTask.getKey();
            GridNode node = mappedTask.getValue();

            if (task == null) {
                String errmsg = "GridJob.map(...) method returned null task: [job=" + job + ", mappedTask:"
                        + mappedTask + ']';
                LOG.error(errmsg);
                throw new GridException(errmsg);
            }
            if (node == null) {
                String errmsg = "GridJob.map(...) method returned null node: [job=" + job + ", mappedTask:"
                        + mappedTask + ']';
                LOG.error(errmsg);
                throw new GridException(errmsg);
            }

            GridTaskAssignor workerTask = new GridTaskAssignor(task, node, taskProc, communicationMgr, resultQueue);
            task.setAssignedNode(node);
            List<Future<?>> futureList = new ArrayList<Future<?>>(2);
            if (taskMap.put(task.getTaskId(), new Pair<GridTask, List<Future<?>>>(task, futureList)) != null) {
                throw new GridException("Found duplicate taskId: " + task.getTaskId());
            }
            Future<?> future = execPool.submit(workerTask);
            futureList.add(future);
        }
    }

    private void aggregateTaskResults(final Map<String, Pair<GridTask, List<Future<?>>>> taskMap,
            final BlockingQueue<GridTaskResult> resultQueue, final TextProgressBar progressBar)
            throws GridException {
        final int numTasks = taskMap.size();
        for (int finishedTasks = 0; finishedTasks < numTasks; finishedTasks++) {
            GridTaskResult result = null;
            try {
                if (JOB_INSPECTION_INTERVAL_SEC == -1L) {
                    result = resultQueue.take();
                } else {
                    while (true) {
                        result = resultQueue.poll(JOB_INSPECTION_INTERVAL_SEC, TimeUnit.SECONDS);
                        if (result != null) {
                            break;
                        }
                        if (LOG.isInfoEnabled()) {
                            showRemainingTasks(job, taskMap);
                        }
                    }
                }
            } catch (InterruptedException e) {
                LOG.warn("GridTask is interrupted", e);
                throw new GridException("GridTask is interrupted", e);
            }
            final String taskId = result.getTaskId();
            final GridTaskResultPolicy policy = job.result(result);
            switch (policy) {
            case CONTINUE: {
                progressBar.inc();
                taskMap.remove(taskId);
                break;
            }
            case CONTINUE_WITH_SPECULATIVE_TASKS: {
                progressBar.inc();
                taskMap.remove(taskId);
                for (final GridTask task : result.getSpeculativeTasks()) {
                    String speculativeTaskId = task.getTaskId();
                    Pair<GridTask, List<Future<?>>> entry = taskMap.get(speculativeTaskId);
                    if (entry == null) {// Is the task already finished?
                        LOG.info("No need to run a speculative task: " + speculativeTaskId);
                    } else {
                        Future<?> newFuture = runSpeculativeTask(task, resultQueue); // SKIP handling is required for speculative tasks
                        if (newFuture != null) {
                            List<Future<?>> futures = entry.getSecond();
                            futures.add(newFuture);
                        }
                    }
                }
                break;
            }
            case RETURN:
                taskMap.remove(taskId);
                return;
            case CANCEL_RETURN: {
                taskMap.remove(taskId);
                if (!taskMap.isEmpty()) {
                    cancelRemainingTasks(taskMap);
                }
                return;
            }
            case FAILOVER: {
                Pair<GridTask, List<Future<?>>> entry = taskMap.get(taskId);
                GridTask task = entry.getFirst();
                Future<?> newFuture = failover(task, resultQueue);
                List<Future<?>> futureList = entry.getSecond();
                futureList.add(newFuture);
                finishedTasks--;
                break;
            }
            case SKIP:
                finishedTasks--;
                break;
            default:
                assert false : "Unexpected policy: " + policy;
            }
        }
    }

    private void cancelRemainingTasks(final Map<String, Pair<GridTask, List<Future<?>>>> taskMap) {
        for (final Pair<GridTask, List<Future<?>>> entry : taskMap.values()) {
            final List<Future<?>> futures = entry.getSecond();
            assert (futures != null);
            for (final Future<?> future : futures) {
                if (!future.isDone()) {
                    // TODO send cancel request
                    future.cancel(true);
                }
            }
        }
    }

    @Nonnull
    private Future<?> failover(@CheckForNull final GridTask task, final BlockingQueue<GridTaskResult> resultQueue)
            throws GridException {
        if (task == null) {
            throw new IllegalArgumentException();
        }
        if (!task.isFailoverActive()) {
            throw new GridException("Failover is not active for this task: " + task);
        }
        final List<GridNode> candidates = task.listFailoverCandidates(router);
        if (candidates.isEmpty()) {
            throw new GridException(
                    "Failover failed because there is no relocatable node: [job=" + job + ", task:" + task + ']');
        }
        GridNodeSelector selector = config.getNodeSelector();
        GridNode node = selector.selectNode(candidates, config);
        assert (node != null);
        if (LOG.isWarnEnabled()) {
            LOG.warn("[Failover] Assigned a task [" + task.getTaskId() + "] to node [" + node + "]");
        }

        GridNode failedNode = task.getAssignedNode();
        task.setTransferToReplica(failedNode);
        GridTaskAssignor workerTask = new GridTaskAssignor(task, node, taskProc, communicationMgr, resultQueue);
        return execPool.submit(workerTask);
    }

    @Nullable
    private Future<?> runSpeculativeTask(@CheckForNull final GridTask task,
            final BlockingQueue<GridTaskResult> resultQueue) {
        if (task == null) {
            throw new IllegalArgumentException();
        }
        final List<GridNode> candidates = task.listFailoverCandidates(router);
        if (candidates.isEmpty()) {
            return null;
        }
        GridNodeSelector selector = config.getNodeSelector();
        GridNode node = selector.selectNode(candidates, config);
        assert (node != null);
        if (LOG.isInfoEnabled()) {
            LOG.info("[Speculative Execution] Assigned a speculative task [" + task.getTaskId() + "] to node ["
                    + node + "]");
        }
        GridTaskAssignor workerTask = new GridTaskAssignor(task, node, taskProc, communicationMgr, resultQueue);
        return execPool.submit(workerTask);
    }

    private static void showRemainingTasks(final GridJob<?, ?> job,
            final Map<String, Pair<GridTask, List<Future<?>>>> taskMap) {
        final StringBuilder buf = new StringBuilder(256);
        String jobClassName = ClassUtils.getSimpleClassName(job);
        buf.append("Start a job inspection... ");
        buf.append(jobClassName);
        buf.append(" [");
        buf.append(job.getJobId());
        buf.append("] Pending tasks: \n");
        final Iterator<Pair<GridTask, List<Future<?>>>> itor = taskMap.values().iterator();
        boolean first = true;
        while (itor.hasNext()) {
            if (first) {
                first = false;
            } else {
                buf.append(",\n");
            }
            Pair<GridTask, List<Future<?>>> e = itor.next();
            GridTask task = e.getFirst();
            String taskClassName = ClassUtils.getSimpleClassName(task);
            buf.append(taskClassName);
            buf.append('[');
            String taskId = task.getTaskId();
            buf.append(taskId);
            buf.append(']');
            buf.append(" on ");
            GridNode node = task.getAssignedNode();
            String nodeinfo = GridUtils.toHostNameAddrPort(node);
            buf.append(nodeinfo);
        }
        LOG.info(buf.toString());
    }

    private static final class JobProgressBar extends TextProgressBar {
        private static final Log JP_LOG = LogFactory.getLog(JobProgressBar.class);

        JobProgressBar(String title, int totalSteps) {
            super(title, totalSteps);
            setRefreshTime(5000L);
            setRefreshFluctations(20);
        }

        @Override
        protected void show() {
            if (JP_LOG.isDebugEnabled()) {
                JP_LOG.debug(getInfo());
            }
        }
    }

}