eu.stratosphere.nephele.execution.RuntimeEnvironment.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.nephele.execution.RuntimeEnvironment.java

Source

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.nephele.execution;

import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.Path;
import eu.stratosphere.core.io.IOReadableWritable;
import eu.stratosphere.nephele.deployment.TaskDeploymentDescriptor;
import eu.stratosphere.nephele.execution.librarycache.LibraryCacheManager;
import eu.stratosphere.nephele.jobgraph.JobID;
import eu.stratosphere.nephele.protocols.AccumulatorProtocol;
import eu.stratosphere.nephele.services.iomanager.IOManager;
import eu.stratosphere.nephele.services.memorymanager.MemoryManager;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.nephele.template.InputSplitProvider;
import eu.stratosphere.runtime.io.Buffer;
import eu.stratosphere.runtime.io.channels.ChannelID;
import eu.stratosphere.runtime.io.channels.OutputChannel;
import eu.stratosphere.runtime.io.gates.GateID;
import eu.stratosphere.runtime.io.gates.InputGate;
import eu.stratosphere.runtime.io.gates.OutputGate;
import eu.stratosphere.runtime.io.network.bufferprovider.BufferAvailabilityListener;
import eu.stratosphere.runtime.io.network.bufferprovider.BufferProvider;
import eu.stratosphere.runtime.io.network.bufferprovider.GlobalBufferPool;
import eu.stratosphere.runtime.io.network.bufferprovider.LocalBufferPool;
import eu.stratosphere.runtime.io.network.bufferprovider.LocalBufferPoolOwner;
import eu.stratosphere.util.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.FutureTask;

/**
 * The user code of every Nephele task runs inside a <code>RuntimeEnvironment</code> object. The environment provides
 * important services to the task. It keeps track of setting up the communication channels and provides access to input
 * splits, memory manager, etc.
 * <p/>
 * This class is thread-safe.
 */
public class RuntimeEnvironment implements Environment, BufferProvider, LocalBufferPoolOwner, Runnable {

    /**
     * The log object used for debugging.
     */
    private static final Log LOG = LogFactory.getLog(RuntimeEnvironment.class);

    /**
     * The interval to sleep in case a communication channel is not yet entirely set up (in milliseconds).
     */
    private static final int SLEEPINTERVAL = 100;

    /**
     * List of output gates created by the task.
     */
    private final List<OutputGate> outputGates = new CopyOnWriteArrayList<OutputGate>();

    /**
     * List of input gates created by the task.
     */
    private final List<InputGate<? extends IOReadableWritable>> inputGates = new CopyOnWriteArrayList<InputGate<? extends IOReadableWritable>>();

    /**
     * Queue of unbound input gate IDs which are required for deserializing an environment in the course of an RPC
     * call.
     */
    private final Queue<GateID> unboundInputGateIDs = new ArrayDeque<GateID>();

    /**
     * The memory manager of the current environment (currently the one associated with the executing TaskManager).
     */
    private final MemoryManager memoryManager;

    /**
     * The io manager of the current environment (currently the one associated with the executing TaskManager).
     */
    private final IOManager ioManager;

    /**
     * Class of the task to run in this environment.
     */
    private final Class<? extends AbstractInvokable> invokableClass;

    /**
     * Instance of the class to be run in this environment.
     */
    private final AbstractInvokable invokable;

    /**
     * The thread executing the task in the environment.
     */
    private volatile Thread executingThread = null;

    /**
     * The ID of the job this task belongs to.
     */
    private final JobID jobID;

    /**
     * The job configuration encapsulated in the environment object.
     */
    private final Configuration jobConfiguration;

    /**
     * The task configuration encapsulated in the environment object.
     */
    private final Configuration taskConfiguration;

    /**
     * The input split provider that can be queried for new input splits.
     */
    private final InputSplitProvider inputSplitProvider;

    /**
     * The observer object for the task's execution.
     */
    private volatile ExecutionObserver executionObserver = null;

    /**
     * The RPC proxy to report accumulators to JobManager
     */
    private AccumulatorProtocol accumulatorProtocolProxy = null;

    /**
     * The index of this subtask in the subtask group.
     */
    private final int indexInSubtaskGroup;

    /**
     * The current number of subtasks the respective task is split into.
     */
    private final int currentNumberOfSubtasks;

    /**
     * The name of the task running in this environment.
     */
    private final String taskName;

    private LocalBufferPool outputBufferPool;

    private Map<String, FutureTask<Path>> cacheCopyTasks = new HashMap<String, FutureTask<Path>>();

    /**
     * Creates a new runtime environment object which contains the runtime information for the encapsulated Nephele
     * task.
     *
     * @param jobID             the ID of the original Nephele job
     * @param taskName          the name of task running in this environment
     * @param invokableClass    invokableClass the class that should be instantiated as a Nephele task
     * @param taskConfiguration the configuration object which was attached to the original JobVertex
     * @param jobConfiguration  the configuration object which was attached to the original JobGraph
     * @throws Exception thrown if an error occurs while instantiating the invokable class
     */
    public RuntimeEnvironment(final JobID jobID, final String taskName,
            final Class<? extends AbstractInvokable> invokableClass, final Configuration taskConfiguration,
            final Configuration jobConfiguration) throws Exception {

        this.jobID = jobID;
        this.taskName = taskName;
        this.invokableClass = invokableClass;
        this.taskConfiguration = taskConfiguration;
        this.jobConfiguration = jobConfiguration;
        this.indexInSubtaskGroup = 0;
        this.currentNumberOfSubtasks = 0;
        this.memoryManager = null;
        this.ioManager = null;
        this.inputSplitProvider = null;

        this.invokable = this.invokableClass.newInstance();
        this.invokable.setEnvironment(this);
        this.invokable.registerInputOutput();
    }

    /**
     * Constructs a runtime environment from a task deployment description.
     *
     * @param tdd                the task deployment description
     * @param memoryManager      the task manager's memory manager component
     * @param ioManager          the task manager's I/O manager component
     * @param inputSplitProvider the input split provider for this environment
     * @throws Exception thrown if an error occurs while instantiating the invokable class
     */
    public RuntimeEnvironment(final TaskDeploymentDescriptor tdd, final MemoryManager memoryManager,
            final IOManager ioManager, final InputSplitProvider inputSplitProvider,
            AccumulatorProtocol accumulatorProtocolProxy, Map<String, FutureTask<Path>> cpTasks) throws Exception {

        this.jobID = tdd.getJobID();
        this.taskName = tdd.getTaskName();
        this.invokableClass = tdd.getInvokableClass();
        this.jobConfiguration = tdd.getJobConfiguration();
        this.taskConfiguration = tdd.getTaskConfiguration();
        this.indexInSubtaskGroup = tdd.getIndexInSubtaskGroup();
        this.currentNumberOfSubtasks = tdd.getCurrentNumberOfSubtasks();
        this.memoryManager = memoryManager;
        this.ioManager = ioManager;
        this.inputSplitProvider = inputSplitProvider;
        this.accumulatorProtocolProxy = accumulatorProtocolProxy;
        this.cacheCopyTasks = cpTasks;

        this.invokable = this.invokableClass.newInstance();
        this.invokable.setEnvironment(this);
        this.invokable.registerInputOutput();

        int numOutputGates = tdd.getNumberOfOutputGateDescriptors();

        for (int i = 0; i < numOutputGates; ++i) {
            this.outputGates.get(i).initializeChannels(tdd.getOutputGateDescriptor(i));
        }

        int numInputGates = tdd.getNumberOfInputGateDescriptors();

        for (int i = 0; i < numInputGates; i++) {
            this.inputGates.get(i).initializeChannels(tdd.getInputGateDescriptor(i));
        }
    }

    /**
     * Returns the invokable object that represents the Nephele task.
     *
     * @return the invokable object that represents the Nephele task
     */
    public AbstractInvokable getInvokable() {
        return this.invokable;
    }

    @Override
    public JobID getJobID() {
        return this.jobID;
    }

    @Override
    public GateID getNextUnboundInputGateID() {
        return this.unboundInputGateIDs.poll();
    }

    @Override
    public OutputGate createAndRegisterOutputGate() {
        OutputGate gate = new OutputGate(getJobID(), new GateID(), getNumberOfOutputGates());
        this.outputGates.add(gate);

        return gate;
    }

    @Override
    public void run() {
        if (invokable == null) {
            LOG.fatal("ExecutionEnvironment has no Invokable set");
        }

        // Now the actual program starts to run
        changeExecutionState(ExecutionState.RUNNING, null);

        // If the task has been canceled in the mean time, do not even start it
        if (this.executionObserver.isCanceled()) {
            changeExecutionState(ExecutionState.CANCELED, null);
            return;
        }

        try {
            ClassLoader cl = LibraryCacheManager.getClassLoader(jobID);
            Thread.currentThread().setContextClassLoader(cl);
            this.invokable.invoke();

            // Make sure, we enter the catch block when the task has been canceled
            if (this.executionObserver.isCanceled()) {
                throw new InterruptedException();
            }
        } catch (Throwable t) {
            if (!this.executionObserver.isCanceled()) {

                // Perform clean up when the task failed and has been not canceled by the user
                try {
                    this.invokable.cancel();
                } catch (Throwable t2) {
                    LOG.error(StringUtils.stringifyException(t2));
                }
            }

            // Release all resources that may currently be allocated by the individual channels
            releaseAllChannelResources();

            if (this.executionObserver.isCanceled() || t instanceof CancelTaskException) {
                changeExecutionState(ExecutionState.CANCELED, null);
            } else {
                changeExecutionState(ExecutionState.FAILED, StringUtils.stringifyException(t));
            }

            return;
        }

        // Task finished running, but there may be unconsumed output data in some of the channels
        changeExecutionState(ExecutionState.FINISHING, null);

        try {
            // If there is any unclosed input gate, close it and propagate close operation to corresponding output gate
            closeInputGates();

            // First, close all output gates to indicate no records will be emitted anymore
            requestAllOutputGatesToClose();

            // Wait until all input channels are closed
            waitForInputChannelsToBeClosed();

            // Now we wait until all output channels have written out their data and are closed
            waitForOutputChannelsToBeClosed();
        } catch (Throwable t) {

            // Release all resources that may currently be allocated by the individual channels
            releaseAllChannelResources();

            if (this.executionObserver.isCanceled() || t instanceof CancelTaskException) {
                changeExecutionState(ExecutionState.CANCELED, null);
            } else {
                changeExecutionState(ExecutionState.FAILED, StringUtils.stringifyException(t));
            }

            return;
        }

        // Release all resources that may currently be allocated by the individual channels
        releaseAllChannelResources();

        // Finally, switch execution state to FINISHED and report to job manager
        changeExecutionState(ExecutionState.FINISHED, null);
    }

    @Override
    public <T extends IOReadableWritable> InputGate<T> createAndRegisterInputGate() {
        InputGate<T> gate = new InputGate<T>(getJobID(), new GateID(), getNumberOfInputGates());
        this.inputGates.add(gate);

        return gate;
    }

    public int getNumberOfOutputGates() {
        return this.outputGates.size();
    }

    @Override
    public int getNumberOfInputGates() {
        return this.inputGates.size();
    }

    @Override
    public int getNumberOfOutputChannels() {
        int numberOfOutputChannels = 0;
        for (int i = 0; i < this.outputGates.size(); ++i) {
            numberOfOutputChannels += this.outputGates.get(i).getNumChannels();
        }

        return numberOfOutputChannels;
    }

    @Override
    public int getNumberOfInputChannels() {
        int numberOfInputChannels = 0;
        for (int i = 0; i < this.inputGates.size(); ++i) {
            numberOfInputChannels += this.inputGates.get(i).getNumberOfInputChannels();
        }

        return numberOfInputChannels;
    }

    /**
     * Returns the registered input gate with index <code>pos</code>.
     *
     * @param pos the index of the input gate to return
     * @return the input gate at index <code>pos</code> or <code>null</code> if no such index exists
     */
    public InputGate<? extends IOReadableWritable> getInputGate(final int pos) {
        if (pos < this.inputGates.size()) {
            return this.inputGates.get(pos);
        }

        return null;
    }

    /**
     * Returns the registered output gate with index <code>pos</code>.
     *
     * @param index the index of the output gate to return
     * @return the output gate at index <code>pos</code> or <code>null</code> if no such index exists
     */
    public OutputGate getOutputGate(int index) {
        if (index < this.outputGates.size()) {
            return this.outputGates.get(index);
        }

        return null;
    }

    /**
     * Returns the thread which is assigned to execute the user code.
     *
     * @return the thread which is assigned to execute the user code
     */
    public Thread getExecutingThread() {
        synchronized (this) {

            if (this.executingThread == null) {
                if (this.taskName == null) {
                    this.executingThread = new Thread(this);
                } else {
                    this.executingThread = new Thread(this, getTaskNameWithIndex());
                }
            }

            return this.executingThread;
        }
    }

    /**
     * Blocks until all output channels are closed.
     *
     * @throws IOException          thrown if an error occurred while closing the output channels
     * @throws InterruptedException thrown if the thread waiting for the channels to be closed is interrupted
     */
    private void waitForOutputChannelsToBeClosed() throws InterruptedException {
        // Make sure, we leave this method with an InterruptedException when the task has been canceled
        if (this.executionObserver.isCanceled()) {
            return;
        }

        for (OutputGate og : this.outputGates) {
            og.waitForGateToBeClosed();
        }
    }

    /**
     * Blocks until all input channels are closed.
     *
     * @throws IOException          thrown if an error occurred while closing the input channels
     * @throws InterruptedException thrown if the thread waiting for the channels to be closed is interrupted
     */
    private void waitForInputChannelsToBeClosed() throws IOException, InterruptedException {
        // Wait for disconnection of all output gates
        while (true) {

            // Make sure, we leave this method with an InterruptedException when the task has been canceled
            if (this.executionObserver.isCanceled()) {
                throw new InterruptedException();
            }

            boolean allClosed = true;
            for (int i = 0; i < getNumberOfInputGates(); i++) {
                final InputGate<? extends IOReadableWritable> eig = this.inputGates.get(i);
                if (!eig.isClosed()) {
                    allClosed = false;
                }
            }

            if (allClosed) {
                break;
            } else {
                Thread.sleep(SLEEPINTERVAL);
            }
        }
    }

    /**
     * Closes all input gates which are not already closed.
     */
    private void closeInputGates() throws IOException, InterruptedException {
        for (int i = 0; i < this.inputGates.size(); i++) {
            final InputGate<? extends IOReadableWritable> eig = this.inputGates.get(i);
            // Important: close must be called on each input gate exactly once
            eig.close();
        }

    }

    /**
     * Requests all output gates to be closed.
     */
    private void requestAllOutputGatesToClose() throws IOException, InterruptedException {
        for (int i = 0; i < this.outputGates.size(); i++) {
            this.outputGates.get(i).requestClose();
        }
    }

    @Override
    public IOManager getIOManager() {
        return this.ioManager;
    }

    @Override
    public MemoryManager getMemoryManager() {
        return this.memoryManager;
    }

    @Override
    public Configuration getTaskConfiguration() {
        return this.taskConfiguration;
    }

    @Override
    public Configuration getJobConfiguration() {
        return this.jobConfiguration;
    }

    @Override
    public int getCurrentNumberOfSubtasks() {
        return this.currentNumberOfSubtasks;
    }

    @Override
    public int getIndexInSubtaskGroup() {
        return this.indexInSubtaskGroup;
    }

    private void changeExecutionState(final ExecutionState newExecutionState, final String optionalMessage) {
        if (this.executionObserver != null) {
            this.executionObserver.executionStateChanged(newExecutionState, optionalMessage);
        }
    }

    @Override
    public String getTaskName() {
        return this.taskName;
    }

    /**
     * Returns the name of the task with its index in the subtask group and the total number of subtasks.
     *
     * @return the name of the task with its index in the subtask group and the total number of subtasks
     */
    public String getTaskNameWithIndex() {
        return String.format("%s (%d/%d)", this.taskName, getIndexInSubtaskGroup() + 1,
                getCurrentNumberOfSubtasks());
    }

    /**
     * Sets the execution observer for this environment.
     *
     * @param executionObserver the execution observer for this environment
     */
    public void setExecutionObserver(final ExecutionObserver executionObserver) {
        this.executionObserver = executionObserver;
    }

    @Override
    public InputSplitProvider getInputSplitProvider() {
        return this.inputSplitProvider;
    }

    @Override
    public void userThreadStarted(final Thread userThread) {
        if (this.executionObserver != null) {
            this.executionObserver.userThreadStarted(userThread);
        }
    }

    @Override
    public void userThreadFinished(final Thread userThread) {
        if (this.executionObserver != null) {
            this.executionObserver.userThreadFinished(userThread);
        }
    }

    /**
     * Releases the allocated resources (particularly buffer) of input and output channels attached to this task. This
     * method should only be called after the respected task has stopped running.
     */
    private void releaseAllChannelResources() {
        for (int i = 0; i < this.inputGates.size(); i++) {
            this.inputGates.get(i).releaseAllChannelResources();
        }

        for (int i = 0; i < this.outputGates.size(); i++) {
            this.outputGates.get(i).releaseAllChannelResources();
        }
    }

    @Override
    public Set<ChannelID> getOutputChannelIDs() {
        Set<ChannelID> ids = new HashSet<ChannelID>();

        for (OutputGate gate : this.outputGates) {
            for (OutputChannel channel : gate.channels()) {
                ids.add(channel.getID());
            }
        }

        return Collections.unmodifiableSet(ids);
    }

    @Override
    public Set<ChannelID> getInputChannelIDs() {
        final Set<ChannelID> inputChannelIDs = new HashSet<ChannelID>();

        final Iterator<InputGate<? extends IOReadableWritable>> gateIterator = this.inputGates.iterator();
        while (gateIterator.hasNext()) {

            final InputGate<? extends IOReadableWritable> outputGate = gateIterator.next();
            for (int i = 0; i < outputGate.getNumberOfInputChannels(); ++i) {
                inputChannelIDs.add(outputGate.getInputChannel(i).getID());
            }
        }

        return Collections.unmodifiableSet(inputChannelIDs);
    }

    @Override
    public Set<GateID> getInputGateIDs() {
        final Set<GateID> inputGateIDs = new HashSet<GateID>();

        final Iterator<InputGate<? extends IOReadableWritable>> gateIterator = this.inputGates.iterator();
        while (gateIterator.hasNext()) {
            inputGateIDs.add(gateIterator.next().getGateID());
        }

        return Collections.unmodifiableSet(inputGateIDs);
    }

    @Override
    public Set<GateID> getOutputGateIDs() {
        final Set<GateID> outputGateIDs = new HashSet<GateID>();

        final Iterator<OutputGate> gateIterator = this.outputGates.iterator();
        while (gateIterator.hasNext()) {
            outputGateIDs.add(gateIterator.next().getGateID());
        }

        return Collections.unmodifiableSet(outputGateIDs);
    }

    @Override
    public Set<ChannelID> getOutputChannelIDsOfGate(final GateID gateID) {
        OutputGate outputGate = null;
        final Iterator<OutputGate> gateIterator = this.outputGates.iterator();
        while (gateIterator.hasNext()) {
            final OutputGate candidateGate = gateIterator.next();
            if (candidateGate.getGateID().equals(gateID)) {
                outputGate = candidateGate;
                break;
            }
        }

        if (outputGate == null) {
            throw new IllegalArgumentException("Cannot find output gate with ID " + gateID);
        }

        final Set<ChannelID> outputChannelIDs = new HashSet<ChannelID>();

        for (int i = 0; i < outputGate.getNumChannels(); ++i) {
            outputChannelIDs.add(outputGate.getChannel(i).getID());
        }

        return Collections.unmodifiableSet(outputChannelIDs);
    }

    @Override
    public Set<ChannelID> getInputChannelIDsOfGate(final GateID gateID) {
        InputGate<? extends IOReadableWritable> inputGate = null;
        final Iterator<InputGate<? extends IOReadableWritable>> gateIterator = this.inputGates.iterator();
        while (gateIterator.hasNext()) {
            final InputGate<? extends IOReadableWritable> candidateGate = gateIterator.next();
            if (candidateGate.getGateID().equals(gateID)) {
                inputGate = candidateGate;
                break;
            }
        }

        if (inputGate == null) {
            throw new IllegalArgumentException("Cannot find input gate with ID " + gateID);
        }

        final Set<ChannelID> inputChannelIDs = new HashSet<ChannelID>();

        for (int i = 0; i < inputGate.getNumberOfInputChannels(); ++i) {
            inputChannelIDs.add(inputGate.getInputChannel(i).getID());
        }

        return Collections.unmodifiableSet(inputChannelIDs);
    }

    public List<OutputGate> outputGates() {
        return this.outputGates;
    }

    public List<InputGate<? extends IOReadableWritable>> inputGates() {
        return this.inputGates;
    }

    @Override
    public AccumulatorProtocol getAccumulatorProtocolProxy() {
        return accumulatorProtocolProxy;
    }

    public void addCopyTaskForCacheFile(String name, FutureTask<Path> copyTask) {
        this.cacheCopyTasks.put(name, copyTask);
    }

    @Override
    public Map<String, FutureTask<Path>> getCopyTask() {
        return this.cacheCopyTasks;
    }

    @Override
    public BufferProvider getOutputBufferProvider() {
        return this;
    }

    // -----------------------------------------------------------------------------------------------------------------
    //                                            BufferProvider methods
    // -----------------------------------------------------------------------------------------------------------------

    @Override
    public Buffer requestBuffer(int minBufferSize) throws IOException {
        return this.outputBufferPool.requestBuffer(minBufferSize);
    }

    @Override
    public Buffer requestBufferBlocking(int minBufferSize) throws IOException, InterruptedException {
        return this.outputBufferPool.requestBufferBlocking(minBufferSize);
    }

    @Override
    public int getBufferSize() {
        return this.outputBufferPool.getBufferSize();
    }

    @Override
    public BufferAvailabilityRegistration registerBufferAvailabilityListener(BufferAvailabilityListener listener) {
        return this.outputBufferPool.registerBufferAvailabilityListener(listener);
    }

    // -----------------------------------------------------------------------------------------------------------------
    //                                       LocalBufferPoolOwner methods
    // -----------------------------------------------------------------------------------------------------------------

    @Override
    public int getNumberOfChannels() {
        return getNumberOfOutputChannels();
    }

    @Override
    public void setDesignatedNumberOfBuffers(int numBuffers) {
        this.outputBufferPool.setNumDesignatedBuffers(numBuffers);
    }

    @Override
    public void clearLocalBufferPool() {
        this.outputBufferPool.destroy();
    }

    @Override
    public void registerGlobalBufferPool(GlobalBufferPool globalBufferPool) {
        if (this.outputBufferPool == null) {
            this.outputBufferPool = new LocalBufferPool(globalBufferPool, 1);
        }
    }

    @Override
    public void logBufferUtilization() {
        LOG.info(String.format("\t%s: %d available, %d requested, %d designated", getTaskNameWithIndex(),
                this.outputBufferPool.numAvailableBuffers(), this.outputBufferPool.numRequestedBuffers(),
                this.outputBufferPool.numDesignatedBuffers()));
    }

    @Override
    public void reportAsynchronousEvent() {
        this.outputBufferPool.reportAsynchronousEvent();
    }
}