x10.x10rt.yarn.ApplicationMaster.java Source code

Java tutorial

Introduction

Here is the source code for x10.x10rt.yarn.ApplicationMaster.java

Source

/*
 *  This file is part of the X10 project (http://x10-lang.org).
 *
 *  This file is licensed to You under the Eclipse Public License (EPL);
 *  You may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *      http://www.opensource.org/licenses/eclipse-1.0.php
 *
 *  (C) Copyright IBM Corporation 2014.
 */

package x10.x10rt.yarn;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.ServerSocketChannel;
import java.nio.channels.SocketChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ConcurrentLinkedQueue;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.impl.SimpleLog;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.client.api.async.NMClientAsync;
import org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.util.ConverterUtils;

public class ApplicationMaster {

    private static final Log LOG = new SimpleLog(ApplicationMaster.class.getName());

    // environment variables used by X10, set by the AM
    public static final String X10_NPLACES = "X10_NPLACES";
    public static final String X10_NTHREADS = "X10_NTHREADS";
    public static final String X10_LAUNCHER_PLACE = "X10_LAUNCHER_PLACE";
    public static final String X10_LAUNCHER_PARENT = "X10_LAUNCHER_PARENT";
    public static final String X10_LAUNCHER_HOST = "X10_LAUNCHER_HOST";
    public static final String X10_HDFS_JARS = "X10_HDFS_JARS";
    public static final String X10_YARN_NATIVE = "X10_YARN_NATIVE";
    public static final String X10_YARN_MAIN = "X10_YARN_MAIN";
    public static final String X10_YARN_KILL = "X10_YARN_KILL";
    public static final String X10_YARNUPLOAD = "X10_YARNUPLOAD";
    public static final String X10YARNENV_ = "X10YARNENV_";
    private static final String DEAD = "DEAD";

    static enum CTRL_MSG_TYPE {
        HELLO, GOODBYE, PORT_REQUEST, PORT_RESPONSE, LAUNCH_REQUEST, LAUNCH_RESPONSE
    }; // Correspond to values in Launcher.h

    private static final int headerLength = 16;
    private static final int PORT_UNKNOWN = -1;
    private static final int PORT_DEAD = -2;

    private Configuration conf;
    private String appMasterHostname;
    private int appMasterPort;
    private int appMasterRpcPort; // Port on which the app master listens for status updates from clients
    private String appMasterTrackingUrl; // Tracking url to which app master publishes info for clients to monitor
    private volatile boolean running = true; // flag for when to shut everything down
    private final String[] args;
    private final String appName;
    private final ScheduledThreadPoolExecutor placeKiller;

    // information about a specific place.
    private class CommunicationLink {
        private CommunicationLink(NodeId node, ContainerId container) {
            super();
            this.node = node;
            this.container = container;
            this.sc = null;
            this.port = PORT_UNKNOWN;
            this.pendingPortRequests = null;
        }

        final NodeId node;
        final ContainerId container;
        SocketChannel sc;
        int port;
        ArrayList<Integer> pendingPortRequests;
    };

    private ServerSocketChannel launcherChannel; // server socket for X10rt to communicate with
    //   private int appMasterX10LauncherPort; // Port used by X10 places to communicate directly with the AM
    private final Selector selector;
    private final HashMap<Integer, CommunicationLink> links;
    private final HashMap<ContainerId, Integer> places; // map of containers to places
    private final HashMap<SocketChannel, ByteBuffer> pendingReads;
    private final HashMap<Integer, Integer> pendingKills;
    private final ConcurrentLinkedQueue<ContainerRequest> pendingRequests = new ConcurrentLinkedQueue<ContainerRequest>();

    private AMRMClientAsync<ContainerRequest> resourceManager; // Handle to communicate with the Resource Manager
    private NMClientAsync nodeManager; // Handle to communicate with the Node Manager
    protected ApplicationAttemptId appAttemptID; // ID assigned to us by the resource manager
    private ByteBuffer allTokens; // security and other yarn tokens

    private int initialNumPlaces;
    private int coresPerPlace;
    private int nthreads;
    private int memoryPerPlaceInMb;

    // these are all atomic, since they get updated in callbacks
    // count of requests we make for new places
    private AtomicInteger numRequestedContainers = new AtomicInteger();
    // count of allocated containers we start places in.
    private AtomicInteger numAllocatedContainers = new AtomicInteger();
    // count of containers that were allocated to us but not requested (bug in yarn v2.5.1)
    private AtomicInteger numExtraContainers = new AtomicInteger();
    // count of completed containers, will eventually reach allocated+extra
    private AtomicInteger numCompletedContainers = new AtomicInteger();
    // Count of failed containers
    private AtomicInteger numFailedContainers = new AtomicInteger();

    public static void main(String[] args) {
        ApplicationMaster appMaster;
        try {
            LOG.info("Initializing ApplicationMaster");
            appMaster = new ApplicationMaster(args);
            appMaster.setup();
            appMaster.handleX10();
            appMaster.shutdown();
        } catch (Exception e) {
            LOG.warn("Error caught in main", e);
            ExitUtil.terminate(1, e);
        }
        System.exit(0);
    }

    public ApplicationMaster(String[] args) throws Exception {
        this.conf = new YarnConfiguration();
        Map<String, String> envs = System.getenv();

        ContainerId containerId = ConverterUtils.toContainerId(envs.get(Environment.CONTAINER_ID.name()));
        appAttemptID = containerId.getApplicationAttemptId();

        LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId()
                + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId="
                + appAttemptID.getAttemptId());

        initialNumPlaces = Integer.parseInt(envs.get(X10_NPLACES));
        coresPerPlace = Integer.parseInt(envs.get(X10_NTHREADS));
        nthreads = coresPerPlace;
        links = new HashMap<Integer, ApplicationMaster.CommunicationLink>(initialNumPlaces);
        places = new HashMap<ContainerId, Integer>(initialNumPlaces);
        pendingReads = new HashMap<SocketChannel, ByteBuffer>();
        selector = Selector.open();
        this.args = args;
        this.appName = System.getProperty(X10_YARN_MAIN);

        // look for max memory argument
        this.memoryPerPlaceInMb = 4024; // default of 1Gb per place
        for (int i = 0; i < args.length; i++) {
            try {
                if (args[i].startsWith("-Xmx")) {
                    if (args[i].toLowerCase().endsWith("g"))
                        this.memoryPerPlaceInMb = Integer.parseInt(args[i].substring(4, args[i].length() - 1))
                                * 1024;
                    else if (args[i].toLowerCase().endsWith("m"))
                        this.memoryPerPlaceInMb = Integer.parseInt(args[i].substring(4, args[i].length() - 1));
                    else if (args[i].toLowerCase().endsWith("k"))
                        this.memoryPerPlaceInMb = Integer.parseInt(args[i].substring(4, args[i].length() - 1))
                                / 1024;
                    else
                        this.memoryPerPlaceInMb = Integer.parseInt(args[i].substring(4)) / 1024 / 1024;
                    break;
                }
            } catch (NumberFormatException e) {
                // ignore, use default value
                e.printStackTrace();
            }
        }

        if (envs.containsKey(X10YARNENV_ + X10_YARN_KILL)) {
            placeKiller = new ScheduledThreadPoolExecutor(1);
            // things to kill takes the form place:delayInSeconds,place:delayInSeconds,etc.  e.g. "2:2,3:3" will kill place 2 after 2 seconds, 3 after 3 seconds
            String thingsToKill = System.getenv(X10YARNENV_ + X10_YARN_KILL);
            // TODO: format error checking
            String[] sets = thingsToKill.split(",");
            pendingKills = new HashMap<Integer, Integer>(sets.length);
            for (String set : sets) {
                String[] place_delay = set.split(":");
                int place = Integer.parseInt(place_delay[0]);
                int delay = Integer.parseInt(place_delay[1]);
                pendingKills.put(place, delay);
            }
        } else {
            placeKiller = null;
            pendingKills = null;
        }
    }

    private void setup() throws IOException, YarnException {
        LOG.info("Starting ApplicationMaster");

        // Remove the AM->RM token so that containers cannot access it.
        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
        LOG.info("Executing with tokens:");
        while (iter.hasNext()) {
            Token<?> token = iter.next();
            LOG.info(token);
            if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
                iter.remove();
            }
        }
        allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        // Create appSubmitterUgi and add original tokens to it
        String appSubmitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
        UserGroupInformation appSubmitterUgi = UserGroupInformation.createRemoteUser(appSubmitterUserName);
        appSubmitterUgi.addCredentials(credentials);

        resourceManager = AMRMClientAsync.createAMRMClientAsync(1000, new RMCallbackHandler());
        resourceManager.init(conf);
        resourceManager.start();

        nodeManager = new NMClientAsyncImpl(new NMCallbackHandler(this));
        nodeManager.init(conf);
        nodeManager.start();

        // Register self with ResourceManager
        // This will start heartbeating to the RM
        appMasterHostname = NetUtils.getHostname();
        RegisterApplicationMasterResponse response = resourceManager.registerApplicationMaster(appMasterHostname,
                appMasterRpcPort, appMasterTrackingUrl);
        {
            int slash = appMasterHostname.indexOf('/');
            if (slash != -1)
                appMasterHostname = appMasterHostname.substring(0, slash);
        }
        // Dump out information about cluster capability as seen by the
        // resource manager
        int maxMem = response.getMaximumResourceCapability().getMemory();
        LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
        int maxVCores = response.getMaximumResourceCapability().getVirtualCores();
        LOG.info("Max vcores capabililty of resources in this cluster " + maxVCores);
        // A resource ask cannot exceed the max.

        // TODO: should we reject instead of modifying to fit?
        if (memoryPerPlaceInMb > maxMem) {
            LOG.info("Container memory specified above max threshold of cluster." + " Using max value."
                    + ", specified=" + memoryPerPlaceInMb + ", max=" + maxMem);
            memoryPerPlaceInMb = maxMem;
        }
        if (coresPerPlace > maxVCores) {
            LOG.info("Container virtual cores specified above max threshold of cluster." + " Using max value."
                    + ", specified=" + coresPerPlace + ", max=" + maxVCores);
            coresPerPlace = maxVCores;
        } else if (coresPerPlace == 0) {
            LOG.info("Container virtual cores specified as auto (X10_NTHREADS=0)." + " Using max value."
                    + ", specified=" + coresPerPlace + ", max=" + maxVCores);
            coresPerPlace = maxVCores;
        }
        List<Container> previousAMRunningContainers = response.getContainersFromPreviousAttempts();
        LOG.info(appAttemptID + " received " + previousAMRunningContainers.size()
                + " previous attempts' running containers on AM registration.");
        numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
        int numTotalContainersToRequest = initialNumPlaces - previousAMRunningContainers.size();

        // open a local port for X10rt management, and register it with the selector
        launcherChannel = ServerSocketChannel.open();
        //launcherChannel.bind(new InetSocketAddress(appMasterHostname, 0)); // bind to the visible network hostname and random port
        launcherChannel.bind(null);
        launcherChannel.configureBlocking(false);
        appMasterPort = launcherChannel.socket().getLocalPort();
        launcherChannel.register(selector, SelectionKey.OP_ACCEPT);

        numRequestedContainers.set(initialNumPlaces);
        // Send request for containers to RM
        for (int i = 0; i < numTotalContainersToRequest; ++i) {
            Resource capability = Resource.newInstance(memoryPerPlaceInMb, coresPerPlace);
            ContainerRequest request = new ContainerRequest(capability, null, null, Priority.newInstance(0));
            LOG.info("Requested container ask: " + request.toString());
            resourceManager.addContainerRequest(request);
            pendingRequests.add(request);
        }
    }

    protected void handleX10() {
        // handle X10 place requests
        Iterator<SelectionKey> events = null;
        while (running) {
            try {
                SelectionKey key;
                // check for previously unhandled events
                if (events != null && events.hasNext()) {
                    key = events.next();
                    events.remove();
                } else if (selector.select() == 0) // check for new events
                    continue; // nothing to process, go back and block on select again
                else { // select returned some events
                    events = selector.selectedKeys().iterator();
                    key = events.next();
                    events.remove();
                }

                // process the selectionkey
                if (key.isAcceptable()) {
                    LOG.info("New connection from X10 detected");
                    // accept any connections on the server socket, and look for things to read from it
                    ServerSocketChannel ssc = (ServerSocketChannel) key.channel();
                    SocketChannel sc = ssc.accept();
                    sc.configureBlocking(false);
                    sc.register(selector, SelectionKey.OP_READ);
                }
                if (key.isReadable()) {
                    SocketChannel sc = (SocketChannel) key.channel();

                    ByteBuffer incomingMsg;
                    if (pendingReads.containsKey(sc))
                        incomingMsg = pendingReads.remove(sc);
                    else
                        incomingMsg = ByteBuffer.allocateDirect(headerLength).order(ByteOrder.nativeOrder());

                    LOG.info("Reading message from X10");
                    try {
                        if (sc.read(incomingMsg) == -1) {
                            // socket closed
                            sc.close();
                            key.cancel();
                            pendingReads.remove(sc);
                        } else if (incomingMsg.hasRemaining()) {
                            LOG.info("Message header partially read. " + incomingMsg.remaining()
                                    + " bytes remaining");
                            pendingReads.put(sc, incomingMsg);
                        } else { // buffer is full
                            if (incomingMsg.capacity() == headerLength) {
                                // check to see if there is a body associated with this message header
                                int datalen = incomingMsg.getInt(headerLength - 4);
                                //System.err.println("Byte order is "+incomingMsg.order()+" datalen="+datalen);
                                if (datalen == 0)
                                    processMessage(incomingMsg, sc);
                                else { // create a larger array to hold the header+body
                                    ByteBuffer newBuffer = ByteBuffer.allocateDirect(headerLength + datalen)
                                            .order(ByteOrder.nativeOrder());
                                    incomingMsg.rewind();
                                    newBuffer.put(incomingMsg);
                                    incomingMsg = newBuffer;
                                    sc.read(incomingMsg); // read in the body, if available
                                    if (incomingMsg.hasRemaining()) {
                                        LOG.info("Message partially read. " + incomingMsg.remaining()
                                                + " bytes remaining");
                                        pendingReads.put(sc, incomingMsg);
                                    } else
                                        processMessage(incomingMsg, sc);
                                }
                            }
                        }
                    } catch (IOException e) {
                        LOG.warn("Error reading in message from socket channel", e);
                    }
                }
            } catch (IOException e) {
                LOG.warn("Error handling X10 links", e);
            }
        }
    }

    private void processMessage(ByteBuffer msg, SocketChannel sc) {
        assert (msg.capacity() >= headerLength);

        msg.rewind(); // reset the buffer for reading from the beginning
        CTRL_MSG_TYPE type = CTRL_MSG_TYPE.values()[msg.getInt()];
        int destination = msg.getInt();
        final int source = msg.getInt();
        int datalen = msg.getInt();
        assert (datalen == msg.remaining());

        LOG.info("Processing message of type " + type + ", size " + (headerLength + datalen) + " from place "
                + source);
        /*      System.err.print("Message contents:");
              for (int i=0; i<msg.capacity(); i++)
                 System.err.print(" "+Integer.toHexString(msg.get(i)).toUpperCase());
              System.err.println();
        */
        switch (type) {
        case HELLO: {
            // read the port information, and update the record for this place
            assert (datalen == 4 && source < numRequestedContainers.get() && source >= 0);
            final CommunicationLink linkInfo;
            LOG.info("Getting link for place " + source);
            synchronized (links) {
                linkInfo = links.get(source);
            }
            linkInfo.port = ((msg.get() & 0xFF) << 8) | (msg.get() & 0xFF); // unsigned short in network order
            linkInfo.sc = sc;

            // check if there are pending port requests for this place
            if (linkInfo.pendingPortRequests != null) {
                // prepare response message
                String linkString = linkInfo.node.getHost() + ":" + linkInfo.port;
                byte[] linkBytes = linkString.getBytes(); // matches existing code.  TODO: switch to UTF8
                ByteBuffer response = ByteBuffer.allocateDirect(headerLength + linkBytes.length)
                        .order(ByteOrder.nativeOrder());
                response.putInt(CTRL_MSG_TYPE.PORT_RESPONSE.ordinal());
                response.putInt(-1);
                response.putInt(source);
                response.putInt(linkBytes.length);
                response.put(linkBytes);
                // send response to each waiting place
                for (int place : linkInfo.pendingPortRequests) {
                    response.putInt(4, place); // set the destination to the requesting place
                    response.rewind();
                    // TODO: this code may get stuck here if the reciever isn't reading properly
                    try {
                        while (response.hasRemaining())
                            links.get(place).sc.write(response);
                    } catch (IOException e) {
                        LOG.warn("Unable to send out port response for place " + place + " to place " + source, e);
                    }
                }
                linkInfo.pendingPortRequests = null;
            }
            LOG.info("HELLO from place " + source + " at port " + linkInfo.port);

            if (pendingKills != null && pendingKills.containsKey(source)) {
                int delay = pendingKills.remove(source);
                LOG.info("Scheduling a takedown of place " + source + " in " + delay + " seconds");
                placeKiller.schedule(new Runnable() {
                    @Override
                    public void run() {
                        LOG.info("KILLING CONTAINER FOR PLACE " + source);
                        nodeManager.stopContainerAsync(linkInfo.container, linkInfo.node);
                    }
                }, delay, TimeUnit.SECONDS);
            }
        }
            break;
        case GOODBYE: {
            try {
                CommunicationLink link = links.get(source);
                assert (link.pendingPortRequests == null);
                sc.close();
                link.port = PORT_DEAD;
            } catch (IOException e) {
                LOG.warn("Error closing socket channel", e);
            }
            LOG.info("GOODBYE to place " + source);
        }
            break;
        case PORT_REQUEST: {
            LOG.info("Got PORT_REQUEST from place " + source + " for place " + destination);
            // check to see if we know the requested information
            CommunicationLink linkInfo = links.get(destination);
            if (linkInfo.port != PORT_UNKNOWN) {
                String linkString;
                if (linkInfo.port == PORT_DEAD)
                    linkString = DEAD;
                else
                    linkString = linkInfo.node.getHost() + ":" + linkInfo.port;
                LOG.info("Telling place " + source + " that place " + destination + " is at " + linkString);
                byte[] linkBytes = linkString.getBytes(); // matches existing code.  TODO: switch to UTF8
                ByteBuffer response = ByteBuffer.allocateDirect(headerLength + linkBytes.length)
                        .order(ByteOrder.nativeOrder());
                response.putInt(CTRL_MSG_TYPE.PORT_RESPONSE.ordinal());
                response.putInt(source);
                response.putInt(destination);
                response.putInt(linkBytes.length);
                response.put(linkBytes);
                response.rewind();
                // TODO: this code may get stuck here if the reciever isn't reading properly
                try {
                    while (response.hasRemaining())
                        sc.write(response);
                } catch (IOException e) {
                    LOG.warn("Unable to send out port response for place " + destination + " to place " + source,
                            e);
                }
            } else { // port is not known.  remember we have a place asking for it when it becomes available
                if (linkInfo.pendingPortRequests == null)
                    linkInfo.pendingPortRequests = new ArrayList<Integer>(2);
                linkInfo.pendingPortRequests.add(source);
                LOG.info("Stashing PORT_REQUEST from place " + source + " for place " + destination
                        + " until the answer is known");
            }
        }
            break;
        case LAUNCH_REQUEST: {
            assert (datalen == 8);
            int numPlacesRequested = (int) msg.getLong();

            int oldvalue = numRequestedContainers.getAndAdd((int) numPlacesRequested);

            // Send request for containers to RM
            for (int i = 0; i < numPlacesRequested; i++) {
                Resource capability = Resource.newInstance(memoryPerPlaceInMb, coresPerPlace);
                ContainerRequest request = new ContainerRequest(capability, null, null, Priority.newInstance(0));
                LOG.info("Adding a new container request " + request.toString());
                resourceManager.addContainerRequest(request);
                pendingRequests.add(request);
            }

            LOG.info("Requested an increase of " + numPlacesRequested + " places on top of the previous " + oldvalue
                    + " places");
            msg.rewind();
            msg.putInt(CTRL_MSG_TYPE.LAUNCH_RESPONSE.ordinal());
            msg.rewind();
            try {
                while (msg.hasRemaining())
                    sc.write(msg);
            } catch (IOException e) {
                LOG.warn("Unable to send out launch response to place " + source, e);
            }
        }
            break;
        default:
            LOG.warn("unknown message type " + type);
        }
        LOG.info("Finished processing message of size " + (headerLength + datalen) + " from place " + source);
    }

    protected boolean shutdown() {

        // close selector, and any remaining links to places
        try {
            selector.close();
        } catch (IOException e1) {
            // shutting down... ignore
        }
        links.clear();
        places.clear();

        // When the application completes, it should stop all running containers
        LOG.info("X10 program " + appName + " completed. Stopping running containers");
        nodeManager.stop();
        // When the application completes, it should send a finish application
        // signal to the RM
        LOG.info("Signalling finish to RM");
        FinalApplicationStatus appStatus;
        String appMessage = null;
        boolean success = true;
        if (numFailedContainers.get() == 0) {
            appStatus = FinalApplicationStatus.SUCCEEDED;
        } else {
            appStatus = FinalApplicationStatus.FAILED;
            appMessage = "Diagnostics." + ", total=" + numRequestedContainers.get() + ", completed="
                    + numCompletedContainers.get() + ", allocated=" + numAllocatedContainers.get() + ", failed="
                    + numFailedContainers.get() + ", extra=" + numExtraContainers.get();
            success = false;
        }
        try {
            resourceManager.unregisterApplicationMaster(appStatus, appMessage, null);
        } catch (YarnException ex) {
            LOG.error("Failed to unregister application", ex);
        } catch (IOException e) {
            LOG.error("Failed to unregister application", e);
        }
        resourceManager.stop();
        return success;
    }

    private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {

        @Override
        public float getProgress() {
            // ramp up to 50% as places start up, then up to 100% as they shut down
            return (float) (numAllocatedContainers.get() + numCompletedContainers.get() - numExtraContainers.get())
                    / (numRequestedContainers.get() * 2);
        }

        @Override
        public void onContainersAllocated(List<Container> allocatedContainers) {
            LOG.info("Got response from RM for container ask, allocatedCnt=" + allocatedContainers.size());
            for (final Container allocatedContainer : allocatedContainers) {
                if (numAllocatedContainers.get() >= numRequestedContainers.get()) {
                    // There is currently a bug in yarn, where it will allocate more
                    // containers than requested, if the request comes in later.  Workaround.
                    LOG.info("Dropping unexpected container " + allocatedContainer.getId());
                    numExtraContainers.incrementAndGet();
                    resourceManager.releaseAssignedContainer(allocatedContainer.getId());
                    continue;
                }

                // A request was satisfied; remove one pending request.
                final ContainerRequest pr = pendingRequests.poll();
                if (pr != null)
                    resourceManager.removeContainerRequest(pr);

                final int placeId = numAllocatedContainers.getAndIncrement();
                final NodeId node = allocatedContainer.getNodeId();
                LOG.info("Launching place on a new container." + ", containerId=" + allocatedContainer.getId()
                        + ", containerNode=" + node.getHost() + ":" + allocatedContainer.getNodeId().getPort()
                        + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress()
                        + ", containerResourceMemory" + allocatedContainer.getResource().getMemory()
                        + ", containerResourceVirtualCores" + allocatedContainer.getResource().getVirtualCores());

                // prepare the container context for X10 places
                try {
                    // put x10.jar and other needed jar files into the container resources
                    FileSystem fs = FileSystem.get(conf);
                    String appId = appAttemptID.getApplicationId().toString();
                    Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();

                    String[] jarfiles = System.getenv(X10_HDFS_JARS).split(":");
                    for (String jar : jarfiles) {
                        if (jar.length() > 0) {
                            LOG.info("Added " + jar + " to local resources for " + appId);
                            addToRuntimeResources(fs, jar, appId, localResources);
                        }
                    }

                    // upload any files specified via -copy argument
                    String upload = System.getProperty(ApplicationMaster.X10_YARNUPLOAD);
                    if (upload != null) {
                        String[] files = upload.split(",");
                        for (String file : files) {
                            LOG.info("Added file " + file + " to local resources for " + appId);
                            addToRuntimeResources(fs, file, appId, localResources);
                        }
                    }

                    // set environment variables
                    LOG.info("Set the environment for container for place " + placeId);
                    Map<String, String> env = new HashMap<String, String>();
                    //env.putAll(System.getenv()); // copy all environment variables from the client side to the application side
                    // copy over existing environment variables
                    final int prefixlen = X10YARNENV_.length();
                    for (String key : System.getenv().keySet()) {
                        if (key.startsWith(X10YARNENV_))
                            env.put(key.substring(prefixlen), System.getenv(key));
                    }
                    env.put(ApplicationMaster.X10_NPLACES,
                            Integer.toString(Math.max(initialNumPlaces, numRequestedContainers.get())));
                    if (nthreads > 0)
                        env.put(ApplicationMaster.X10_NTHREADS, Integer.toString(nthreads));
                    env.put(ApplicationMaster.X10_LAUNCHER_PLACE, Integer.toString(placeId));
                    env.put(ApplicationMaster.X10_LAUNCHER_HOST, allocatedContainer.getNodeId().getHost());
                    env.put(ApplicationMaster.X10_LAUNCHER_PARENT, appMasterHostname + ':' + appMasterPort);

                    // Set the necessary command to execute the runtime
                    Vector<CharSequence> vargs = new Vector<CharSequence>(args.length + 30);

                    boolean isNative = Boolean.getBoolean(X10_YARN_NATIVE);
                    if (!isNative) {
                        // Set up the class path, to include all local jar files addid via addToRuntimeResources above
                        StringBuilder classPathEnv = new StringBuilder(Environment.CLASSPATH.$$())
                                .append(ApplicationConstants.CLASS_PATH_SEPARATOR).append("./*");
                        /*               for (String c : conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
                                             YarnConfiguration.DEFAULT_YARN_CROSS_PLATFORM_APPLICATION_CLASSPATH)) {
                                          classPathEnv.append(ApplicationConstants.CLASS_PATH_SEPARATOR);
                                          classPathEnv.append(c.trim());
                                       }
                                       env.put("CLASSPATH", classPathEnv.toString());
                        */
                        LOG.info("Completed setting up runtime environment " + env.toString());

                        // Set java executable command
                        vargs.add(Environment.JAVA_HOME.$$() + "/bin/java");
                        // set classpath
                        vargs.add("-classpath");
                        vargs.add(classPathEnv.toString());
                        // set remaining arguments
                        for (int i = 0; i < args.length; i++)
                            vargs.add(args[i]);
                    } else {
                        // set binary name
                        vargs.add("./" + appName);
                        for (int i = 1; i < args.length; i++)
                            vargs.add(args[i]);
                    }

                    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + '/' + appName + "_Place" + placeId
                            + ".stdout");
                    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + '/' + appName + "_Place" + placeId
                            + ".stderr");

                    // Get final commmand
                    StringBuilder command = new StringBuilder();
                    for (CharSequence str : vargs) {
                        command.append(str).append(" ");
                    }

                    LOG.info("Completed setting up runtime launch command " + command.toString());
                    List<String> commands = new ArrayList<String>();
                    commands.add(command.toString());

                    ContainerLaunchContext ctx = ContainerLaunchContext.newInstance(localResources, env, commands,
                            null, allTokens.duplicate(), null);
                    LOG.info("Storing link for place " + placeId);
                    links.put(placeId, new CommunicationLink(node, allocatedContainer.getId())); // save the hostname of the machine with this place
                    places.put(allocatedContainer.getId(), placeId);
                    nodeManager.startContainerAsync(allocatedContainer, ctx);
                } catch (IOException e) {
                    LOG.error("Failed to start Place " + placeId, e);
                }
            }
        }

        @Override
        public void onContainersCompleted(List<ContainerStatus> completedContainers) {
            LOG.info("Got response from RM for container ask, completedCnt=" + completedContainers.size());
            for (ContainerStatus containerStatus : completedContainers) {
                LOG.info(appAttemptID + " got container status for containerID=" + containerStatus.getContainerId()
                        + ", state=" + containerStatus.getState() + ", exitStatus="
                        + containerStatus.getExitStatus() + ", diagnostics=" + containerStatus.getDiagnostics());

                // increment counters for completed/failed containers
                numCompletedContainers.incrementAndGet();
                int exitStatus = containerStatus.getExitStatus();
                Integer placeId = places.get(containerStatus.getContainerId());
                if (placeId != null) {
                    CommunicationLink l = links.get(placeId);
                    if (l != null)
                        l.port = PORT_DEAD; // mark the place as dead
                }
                if (0 != exitStatus) {
                    // container failed
                    if (ContainerExitStatus.ABORTED == exitStatus) {
                        // killed by framework, not really a failure?
                        LOG.info("Container aborted");
                    } else if (placeId != null && pendingKills != null && pendingKills.containsKey(placeId)) {
                        // TODO: ensure that the pending kill actually executed
                        LOG.info("Container was killed and exited with exit code " + exitStatus);
                    } else {
                        numFailedContainers.incrementAndGet();
                        LOG.info("Container exited with exit code " + exitStatus);
                    }
                } else {
                    // nothing to do
                    // container completed successfully
                    LOG.info("Container completed successfully." + ", containerId="
                            + containerStatus.getContainerId());
                }
            }
            // check to see if everything is down, and if so, exit ourselves
            if (numCompletedContainers.get() == (numAllocatedContainers.get() + numExtraContainers.get())) {
                LOG.info("Shutting down now that all " + numRequestedContainers.get() + " containers have exited");
                running = false;
                selector.wakeup();
            }
        }

        @Override
        public void onError(Throwable arg0) {
            running = false;
            selector.wakeup();
            resourceManager.stop();
        }

        @Override
        public void onNodesUpdated(List<NodeReport> arg0) {
        }

        @Override
        public void onShutdownRequest() {
            running = false;
            selector.wakeup();
        }

        private void addToRuntimeResources(FileSystem fs, String fileDstPath, String appId,
                Map<String, LocalResource> localResources) throws IOException {
            Path dst = new Path(fs.getUri() + fileDstPath);
            FileStatus scFileStatus = fs.getFileStatus(dst);
            LocalResource scRsrc = LocalResource.newInstance(ConverterUtils.getYarnUrlFromURI(dst.toUri()),
                    LocalResourceType.FILE, LocalResourceVisibility.APPLICATION, scFileStatus.getLen(),
                    scFileStatus.getModificationTime());
            localResources.put(fileDstPath.substring(fileDstPath.lastIndexOf('/') + 1), scRsrc);
        }
    }

    private static class NMCallbackHandler implements NMClientAsync.CallbackHandler {
        private final ApplicationMaster applicationMaster;

        public NMCallbackHandler(ApplicationMaster applicationMaster) {
            this.applicationMaster = applicationMaster;
        }

        @Override
        public void onContainerStarted(ContainerId containerId, Map<String, ByteBuffer> arg1) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Succeeded to start Container " + containerId);
            }
        }

        @Override
        public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Container Status: id=" + containerId + ", status=" + containerStatus);
            }
        }

        @Override
        public void onContainerStopped(ContainerId containerId) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Succeeded to stop Container " + containerId);
            }
        }

        @Override
        public void onGetContainerStatusError(ContainerId containerId, Throwable arg1) {
            LOG.error("Failed to query the status of Container " + containerId);
        }

        @Override
        public void onStartContainerError(ContainerId containerId, Throwable e) {
            LOG.error("Failed to start Container " + containerId, e);
            applicationMaster.numCompletedContainers.incrementAndGet();
            applicationMaster.numFailedContainers.incrementAndGet();
        }

        @Override
        public void onStopContainerError(ContainerId containerId, Throwable e) {
            LOG.error("Failed to stop Container " + containerId, e);
        }
    }
}