ml.shifu.guagua.yarn.GuaguaAppMaster.java Source code

Java tutorial

Introduction

Here is the source code for ml.shifu.guagua.yarn.GuaguaAppMaster.java

Source

/*
 * Copyright [2013-2014] PayPal Software Foundation
 *  
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *  
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package ml.shifu.guagua.yarn;

import java.io.IOException;
import java.lang.Thread.UncaughtExceptionHandler;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicInteger;

import ml.shifu.guagua.GuaguaConstants;
import ml.shifu.guagua.GuaguaRuntimeException;
import ml.shifu.guagua.hadoop.io.GuaguaInputSplit;
import ml.shifu.guagua.yarn.util.GsonUtils;
import ml.shifu.guagua.yarn.util.YarnUtils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.client.api.async.NMClientAsync;
import org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
import org.jboss.netty.bootstrap.ServerBootstrap;
import org.jboss.netty.channel.ChannelEvent;
import org.jboss.netty.channel.ChannelHandlerContext;
import org.jboss.netty.channel.ChannelPipeline;
import org.jboss.netty.channel.ChannelPipelineFactory;
import org.jboss.netty.channel.ChannelState;
import org.jboss.netty.channel.ChannelStateEvent;
import org.jboss.netty.channel.Channels;
import org.jboss.netty.channel.ExceptionEvent;
import org.jboss.netty.channel.MessageEvent;
import org.jboss.netty.channel.SimpleChannelUpstreamHandler;
import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory;
import org.jboss.netty.handler.codec.serialization.ClassResolvers;
import org.jboss.netty.handler.codec.serialization.ObjectDecoder;
import org.jboss.netty.handler.codec.serialization.ObjectEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Maps;

/**
 * {@link GuaguaAppMaster} is application master to launch master and worker tasks.
 * 
 * <p>
 * This app master is used to check and launch all tasks, not to run the master task of distributed training. Master
 * task is run on another container.
 * 
 * <p>
 * TODO: Web monitor is not supported in current app master.
 * 
 * <p>
 * Fail-over is added like mapreduce. If one container is failed, it will be launched until 4 times by default.
 * 
 * <p>
 * In each container, use a number start from 1 to mark as the id of the container for fail-over.
 */
public class GuaguaAppMaster {

    private static final Logger LOG = LoggerFactory.getLogger(GuaguaAppMaster.class);

    /** Exit code for YARN containers that were manually killed/aborted */
    private static final int YARN_ABORT_EXIT_STATUS = -100;
    /** Exit code for successfully run YARN containers */
    private static final int YARN_SUCCESS_EXIT_STATUS = 0;
    /** millis to sleep between heartbeats during long loops */
    private static final int SLEEP_BETWEEN_HEARTBEATS_MSECS = 900;

    /**
     * Container id for current master container
     */
    private ContainerId masterContainerId;

    /**
     * App attempt id
     */
    private ApplicationAttemptId appAttemptId;

    /**
     * Whether master is done.
     */
    private volatile boolean done;

    /**
     * Yarn conf
     */
    private Configuration yarnConf;

    /**
     * Number of completed containers.
     */
    private AtomicInteger completedCount;

    /**
     * Number of failed containers.
     */
    private AtomicInteger failedCount;

    /**
     * Number of allocated containers.
     */
    private AtomicInteger allocatedCount;

    /**
     * Number of successful containers.
     */
    private AtomicInteger successfulCount;

    /**
     * Number of completed containers.
     */
    private int containersToLaunch;

    /**
     * executor to launch container.
     */
    private ExecutorService executor;

    /**
     * executor to check whether task is time out.
     */
    private ExecutorService taskTimeoutExecutor;

    /**
     * Like mapred.task.timeout, if no update on this time, container will be killed.
     */
    private long taskTimeOut = GuaguaYarnConstants.DEFAULT_TIME_OUT;

    /**
     * Heap memory setting for worker container.
     */
    private int heapPerContainer;

    /**
     * Handle to communicate with resource manager.
     */
    private AMRMClientAsync<ContainerRequest> amRmClient;
    /**
     * Handle to communicate with the Node Manager
     */
    private NMClientAsync nmClientAsync;
    /**
     * Listen to process the response from the Node Manager
     */
    private NMCallbackHandler containerListener;

    /**
     * A reusable map of resources already in HDFS for each task to copy-to-local env and use to launch each
     * GuaguaYarnTask.
     */
    private static Map<String, LocalResource> localResources;

    /**
     * For status update for clients - yet to be implemented\\
     * Hostname of the container
     */
    private String appMasterHostname;
    /** Port on which the app master listens for status updates from clients */
    private int appMasterRpcPort = 1234;
    /** Tracking url to which app master publishes info for clients to monitor */
    private String appMasterTrackingUrl = "";

    /**
     * Setting container args
     */
    private String containerArgs;

    private List<InputSplit> inputSplits;

    private ApplicationId appId;

    private Map<Integer, List<Container>> partitionContainerMap;

    private Map<String, Integer> containerPartitionMap;

    private static enum PartitionStatus {
        INIT, SUCCESSFUL, FAILED, RETRY,
    }

    private Map<Integer, PartitionStatus> partitionStatusMap;

    private List<Integer> failedPartitions;

    private AtomicInteger partitionIndex;

    private int maxContainerAttempts;

    private int totalIterations;

    private ByteBuffer allTokens;

    private int rpcPort = GuaguaYarnConstants.DEFAULT_STATUS_RPC_PORT;

    private String rpcHostName;

    private static final Object LOCK = new Object();

    private Map<Integer, GuaguaIterationStatus> partitionProgress;

    private ServerBootstrap rpcServer;

    static {
        // pick up new conf XML file and populate it with stuff exported from client
        Configuration.addDefaultResource(GuaguaYarnConstants.GUAGUA_CONF_FILE);
    }

    /**
     * Construct the GuaguappMaster, populate fields using env vars and set up by YARN framework in this execution
     * container.
     * 
     * @param cId
     *            the ContainerId
     * @param aId
     *            the ApplicationAttemptId
     */
    public GuaguaAppMaster(ContainerId cId, ApplicationAttemptId aId, Configuration conf) {
        try {
            this.rpcHostName = this.appMasterHostname = InetAddress.getLocalHost().getHostName();
        } catch (UnknownHostException e) {
            LOG.error("Error in getting local host name.", e);
        }

        this.masterContainerId = cId; // future good stuff will need me to operate.
        this.appAttemptId = aId;
        this.appId = this.getAppAttemptId().getApplicationId();
        this.yarnConf = conf;
        this.completedCount = new AtomicInteger(0);
        this.failedCount = new AtomicInteger(0);
        this.allocatedCount = new AtomicInteger(0);
        this.successfulCount = new AtomicInteger(0);

        this.partitionContainerMap = new ConcurrentHashMap<Integer, List<Container>>();
        this.containerPartitionMap = new ConcurrentHashMap<String, Integer>();
        this.partitionStatusMap = new ConcurrentHashMap<Integer, GuaguaAppMaster.PartitionStatus>();
        this.partitionIndex = new AtomicInteger(0);
        this.failedPartitions = new CopyOnWriteArrayList<Integer>();
        ;
        this.maxContainerAttempts = this.getYarnConf().getInt(
                GuaguaYarnConstants.GUAGUA_YARN_MAX_CONTAINER_ATTEMPTS,
                GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_MAX_CONTAINER_ATTEMPTS);

        this.heapPerContainer = this.getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_CHILD_MEMORY,
                GuaguaYarnConstants.GUAGUA_CHILD_DEFAULT_MEMORY);
        this.totalIterations = this.getYarnConf().getInt(GuaguaConstants.GUAGUA_ITERATION_COUNT, 1);
        String containerArgs = this.getYarnConf().get(GuaguaYarnConstants.GUAGUA_YARN_CONTAINER_ARGS);
        if (containerArgs == null) {
            containerArgs = GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_CONTAINER_JAVA_OPTS;
        } else {
            containerArgs = GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_CONTAINER_JAVA_OPTS + " " + containerArgs;
        }
        this.containerArgs = containerArgs;

        this.rpcPort = getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_YARN_STATUS_RPC_PORT,
                GuaguaYarnConstants.DEFAULT_STATUS_RPC_PORT);

        this.partitionProgress = new ConcurrentHashMap<Integer, GuaguaIterationStatus>();

        this.executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
        this.taskTimeoutExecutor = Executors.newSingleThreadExecutor();
        this.taskTimeOut = getYarnConf().getLong(GuaguaYarnConstants.GUAGUA_TASK_TIMEOUT,
                GuaguaYarnConstants.DEFAULT_TIME_OUT);

        LOG.info("{}:{}", taskTimeOut, GuaguaYarnConstants.DEFAULT_TIME_OUT);
        LOG.info("GuaguaAppMaster  for ContainerId {} ApplicationAttemptId {}", cId, aId);
    }

    /**
     * Coordinates all requests for guagua's worker/master task containers, and manages application liveness heartbeat,
     * completion status, teardown, etc.
     * 
     * @return success or failure
     */
    public boolean run() throws YarnException, IOException {
        boolean success = false;
        try {
            // 1. get input from conf, generate input splits like GuaguaMapReduce
            // 2. store all splits into conf and export to hdfs
            // 3. for each conntainer, according to container host and split host to select a partition to the container
            // add transfer partition as program args.
            // 4. Store <partition, failed container number>
            // 5. If one container failed, try to request another container
            // 6. for containers get, try failed partition firstly
            prepareInputSplits();

            // set tokens to make app master and task work well.
            getAllTokens();

            registerRMCallBackHandler();

            registerNMCallbackHandler();

            registerAMToRM();

            startRPCServer();

            startTaskTimeoutExecutor();

            madeAllContainerRequestToRM();

            LOG.info("Wait to finish ..");
            while (!isDone()) {
                try {
                    Thread.sleep(SLEEP_BETWEEN_HEARTBEATS_MSECS);
                } catch (InterruptedException ex) {
                    Thread.currentThread().interrupt();
                }
            }
            LOG.info("Done {}", isDone());
        } catch (Throwable t) {
            LOG.error("Error in AppMaster run.", t);
            throw new GuaguaRuntimeException(t);
        } finally {
            shutdown();
            success = finish();
        }
        return success;
    }

    private void startTaskTimeoutExecutor() {
        this.taskTimeoutExecutor.submit(new Runnable() {
            @Override
            public void run() {
                while (true) {
                    try {
                        Thread.sleep(GuaguaAppMaster.this.taskTimeOut);
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                        break;
                    }
                    LOG.debug(GuaguaAppMaster.this.partitionProgress.toString());
                    for (Map.Entry<Integer, GuaguaIterationStatus> entry : GuaguaAppMaster.this.partitionProgress
                            .entrySet()) {
                        GuaguaIterationStatus status = entry.getValue();
                        // doesn't work in the first iteration
                        if (status.getTime() != 0l && status.getCurrentIteration() != 1
                                && (System.currentTimeMillis()
                                        - status.getTime()) > GuaguaAppMaster.this.taskTimeOut) {
                            List<Container> containers = GuaguaAppMaster.this.partitionContainerMap
                                    .get(entry.getKey());
                            Container container = containers.get(containers.size() - 1);
                            LOG.info(
                                    "Container {} is timeout with timeout period {}, will be killed by node manager {}.",
                                    container.getId(), GuaguaAppMaster.this.taskTimeOut, container.getNodeId());
                            GuaguaAppMaster.this.getNmClientAsync().stopContainerAsync(container.getId(),
                                    container.getNodeId());
                        }
                    }
                }
            }
        });
    }

    protected void shutdown() {
        // if we get here w/o problems, the executor is already long finished.
        if (null != getExecutor() && !getExecutor().isTerminated()) {
            LOG.info("Forcefully terminating executors with done ={}", isDone());
            getExecutor().shutdownNow(); // force kill, especially if got here by throw
        }
        if (this.rpcServer != null) {
            this.rpcServer.shutdown();
            this.rpcServer.releaseExternalResources();
        }
        if (this.taskTimeoutExecutor != null) {
            this.taskTimeoutExecutor.shutdownNow();
        }
    }

    /**
     * Start rpc server which is used to update progress.
     */
    private void startRPCServer() {
        this.rpcServer = new ServerBootstrap(new NioServerSocketChannelFactory(
                Executors.newFixedThreadPool(GuaguaYarnConstants.DEFAULT_STATUS_RPC_SERVER_THREAD_COUNT),
                Executors.newCachedThreadPool(new MasterThreadFactory())));

        // Set up the pipeline factory.
        this.rpcServer.setPipelineFactory(new ChannelPipelineFactory() {
            public ChannelPipeline getPipeline() throws Exception {
                return Channels.pipeline(new ObjectEncoder(),
                        new ObjectDecoder(ClassResolvers.cacheDisabled(getClass().getClassLoader())),
                        new ServerHandler());
            }
        });

        // Bind and start to accept incoming connections.
        this.rpcServer.bind(new InetSocketAddress(rpcPort));
    }

    /**
     * The master thread factory. Main feature is to print error log of worker thread.
     */
    private static class MasterThreadFactory implements ThreadFactory {
        static final AtomicInteger poolNumber = new AtomicInteger(1);
        final ThreadGroup group;
        final AtomicInteger threadNumber = new AtomicInteger(1);
        final String namePrefix;

        MasterThreadFactory() {
            SecurityManager s = System.getSecurityManager();
            group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup();
            namePrefix = "pool-" + poolNumber.getAndIncrement() + "-thread-";
        }

        public Thread newThread(Runnable r) {
            Thread t = new Thread(group, r, namePrefix + threadNumber.getAndIncrement(), 0);
            if (t.isDaemon()) {
                t.setDaemon(false);
            }
            if (t.getPriority() != Thread.NORM_PRIORITY) {
                t.setPriority(Thread.NORM_PRIORITY);
            }
            t.setUncaughtExceptionHandler(new UncaughtExceptionHandler() {
                @Override
                public void uncaughtException(Thread t, Throwable e) {
                    LOG.warn("Error message in thread {} with error message {}, error root cause {}.", t, e,
                            e.getCause());
                    // print stack???
                }
            });
            return t;
        }
    }

    /**
     * {@link ServerHandler} is used to receive message and update progress for this yarn app.
     */
    private class ServerHandler extends SimpleChannelUpstreamHandler {

        @Override
        public void handleUpstream(ChannelHandlerContext ctx, ChannelEvent e) throws Exception {
            if (e instanceof ChannelStateEvent && ((ChannelStateEvent) e).getState() != ChannelState.INTEREST_OPS) {
                LOG.debug(e.toString());
            }
            super.handleUpstream(ctx, e);
        }

        @Override
        public void messageReceived(ChannelHandlerContext ctx, MessageEvent e) {
            GuaguaIterationStatus status = GsonUtils.fromJson(e.getMessage().toString(),
                    GuaguaIterationStatus.class);
            LOG.info("Receive RPC status:{}", status);
            synchronized (LOCK) {
                GuaguaAppMaster.this.partitionProgress.put(status.getPartition(), status);
            }
            if (status.isKillContainer()) {
                List<Container> containers;
                synchronized (LOCK) {
                    containers = GuaguaAppMaster.this.partitionContainerMap.get(status.getPartition());
                }
                LOG.info("containers:{}", containers);
                Container container = containers.get(containers.size() - 1);
                LOG.info("Container {} in node {} is killed because of straggler condition.", container.getId(),
                        container.getNodeId());
                GuaguaAppMaster.this.getNmClientAsync().stopContainerAsync(container.getId(),
                        container.getNodeId());
            }
        }

        @Override
        public void exceptionCaught(ChannelHandlerContext ctx, ExceptionEvent e) {
            e.getChannel().close();
        }
    }

    /**
     * Prepare input splits for containers
     */
    private void prepareInputSplits() throws IOException {
        this.inputSplits = getNewSplits(getYarnConf());

        this.setContainersToLaunch(this.inputSplits.size());
        LOG.info("Input split size including master: {}", this.inputSplits.size());
    }

    public List<InputSplit> getNewSplits(Configuration conf) throws IOException {
        int masters = conf.getInt(GuaguaConstants.GUAGUA_MASTER_NUMBER, GuaguaConstants.DEFAULT_MASTER_NUMBER);
        int size = getYarnConf().getInt(GuaguaConstants.GUAGUA_WORKER_NUMBER, 0) + masters;
        List<InputSplit> newSplits = new ArrayList<InputSplit>(size);
        for (int i = 1; i <= size; i++) {
            newSplits.add(
                    GsonUtils.fromJson(getYarnConf().get(GuaguaYarnConstants.GUAGUA_YARN_INPUT_SPLIT_PREFIX + i),
                            GuaguaInputSplit.class));
            this.partitionProgress.put(i, new GuaguaIterationStatus());
        }

        return newSplits;
    }

    /**
     * Populate allTokens with the tokens received
     */
    private void getAllTokens() throws IOException {
        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream(dob);
        // Now remove the AM->RM token so that containers cannot access it.
        Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
        while (iter.hasNext()) {
            Token<?> token = iter.next();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Token type : {}", token.getKind());
            }
            if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
                iter.remove();
            }
        }
        this.allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
    }

    /**
     * Register RM callback and start listening
     */
    private void registerRMCallBackHandler() {
        AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
        setAmRMClient(AMRMClientAsync.createAMRMClientAsync(1000, allocListener));
        getAmRMClient().init(getYarnConf());
        getAmRMClient().start();
    }

    /**
     * Register NM callback and start listening
     */
    private void registerNMCallbackHandler() {
        setContainerListener(new NMCallbackHandler());
        setNmClientAsync(new NMClientAsyncImpl(getContainerListener()));
        getNmClientAsync().init(getYarnConf());
        getNmClientAsync().start();
    }

    /**
     * Register AM to RM
     * 
     * @return AM register response
     */
    private RegisterApplicationMasterResponse registerAMToRM() throws YarnException {
        // register Application Master with the YARN Resource Manager so we can begin requesting resources.
        try {
            if (UserGroupInformation.isSecurityEnabled()) {
                LOG.info("SECURITY ENABLED ");
            }
            RegisterApplicationMasterResponse response = getAmRMClient().registerApplicationMaster(
                    this.appMasterHostname, this.appMasterRpcPort, this.appMasterTrackingUrl);
            return response;
        } catch (IOException ioe) {
            throw new IllegalStateException("GuaguaAppMaster failed to register with RM.", ioe);
        }
    }

    /**
     * Add all containers' request
     */
    private void madeAllContainerRequestToRM() {
        // Setup ask for containers from RM
        // Send request for containers to RM Until we get our fully allocated quota, we keep on polling RM for
        // containers. Keep looping until all the containers are launched and shell script executed on them ( regardless
        // of success/failure).
        for (int i = 0; i < getContainersToLaunch(); i++) {
            ContainerRequest containerAsk = setupContainerAskForRM();
            getAmRMClient().addContainerRequest(containerAsk);
        }
    }

    private void madeOneContainerRequestToRM() {
        ContainerRequest containerAsk = setupContainerAskForRM();
        getAmRMClient().addContainerRequest(containerAsk);
    }

    /**
     * Setup the request that will be sent to the RM for the container ask.
     * 
     * @return the setup ResourceRequest to be sent to RM
     */
    private ContainerRequest setupContainerAskForRM() {
        // setup requirements for hosts, request containers firstly and then check allocated containers and splits to
        // get data locality.
        // TODO, better here to requests according to hosts of splits.
        Priority pri = Records.newRecord(Priority.class);
        pri.setPriority(GuaguaYarnConstants.GUAGUA_YARN_DEFAULT_PRIORITY);

        Resource capability = Records.newRecord(Resource.class);
        capability.setMemory(getHeapPerContainer());
        capability.setVirtualCores(getYarnConf().getInt(GuaguaYarnConstants.GUAGUA_YARN_TASK_VCORES,
                GuaguaYarnConstants.GUAGUA_YARN_TASK_DEFAULT_VCORES));

        ContainerRequest request = new ContainerRequest(capability, null, null, pri);
        LOG.info("Requested container ask: {}", request.toString());
        return request;
    }

    /**
     * Call when the application is done
     * 
     * @return if all containers succeed
     */
    private boolean finish() {
        // When the application completes, it should stop all running containers
        LOG.info("Application completed. Stopping running containers");
        getNmClientAsync().stop();

        // When the application completes, it should send a finish application
        // signal to the RM
        LOG.info("Application completed. Signalling finish to RM");
        FinalApplicationStatus appStatus;
        String appMessage = null;
        boolean success = true;
        if (getSuccessfulCount().get() == getContainersToLaunch()) {
            appStatus = FinalApplicationStatus.SUCCEEDED;
        } else {
            appStatus = FinalApplicationStatus.FAILED;
            appMessage = String.format("Diagnostics total=%s, completed=%s, failed=%s.", getContainersToLaunch(),
                    getCompletedCount().get(), getFailedCount().get());
            success = false;
        }
        try {
            getAmRMClient().unregisterApplicationMaster(appStatus, appMessage, this.appMasterTrackingUrl);
        } catch (YarnException ex) {
            LOG.error("Failed to unregister application", ex);
        } catch (IOException e) {
            LOG.error("Failed to unregister application", e);
        }

        getAmRMClient().stop();
        return success;
    }

    /**
     * CallbackHandler to process RM async calls
     */
    private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
        @Override
        public void onContainersCompleted(List<ContainerStatus> completedContainers) {
            LOG.info("Got response from RM for container ask, completedCnt={}", completedContainers.size());
            for (ContainerStatus containerStatus : completedContainers) {
                LOG.info("Got container status for containerID={}, state={}, exitStatus={}, diagnostics={}.",
                        containerStatus.getContainerId(), containerStatus.getState(),
                        containerStatus.getExitStatus(), containerStatus.getDiagnostics());
                if (!GuaguaAppMaster.this.containerPartitionMap
                        .containsKey(containerStatus.getContainerId().toString())) {
                    getCompletedCount().incrementAndGet();
                    LOG.info("Why such container {} is started, no partition. Exited with status:{}",
                            containerStatus.getContainerId(), containerStatus.getExitStatus());
                    continue;
                }

                int partition = GuaguaAppMaster.this.containerPartitionMap
                        .get(containerStatus.getContainerId().toString());
                if (GuaguaAppMaster.this.partitionContainerMap.get(partition)
                        .size() >= GuaguaAppMaster.this.maxContainerAttempts) {
                    setDone(true);
                    LOG.info("One partition {} has more than max attempt {} ", partition,
                            GuaguaAppMaster.this.maxContainerAttempts);
                    return;
                }
                switch (containerStatus.getExitStatus()) {
                case YARN_SUCCESS_EXIT_STATUS:
                    GuaguaAppMaster.this.partitionStatusMap.put(partition, PartitionStatus.SUCCESSFUL);
                    getSuccessfulCount().incrementAndGet();
                    break;
                case YARN_ABORT_EXIT_STATUS:
                    LOG.info("YARN_ABORT_EXIT_STATUS: Container id {} exits with {}",
                            containerStatus.getContainerId(), YARN_ABORT_EXIT_STATUS);
                    break; // not success or fail
                default:
                    LOG.info("default: Container id {} exits with {}", containerStatus.getContainerId(),
                            containerStatus.getExitStatus());
                    GuaguaAppMaster.this.partitionStatusMap.put(partition, PartitionStatus.FAILED);
                    GuaguaAppMaster.this.failedPartitions.add(partition);
                    GuaguaAppMaster.this.madeOneContainerRequestToRM();
                    getFailedCount().incrementAndGet();
                    break;
                }
                getCompletedCount().incrementAndGet();
            }

            if (getSuccessfulCount().get() == getContainersToLaunch()) {
                setDone(true);
                LOG.info("All container compeleted. done = {} ", isDone());
            } else {
                LOG.info(
                        "After completion of one conatiner. current status is: completedCount:{} containersToLaunch:{} successfulCount:{} failedCount:{}.",
                        getCompletedCount().get(), getContainersToLaunch(), getSuccessfulCount().get(),
                        getFailedCount().get());
            }
        }

        @Override
        public void onContainersAllocated(List<Container> allocatedContainers) {
            LOG.info("Got response from RM for container ask, allocatedCnt={}", allocatedContainers.size());
            getAllocatedCount().addAndGet(allocatedContainers.size());
            LOG.info("Total allocated # of container so far {} : allocated out of required {}.",
                    getAllocatedCount().get(), getContainersToLaunch());
            startContainerLaunchingThreads(allocatedContainers);
        }

        @Override
        public void onShutdownRequest() {
            setDone(true);
            getAmRMClient().stop();
        }

        @Override
        public void onNodesUpdated(List<NodeReport> updatedNodes) {
        }

        @Override
        public float getProgress() {
            // set progress to deliver to RM on next heartbeat
            int sum = 0, totalSum = 0;
            synchronized (LOCK) {
                for (Map.Entry<Integer, GuaguaIterationStatus> entry : GuaguaAppMaster.this.partitionProgress
                        .entrySet()) {
                    sum += entry.getValue().getCurrentIteration();
                    totalSum += GuaguaAppMaster.this.totalIterations;
                }
                return (sum * 1.0f) / totalSum;
            }
        }

        @Override
        public void onError(Throwable e) {
            setDone(true);
            getAmRMClient().stop();
        }
    }

    /**
     * For each container successfully allocated, attempt to set up and launch a Guagua worker/master task.
     * 
     * @param allocatedContainers
     *            the containers we have currently allocated.
     */
    private void startContainerLaunchingThreads(final List<Container> allocatedContainers) {
        Map<String, List<Container>> hostContainterMap = getHostContainersMap(allocatedContainers);
        int size = allocatedContainers.size();
        while (size > 0) {
            int currentPartition = getCurrentPartition();
            if (currentPartition == -1) {
                LOG.warn("Request too many resources. TODO, remove containers no needed.");
                for (Container container : allocatedContainers) {
                    GuaguaAppMaster.this.getAmRMClient().releaseAssignedContainer(container.getId());
                }
                break;
            }
            Container container = getDataLocalityContainer(hostContainterMap, currentPartition);
            if (container == null) {
                container = allocatedContainers.get(0);
            }

            allocatedContainers.remove(container);

            LOG.info(
                    "Launching command on a new container., containerId={}, containerNode={}, containerPort={}, containerNodeURI={}, containerResourceMemory={}",
                    container.getId(), container.getNodeId().getHost(), container.getNodeId().getPort(),
                    container.getNodeHttpAddress(), container.getResource().getMemory());

            List<Container> list = this.partitionContainerMap.get(currentPartition);
            if (list == null) {
                list = new ArrayList<Container>();
            }
            list.add(container);
            this.partitionContainerMap.put(currentPartition, list);
            this.containerPartitionMap.put(container.getId().toString(), currentPartition);
            this.partitionStatusMap.put(currentPartition, PartitionStatus.INIT);
            LaunchContainerRunnable runnableLaunchContainer = new LaunchContainerRunnable(container,
                    getContainerListener(), currentPartition);
            getExecutor().execute(runnableLaunchContainer);

            size = allocatedContainers.size();
        }
    }

    private Map<String, List<Container>> getHostContainersMap(final List<Container> allocatedContainers) {
        Map<String, List<Container>> hostContainterMap = new HashMap<String, List<Container>>();
        for (Container container : allocatedContainers) {
            String host = container.getNodeId().getHost();
            List<Container> containers = hostContainterMap.get(host);
            if (containers == null) {
                containers = new ArrayList<Container>();
            }
            containers.add(container);
            hostContainterMap.put(host, containers);
        }
        return hostContainterMap;
    }

    /**
     * Find a container with the same host for input split. Not a good implementation for data locality. Check
     * map-reduce implementation.
     * 
     * TODO RACK-LOCAL implementation
     */
    private Container getDataLocalityContainer(Map<String, List<Container>> hostContainterMap,
            int currentPartition) {
        GuaguaInputSplit inputSplit = (GuaguaInputSplit) (this.inputSplits.get(currentPartition - 1));
        String host = null;
        FileSplit[] fileSplits = inputSplit.getFileSplits();
        if (fileSplits != null) {
            try {
                host = fileSplits[0].getLocations()[0];
            } catch (Exception mayNotHappen) {
                host = null;
            }
        }

        List<Container> containers = hostContainterMap.get(host);
        Container container = null;
        if (containers != null && !containers.isEmpty()) {
            container = containers.remove(0);
            hostContainterMap.put(host, containers);
            LOG.info("find a container {} with host {} for partition {} and split {}.", container, host,
                    currentPartition, inputSplit);
            return container;
        }

        // if not find a container, try to choose the first one.
        Set<Entry<String, List<Container>>> entrySet = hostContainterMap.entrySet();
        String firstHost = null;
        List<Container> firstContainers = null;
        for (Entry<String, List<Container>> entry : entrySet) {
            firstHost = entry.getKey();
            firstContainers = entry.getValue();
            if (firstContainers != null && !firstContainers.isEmpty()) {
                container = firstContainers.remove(0);
                break;
            }
        }
        hostContainterMap.put(firstHost, firstContainers);
        LOG.info("find a container {} with host {} for partition {} and split {}.", container, host,
                currentPartition, inputSplit);
        return container;
    }

    private int getCurrentPartition() {
        LOG.info("failed container request size:{} {}", this.failedPartitions.size(), this.failedPartitions);
        Iterator<Integer> it = this.failedPartitions.iterator();

        // Launch and start the container on a separate thread to keep the main thread unblocked as all containers
        // may not be allocated at one go.
        int currentPartition = 0;
        if (it.hasNext()) {
            currentPartition = it.next();
            // because we use CopyOnWriteArrayList, we can remove object in iteration
            this.failedPartitions.remove(Integer.valueOf(currentPartition));
            LOG.info("failed container request size after remove:{} {}", this.failedPartitions.size(),
                    this.failedPartitions);
        } else {
            LOG.info("partitionIndex{} containersToLaunch {}", this.partitionIndex.get(), this.containersToLaunch);
            if (this.partitionIndex.get() >= this.containersToLaunch) {
                return -1;
            }
            currentPartition = this.partitionIndex.addAndGet(1);
        }
        return currentPartition;
    }

    /**
     * Thread to connect to the {@link ContainerManager} and launch the container that will house one of our Guagua
     * worker (or master) tasks.
     */
    private class LaunchContainerRunnable implements Runnable {
        /** Allocated container */
        private Container container;
        /** NM listener */
        private NMCallbackHandler containerListener;

        private final int partition;

        /**
         * Constructor.
         * 
         * @param container
         *            Allocated container
         * @param containerListener
         *            container listener.
         */
        public LaunchContainerRunnable(final Container container, NMCallbackHandler containerListener,
                int partition) {
            this.container = container;
            this.containerListener = containerListener;
            this.partition = partition;
        }

        /**
         * Connects to CM, sets up container launch context for shell command and eventually dispatches the container
         * start request to the CM.
         */
        @Override
        public void run() {
            // Connect to ContainerManager
            // configure the launcher for the guagua task it will host
            ContainerLaunchContext ctx = buildContainerLaunchContext();
            // request CM to start this container as spec'd in ContainerLaunchContext
            this.containerListener.addContainer(this.container.getId(), this.container);
            getNmClientAsync().startContainerAsync(this.container, ctx);
        }

        /**
         * Boilerplate to set up the ContainerLaunchContext to tell the Container Manager how to launch our guagua task
         * in the execution container we have already allocated.
         * 
         * @return a populated ContainerLaunchContext object.
         */
        private ContainerLaunchContext buildContainerLaunchContext() {
            LOG.info("Setting up container launch container for containerid={}", container.getId());
            ContainerLaunchContext launchContext = Records.newRecord(ContainerLaunchContext.class);

            // args inject the CLASSPATH, heap MB, and TaskAttemptID for launched task
            final List<String> commands = generateShellExecCommand();
            LOG.info("Conatain launch Commands :{}" + commands);
            launchContext.setCommands(commands);
            // Set up tokens for the container too. We are populating them mainly for NodeManagers to be able to
            // download any files in the distributed file-system. The tokens are otherwise also useful in cases, for
            // e.g., when one is running a "hadoop dfs" like command
            launchContext.setTokens(allTokens.slice());

            // Set the environment variables to inject into remote task's container
            buildEnvironment(launchContext);

            // Set the local resources: just send the copies already in HDFS
            launchContext.setLocalResources(getTaskResourceMap());
            return launchContext;
        }

        /**
         * Generates our command line string used to launch our guagua tasks.
         * 
         * @return the BASH shell commands to launch the job.
         */
        private List<String> generateShellExecCommand() {
            String programArgs = new StringBuilder(300)
                    .append(getAppAttemptId().getApplicationId().getClusterTimestamp()).append(" ")
                    .append(getAppAttemptId().getApplicationId().getId()).append(" ")
                    .append(this.container.getId().getId()).append(" ").append(getAppAttemptId().getAttemptId())
                    .append(" ").append(this.partition).append(" ").append(GuaguaAppMaster.this.rpcHostName)
                    .append(" ").append(GuaguaAppMaster.this.rpcPort).toString();
            return YarnUtils.getCommand(GuaguaYarnTask.class.getName(), GuaguaAppMaster.this.containerArgs,
                    programArgs, getHeapPerContainer() + "");
        }

        /**
         * Utility to populate the environment vars we wish to inject into the new containter's env when the guagua BSP
         * task is executed.
         * 
         * @param launchContext
         *            the launch context which will set our environment vars in the app master's execution container.
         */
        private void buildEnvironment(final ContainerLaunchContext launchContext) {
            Map<String, String> classPathForEnv = Maps.newHashMap();
            // pick up the local classpath so when we instantiate a Configuration remotely.
            YarnUtils.addLocalClasspathToEnv(classPathForEnv, getYarnConf());
            // set this map of env vars into the launch context.
            launchContext.setEnvironment(classPathForEnv);
        }
    }

    /**
     * CallbackHandler to process NM async calls
     */
    private class NMCallbackHandler implements NMClientAsync.CallbackHandler {
        /** List of containers */
        private ConcurrentMap<ContainerId, Container> containers = new ConcurrentHashMap<ContainerId, Container>();

        /**
         * Add a container
         * 
         * @param containerId
         *            id of container
         * @param container
         *            container object
         */
        public void addContainer(ContainerId containerId, Container container) {
            this.containers.putIfAbsent(containerId, container);
        }

        @Override
        public void onContainerStopped(ContainerId containerId) {
            LOG.info("Succeeded to stop Container {}", containerId);
            this.containers.remove(containerId);
        }

        @Override
        public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) {
            LOG.info("Container Status: id={}, status={}", containerId, containerStatus);
        }

        @Override
        public void onContainerStarted(ContainerId containerId, Map<String, ByteBuffer> allServiceResponse) {
            LOG.info("Succeeded to start Container {}", containerId);
            Container container = this.containers.get(containerId);
            if (container != null) {
                getNmClientAsync().getContainerStatusAsync(containerId, container.getNodeId());
            }
        }

        @Override
        public void onStartContainerError(ContainerId containerId, Throwable t) {
            LOG.error(String.format("Failed to start Container %s", containerId), t);
            this.containers.remove(containerId);
        }

        @Override
        public void onGetContainerStatusError(ContainerId containerId, Throwable t) {
            LOG.error(String.format("Failed to query the status of Container %s", containerId), t);

        }

        @Override
        public void onStopContainerError(ContainerId containerId, Throwable t) {
            LOG.error(String.format("Failed to stop Container %s", containerId), t);
            this.containers.remove(containerId);
        }
    }

    /**
     * Lazily compose the map of jar and file names to LocalResource records for inclusion in GuaguaYarnTask container
     * requests. Can re-use the same map as guagua tasks need identical HDFS-based resources (jars etc.) to run.
     * 
     * @return the resource map for a ContainerLaunchContext
     */
    private synchronized Map<String, LocalResource> getTaskResourceMap() {
        // Set the local resources: just send the copies already in HDFS
        if (null == localResources) {
            localResources = Maps.newHashMap();
            try {
                // if you have to update the Conf for export to tasks, do it now
                // updateGuaguaConfForExport();
                localResources = YarnUtils.getLocalResourceMap(getYarnConf(), getAppId());
            } catch (IOException ioe) {
                // fail fast, this container will never launch.
                throw new IllegalStateException(
                        "Could not configure the container launch context for GuaguaYarnTask.", ioe);
            }
        }
        // else, return the prepopulated copy to reuse for each GuaguaYarkTask
        return localResources;
    }

    public ContainerId getContainerId() {
        return masterContainerId;
    }

    public void setContainerId(ContainerId containerId) {
        this.masterContainerId = containerId;
    }

    public ApplicationAttemptId getAppAttemptId() {
        return appAttemptId;
    }

    public void setAppAttemptId(ApplicationAttemptId appAttemptId) {
        this.appAttemptId = appAttemptId;
    }

    public boolean isDone() {
        return done;
    }

    public void setDone(boolean done) {
        this.done = done;
    }

    public Configuration getYarnConf() {
        return yarnConf;
    }

    public void setYarnConf(YarnConfiguration yarnConf) {
        this.yarnConf = yarnConf;
    }

    public AtomicInteger getCompletedCount() {
        return completedCount;
    }

    public void setCompletedCount(AtomicInteger completedCount) {
        this.completedCount = completedCount;
    }

    public AtomicInteger getFailedCount() {
        return failedCount;
    }

    public void setFailedCount(AtomicInteger failedCount) {
        this.failedCount = failedCount;
    }

    public AtomicInteger getAllocatedCount() {
        return allocatedCount;
    }

    public void setAllocatedCount(AtomicInteger allocatedCount) {
        this.allocatedCount = allocatedCount;
    }

    public AtomicInteger getSuccessfulCount() {
        return successfulCount;
    }

    public void setSuccessfulCount(AtomicInteger successfulCount) {
        this.successfulCount = successfulCount;
    }

    public int getContainersToLaunch() {
        return containersToLaunch;
    }

    public void setContainersToLaunch(int containersToLaunch) {
        this.containersToLaunch = containersToLaunch;
    }

    public ExecutorService getExecutor() {
        return executor;
    }

    public void setExecutor(ExecutorService executor) {
        this.executor = executor;
    }

    public int getHeapPerContainer() {
        return heapPerContainer;
    }

    public void setHeapPerContainer(int heapPerContainer) {
        this.heapPerContainer = heapPerContainer;
    }

    public AMRMClientAsync<ContainerRequest> getAmRMClient() {
        return amRmClient;
    }

    public void setAmRMClient(AMRMClientAsync<ContainerRequest> amRMClient) {
        this.amRmClient = amRMClient;
    }

    public NMClientAsync getNmClientAsync() {
        return nmClientAsync;
    }

    public void setNmClientAsync(NMClientAsync nmClientAsync) {
        this.nmClientAsync = nmClientAsync;
    }

    public NMCallbackHandler getContainerListener() {
        return containerListener;
    }

    public void setContainerListener(NMCallbackHandler containerListener) {
        this.containerListener = containerListener;
    }

    public String getContainerArgs() {
        return containerArgs;
    }

    public void setContainerArgs(String containerArgs) {
        this.containerArgs = containerArgs;
    }

    public ApplicationId getAppId() {
        return appId;
    }

    public void setAppId(ApplicationId appId) {
        this.appId = appId;
    }

    /**
     * Application entry point
     * 
     * @param args
     *            command-line args (set by GuaguaYarnClient, if any)
     */
    public static void main(final String[] args) {
        LOG.info("Starting GuaguaAppMaster. ");
        String containerIdString = System.getenv().get(Environment.CONTAINER_ID.name());
        if (containerIdString == null) {
            // container id should always be set in the env by the framework
            throw new IllegalArgumentException("ContainerId not found in env vars.");
        }
        ContainerId containerId = ConverterUtils.toContainerId(containerIdString);
        ApplicationAttemptId appAttemptId = containerId.getApplicationAttemptId();
        Configuration conf = new YarnConfiguration();
        String jobUserName = System.getenv(ApplicationConstants.Environment.USER.name());
        conf.set(MRJobConfig.USER_NAME, jobUserName);
        try {
            UserGroupInformation.setConfiguration(conf);
            // Security framework already loaded the tokens into current UGI, just use them
            Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
            LOG.info("Executing with tokens:");
            for (Token<?> token : credentials.getAllTokens()) {
                LOG.info(token.toString());
            }

            UserGroupInformation appMasterUgi = UserGroupInformation.createRemoteUser(jobUserName);
            appMasterUgi.addCredentials(credentials);

            // Now remove the AM->RM token so tasks don't have it
            Iterator<Token<?>> iter = credentials.getAllTokens().iterator();
            while (iter.hasNext()) {
                Token<?> token = iter.next();
                if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) {
                    iter.remove();
                }
            }

            final GuaguaAppMaster appMaster = new GuaguaAppMaster(containerId, appAttemptId, conf);
            appMasterUgi.doAs(new PrivilegedAction<Void>() {
                @Override
                public Void run() {
                    boolean result = false;
                    try {
                        result = appMaster.run();
                    } catch (Throwable t) {
                        LOG.error("GuaguaAppMaster caught a top-level exception in main.", t);
                        System.exit(1);
                    }

                    if (result) {
                        LOG.info("Guagua Application Master completed successfully. exiting");
                        System.exit(0);
                    } else {
                        LOG.info("Guagua Application Master failed. exiting");
                        System.exit(2);
                    }
                    return null;
                }
            });

        } catch (Throwable t) {
            LOG.error("GuaguaAppMaster caught a top-level exception in main.", t);
            System.exit(1);
        }
    }
}