org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.yarn.server.resourcemanager;

import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import io.hops.util.DBUtility;
import io.hops.util.RMStorageFactory;
import io.hops.util.YarnAPIStorageFactory;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.DateUtils;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdatedCryptoForApp;
import org.apache.hadoop.yarn.server.api.records.NodeAction;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreAMRMTokenEvent;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTEvent;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTMasterKeyEvent;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.security.JWTSecurityHandler;
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMAppSecurityManagerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMAppSecurityManagerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMAppSecurityMaterial;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
import org.apache.hadoop.yarn.server.resourcemanager.security.X509SecurityHandler;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.mockito.Mockito;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;

import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Matchers.isA;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.timeout;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

public class TestRMRestart extends ParameterizedSchedulerTestBase {
    private static final Log LOG = LogFactory.getLog(TestRMRestart.class);
    private final static File TEMP_DIR = new File(System.getProperty("test.build.data", "/tmp"), "decommision");
    private File hostFile = new File(TEMP_DIR + File.separator + "hostFile.txt");
    private YarnConfiguration conf;

    // Fake rmAddr for token-renewal
    private static InetSocketAddress rmAddr;
    private List<MockRM> rms = new ArrayList<MockRM>();
    private FileSystem fs;
    private Path tmpDir;

    public TestRMRestart(SchedulerType type) {
        super(type);
    }

    @Before
    public void setup() throws Exception {
        conf = getConf();
        Logger rootLogger = LogManager.getRootLogger();
        rootLogger.setLevel(Level.DEBUG);
        UserGroupInformation.setConfiguration(conf);
        conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
        conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
        fs = FileSystem.get(conf);
        tmpDir = new Path(new File("target", this.getClass().getSimpleName() + "-tmpDir").getAbsolutePath());
        fs.delete(tmpDir, true);
        fs.mkdirs(tmpDir);
        conf.set(YarnConfiguration.FS_RM_STATE_STORE_URI, tmpDir.toString());
        conf.set(YarnConfiguration.RM_STORE, FileSystemRMStateStore.class.getName());
        YarnAPIStorageFactory.setConfiguration(conf);
        RMStorageFactory.setConfiguration(conf);
        DBUtility.InitializeDB();
        rmAddr = new InetSocketAddress("localhost", 8032);
        Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
    }

    @After
    public void tearDown() throws IOException {
        for (MockRM rm : rms) {
            rm.stop();
        }
        rms.clear();
        fs.delete(tmpDir, true);
        TEMP_DIR.delete();
    }

    /**
     *
     * @return a new MockRM that will be stopped at the end of the test.
     */
    private MockRM createMockRM(YarnConfiguration conf, RMStateStore store) {
        MockRM rm = new MockRM(conf, store);
        rms.add(rm);
        return rm;
    }

    private MockRM createMockRM(YarnConfiguration conf) {
        MockRM rm = new MockRM(conf);
        rms.add(rm);
        return rm;
    }

    @Test
    public void testRMRestartWithCryptoMaterial() throws Exception {
        conf.setBoolean(CommonConfigurationKeysPublic.IPC_SERVER_SSL_ENABLED, true);
        conf.setBoolean(YarnConfiguration.RM_JWT_ENABLED, true);
        // This should not kick off certificate rotation
        conf.set(YarnConfiguration.RM_APP_CERTIFICATE_EXPIRATION_SAFETY_PERIOD, "1ms");
        // This should not kick off JWT rotation
        conf.set(YarnConfiguration.RM_JWT_EXPIRATION_LEEWAY, "1s");

        // Start RM
        MockRM rm1 = createMockRM(conf);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1337", 1024 * 10, rm1.getResourceTrackerService());
        nm1.registerNode();

        // Submit application
        RMApp app0 = rm1.submitApp(1024);
        nm1.nodeHeartbeat(true);

        // RMState should have the application with the crypto material
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        assertCryptoMaterialStateNotEmpty(rmAppState.get(app0.getApplicationId()));

        // Start second RM
        MockRM rm2 = createMockRM(conf);
        rm2.start();
        nm1.setResourceTrackerService(rm2.getResourceTrackerService());
        nm1.nodeHeartbeat(true);
        Map<ApplicationId, RMApp> rm2Apps = rm2.getRMContext().getRMApps();
        Assert.assertEquals(1, rm2Apps.size());

        // Verify crypto material are recovered correctly
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        assertCryptoMaterialStateNotEmpty(rmAppState.get(app0.getApplicationId()));
        RMApp recoveredApp0 = rm2Apps.get(app0.getApplicationId());
        assertAppCryptoMaterialNotEmpty(recoveredApp0);

        Assert.assertArrayEquals(app0.getKeyStore(), recoveredApp0.getKeyStore());
        Assert.assertArrayEquals(app0.getKeyStorePassword(), recoveredApp0.getKeyStorePassword());
        Assert.assertArrayEquals(app0.getTrustStore(), recoveredApp0.getTrustStore());
        Assert.assertArrayEquals(app0.getTrustStorePassword(), recoveredApp0.getTrustStorePassword());
        Assert.assertEquals(app0.getJWT(), recoveredApp0.getJWT());
        Assert.assertEquals(app0.getJWTExpiration(), recoveredApp0.getJWTExpiration());
    }

    @SuppressWarnings("rawtypes")
    @Test(timeout = 180000)
    public void testRMRestart() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
        conf.setBoolean(CommonConfigurationKeysPublic.IPC_SERVER_SSL_ENABLED, true);
        conf.setBoolean(YarnConfiguration.RM_JWT_ENABLED, true);
        // Do not kick off renewer for the duration of the test
        conf.set(YarnConfiguration.RM_APP_CERTIFICATE_EXPIRATION_SAFETY_PERIOD, "1ms");

        // PHASE 1: create state in an RM

        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);

        // start like normal because state is empty
        rm1.start();

        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        MockNM nm2 = new MockNM("127.0.0.2:5678", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();
        nm2.registerNode(); // nm2 will not heartbeat with RM1

        // create app that will finish and the final state should be saved.
        RMApp app0 = rm1.submitApp(200);
        RMAppAttempt attempt0 = app0.getCurrentAppAttempt();
        // spot check that app is saved
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        Assert.assertEquals(1, rmAppState.size());
        nm1.nodeHeartbeat(true);
        MockAM am0 = rm1.sendAMLaunched(attempt0.getAppAttemptId());
        am0.registerAppAttempt();
        finishApplicationMaster(app0, rm1, nm1, am0);

        // create app that gets launched and does allocate before RM restart
        RMApp app1 = rm1.submitApp(200);
        // assert app1 info is saved
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(),
                app1.getApplicationSubmissionContext().getApplicationId());

        assertCryptoMaterialStateNotEmpty(appState);

        //kick the scheduling to allocate AM container
        nm1.nodeHeartbeat(true);

        // assert app1 attempt is saved
        RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
        ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
        rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(app1.getApplicationId());
        Assert.assertEquals(1, appState.getAttemptCount());
        ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
        Assert.assertNotNull(attemptState);
        Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());

        // launch the AM
        MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
        am1.registerAppAttempt();

        // AM request for containers
        am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
        // kick the scheduler
        nm1.nodeHeartbeat(true);
        List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                .getAllocatedContainers();
        while (conts.size() == 0) {
            nm1.nodeHeartbeat(true);
            conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                    .getAllocatedContainers());
            Thread.sleep(500);
        }

        // create app that does not get launched by RM before RM restart
        RMApp app2 = rm1.submitApp(200);

        // assert app2 info is saved
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(app2.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(),
                app2.getApplicationSubmissionContext().getApplicationId());

        // create unmanaged app
        RMApp appUnmanaged = rm1.submitApp(200, "someApp", "someUser", null, true, null,
                conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS),
                null);
        ApplicationAttemptId unmanagedAttemptId = appUnmanaged.getCurrentAppAttempt().getAppAttemptId();
        // assert appUnmanaged info is saved
        ApplicationId unmanagedAppId = appUnmanaged.getApplicationId();
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(unmanagedAppId);
        Assert.assertNotNull(appState);
        // wait for attempt to reach LAUNCHED state 
        rm1.waitForState(unmanagedAttemptId, RMAppAttemptState.LAUNCHED);
        rm1.waitForState(unmanagedAppId, RMAppState.ACCEPTED);
        // assert unmanaged attempt info is saved
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(unmanagedAppId);
        Assert.assertEquals(1, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(),
                appUnmanaged.getApplicationSubmissionContext().getApplicationId());

        // PHASE 2: create new RM and start from old state

        // create new RM to represent restart and recover state
        MockRM rm2 = createMockRM(conf);

        // start new RM
        rm2.start();

        // change NM to point to new RM
        nm1.setResourceTrackerService(rm2.getResourceTrackerService());
        nm2.setResourceTrackerService(rm2.getResourceTrackerService());

        // verify load of old state
        // 4 apps are loaded.
        // FINISHED app and attempt is also loaded back.
        // Unmanaged app state is still loaded back but it cannot be restarted by
        // the RM. this will change with work preserving RM restart in which AMs/NMs
        // are not rebooted.
        Assert.assertEquals(4, rm2.getRMContext().getRMApps().size());
        // check that earlier finished app and attempt is also loaded back and move
        // to finished state.
        rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
        rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FINISHED);

        // verify correct number of attempts and other data
        RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
        Assert.assertNotNull(loadedApp1);
        Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
        Assert.assertEquals(app1.getApplicationSubmissionContext().getApplicationId(),
                loadedApp1.getApplicationSubmissionContext().getApplicationId());

        RMApp loadedApp2 = rm2.getRMContext().getRMApps().get(app2.getApplicationId());
        Assert.assertNotNull(loadedApp2);
        //Assert.assertEquals(0, loadedApp2.getAppAttempts().size());
        Assert.assertEquals(app2.getApplicationSubmissionContext().getApplicationId(),
                loadedApp2.getApplicationSubmissionContext().getApplicationId());

        // verify state machine kicked into expected states
        rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
        rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED);

        // verify attempts for apps
        // The app for which AM was started will wait for previous am
        // container finish event to arrive. However for an application for which
        // no am container was running will start new application attempt.
        Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
        Assert.assertEquals(1, loadedApp2.getAppAttempts().size());

        // Verify crypto material for recovered apps
        assertAppCryptoMaterialNotEmpty(loadedApp1);
        assertAppCryptoMaterialNotEmpty(loadedApp2);

        // verify old AM is not accepted
        // change running AM to talk to new RM
        am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
        try {
            am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
            Assert.fail();
        } catch (ApplicationAttemptNotFoundException e) {
            Assert.assertTrue(e instanceof ApplicationAttemptNotFoundException);
        }

        // NM should be rebooted on heartbeat, even first heartbeat for nm2
        NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
        Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
        hbResponse = nm2.nodeHeartbeat(true);
        Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());

        // new NM to represent NM re-register
        nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
        nm2 = new MockNM("127.0.0.2:5678", 15120, rm2.getResourceTrackerService());

        NMContainerStatus status = TestRMRestart.createNMContainerStatus(
                loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
        nm1.registerNode(Arrays.asList(status), null);
        nm2.registerNode();

        rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
        // wait for the 2nd attempt to be started.
        int timeoutSecs = 0;
        while (loadedApp1.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
            ;
            Thread.sleep(200);
        }

        // verify no more reboot response sent
        hbResponse = nm1.nodeHeartbeat(true);
        Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
        hbResponse = nm2.nodeHeartbeat(true);
        Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());

        // assert app1 attempt is saved
        attempt1 = loadedApp1.getCurrentAppAttempt();
        attemptId1 = attempt1.getAppAttemptId();
        rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(loadedApp1.getApplicationId());
        attemptState = appState.getAttempt(attemptId1);
        Assert.assertNotNull(attemptState);
        Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());

        // Nodes on which the AM's run 
        MockNM am1Node = nm1;
        if (attemptState.getMasterContainer().getNodeId().toString().contains("127.0.0.2")) {
            am1Node = nm2;
        }

        // assert app2 attempt is saved
        RMAppAttempt attempt2 = loadedApp2.getCurrentAppAttempt();
        ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
        rm2.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(loadedApp2.getApplicationId());
        attemptState = appState.getAttempt(attemptId2);
        Assert.assertNotNull(attemptState);
        Assert.assertEquals(BuilderUtils.newContainerId(attemptId2, 1), attemptState.getMasterContainer().getId());

        MockNM am2Node = nm1;
        if (attemptState.getMasterContainer().getNodeId().toString().contains("127.0.0.2")) {
            am2Node = nm2;
        }

        // start the AM's
        am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
        am1.registerAppAttempt();

        MockAM am2 = rm2.sendAMLaunched(attempt2.getAppAttemptId());
        am2.registerAppAttempt();

        //request for containers
        am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
        am2.allocate("127.0.0.2", 1000, 1, new ArrayList<ContainerId>());

        // verify container allocate continues to work
        nm1.nodeHeartbeat(true);
        nm2.nodeHeartbeat(true);
        conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                .getAllocatedContainers();
        while (conts.size() == 0) {
            nm1.nodeHeartbeat(true);
            nm2.nodeHeartbeat(true);
            conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                    .getAllocatedContainers());
            Thread.sleep(500);
        }
        // finish the AMs
        finishApplicationMaster(loadedApp1, rm2, am1Node, am1);
        finishApplicationMaster(loadedApp2, rm2, am2Node, am2);

        // stop RM's
        rm2.stop();
        rm1.stop();

        // completed apps are not removed immediately after app finish
        // And finished app is also loaded back.
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        Assert.assertEquals(4, rmAppState.size());
    }

    private void assertCryptoMaterialStateNotEmpty(ApplicationStateData appState) {
        if (conf.getBoolean(CommonConfigurationKeys.IPC_SERVER_SSL_ENABLED,
                CommonConfigurationKeys.IPC_SERVER_SSL_ENABLED_DEFAULT)) {
            Assert.assertNotNull(appState.getKeyStore());
            Assert.assertNotEquals(0, appState.getKeyStore().length);
            Assert.assertNotNull(appState.getKeyStorePassword());
            Assert.assertNotEquals(0, appState.getKeyStorePassword());
            Assert.assertNotNull(appState.getTrustStore());
            Assert.assertNotEquals(0, appState.getTrustStore().length);
            Assert.assertNotNull(appState.getTrustStore());
            Assert.assertNotEquals(0, appState.getTrustStorePassword().length);
        }
        if (conf.getBoolean(YarnConfiguration.RM_JWT_ENABLED, YarnConfiguration.DEFAULT_RM_JWT_ENABLED)) {
            Assert.assertNotNull(appState.getJWT());
            Assert.assertNotEquals(0, appState.getJWT().length());
            Assert.assertNotEquals(-1L, appState.getJWTExpiration());
        }
    }

    private void assertAppCryptoMaterialNotEmpty(RMApp app) {
        if (conf.getBoolean(CommonConfigurationKeys.IPC_SERVER_SSL_ENABLED,
                CommonConfigurationKeys.IPC_SERVER_SSL_ENABLED_DEFAULT)) {
            Assert.assertNotNull(app.getKeyStore());
            Assert.assertNotEquals(0, app.getKeyStore().length);
            Assert.assertNotNull(app.getKeyStorePassword());
            Assert.assertNotEquals(0, app.getKeyStorePassword());
            Assert.assertNotNull(app.getTrustStore());
            Assert.assertNotEquals(0, app.getTrustStore().length);
            Assert.assertNotNull(app.getTrustStore());
            Assert.assertNotEquals(0, app.getTrustStorePassword().length);
        }
        if (conf.getBoolean(YarnConfiguration.RM_JWT_ENABLED, YarnConfiguration.DEFAULT_RM_JWT_ENABLED)) {
            Assert.assertNotNull(app.getJWT());
            Assert.assertNotEquals(0, app.getJWT());
            Assert.assertNotNull(app.getJWTExpiration());
        }
    }

    /**
     * Special edge case:
     * 1. Start an application
     * 2. Finish application
     * 3. RM gets killed
     * 4. RM recovers
     * 5. NM resyncs - NM hasn't received that application has finished and report it as running
     *    together with its local crypto material state
     * 6. RM receives the running applications and tries to infer if NM has missed
     *    a crypto material update by comparing RM's crypto material state with the state
     *    NM has sent
     * 7. NPE because the RMApp has finished and JWT expiration date is null
     */
    @Test
    public void testRMRestartAfterAppFinishedPushingCryptoMaterial() throws Exception {
        conf.setBoolean(CommonConfigurationKeys.IPC_SERVER_SSL_ENABLED, true);
        conf.set(YarnConfiguration.RM_APP_CERTIFICATE_EXPIRATION_SAFETY_PERIOD, "40s");
        conf.setBoolean(YarnConfiguration.RM_JWT_ENABLED, true);
        conf.set(YarnConfiguration.RM_JWT_VALIDITY_PERIOD, "10s");
        conf.set(YarnConfiguration.RM_JWT_EXPIRATION_LEEWAY, "6s");

        // Start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();

        MockNM nm = new MockNM("127.0.0.1:1337", 20 * 2014, rm1.getResourceTrackerService());
        nm.registerNode();

        RMApp app = rm1.submitApp(1024);
        RMAppAttempt appAttempt = app.getCurrentAppAttempt();
        nm.nodeHeartbeat(true);
        MockAM am = rm1.sendAMLaunched(appAttempt.getAppAttemptId());
        am.registerAppAttempt();

        rm1.waitForState(app.getApplicationId(), RMAppState.RUNNING);

        AllocateResponse allocateResponse = am.allocate("127.0.0.1", 2 * 1024, 2, new ArrayList<ContainerId>());
        nm.nodeHeartbeat(true);

        while (allocateResponse.getAllocatedContainers().size() < 2) {
            nm.nodeHeartbeat(true);
            allocateResponse = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
            TimeUnit.MILLISECONDS.sleep(100);
        }
        LOG.info("Containers allocated");
        finishApplicationMaster(app, rm1, nm, am);
        LOG.info("App finished");

        MockRM rm2 = createMockRM(conf);
        rm2.start();
        nm.setResourceTrackerService(rm2.getResourceTrackerService());
        LOG.info("NM heartbeating to new RM");
        NodeHeartbeatResponse hbResponse = nm.nodeHeartbeat(true);
        Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
        Map<ApplicationId, UpdatedCryptoForApp> runningApps = new HashMap<>();
        UpdatedCryptoForApp uca = UpdatedCryptoForApp.newInstance(1, System.currentTimeMillis());
        runningApps.put(app.getApplicationId(), uca);
        nm.registerNode(runningApps);
        hbResponse = nm.nodeHeartbeat(true);
        Assert.assertEquals(0, hbResponse.getUpdatedCryptoForApps().size());
    }

    /**
     * Test bug where the following workflow would throw a NPE in ResourceManager
     * 1. Start RM with JWT disabled
     * 2. Submit application and wait for running
     * 3. Enable JWT and restart RM
     *
     * During recovery RMApp would try to register with JWT renewer but
     * JWT from previous attempt is null
     *
     * @throws Exception
     */
    @Test
    public void testRMRestartEnablingJWTAppRunning() throws Exception {
        // Start RM with JWT disabled
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm = new MockNM("127.0.0.1:1337", 20 * 1024, rm1.getResourceTrackerService());
        nm.registerNode();
        final RMApp app = rm1.submitApp(1024);
        MockAM am = MockRM.launchAndRegisterAM(app, rm1, nm);

        Configuration newConf = new Configuration(conf);
        newConf.setBoolean(YarnConfiguration.RM_JWT_ENABLED, true);
        // Start second RM with JWT enabled
        MockRM rm2 = new MockRM(newConf) {
            @Override
            protected ApplicationMasterLauncher createAMLauncher() {
                return new ApplicationMasterLauncher(getRMContext()) {
                    @Override
                    protected void serviceStart() {
                        // override to not start rpc handler
                    }

                    @Override
                    public void handle(AMLauncherEvent appEvent) {
                        // Just send crypto material revocation event
                        if (appEvent.getType().equals(AMLauncherEventType.CLEANUP)) {
                            RMApp application = rmContext.getRMApps().get(app.getApplicationId());
                            X509SecurityHandler.X509MaterialParameter x509Param = new X509SecurityHandler.X509MaterialParameter(
                                    application.getApplicationId(), application.getUser(),
                                    application.getCryptoMaterialVersion());
                            JWTSecurityHandler.JWTMaterialParameter jwtParam = new JWTSecurityHandler.JWTMaterialParameter(
                                    application.getApplicationId(), application.getUser());

                            RMAppSecurityMaterial securityMaterial = new RMAppSecurityMaterial();
                            securityMaterial.addMaterial(x509Param);
                            securityMaterial.addMaterial(jwtParam);
                            RMAppSecurityManagerEvent securityMaterialCleanup = new RMAppSecurityManagerEvent(
                                    application.getApplicationId(), securityMaterial,
                                    RMAppSecurityManagerEventType.REVOKE_SECURITY_MATERIAL);
                            getRmDispatcher().getEventHandler().handle(securityMaterialCleanup);
                        }
                    }

                    @Override
                    protected void serviceStop() {
                        // don't do anything
                    }
                };
            }
        };
        rm2.start();
        nm.setResourceTrackerService(rm2.getResourceTrackerService());
        NMContainerStatus status = createNMContainerStatus(am.getApplicationAttemptId(), 1,
                ContainerState.COMPLETE);
        nm.registerNode(Arrays.asList(status), null);

        RMApp recoveredApp = rm2.getRMContext().getRMApps().get(app.getApplicationId());
        int timeout = 0;
        while (recoveredApp.getAppAttempts().size() != 2 && timeout++ < 20) {
            TimeUnit.SECONDS.sleep(1);
        }
        MockAM am1 = MockRM.launchAndRegisterAM(recoveredApp, rm2, nm);
        MockRM.finishAMAndVerifyAppState(recoveredApp, rm2, nm, am1);
    }

    @Test(timeout = 50000)
    public void testRMRestartBeforeSendingCryptoUpdateToNM() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
        conf.setBoolean(CommonConfigurationKeys.IPC_SERVER_SSL_ENABLED, true);
        conf.set(YarnConfiguration.RM_APP_CERTIFICATE_EXPIRATION_SAFETY_PERIOD, "40s");
        conf.setBoolean(YarnConfiguration.RM_JWT_ENABLED, true);
        conf.set(YarnConfiguration.RM_JWT_VALIDITY_PERIOD, "10s");
        conf.set(YarnConfiguration.RM_JWT_EXPIRATION_LEEWAY, "6s");

        // Start RM 1
        MockRM rm1 = new RMWithCustomRTService(conf);
        rms.add(rm1);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();

        // Start NM
        MockNM nm = new MockNM("127.0.0.1:1337", 20 * 1024, rm1.getResourceTrackerService());
        nm.registerNode();
        // Launch application
        RMApp app = rm1.submitApp(1024);
        RMAppAttempt appAttempt = app.getCurrentAppAttempt();
        NodeHeartbeatResponse nmHeartbeatResponse = nm.nodeHeartbeat(true);
        Assert.assertTrue(nmHeartbeatResponse.getUpdatedCryptoForApps().isEmpty());
        MockAM am = rm1.sendAMLaunched(appAttempt.getAppAttemptId());
        am.registerAppAttempt();
        rm1.waitForState(app.getApplicationId(), RMAppState.RUNNING);

        AllocateResponse allocateResponse = am.allocate("127.0.0.1", 2 * 1024, 2, new ArrayList<ContainerId>());
        nm.nodeHeartbeat(true);

        while (allocateResponse.getAllocatedContainers().size() < 2) {
            nm.nodeHeartbeat(true);
            allocateResponse = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
            TimeUnit.MILLISECONDS.sleep(100);
        }
        LOG.info("Containers allocated");

        // Trigger Certificate renewal but without sending event to NM
        RMNode rmNode = rm1.getRMContext().getRMNodes().get(nm.getNodeId());

        LOG.info("Sleeping until the renewal is scheduled");
        while (rmNode.getAppX509ToUpdate().isEmpty()) {
            TimeUnit.MILLISECONDS.sleep(100);
        }
        while (rmNode.getAppJWTToUpdate().isEmpty()) {
            TimeUnit.MILLISECONDS.sleep(100);
        }
        nmHeartbeatResponse = nm.nodeHeartbeat(true);
        // This should be empty because RM 1 is fixed not to send update crypto events
        Assert.assertTrue(nmHeartbeatResponse.getUpdatedCryptoForApps().isEmpty());

        rm1.stop();
        // Start RM 2
        MockRM rm2 = createMockRM(conf);
        rm2.start();
        // Update NM RT to point to RM 2
        nm.setResourceTrackerService(rm2.getResourceTrackerService());
        nmHeartbeatResponse = nm.nodeHeartbeat(true);
        Assert.assertEquals(NodeAction.RESYNC, nmHeartbeatResponse.getNodeAction());
        app = rm2.getRMContext().getRMApps().get(app.getApplicationId());

        // new NM to represent re-registration
        nm = new MockNM("127.0.0.1:1337", 20 * 1024, rm2.getResourceTrackerService());
        Map<ApplicationId, UpdatedCryptoForApp> runningApps = new HashMap<>();
        UpdatedCryptoForApp upc = UpdatedCryptoForApp.newInstance(app.getCryptoMaterialVersion(),
                DateUtils.localDateTime2UnixEpoch(app.getJWTExpiration()));
        runningApps.put(app.getApplicationId(), upc);
        nm.registerNode(runningApps);
        nmHeartbeatResponse = nm.nodeHeartbeat(true);
        // Since crypto material didn't change the heartbeat should not contain updated crypto material
        Assert.assertEquals(0, nmHeartbeatResponse.getUpdatedCryptoForApps().size());

        rmNode = rm2.getRMContext().getRMNodes().get(nm.getNodeId());
        rmNode.getAppX509ToUpdate().clear();
        rmNode.getAppJWTToUpdate().clear();

        LOG.info("Sleeping until the renewal triggers again");
        while (rmNode.getAppX509ToUpdate().isEmpty()) {
            TimeUnit.MILLISECONDS.sleep(100);
        }

        while (rmNode.getAppJWTToUpdate().isEmpty()) {
            TimeUnit.MILLISECONDS.sleep(100);
        }

        // New NM to represent re-registration
        nm = new MockNM("127.0.0.1:1337", 20 * 1024, rm2.getResourceTrackerService());
        nm.registerNode(runningApps);
        Assert.assertEquals(1, rmNode.getAppX509ToUpdate().size());
        Assert.assertEquals(1, rmNode.getAppJWTToUpdate().size());
        Assert.assertTrue(rmNode.getAppX509ToUpdate().containsKey(app.getApplicationId()));
        Assert.assertTrue(rmNode.getAppJWTToUpdate().containsKey(app.getApplicationId()));
        nmHeartbeatResponse = nm.nodeHeartbeat(true);
        // Crypto material has changed so heartbeat should contain updated crypto material
        Assert.assertEquals(1, nmHeartbeatResponse.getUpdatedCryptoForApps().size());
        Assert.assertTrue(nmHeartbeatResponse.getUpdatedCryptoForApps().containsKey(app.getApplicationId()));
        Assert.assertEquals(app.getCryptoMaterialVersion(), new Integer(
                nmHeartbeatResponse.getUpdatedCryptoForApps().get(app.getApplicationId()).getVersion()));
        Assert.assertEquals(DateUtils.localDateTime2UnixEpoch(app.getJWTExpiration()),
                nmHeartbeatResponse.getUpdatedCryptoForApps().get(app.getApplicationId()).getJWTExpiration());

        rm2.stop();
    }

    private class RMWithCustomRTService extends MockRM {

        private RMWithCustomRTService(Configuration conf) {
            super(conf);
        }

        @Override
        protected ResourceTrackerService createResourceTrackerService() {
            RMContainerTokenSecretManager containerTokenSecretManager = getRMContext()
                    .getContainerTokenSecretManager();
            containerTokenSecretManager.rollMasterKey();
            NMTokenSecretManagerInRM nmTokenSecretManager = getRMContext().getNMTokenSecretManager();
            nmTokenSecretManager.rollMasterKey();
            return new RTServiceNotUpdatingCryptoMaterial(getRMContext(), nodesListManager,
                    this.nmLivelinessMonitor, containerTokenSecretManager, nmTokenSecretManager);
        }

        private class RTServiceNotUpdatingCryptoMaterial extends ResourceTrackerService {

            public RTServiceNotUpdatingCryptoMaterial(RMContext rmContext, NodesListManager nodesListManager,
                    NMLivelinessMonitor nmLivelinessMonitor,
                    RMContainerTokenSecretManager containerTokenSecretManager,
                    NMTokenSecretManagerInRM nmTokenSecretManager) {
                super(rmContext, nodesListManager, nmLivelinessMonitor, containerTokenSecretManager,
                        nmTokenSecretManager);
            }

            @Override
            protected Map<ApplicationId, UpdatedCryptoForApp> mergeNewSecurityMaterialForApps(RMNode rmNode) {
                return new HashMap<>();
            }

            @Override
            protected void serviceStart() {
                // override to not start rpc handler
            }

            @Override
            protected void serviceStop() {
                // don't do anything
            }
        }
    }

    @Test(timeout = 60000)
    public void testRMRestartAppRunningAMFailed() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);

        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // create app and launch the AM
        RMApp app0 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", -1, null, "MAPREDUCE", true, true);
        MockAM am0 = launchAM(app0, rm1, nm1);

        // fail the AM by sending CONTAINER_FINISHED event without registering.
        nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
        am0.waitForState(RMAppAttemptState.FAILED);

        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
        // assert the AM failed state is saved.
        Assert.assertEquals(RMAppAttemptState.FAILED,
                appState.getAttempt(am0.getApplicationAttemptId()).getState());

        // assert app state has not been saved.
        Assert.assertNull(rmAppState.get(app0.getApplicationId()).getState());

        // new AM started but not registered, app still stays at ACCECPTED state.
        rm1.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);

        // start new RM
        MockRM rm2 = createMockRM(conf);
        rm2.start();
        // assert the previous AM state is loaded back on RM recovery.

        rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    }

    @Test(timeout = 60000)
    public void testRMRestartWaitForPreviousAMToFinish() throws Exception {
        // testing 3 cases
        // After RM restarts
        // 1) New application attempt is not started until previous AM container
        // finish event is reported back to RM as a part of nm registration.
        // 2) If previous AM container finish event is never reported back (i.e.
        // node manager on which this AM container was running also went down) in
        // that case AMLivenessMonitor should time out previous attempt and start
        // new attempt.
        // 3) If all the stored attempts had finished then new attempt should
        // be started immediately.
        YarnConfiguration conf = new YarnConfiguration(this.conf);
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 40);

        // start RM
        final MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 16382, rm1.getResourceTrackerService());
        nm1.registerNode();

        // submitting app
        RMApp app1 = rm1.submitApp(200);
        rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
        MockAM am1 = launchAM(app1, rm1, nm1);
        nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
        // Fail first AM.
        am1.waitForState(RMAppAttemptState.FAILED);

        // launch another AM.
        MockAM am2 = launchAM(app1, rm1, nm1);
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        Assert.assertEquals(1, rmAppState.size());
        Assert.assertEquals(app1.getState(), RMAppState.RUNNING);
        Assert.assertEquals(
                app1.getAppAttempts().get(app1.getCurrentAppAttempt().getAppAttemptId()).getAppAttemptState(),
                RMAppAttemptState.RUNNING);

        //  start new RM.
        MockRM rm2 = createMockRM(conf);
        rm2.start();

        nm1.setResourceTrackerService(rm2.getResourceTrackerService());
        NodeHeartbeatResponse res = nm1.nodeHeartbeat(true);
        Assert.assertEquals(NodeAction.RESYNC, res.getNodeAction());

        RMApp rmApp = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
        // application should be in ACCEPTED state
        rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);

        Assert.assertEquals(RMAppState.ACCEPTED, rmApp.getState());
        // new attempt should not be started
        Assert.assertEquals(2, rmApp.getAppAttempts().size());
        // am1 attempt should be in FAILED state where as am2 attempt should be in
        // LAUNCHED state
        rm2.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
        rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED);
        Assert.assertEquals(RMAppAttemptState.FAILED,
                rmApp.getAppAttempts().get(am1.getApplicationAttemptId()).getAppAttemptState());
        Assert.assertEquals(RMAppAttemptState.LAUNCHED,
                rmApp.getAppAttempts().get(am2.getApplicationAttemptId()).getAppAttemptState());

        NMContainerStatus status = TestRMRestart.createNMContainerStatus(am2.getApplicationAttemptId(), 1,
                ContainerState.COMPLETE);
        nm1.registerNode(Arrays.asList(status), null);
        rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
        launchAM(rmApp, rm2, nm1);
        Assert.assertEquals(3, rmApp.getAppAttempts().size());
        rm2.waitForState(rmApp.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.RUNNING);
        // Now restart RM ...
        // Setting AMLivelinessMonitor interval to be 10 Secs. 
        conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 10000);
        MockRM rm3 = createMockRM(conf);
        rm3.start();

        // Wait for RM to process all the events as a part of rm recovery.
        nm1.setResourceTrackerService(rm3.getResourceTrackerService());

        rmApp = rm3.getRMContext().getRMApps().get(app1.getApplicationId());
        // application should be in ACCEPTED state
        rm3.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
        Assert.assertEquals(rmApp.getState(), RMAppState.ACCEPTED);
        // new attempt should not be started
        Assert.assertEquals(3, rmApp.getAppAttempts().size());
        // am1 and am2 attempts should be in FAILED state where as am3 should be
        // in LAUNCHED state
        rm3.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
        rm3.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
        ApplicationAttemptId latestAppAttemptId = rmApp.getCurrentAppAttempt().getAppAttemptId();
        rm3.waitForState(latestAppAttemptId, RMAppAttemptState.LAUNCHED);
        Assert.assertEquals(RMAppAttemptState.FAILED,
                rmApp.getAppAttempts().get(am1.getApplicationAttemptId()).getAppAttemptState());
        Assert.assertEquals(RMAppAttemptState.FAILED,
                rmApp.getAppAttempts().get(am2.getApplicationAttemptId()).getAppAttemptState());
        Assert.assertEquals(RMAppAttemptState.LAUNCHED,
                rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState());

        rm3.waitForState(latestAppAttemptId, RMAppAttemptState.FAILED);
        rm3.waitForState(rmApp.getApplicationId(), RMAppState.ACCEPTED);
        final int maxRetry = 10;
        final RMApp rmAppForCheck = rmApp;
        GenericTestUtils.waitFor(new Supplier<Boolean>() {
            @Override
            public Boolean get() {
                return new Boolean(rmAppForCheck.getAppAttempts().size() == 4);
            }
        }, 100, maxRetry);
        Assert.assertEquals(RMAppAttemptState.FAILED,
                rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState());

        latestAppAttemptId = rmApp.getCurrentAppAttempt().getAppAttemptId();

        // The 4th attempt has started but is not yet saved into RMStateStore
        // It will be saved only when we launch AM.

        // submitting app but not starting AM for it.
        RMApp app2 = rm3.submitApp(200);
        rm3.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
        Assert.assertEquals(1, app2.getAppAttempts().size());
        rmState = rm3.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        Assert.assertEquals(0, rmAppState.get(app2.getApplicationId()).getAttemptCount());

        MockRM rm4 = createMockRM(conf);
        rm4.start();

        rmApp = rm4.getRMContext().getRMApps().get(app1.getApplicationId());
        rm4.waitForState(rmApp.getApplicationId(), RMAppState.ACCEPTED);
        // wait for the attempt to be created.
        int timeoutSecs = 0;
        while (rmApp.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
            Thread.sleep(200);
        }
        Assert.assertEquals(4, rmApp.getAppAttempts().size());
        Assert.assertEquals(RMAppState.ACCEPTED, rmApp.getState());
        rm4.waitForState(latestAppAttemptId, RMAppAttemptState.SCHEDULED);
        Assert.assertEquals(RMAppAttemptState.SCHEDULED,
                rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState());

        // The initial application for which an AM was not started should be in
        // ACCEPTED state with one application attempt started.
        app2 = rm4.getRMContext().getRMApps().get(app2.getApplicationId());
        rm4.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
        Assert.assertEquals(RMAppState.ACCEPTED, app2.getState());
        Assert.assertEquals(1, app2.getAppAttempts().size());
        rm4.waitForState(app2.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.SCHEDULED);
        Assert.assertEquals(RMAppAttemptState.SCHEDULED, app2.getCurrentAppAttempt().getAppAttemptState());
    }

    // Test RM restarts after previous attempt succeeded and was saved into state
    // store but before the RMAppAttempt notifies RMApp that it has succeeded. On
    // recovery, RMAppAttempt should send the AttemptFinished event to RMApp so
    // that RMApp can recover its state.
    @Test(timeout = 60000)
    public void testRMRestartWaitForPreviousSucceededAttempt() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
        FileSystemRMStateStore fileStore = new FileSystemRMStateStore() {
            int count = 0;

            @Override
            public void updateApplicationStateInternal(ApplicationId appId, ApplicationStateData appStateData)
                    throws Exception {
                if (count == 0) {
                    // do nothing; simulate app final state is not saved.
                    LOG.info(appId + " final state is not saved.");
                    count++;
                } else {
                    super.updateApplicationStateInternal(appId, appStateData);
                }
            }
        };
        fileStore.init(conf);

        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        ((RMContextImpl) rm1.getRMContext()).setStateStore(fileStore);
        fileStore.setRMDispatcher(rm1.getRMContext().getDispatcher());
        fileStore.start();
        MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 15120);
        RMApp app0 = rm1.submitApp(200);
        MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);

        FinishApplicationMasterRequest req = FinishApplicationMasterRequest
                .newInstance(FinalApplicationStatus.SUCCEEDED, "", "");
        am0.unregisterAppAttempt(req, true);
        am0.waitForState(RMAppAttemptState.FINISHING);
        // app final state is not saved. This guarantees that RMApp cannot be
        // recovered via its own saved state, but only via the event notification
        // from the RMAppAttempt on recovery.
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        Assert.assertNull(rmAppState.get(app0.getApplicationId()).getState());

        // start RM
        MockRM rm2 = createMockRM(conf);
        nm1.setResourceTrackerService(rm2.getResourceTrackerService());
        rm2.start();

        rm2.waitForState(app0.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.FINISHED);
        rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
        // app final state is saved via the finish event from attempt.
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        Assert.assertEquals(RMAppState.FINISHED, rmAppState.get(app0.getApplicationId()).getState());
    }

    @Test(timeout = 60000)
    public void testRMRestartFailedApp() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);

        // start RM
        MockRM rm1 = createMockRM(conf);
        rm1.start();
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // create app and launch the AM
        RMApp app0 = rm1.submitApp(200);
        MockAM am0 = launchAM(app0, rm1, nm1);

        // fail the AM by sending CONTAINER_FINISHED event without registering.
        nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
        am0.waitForState(RMAppAttemptState.FAILED);
        rm1.waitForState(app0.getApplicationId(), RMAppState.FAILED);

        // assert the app/attempt failed state is saved.
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
        Assert.assertEquals(RMAppState.FAILED, appState.getState());
        Assert.assertEquals(RMAppAttemptState.FAILED,
                appState.getAttempt(am0.getApplicationAttemptId()).getState());

        // start new RM
        MockRM rm2 = createMockRM(conf);
        rm2.start();
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
        rm2.waitForState(app0.getApplicationId(), RMAppState.FAILED);
        rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
        // no new attempt is created.
        Assert.assertEquals(1, loadedApp0.getAppAttempts().size());

        verifyAppReportAfterRMRestart(app0, rm2);
        Assert.assertTrue(app0.getDiagnostics().toString().contains("Failing the application."));
        // failed diagnostics from attempt is lost because the diagnostics from
        // attempt is not yet available by the time app is saving the app state.
    }

    @Test(timeout = 60000)
    public void testRMRestartKilledApp() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);

        // start RM
        MockRM rm1 = createMockRM(conf);
        rm1.start();
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // create app and launch the AM
        RMApp app0 = rm1.submitApp(200);
        MockAM am0 = launchAM(app0, rm1, nm1);

        // kill the app.
        rm1.killApp(app0.getApplicationId());
        rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
        rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);

        // killed state is saved.
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
        Assert.assertEquals(RMAppState.KILLED, appState.getState());
        Assert.assertEquals(RMAppAttemptState.KILLED,
                appState.getAttempt(am0.getApplicationAttemptId()).getState());
        String trackingUrl = app0.getCurrentAppAttempt().getOriginalTrackingUrl();
        Assert.assertNotNull(trackingUrl);

        // restart rm
        MockRM rm2 = createMockRM(conf);
        rm2.start();
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
        rm2.waitForState(app0.getApplicationId(), RMAppState.KILLED);
        rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
        // no new attempt is created.
        Assert.assertEquals(1, loadedApp0.getAppAttempts().size());

        ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
        Assert.assertEquals(app0.getDiagnostics().toString(), appReport.getDiagnostics());
        Assert.assertEquals(trackingUrl, loadedApp0.getCurrentAppAttempt().getOriginalTrackingUrl());
    }

    @Test(timeout = 60000)
    public void testRMRestartKilledAppWithNoAttempts() throws Exception {
        FileSystemRMStateStore fileStore = new FileSystemRMStateStore() {
            @Override
            public synchronized void storeApplicationAttemptStateInternal(ApplicationAttemptId attemptId,
                    ApplicationAttemptStateData attemptStateData) throws Exception {
                // ignore attempt saving request.
            }

            @Override
            public synchronized void updateApplicationAttemptStateInternal(ApplicationAttemptId attemptId,
                    ApplicationAttemptStateData attemptStateData) throws Exception {
                // ignore attempt saving request.
            }
        };
        fileStore.init(conf);

        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        ((RMContextImpl) rm1.getRMContext()).setStateStore(fileStore);
        fileStore.setRMDispatcher(rm1.getRMContext().getDispatcher());
        fileStore.start();
        // create app
        RMApp app0 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", -1, null, "MAPREDUCE", false);
        // kill the app.
        rm1.killApp(app0.getApplicationId());
        rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);

        // restart rm
        MockRM rm2 = createMockRM(conf);
        rm2.start();
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
        rm2.waitForState(loadedApp0.getApplicationId(), RMAppState.KILLED);
        Assert.assertTrue(loadedApp0.getAppAttempts().size() == 0);
    }

    @Test(timeout = 60000)
    public void testRMRestartSucceededApp() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);

        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // create an app and finish the app.
        RMApp app0 = rm1.submitApp(200);
        MockAM am0 = launchAM(app0, rm1, nm1);

        // unregister am
        FinishApplicationMasterRequest req = FinishApplicationMasterRequest
                .newInstance(FinalApplicationStatus.SUCCEEDED, "diagnostics", "trackingUrl");
        finishApplicationMaster(app0, rm1, nm1, am0, req);

        // check the state store about the unregistered info.
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
        ApplicationAttemptStateData attemptState0 = appState.getAttempt(am0.getApplicationAttemptId());
        Assert.assertEquals("diagnostics", attemptState0.getDiagnostics());
        Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, attemptState0.getFinalApplicationStatus());
        Assert.assertEquals("trackingUrl", attemptState0.getFinalTrackingUrl());
        Assert.assertEquals(app0.getFinishTime(), appState.getFinishTime());

        // restart rm
        MockRM rm2 = createMockRM(conf);
        rm2.start();

        // verify application report returns the same app info as the app info
        // before RM restarts.
        ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
        Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus());
        Assert.assertEquals("trackingUrl", appReport.getOriginalTrackingUrl());
    }

    @Test(timeout = 60000)
    public void testRMRestartGetApplicationList() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);

        // start RM
        MockRM rm1 = new MockRM(conf) {
            @Override
            protected SystemMetricsPublisher createSystemMetricsPublisher() {
                return spy(super.createSystemMetricsPublisher());
            }
        };
        rms.add(rm1);
        rm1.start();
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // a succeeded app.
        RMApp app0 = rm1.submitApp(200, "name", "user", null, false, "default", 1, null, "myType");
        MockAM am0 = launchAM(app0, rm1, nm1);
        finishApplicationMaster(app0, rm1, nm1, am0);

        // a failed app.
        RMApp app1 = rm1.submitApp(200, "name", "user", null, false, "default", 1, null, "myType");
        MockAM am1 = launchAM(app1, rm1, nm1);
        // fail the AM by sending CONTAINER_FINISHED event without registering.
        nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
        am1.waitForState(RMAppAttemptState.FAILED);
        rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED);

        // a killed app.
        RMApp app2 = rm1.submitApp(200, "name", "user", null, false, "default", 1, null, "myType");
        MockAM am2 = launchAM(app2, rm1, nm1);
        rm1.killApp(app2.getApplicationId());
        rm1.waitForState(app2.getApplicationId(), RMAppState.KILLED);
        rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.KILLED);

        verify(rm1.getRMContext().getSystemMetricsPublisher(), Mockito.times(3)).appCreated(any(RMApp.class),
                anyLong());
        // restart rm

        MockRM rm2 = new MockRM(conf) {
            @Override
            protected RMAppManager createRMAppManager() {
                return spy(super.createRMAppManager());
            }

            @Override
            protected SystemMetricsPublisher createSystemMetricsPublisher() {
                return spy(super.createSystemMetricsPublisher());
            }
        };
        rms.add(rm2);
        rm2.start();

        verify(rm2.getRMContext().getSystemMetricsPublisher(), Mockito.times(3)).appCreated(any(RMApp.class),
                anyLong());

        GetApplicationsRequest request1 = GetApplicationsRequest.newInstance(EnumSet
                .of(YarnApplicationState.FINISHED, YarnApplicationState.KILLED, YarnApplicationState.FAILED));
        GetApplicationsResponse response1 = rm2.getClientRMService().getApplications(request1);
        List<ApplicationReport> appList1 = response1.getApplicationList();

        // assert all applications exist according to application state after RM
        // restarts.
        boolean forApp0 = false, forApp1 = false, forApp2 = false;
        for (ApplicationReport report : appList1) {
            if (report.getApplicationId().equals(app0.getApplicationId())) {
                Assert.assertEquals(YarnApplicationState.FINISHED, report.getYarnApplicationState());
                forApp0 = true;
            }
            if (report.getApplicationId().equals(app1.getApplicationId())) {
                Assert.assertEquals(YarnApplicationState.FAILED, report.getYarnApplicationState());
                forApp1 = true;
            }
            if (report.getApplicationId().equals(app2.getApplicationId())) {
                Assert.assertEquals(YarnApplicationState.KILLED, report.getYarnApplicationState());
                forApp2 = true;
            }
        }
        Assert.assertTrue(forApp0 && forApp1 && forApp2);

        // assert all applications exist according to application type after RM
        // restarts.
        Set<String> appTypes = new HashSet<String>();
        appTypes.add("myType");
        GetApplicationsRequest request2 = GetApplicationsRequest.newInstance(appTypes);
        GetApplicationsResponse response2 = rm2.getClientRMService().getApplications(request2);
        List<ApplicationReport> appList2 = response2.getApplicationList();
        Assert.assertTrue(3 == appList2.size());

        // check application summary is logged for the completed apps with timeout
        // to make sure APP_COMPLETED events are processed, after RM restart.
        verify(rm2.getRMAppManager(), timeout(1000).times(3)).logApplicationSummary(isA(ApplicationId.class));
    }

    private MockAM launchAM(RMApp app, MockRM rm, MockNM nm) throws Exception {
        RMAppAttempt attempt = app.getCurrentAppAttempt();
        nm.nodeHeartbeat(true);
        MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
        am.registerAppAttempt();
        rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
        return am;
    }

    private ApplicationReport verifyAppReportAfterRMRestart(RMApp app, MockRM rm) throws Exception {
        GetApplicationReportRequest reportRequest = GetApplicationReportRequest.newInstance(app.getApplicationId());
        GetApplicationReportResponse response = rm.getClientRMService().getApplicationReport(reportRequest);
        ApplicationReport report = response.getApplicationReport();
        Assert.assertEquals(app.getStartTime(), report.getStartTime());
        Assert.assertEquals(app.getFinishTime(), report.getFinishTime());
        Assert.assertEquals(app.createApplicationState(), report.getYarnApplicationState());
        Assert.assertTrue(1 == report.getProgress());
        return response.getApplicationReport();
    }

    private void finishApplicationMaster(RMApp rmApp, MockRM rm, MockNM nm, MockAM am) throws Exception {
        final FinishApplicationMasterRequest req = FinishApplicationMasterRequest
                .newInstance(FinalApplicationStatus.SUCCEEDED, "", "");
        finishApplicationMaster(rmApp, rm, nm, am, req);
    }

    private void finishApplicationMaster(RMApp rmApp, MockRM rm, MockNM nm, MockAM am,
            FinishApplicationMasterRequest req) throws Exception {
        am.unregisterAppAttempt(req, true);
        am.waitForState(RMAppAttemptState.FINISHING);
        nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
        am.waitForState(RMAppAttemptState.FINISHED);
        rm.waitForState(rmApp.getApplicationId(), RMAppState.FINISHED);
        // check that app/attempt is saved with the final state
        RMState rmState = rm.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(rmApp.getApplicationId());
        Assert.assertEquals(RMAppState.FINISHED, appState.getState());
        Assert.assertEquals(RMAppAttemptState.FINISHED,
                appState.getAttempt(am.getApplicationAttemptId()).getState());
    }

    @Test(timeout = 60000)
    public void testRMRestartOnMaxAppAttempts() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);

        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // submit an app with maxAppAttempts equals to 1
        RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", 1, null);
        // submit an app with maxAppAttempts equals to -1
        RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", -1, null);

        // assert app1 info is saved
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(),
                app1.getApplicationSubmissionContext().getApplicationId());

        // Allocate the AM
        nm1.nodeHeartbeat(true);
        RMAppAttempt attempt = app1.getCurrentAppAttempt();
        ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
        rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(app1.getApplicationId());
        Assert.assertEquals(1, appState.getAttemptCount());
        ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
        Assert.assertNotNull(attemptState);
        Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());

        // Setting AMLivelinessMonitor interval to be 3 Secs.
        conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
        // start new RM   
        MockRM rm2 = createMockRM(conf);
        rm2.start();

        // verify that maxAppAttempts is set to global value
        Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());

        // app1 and app2 are loaded back, but app1 failed because it's
        // hitting max-retry.
        Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
        rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
        rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);

        // app1 failed state is saved in state store. app2 final saved state is not
        // determined yet.
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
        Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
    }

    @Test(timeout = 60000)
    public void testDelegationTokenRestoredInDelegationTokenRenewer() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
        UserGroupInformation.setConfiguration(conf);

        MockRM rm1 = new TestSecurityMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();

        HashSet<Token<RMDelegationTokenIdentifier>> tokenSet = new HashSet<Token<RMDelegationTokenIdentifier>>();

        // create an empty credential
        Credentials ts = new Credentials();

        // create tokens and add into credential
        Text userText1 = new Text("user1");
        RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(userText1, new Text("renewer1"),
                userText1);
        Token<RMDelegationTokenIdentifier> token1 = new Token<RMDelegationTokenIdentifier>(dtId1,
                rm1.getRMContext().getRMDelegationTokenSecretManager());
        SecurityUtil.setTokenService(token1, rmAddr);
        ts.addToken(userText1, token1);
        tokenSet.add(token1);

        Text userText2 = new Text("user2");
        RMDelegationTokenIdentifier dtId2 = new RMDelegationTokenIdentifier(userText2, new Text("renewer2"),
                userText2);
        Token<RMDelegationTokenIdentifier> token2 = new Token<RMDelegationTokenIdentifier>(dtId2,
                rm1.getRMContext().getRMDelegationTokenSecretManager());
        SecurityUtil.setTokenService(token2, rmAddr);
        ts.addToken(userText2, token2);
        tokenSet.add(token2);

        // submit an app with customized credential
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", 1, ts);

        // assert app info is saved
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);

        // assert delegation tokens exist in rm1 DelegationTokenRenewr
        Assert.assertEquals(tokenSet, rm1.getRMContext().getDelegationTokenRenewer().getDelegationTokens());

        // assert delegation tokens are saved
        DataOutputBuffer dob = new DataOutputBuffer();
        ts.writeTokenStorageToStream(dob);
        ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        securityTokens.rewind();
        Assert.assertEquals(securityTokens,
                appState.getApplicationSubmissionContext().getAMContainerSpec().getTokens());

        // start new RM
        MockRM rm2 = new TestSecurityMockRM(conf);
        rm2.start();

        // Need to wait for a while as now token renewal happens on another thread
        // and is asynchronous in nature.
        waitForTokensToBeRenewed(rm2, tokenSet);

        // verify tokens are properly populated back to rm2 DelegationTokenRenewer
        Assert.assertEquals(tokenSet, rm2.getRMContext().getDelegationTokenRenewer().getDelegationTokens());
    }

    private void waitForTokensToBeRenewed(MockRM rm2, HashSet<Token<RMDelegationTokenIdentifier>> tokenSet)
            throws Exception {
        // Max wait time to get the token renewal can be kept as 1sec (100 * 10ms)
        int waitCnt = 100;
        while (waitCnt-- > 0) {
            if (tokenSet.equals(rm2.getRMContext().getDelegationTokenRenewer().getDelegationTokens())) {
                // Stop waiting as tokens are populated to DelegationTokenRenewer.
                break;
            } else {
                Thread.sleep(10);
            }
        }
    }

    @Test(timeout = 60000)
    public void testAppAttemptTokensRestoredOnRMRestart() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
        UserGroupInformation.setConfiguration(conf);

        MockRM rm1 = new TestSecurityMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm1 = new MockNM("0.0.0.0:4321", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // submit an app
        RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), "default");

        // assert app info is saved
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
        Assert.assertNotNull(appState);

        // Allocate the AM
        nm1.nodeHeartbeat(true);
        RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
        ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
        rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);

        // assert attempt info is saved
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        appState = rmAppState.get(app1.getApplicationId());
        ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
        Assert.assertNotNull(attemptState);
        Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());

        // the clientTokenMasterKey that are generated when
        // RMAppAttempt is created,
        byte[] clientTokenMasterKey = attempt1.getClientTokenMasterKey().getEncoded();

        // assert application credentials are saved
        Credentials savedCredentials = attemptState.getAppAttemptTokens();
        Assert.assertArrayEquals("client token master key not saved", clientTokenMasterKey,
                savedCredentials.getSecretKey(RMStateStore.AM_CLIENT_TOKEN_MASTER_KEY_NAME));

        // start new RM
        MockRM rm2 = new TestSecurityMockRM(conf);
        rm2.start();

        RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
        RMAppAttempt loadedAttempt1 = loadedApp1.getRMAppAttempt(attemptId1);

        // assert loaded attempt recovered
        Assert.assertNotNull(loadedAttempt1);

        // assert client token master key is recovered back to api-versioned
        // client token master key
        Assert.assertEquals("client token master key not restored", attempt1.getClientTokenMasterKey(),
                loadedAttempt1.getClientTokenMasterKey());

        // assert ClientTokenSecretManager also knows about the key
        Assert.assertArrayEquals(clientTokenMasterKey,
                rm2.getClientToAMTokenSecretManager().getMasterKey(attemptId1).getEncoded());

        // assert AMRMTokenSecretManager also knows about the AMRMToken password
        Token<AMRMTokenIdentifier> amrmToken = loadedAttempt1.getAMRMToken();
        Assert.assertArrayEquals(amrmToken.getPassword(),
                rm2.getRMContext().getAMRMTokenSecretManager().retrievePassword(amrmToken.decodeIdentifier()));
    }

    @Test(timeout = 60000)
    public void testRMDelegationTokenRestoredOnRMRestart() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
        conf.set(YarnConfiguration.RM_ADDRESS, "localhost:8032");
        UserGroupInformation.setConfiguration(conf);

        MockRM rm1 = new TestSecurityMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();

        // create an empty credential
        Credentials ts = new Credentials();

        // request a token and add into credential
        GetDelegationTokenRequest request1 = GetDelegationTokenRequest.newInstance("renewer1");
        UserGroupInformation.getCurrentUser().setAuthenticationMethod(AuthMethod.KERBEROS);
        GetDelegationTokenResponse response1 = rm1.getClientRMService().getDelegationToken(request1);
        org.apache.hadoop.yarn.api.records.Token delegationToken1 = response1.getRMDelegationToken();
        Token<RMDelegationTokenIdentifier> token1 = ConverterUtils.convertFromYarn(delegationToken1, rmAddr);
        RMDelegationTokenIdentifier dtId1 = token1.decodeIdentifier();

        HashSet<RMDelegationTokenIdentifier> tokenIdentSet = new HashSet<RMDelegationTokenIdentifier>();
        ts.addToken(token1.getService(), token1);
        tokenIdentSet.add(dtId1);

        // submit an app with customized credential
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", 1, ts);

        // assert app info is saved
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);

        // assert all master keys are saved
        Set<DelegationKey> allKeysRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys();
        rmState = rm1.getRMContext().getStateStore().loadState();
        Set<DelegationKey> rmDTMasterKeyState = rmState.getRMDTSecretManagerState().getMasterKeyState();
        for (DelegationKey expectedKey : allKeysRM1) {
            boolean foundIt = false;
            for (DelegationKey gotKey : rmDTMasterKeyState) {
                if (expectedKey.getKeyId() == gotKey.getKeyId()
                        && Arrays.equals(expectedKey.getEncodedKey(), gotKey.getEncodedKey())) {
                    foundIt = true;
                    break;
                }
            }
            Assert.assertTrue(foundIt);
        }

        // assert all tokens are saved
        Map<RMDelegationTokenIdentifier, Long> allTokensRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager()
                .getAllTokens();
        Assert.assertEquals(tokenIdentSet, allTokensRM1.keySet());
        rmState = rm1.getRMContext().getStateStore().loadState();
        Map<RMDelegationTokenIdentifier, Long> rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
        Assert.assertEquals(allTokensRM1.size(), rmDTState.size());
        for (RMDelegationTokenIdentifier identifier : allTokensRM1.keySet()) {
            Assert.assertEquals(allTokensRM1.get(identifier), rmDTState.get(identifier));
        }

        // assert sequence number is saved
        rmState = rm1.getRMContext().getStateStore().loadState();
        Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(),
                rmState.getRMDTSecretManagerState().getDTSequenceNumber());

        // request one more token
        GetDelegationTokenRequest request2 = GetDelegationTokenRequest.newInstance("renewer2");
        GetDelegationTokenResponse response2 = rm1.getClientRMService().getDelegationToken(request2);
        org.apache.hadoop.yarn.api.records.Token delegationToken2 = response2.getRMDelegationToken();
        Token<RMDelegationTokenIdentifier> token2 = ConverterUtils.convertFromYarn(delegationToken2, rmAddr);
        RMDelegationTokenIdentifier dtId2 = token2.decodeIdentifier();

        // cancel token2
        try {
            rm1.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token2,
                    UserGroupInformation.getCurrentUser().getUserName());
        } catch (Exception e) {
            Assert.fail();
        }

        // Assert the token which has the latest delegationTokenSequenceNumber is removed
        Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(),
                dtId2.getSequenceNumber());
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
        Assert.assertFalse(rmDTState.containsKey(dtId2));

        // start new RM
        MockRM rm2 = new TestSecurityMockRM(conf);
        rm2.start();

        // assert master keys and tokens are populated back to DTSecretManager
        Map<RMDelegationTokenIdentifier, Long> allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager()
                .getAllTokens();
        Assert.assertEquals(allTokensRM2.keySet(), allTokensRM1.keySet());
        // rm2 has its own master keys when it starts, we use containsAll here
        rmDTMasterKeyState = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys();
        for (DelegationKey expectedKey : allKeysRM1) {
            boolean foundIt = false;
            for (DelegationKey gotKey : rmDTMasterKeyState) {
                if (expectedKey.getKeyId() == gotKey.getKeyId()
                        && Arrays.equals(expectedKey.getEncodedKey(), gotKey.getEncodedKey())) {
                    foundIt = true;
                    break;
                }
            }
            Assert.assertTrue(foundIt);
        }

        // assert sequenceNumber is properly recovered,
        // even though the token which has max sequenceNumber is not stored
        Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(),
                rm2.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber());

        // renewDate before renewing
        Long renewDateBeforeRenew = allTokensRM2.get(dtId1);
        try {
            // Sleep for one millisecond to make sure renewDataAfterRenew is greater
            Thread.sleep(1);
            // renew recovered token
            rm2.getRMContext().getRMDelegationTokenSecretManager().renewToken(token1, "renewer1");
        } catch (Exception e) {
            Assert.fail();
        }

        allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
        Long renewDateAfterRenew = allTokensRM2.get(dtId1);
        // assert token is renewed
        Assert.assertTrue(renewDateAfterRenew > renewDateBeforeRenew);

        // assert new token is added into state store
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
        Assert.assertTrue(rmDTState.containsValue(renewDateAfterRenew));
        // assert old token is removed from state store
        Assert.assertFalse(rmDTState.containsValue(renewDateBeforeRenew));

        try {
            rm2.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token1,
                    UserGroupInformation.getCurrentUser().getUserName());
        } catch (Exception e) {
            Assert.fail();
        }

        // assert token is removed from state after its cancelled
        allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
        Assert.assertFalse(allTokensRM2.containsKey(dtId1));
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
        Assert.assertFalse(rmDTState.containsKey(dtId1));
    }

    // This is to test submit an application to the new RM with the old delegation
    // token got from previous RM.
    @Test(timeout = 60000)
    public void testAppSubmissionWithOldDelegationTokenAfterRMRestart() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
        conf.set(YarnConfiguration.RM_ADDRESS, "localhost:8032");
        UserGroupInformation.setConfiguration(conf);

        MockRM rm1 = new TestSecurityMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();

        GetDelegationTokenRequest request1 = GetDelegationTokenRequest.newInstance("renewer1");
        UserGroupInformation.getCurrentUser().setAuthenticationMethod(AuthMethod.KERBEROS);
        GetDelegationTokenResponse response1 = rm1.getClientRMService().getDelegationToken(request1);
        Token<RMDelegationTokenIdentifier> token1 = ConverterUtils.convertFromYarn(response1.getRMDelegationToken(),
                rmAddr);

        // start new RM
        MockRM rm2 = new TestSecurityMockRM(conf);
        rm2.start();

        // submit an app with the old delegation token got from previous RM.
        Credentials ts = new Credentials();
        ts.addToken(token1.getService(), token1);
        RMApp app = rm2.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", 1, ts);
        rm2.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
    }

    @Test(timeout = 60000)
    public void testRMStateStoreDispatcherDrainedOnRMStop() throws Exception {
        FileSystemRMStateStore fileStore = new FileSystemRMStateStore() {
            volatile boolean wait = true;

            @Override
            public void serviceStop() throws Exception {
                // Unblock app saving request.
                wait = false;
                super.serviceStop();
            }

            @Override
            protected void handleStoreEvent(RMStateStoreEvent event) {
                // Block app saving request.
                // Skip if synchronous updation of DTToken
                if (!(event instanceof RMStateStoreAMRMTokenEvent) && !(event instanceof RMStateStoreRMDTEvent)
                        && !(event instanceof RMStateStoreRMDTMasterKeyEvent)) {
                    while (wait)
                        ;
                }
                super.handleStoreEvent(event);
            }
        };
        fileStore.init(conf);

        // start RM
        final MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        ((RMContextImpl) rm1.getRMContext()).setStateStore(fileStore);
        fileStore.setRMDispatcher(rm1.getRMContext().getDispatcher());
        fileStore.start();
        // create apps.
        final ArrayList<RMApp> appList = new ArrayList<RMApp>();
        final int NUM_APPS = 5;

        for (int i = 0; i < NUM_APPS; i++) {
            RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                    "default", -1, null, "MAPREDUCE", false);
            appList.add(app);
            rm1.waitForState(app.getApplicationId(), RMAppState.NEW_SAVING);
        }
        // all apps's saving request are now enqueued to RMStateStore's dispatcher
        // queue, and will be processed once rm.stop() is called.

        // Nothing exist in state store before stop is called.
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
        Assert.assertTrue(rmAppState.size() == 0);

        // stop rm
        rm1.stop();

        // Assert app info is still saved even if stop is called with pending saving
        // request on dispatcher.
        for (RMApp app : appList) {
            rmState = rm1.getRMContext().getStateStore().loadState();
            rmAppState = rmState.getApplicationState();
            ApplicationStateData appState = rmAppState.get(app.getApplicationId());
            Assert.assertNotNull(appState);
            Assert.assertEquals(0, appState.getAttemptCount());
            Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(),
                    app.getApplicationSubmissionContext().getApplicationId());
        }
        rmState = rm1.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        Assert.assertTrue(rmAppState.size() == NUM_APPS);
    }

    @Test(timeout = 60000)
    public void testFinishedAppRemovalAfterRMRestart() throws Exception {
        conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 1);

        // start RM
        MockRM rm1 = createMockRM(conf);
        rm1.start();
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // create an app and finish the app.
        RMApp app0 = rm1.submitApp(200);
        MockAM am0 = launchAM(app0, rm1, nm1);
        finishApplicationMaster(app0, rm1, nm1, am0);

        MockRM rm2 = createMockRM(conf);
        rm2.start();
        nm1.setResourceTrackerService(rm2.getResourceTrackerService());
        nm1 = rm2.registerNode("127.0.0.1:1234", 15120);

        RMState rmState = rm2.getRMContext().getStateStore().loadState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();

        // app0 exits in both state store and rmContext
        Assert.assertEquals(RMAppState.FINISHED, rmAppState.get(app0.getApplicationId()).getState());
        rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);

        // create one more app and finish the app.
        RMApp app1 = rm2.submitApp(200);
        MockAM am1 = launchAM(app1, rm2, nm1);
        finishApplicationMaster(app1, rm2, nm1, am1);

        // the first app0 get kicked out from both rmContext and state store
        Assert.assertNull(rm2.getRMContext().getRMApps().get(app0.getApplicationId()));
        rmState = rm2.getRMContext().getStateStore().loadState();
        rmAppState = rmState.getApplicationState();
        Assert.assertNull(rmAppState.get(app0.getApplicationId()));
    }

    // This is to test RM does not get hang on shutdown.
    @Ignore //the test need to be reimplemented to work with the distributed RT
    @Test(timeout = 10000)
    public void testRMShutdown() throws Exception {
        conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
        MemoryRMStateStore memStore = new MemoryRMStateStore() {
            @Override
            public synchronized void checkVersion() throws Exception {
                throw new Exception("Invalid version.");
            }
        };
        // start RM
        memStore.init(conf);
        MockRM rm1 = null;
        try {
            rm1 = createMockRM(conf, memStore);
            Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
            rm1.start();
            Assert.fail();
        } catch (Exception e) {
            Assert.assertTrue(e.getMessage().contains("Invalid version."));
        }
        Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
    }

    // This is to test Killing application should be able to wait until app
    // reaches killed state and also check that attempt state is saved before app
    // state is saved.
    @Test(timeout = 60000)
    public void testClientRetryOnKillingApplication() throws Exception {
        conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
        MemoryRMStateStore memStore = new TestMemoryRMStateStore();
        memStore.init(conf);

        // start RM
        MockRM rm1 = createMockRM(conf, memStore);
        rm1.start();
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        memStore = new TestMemoryRMStateStore();
        memStore.init(conf);
        ((RMContextImpl) rm1.getRMContext()).setStateStore(memStore);
        memStore.setRMDispatcher(rm1.getRmDispatcher());
        memStore.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        RMApp app1 = rm1.submitApp(200, "name", "user", null, false, "default", 1, null, "myType");
        MockAM am1 = launchAM(app1, rm1, nm1);

        KillApplicationResponse response;
        int count = 0;
        while (true) {
            response = rm1.killApp(app1.getApplicationId());
            if (response.getIsKillCompleted()) {
                break;
            }
            Thread.sleep(100);
            count++;
        }
        // we expect at least 2 calls for killApp as the first killApp always return
        // false.
        Assert.assertTrue(count >= 1);

        rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED);
        rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
        Assert.assertEquals(1, ((TestMemoryRMStateStore) memStore).updateAttempt);
        Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp);
    }

    // Test Application that fails on submission is saved in state store.
    @Test(timeout = 20000)
    public void testAppFailedOnSubmissionSavedInStateStore() throws Exception {
        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
        UserGroupInformation.setConfiguration(conf);

        MockRM rm1 = new TestSecurityMockRM(conf) {
            @Override
            protected RMAppManager createRMAppManager() {
                return new TestRMAppManager(this.rmContext, this.scheduler, this.masterService,
                        this.applicationACLsManager, conf);
            }

            class TestRMAppManager extends RMAppManager {

                public TestRMAppManager(RMContext context, YarnScheduler scheduler,
                        ApplicationMasterService masterService, ApplicationACLsManager applicationACLsManager,
                        Configuration conf) {
                    super(context, scheduler, masterService, applicationACLsManager, conf);
                }

                @Override
                protected Credentials parseCredentials(ApplicationSubmissionContext application)
                        throws IOException {
                    throw new IOException("Parsing credential error.");
                }
            }
        };
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false,
                "default", -1, null, "MAPREDUCE", false);
        rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED);
        // Check app staet is saved in state store.
        RMState rmState = rm1.getRMContext().getStateStore().loadState();
        Assert.assertEquals(RMAppState.FAILED,
                rmState.getApplicationState().get(app1.getApplicationId()).getState());

        MockRM rm2 = new TestSecurityMockRM(conf);
        rm2.start();
        // Restarted RM has the failed app info too.
        rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
    }

    @Test(timeout = 20000)
    public void testAppRecoveredInOrderOnRMRestart() throws Exception {
        MemoryRMStateStore memStore = new MemoryRMStateStore();
        memStore.init(conf);

        for (int i = 10; i > 0; i--) {
            ApplicationStateData appState = mock(ApplicationStateData.class);
            ApplicationSubmissionContext context = mock(ApplicationSubmissionContext.class);
            when(appState.getApplicationSubmissionContext()).thenReturn(context);
            when(context.getApplicationId()).thenReturn(ApplicationId.newInstance(1234, i));
            memStore.getState().getApplicationState()
                    .put(appState.getApplicationSubmissionContext().getApplicationId(), appState);
        }

        MockRM rm1 = new MockRM(conf, memStore) {
            @Override
            protected RMAppManager createRMAppManager() {
                return new TestRMAppManager(this.rmContext, this.scheduler, this.masterService,
                        this.applicationACLsManager, conf);
            }

            class TestRMAppManager extends RMAppManager {
                ApplicationId prevId = ApplicationId.newInstance(1234, 0);

                public TestRMAppManager(RMContext context, YarnScheduler scheduler,
                        ApplicationMasterService masterService, ApplicationACLsManager applicationACLsManager,
                        Configuration conf) {
                    super(context, scheduler, masterService, applicationACLsManager, conf);
                }

                @Override
                protected void recoverApplication(ApplicationStateData appState, RMState rmState) throws Exception {
                    // check application is recovered in order.
                    Assert.assertTrue(rmState.getApplicationState().size() > 0);
                    Assert.assertTrue(
                            appState.getApplicationSubmissionContext().getApplicationId().compareTo(prevId) > 0);
                    prevId = appState.getApplicationSubmissionContext().getApplicationId();
                }
            }
        };
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        try {
            rm1.start();
        } finally {
            rm1.stop();
        }
    }

    @SuppressWarnings("resource")
    @Test(timeout = 60000)
    public void testQueueMetricsOnRMRestart() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);

        // PHASE 1: create state in an RM
        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();
        QueueMetrics qm1 = rm1.getResourceScheduler().getRootQueueMetrics();
        resetQueueMetrics(qm1);
        assertQueueMetrics(qm1, 0, 0, 0, 0);

        // create app that gets launched and does allocate before RM restart
        RMApp app1 = rm1.submitApp(200);
        // Need to wait first for AppAttempt to be started (RMAppState.ACCEPTED)
        // and then for it to reach RMAppAttemptState.SCHEDULED
        // inorder to ensure appsPending metric is incremented
        rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
        RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
        ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
        rm1.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
        assertQueueMetrics(qm1, 1, 1, 0, 0);

        nm1.nodeHeartbeat(true);
        rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
        am1.registerAppAttempt();
        am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
        nm1.nodeHeartbeat(true);
        List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                .getAllocatedContainers();
        while (conts.size() == 0) {
            nm1.nodeHeartbeat(true);
            conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                    .getAllocatedContainers());
            Thread.sleep(500);
        }
        assertQueueMetrics(qm1, 1, 0, 1, 0);

        // PHASE 2: create new RM and start from old state
        // create new RM to represent restart and recover state
        MockRM rm2 = createMockRM(conf);
        QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
        resetQueueMetrics(qm2);
        assertQueueMetrics(qm2, 0, 0, 0, 0);

        rm2.start();
        resetCarryOn();
        nm1.setResourceTrackerService(rm2.getResourceTrackerService());
        // recover app
        RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());

        nm1.nodeHeartbeat(true);
        nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());

        NMContainerStatus status = TestRMRestart.createNMContainerStatus(
                loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
        nm1.registerNode(Arrays.asList(status), null);

        while (loadedApp1.getAppAttempts().size() != 2) {
            Thread.sleep(200);
        }
        attempt1 = loadedApp1.getCurrentAppAttempt();
        attemptId1 = attempt1.getAppAttemptId();
        rm2.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
        qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
        assertQueueMetrics(qm2, 1, 1, 0, 0);
        nm1.nodeHeartbeat(true);
        rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        assertQueueMetrics(qm2, 1, 0, 1, 0);
        am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
        am1.registerAppAttempt();
        am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
        nm1.nodeHeartbeat(true);
        conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                .getAllocatedContainers();
        while (conts.size() == 0) {
            nm1.nodeHeartbeat(true);
            conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>())
                    .getAllocatedContainers());
            Thread.sleep(500);
        }

        // finish the AMs
        finishApplicationMaster(loadedApp1, rm2, nm1, am1);
        // now AppAttempt and App becomes FINISHED,
        // we should also grant APP_ATTEMPT_REMOVE/APP_REMOVE event
        // had processed by scheduler
        rm2.waitForAppRemovedFromScheduler(loadedApp1.getApplicationId());
        assertQueueMetrics(qm2, 1, 0, 0, 1);
    }

    // The metrics has some carry-on value from the previous RM, because the
    // test case is in-memory, for the same queue name (e.g. root), there's
    // always a singleton QueueMetrics object.
    private int appsSubmittedCarryOn = 0;
    private int appsPendingCarryOn = 0;
    private int appsRunningCarryOn = 0;
    private int appsCompletedCarryOn = 0;

    private void resetQueueMetrics(QueueMetrics qm) {
        appsSubmittedCarryOn = qm.getAppsSubmitted();
        appsPendingCarryOn = qm.getAppsPending();
        appsRunningCarryOn = qm.getAppsRunning();
        appsCompletedCarryOn = qm.getAppsCompleted();
    }

    //when a new rm become leader the carry on are reseted.
    private void resetCarryOn() {
        appsSubmittedCarryOn = 0;
        appsPendingCarryOn = 0;
        appsRunningCarryOn = 0;
        appsCompletedCarryOn = 0;
    }

    private void assertQueueMetrics(QueueMetrics qm, int appsSubmitted, int appsPending, int appsRunning,
            int appsCompleted) {
        Assert.assertEquals(appsSubmitted + appsSubmittedCarryOn, qm.getAppsSubmitted());
        Assert.assertEquals(appsPending + appsPendingCarryOn, qm.getAppsPending());
        Assert.assertEquals(appsRunning + appsRunningCarryOn, qm.getAppsRunning());
        Assert.assertEquals(appsCompleted + appsCompletedCarryOn, qm.getAppsCompleted());
    }

    @Test(timeout = 60000)
    public void testDecomissionedNMsMetricsOnRMRestart() throws Exception {
        YarnConfiguration conf = new YarnConfiguration(this.conf);
        conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath());
        writeToHostsFile("");
        MockRM rm1 = null, rm2 = null;
        try {
            rm1 = new MockRM(conf);
            Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
            rm1.start();
            MockNM nm1 = rm1.registerNode("localhost:1234", 8000);
            MockNM nm2 = rm1.registerNode("host2:1234", 8000);
            Resource expectedCapability = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
            String expectedVersion = nm1.getVersion();
            Assert.assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
            String ip = NetUtils.normalizeHostName("localhost");
            // Add 2 hosts to exclude list.
            writeToHostsFile("host2", ip);

            // refresh nodes
            rm1.getNodesListManager().refreshNodes(conf);
            NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
            Assert.assertTrue(NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
            nodeHeartbeat = nm2.nodeHeartbeat(true);
            Assert.assertTrue("The decommisioned metrics are not updated",
                    NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));

            rm1.drainEvents();
            Assert.assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
            verifyNodesAfterDecom(rm1, 2, expectedCapability, expectedVersion);
            rm1.stop();
            rm1 = null;
            Assert.assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());

            // restart RM.
            rm2 = new MockRM(conf);
            rm2.start();
            rm2.drainEvents();
            Assert.assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
            verifyNodesAfterDecom(rm2, 2, Resource.newInstance(0, 0), "unknown");
        } finally {
            if (rm1 != null) {
                rm1.stop();
            }
            if (rm2 != null) {
                rm2.stop();
            }
        }
    }

    private void verifyNodesAfterDecom(MockRM rm, int numNodes, Resource expectedCapability,
            String expectedVersion) {
        ConcurrentMap<NodeId, RMNode> inactiveRMNodes = rm.getRMContext().getInactiveRMNodes();
        Assert.assertEquals(numNodes, inactiveRMNodes.size());
        for (RMNode rmNode : inactiveRMNodes.values()) {
            Assert.assertEquals(expectedCapability, rmNode.getTotalCapability());
            Assert.assertEquals(expectedVersion, rmNode.getNodeManagerVersion());
        }
    }

    // Test Delegation token is renewed synchronously so that recover events
    // can be processed before any other external incoming events, specifically
    // the ContainerFinished event on NM re-registraton.
    @Test(timeout = 20000)
    public void testSynchronouslyRenewDTOnRecovery() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");

        // start RM
        MockRM rm1 = createMockRM(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();
        final MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();
        RMApp app0 = rm1.submitApp(200);
        final MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);

        MockRM rm2 = new MockRM(conf) {
            @Override
            protected ResourceTrackerService createResourceTrackerService() {
                return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor,
                        this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {
                    @Override
                    protected void serviceStart() throws Exception {
                        // send the container_finished event as soon as the
                        // ResourceTrackerService is started.
                        super.serviceStart();
                        nm1.setResourceTrackerService(getResourceTrackerService());
                        NMContainerStatus status = TestRMRestart
                                .createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
                        nm1.registerNode(Arrays.asList(status), null);
                    }
                };
            }
        };

        try {
            // Re-start RM
            rm2.start();

            // wait for the 2nd attempt to be started.
            RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
            int timeoutSecs = 0;
            while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
                Thread.sleep(200);
            }
            MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
            MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
        } finally {
            rm2.stop();
        }
    }

    private void writeToHostsFile(String... hosts) throws IOException {
        if (!hostFile.exists()) {
            TEMP_DIR.mkdirs();
            hostFile.createNewFile();
        }
        FileOutputStream fStream = null;
        try {
            fStream = new FileOutputStream(hostFile);
            for (int i = 0; i < hosts.length; i++) {
                fStream.write(hosts[i].getBytes());
                fStream.write(System.getProperty("line.separator").getBytes());
            }
        } finally {
            if (fStream != null) {
                IOUtils.closeStream(fStream);
                fStream = null;
            }
        }
    }

    public static NMContainerStatus createNMContainerStatus(ApplicationAttemptId appAttemptId, int id,
            ContainerState containerState) {
        return createNMContainerStatus(appAttemptId, id, containerState, RMNodeLabelsManager.NO_LABEL);
    }

    public static NMContainerStatus createNMContainerStatus(ApplicationAttemptId appAttemptId, int id,
            ContainerState containerState, String nodeLabelExpression) {
        ContainerId containerId = ContainerId.newContainerId(appAttemptId, id);
        NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, containerState,
                Resource.newInstance(1024, 1), "recover container", 0, Priority.newInstance(0), 0,
                nodeLabelExpression);
        return containerReport;
    }

    public class TestMemoryRMStateStore extends MemoryRMStateStore {
        int count = 0;
        public int updateApp = 0;
        public int updateAttempt = 0;

        @Override
        public void updateApplicationStateInternal(ApplicationId appId, ApplicationStateData appStateData)
                throws Exception {
            updateApp = ++count;
            super.updateApplicationStateInternal(appId, appStateData);
        }

        @Override
        public synchronized void updateApplicationAttemptStateInternal(ApplicationAttemptId attemptId,
                ApplicationAttemptStateData attemptStateData) throws Exception {
            updateAttempt = ++count;
            super.updateApplicationAttemptStateInternal(attemptId, attemptStateData);
        }
    }

    public static class TestSecurityMockRM extends MockRM {

        public TestSecurityMockRM(Configuration conf, RMStateStore store) {
            super(conf, store);
        }

        public TestSecurityMockRM(Configuration conf) {
            super(conf);
        }

        @Override
        public void init(Configuration conf) {
            // reset localServiceAddress.
            RMDelegationTokenIdentifier.Renewer.setSecretManager(null, null);
            super.init(conf);
        }

        @Override
        protected ClientRMService createClientRMService() {
            return new ClientRMService(getRMContext(), getResourceScheduler(), rmAppManager, applicationACLsManager,
                    null, getRMContext().getRMDelegationTokenSecretManager()) {
                @Override
                protected void serviceStart() throws Exception {
                    // do nothing
                }

                @Override
                protected void serviceStop() throws Exception {
                    //do nothing
                }
            };
        }

        @Override
        protected void doSecureLogin() throws IOException {
            // Do nothing.
        }
    }

    // Test does following verification
    // 1. Start RM1 with store patch /tmp
    // 2. Add/remove/replace labels to cluster and node lable and verify
    // 3. Start RM2 with store patch /tmp only
    // 4. Get cluster and node lobel, it should be present by recovering it
    @Test(timeout = 20000)
    public void testRMRestartRecoveringNodeLabelManager() throws Exception {
        // Initial FS node label store root dir to a random tmp dir
        File nodeLabelFsStoreDir = new File("target",
                this.getClass().getSimpleName() + "-testRMRestartRecoveringNodeLabelManager");
        if (nodeLabelFsStoreDir.exists()) {
            FileUtils.deleteDirectory(nodeLabelFsStoreDir);
        }
        nodeLabelFsStoreDir.deleteOnExit();

        String nodeLabelFsStoreDirURI = nodeLabelFsStoreDir.toURI().toString();
        conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, nodeLabelFsStoreDirURI);

        MemoryRMStateStore memStore = new MemoryRMStateStore();
        memStore.init(conf);
        conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
        MockRM rm1 = new MockRM(conf, memStore) {
            @Override
            protected RMNodeLabelsManager createNodeLabelManager() {
                RMNodeLabelsManager mgr = new RMNodeLabelsManager();
                mgr.init(getConfig());
                return mgr;
            }
        };
        rm1.init(conf);
        Assume.assumeFalse(rm1.getResourceScheduler() instanceof FairScheduler);
        rm1.start();

        RMNodeLabelsManager nodeLabelManager = rm1.getRMContext().getNodeLabelManager();

        Set<String> clusterNodeLabels = new HashSet<String>();
        clusterNodeLabels.add("x");
        clusterNodeLabels.add("y");
        clusterNodeLabels.add("z");
        // Add node label x,y,z
        nodeLabelManager.addToCluserNodeLabelsWithDefaultExclusivity(clusterNodeLabels);

        // Add node Label to Node h1->x
        NodeId n1 = NodeId.newInstance("h1", 0);
        nodeLabelManager.addLabelsToNode(ImmutableMap.of(n1, toSet("x")));

        clusterNodeLabels.remove("z");
        // Remove cluster label z
        nodeLabelManager.removeFromClusterNodeLabels(toSet("z"));

        // Replace nodelabel h1->x,y
        nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("y")));

        // Wait for updating store.It is expected NodeStore update should happen
        // very fast since it has separate dispatcher. So waiting for max 5 seconds,
        // which is sufficient time to update NodeStore.
        int count = 10;
        while (count-- > 0) {
            if (nodeLabelManager.getNodeLabels().size() > 0) {
                break;
            }
            Thread.sleep(500);
        }

        Assert.assertEquals(clusterNodeLabels.size(), nodeLabelManager.getClusterNodeLabelNames().size());

        Map<NodeId, Set<String>> nodeLabels = nodeLabelManager.getNodeLabels();
        Assert.assertEquals(1, nodeLabelManager.getNodeLabels().size());
        Assert.assertTrue(nodeLabels.get(n1).equals(toSet("y")));

        MockRM rm2 = new MockRM(conf, memStore) {
            @Override
            protected RMNodeLabelsManager createNodeLabelManager() {
                RMNodeLabelsManager mgr = new RMNodeLabelsManager();
                mgr.init(getConfig());
                return mgr;
            }
        };
        rm2.init(conf);
        rm2.start();

        nodeLabelManager = rm2.getRMContext().getNodeLabelManager();
        Assert.assertEquals(clusterNodeLabels.size(), nodeLabelManager.getClusterNodeLabelNames().size());

        nodeLabels = nodeLabelManager.getNodeLabels();
        Assert.assertEquals(1, nodeLabelManager.getNodeLabels().size());
        Assert.assertTrue(nodeLabels.get(n1).equals(toSet("y")));
        rm1.stop();
        rm2.stop();
    }

    @Test(timeout = 60000)
    public void testRMRestartFailAppAttempt() throws Exception {
        conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
        int maxAttempt = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
                YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
        MemoryRMStateStore memStore = new MemoryRMStateStore();
        memStore.init(conf);
        RMState rmState = memStore.getState();
        Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();

        // start RM
        MockRM rm1 = createMockRM(conf, memStore);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();

        // create app and launch the AM
        RMApp app0 = rm1.submitApp(200);
        MockAM am0 = launchAM(app0, rm1, nm1);

        ApplicationId applicationId = app0.getApplicationId();
        ApplicationAttemptId appAttemptId1 = app0.getCurrentAppAttempt().getAppAttemptId();
        Assert.assertEquals(1, appAttemptId1.getAttemptId());

        // fail the 1st app attempt.
        rm1.failApplicationAttempt(appAttemptId1);

        rm1.waitForState(appAttemptId1, RMAppAttemptState.FAILED);
        rm1.waitForState(applicationId, RMAppState.ACCEPTED);

        ApplicationAttemptId appAttemptId2 = app0.getCurrentAppAttempt().getAppAttemptId();
        Assert.assertEquals(2, appAttemptId2.getAttemptId());
        rm1.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);

        // restart rm
        MockRM rm2 = createMockRM(conf, memStore);
        rm2.start();
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(applicationId);
        rm2.waitForState(applicationId, RMAppState.ACCEPTED);
        rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);

        Assert.assertEquals(2, loadedApp0.getAppAttempts().size());
        rm2.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);

        appAttemptId2 = loadedApp0.getCurrentAppAttempt().getAppAttemptId();
        Assert.assertEquals(2, appAttemptId2.getAttemptId());

        // fail 2nd attempt
        rm2.failApplicationAttempt(appAttemptId2);

        rm2.waitForState(appAttemptId2, RMAppAttemptState.FAILED);
        rm2.waitForState(applicationId, RMAppState.FAILED);
        Assert.assertEquals(maxAttempt, loadedApp0.getAppAttempts().size());
    }

    private <E> Set<E> toSet(E... elements) {
        Set<E> set = Sets.newHashSet(elements);
        return set;
    }

    @Test(timeout = 20000)
    public void testRMRestartNodeMapping() throws Exception {
        // Initial FS node label store root dir to a random tmp dir
        File nodeLabelFsStoreDir = new File("target",
                this.getClass().getSimpleName() + "-testRMRestartNodeMapping");
        if (nodeLabelFsStoreDir.exists()) {
            FileUtils.deleteDirectory(nodeLabelFsStoreDir);
        }
        nodeLabelFsStoreDir.deleteOnExit();
        String nodeLabelFsStoreDirURI = nodeLabelFsStoreDir.toURI().toString();
        conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, nodeLabelFsStoreDirURI);

        MemoryRMStateStore memStore = new MemoryRMStateStore();
        memStore.init(conf);
        conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
        MockRM rm1 = new MockRM(conf, memStore) {
            @Override
            protected RMNodeLabelsManager createNodeLabelManager() {
                RMNodeLabelsManager mgr = new RMNodeLabelsManager();
                mgr.init(getConfig());
                return mgr;
            }
        };
        rm1.init(conf);
        rm1.start();
        RMNodeLabelsManager nodeLabelManager = rm1.getRMContext().getNodeLabelManager();

        Set<String> clusterNodeLabels = new HashSet<String>();
        clusterNodeLabels.add("x");
        clusterNodeLabels.add("y");
        nodeLabelManager.addToCluserNodeLabelsWithDefaultExclusivity(clusterNodeLabels);
        // Add node Label to Node h1->x
        NodeId n1 = NodeId.newInstance("h1", 1234);
        NodeId n2 = NodeId.newInstance("h1", 1235);
        NodeId nihost = NodeId.newInstance("h1", 0);
        nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("x")));
        nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n2, toSet("x")));
        nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(nihost, toSet("y")));
        nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("x")));
        MockRM rm2 = null;
        for (int i = 0; i < 2; i++) {
            rm2 = new MockRM(conf, memStore) {
                @Override
                protected RMNodeLabelsManager createNodeLabelManager() {
                    RMNodeLabelsManager mgr = new RMNodeLabelsManager();
                    mgr.init(getConfig());
                    return mgr;
                }
            };
            rm2.init(conf);
            rm2.start();

            nodeLabelManager = rm2.getRMContext().getNodeLabelManager();
            Map<String, Set<NodeId>> labelsToNodes = nodeLabelManager.getLabelsToNodes(toSet("x"));
            Assert.assertEquals(1, null == labelsToNodes.get("x") ? 0 : labelsToNodes.get("x").size());
        }
        rm1.stop();
        rm2.stop();
    }

    @Test(timeout = 60000)
    public void testRMRestartAfterNodeLabelDisabled() throws Exception {
        // Skip this test case if it is not CapacityScheduler since NodeLabel is
        // not fully supported yet for FairScheduler and others.
        if (!getSchedulerType().equals(SchedulerType.CAPACITY)) {
            return;
        }
        MemoryRMStateStore memStore = new MemoryRMStateStore();
        memStore.init(conf);

        conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);

        MockRM rm1 = new MockRM(TestUtils.getConfigurationWithDefaultQueueLabels(conf), memStore) {
            @Override
            protected RMNodeLabelsManager createNodeLabelManager() {
                RMNodeLabelsManager mgr = new RMNodeLabelsManager();
                mgr.init(getConfig());
                return mgr;
            }
        };
        rm1.start();

        // add node label "x" and set node to label mapping
        Set<String> clusterNodeLabels = new HashSet<String>();
        clusterNodeLabels.add("x");
        RMNodeLabelsManager nodeLabelManager = rm1.getRMContext().getNodeLabelManager();
        nodeLabelManager.addToCluserNodeLabelsWithDefaultExclusivity(clusterNodeLabels);
        nodeLabelManager.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
        MockNM nm1 = rm1.registerNode("h1:1234", 8000); // label = x

        // submit an application with specifying am node label expression as "x"
        RMApp app1 = rm1.submitApp(200, "someApp", "someUser", null, "a1", "x");
        // check am container allocated with correct node label expression
        MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
        ContainerId amContainerId1 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
        Assert.assertEquals("x",
                rm1.getRMContext().getScheduler().getRMContainer(amContainerId1).getNodeLabelExpression());
        finishApplicationMaster(app1, rm1, nm1, am1);

        // restart rm with node label disabled
        conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, false);
        MockRM rm2 = new MockRM(TestUtils.getConfigurationWithDefaultQueueLabels(conf), memStore) {
            @Override
            protected RMNodeLabelsManager createNodeLabelManager() {
                RMNodeLabelsManager mgr = new RMNodeLabelsManager();
                mgr.init(getConfig());
                return mgr;
            }
        };

        // rm should successfully start with app1 loaded back in FAILED state
        // due to node label not enabled but am resource request contains
        // node label expression.
        try {
            rm2.start();
            Assert.assertTrue("RM start successfully", true);
            Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
            rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
        } catch (Exception e) {
            LOG.debug("Exception on start", e);
            Assert.fail("RM should start without any issue");
        } finally {
            rm1.stop();
            rm2.stop();
        }
    }
}