org.apache.hadoop.yarn.server.resourcemanager.TestRMHA.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.yarn.server.resourcemanager.TestRMHA.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.yarn.server.resourcemanager;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.IOException;
import java.net.InetSocketAddress;

import javax.ws.rs.core.MediaType;

import io.hops.util.DBUtility;
import io.hops.util.RMStorageFactory;
import io.hops.util.YarnAPIStorageFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.DrainDispatcher;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.api.client.config.DefaultClientConfig;
import org.apache.hadoop.service.Service;
import org.junit.After;

public class TestRMHA {
    private Log LOG = LogFactory.getLog(TestRMHA.class);
    private Configuration configuration;
    private MockRM rm = null;
    private RMApp app = null;
    private RMAppAttempt attempt = null;
    private static final String STATE_ERR = "ResourceManager is in wrong HA state";

    private static final String RM1_ADDRESS = "1.1.1.1:1";
    private static final String RM1_NODE_ID = "rm1";

    private static final String RM2_ADDRESS = "0.0.0.0:0";
    private static final String RM2_NODE_ID = "rm2";

    private static final String RM3_ADDRESS = "2.2.2.2:2";
    private static final String RM3_NODE_ID = "rm3";

    @Before
    public void setUp() throws Exception {
        configuration = new Configuration();
        UserGroupInformation.setConfiguration(configuration);
        configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
        configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID);
        for (String confKey : YarnConfiguration.getServiceAddressConfKeys(configuration)) {
            configuration.set(HAUtil.addSuffix(confKey, RM1_NODE_ID), RM1_ADDRESS);
            configuration.set(HAUtil.addSuffix(confKey, RM2_NODE_ID), RM2_ADDRESS);
            configuration.set(HAUtil.addSuffix(confKey, RM3_NODE_ID), RM3_ADDRESS);
        }

        // Enable webapp to test web-services also
        configuration.setBoolean(MockRM.ENABLE_WEBAPP, true);
        configuration.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);

        RMStorageFactory.setConfiguration(configuration);
        YarnAPIStorageFactory.setConfiguration(configuration);
        DBUtility.InitializeDB();

        ClusterMetrics.destroy();
        QueueMetrics.clearQueueMetrics();
        DefaultMetricsSystem.shutdown();
    }

    @After
    public void teardown() {
        if (rm != null && !rm.isInState(Service.STATE.STOPPED)) {
            rm.stop();
        }
    }

    private void checkMonitorHealth() throws IOException {
        try {
            rm.adminService.monitorHealth();
        } catch (HealthCheckFailedException e) {
            fail("The RM is in bad health: it is Active, but the active services " + "are not running");
        }
    }

    private void checkStandbyRMFunctionality() throws IOException {
        assertEquals(STATE_ERR, HAServiceState.STANDBY, rm.adminService.getServiceStatus().getState());
        assertFalse("Active RM services are started", rm.areSchedulerServicesRunning());
        assertTrue("RM is not ready to become active", rm.adminService.getServiceStatus().isReadyToBecomeActive());
    }

    private void checkActiveRMFunctionality() throws Exception {
        assertEquals(STATE_ERR, HAServiceState.ACTIVE, rm.adminService.getServiceStatus().getState());
        assertTrue("Active RM services aren't started", rm.areSchedulerServicesRunning());
        assertTrue("RM is not ready to become active", rm.adminService.getServiceStatus().isReadyToBecomeActive());

        try {
            rm.getNewAppId();
            rm.registerNode("127.0.0.1:1", 2048);
            app = rm.submitApp(1024);
            attempt = app.getCurrentAppAttempt();
            rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.SCHEDULED);
        } catch (Exception e) {
            fail("Unable to perform Active RM functions");
            LOG.error("ActiveRM check failed", e);
        }

        checkActiveRMWebServices();
    }

    // Do some sanity testing of the web-services after fail-over.
    private void checkActiveRMWebServices() throws JSONException {

        // Validate web-service
        Client webServiceClient = Client.create(new DefaultClientConfig());
        InetSocketAddress rmWebappAddr = NetUtils.getConnectAddress(rm.getWebapp().getListenerAddress());
        String webappURL = "http://" + rmWebappAddr.getHostName() + ":" + rmWebappAddr.getPort();
        WebResource webResource = webServiceClient.resource(webappURL);
        String path = app.getApplicationId().toString();

        ClientResponse response = webResource.path("ws").path("v1").path("cluster").path("apps").path(path)
                .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
        assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
        JSONObject json = response.getEntity(JSONObject.class);

        assertEquals("incorrect number of elements", 1, json.length());
        JSONObject appJson = json.getJSONObject("app");
        assertEquals("ACCEPTED", appJson.getString("state"));
        // Other stuff is verified in the regular web-services related tests
    }

    /**
     * Test to verify the following RM HA transitions to the following states.
     * 1. Standby: Should be a no-op
     * 2. Active: Active services should start
     * 3. Active: Should be a no-op.
     * While active, submit a couple of jobs
     * 4. Standby: Active services should stop
     * 5. Active: Active services should start
     * 6. Stop the RM: All services should stop and RM should not be ready to
     * become Active
     */
    @Test(timeout = 30000)
    public void testFailoverAndTransitions() throws Exception {
        configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
        configuration.set(YarnConfiguration.LEADER_CLIENT_FAILOVER_PROXY_PROVIDER,
                "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider");
        Configuration conf = new YarnConfiguration(configuration);

        rm = new MockRM(conf);
        rm.init(conf);
        StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);

        assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
        assertFalse("RM is ready to become active before being started",
                rm.adminService.getServiceStatus().isReadyToBecomeActive());
        checkMonitorHealth();

        rm.start();
        checkMonitorHealth();
        checkStandbyRMFunctionality();
        verifyClusterMetrics(0, 0, 0, 0, 0, 0);

        // 1. Transition to Standby - must be a no-op
        rm.adminService.transitionToStandby(requestInfo);
        checkMonitorHealth();
        checkStandbyRMFunctionality();
        verifyClusterMetrics(0, 0, 0, 0, 0, 0);

        // 2. Transition to active
        rm.adminService.transitionToActive(requestInfo);
        checkMonitorHealth();
        checkActiveRMFunctionality();
        verifyClusterMetrics(1, 1, 1, 1, 2048, 1);

        // 3. Transition to active - no-op
        rm.adminService.transitionToActive(requestInfo);
        checkMonitorHealth();
        checkActiveRMFunctionality();
        verifyClusterMetrics(1, 2, 2, 2, 2048, 2);

        // 4. Transition to standby
        rm.adminService.transitionToStandby(requestInfo);
        checkMonitorHealth();
        checkStandbyRMFunctionality();
        verifyClusterMetrics(0, 0, 0, 0, 0, 0);

        // 5. Transition to active to check Active->Standby->Active works
        rm.adminService.transitionToActive(requestInfo);
        checkMonitorHealth();
        checkActiveRMFunctionality();
        verifyClusterMetrics(1, 1, 1, 1, 2048, 1);

        // 6. Stop the RM. All services should stop and RM should not be ready to
        // become active
        rm.stop();
        assertEquals(STATE_ERR, HAServiceState.STOPPING, rm.adminService.getServiceStatus().getState());
        assertFalse("RM is ready to become active even after it is stopped",
                rm.adminService.getServiceStatus().isReadyToBecomeActive());
        assertFalse("Active RM services are started", rm.areSchedulerServicesRunning());
        checkMonitorHealth();
    }

    @Test
    public void testTransitionsWhenAutomaticFailoverEnabled() throws Exception {
        final String ERR_UNFORCED_REQUEST = "User request succeeded even when " + "automatic failover is enabled";

        Configuration conf = new YarnConfiguration(configuration);

        rm = new MockRM(conf);
        rm.init(conf);
        rm.start();
        StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);

        // Transition to standby
        try {
            rm.adminService.transitionToStandby(requestInfo);
            fail(ERR_UNFORCED_REQUEST);
        } catch (AccessControlException e) {
            // expected
        }
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        // Transition to active
        try {
            rm.adminService.transitionToActive(requestInfo);
            fail(ERR_UNFORCED_REQUEST);
        } catch (AccessControlException e) {
            // expected
        }
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        final String ERR_FORCED_REQUEST = "Forced request by user should work "
                + "even if automatic failover is enabled";
        requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER_FORCED);

        // Transition to standby
        try {
            rm.adminService.transitionToStandby(requestInfo);
        } catch (AccessControlException e) {
            fail(ERR_FORCED_REQUEST);
        }
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        // Transition to active
        try {
            rm.adminService.transitionToActive(requestInfo);
        } catch (AccessControlException e) {
            fail(ERR_FORCED_REQUEST);
        }
        checkMonitorHealth();
        checkActiveRMFunctionality();
        rm.stop();
    }

    @Test
    public void testRMDispatcherForHA() throws IOException {
        String errorMessageForEventHandler = "Expect to get the same number of handlers";
        String errorMessageForService = "Expect to get the same number of services";
        configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
        configuration.set(YarnConfiguration.LEADER_CLIENT_FAILOVER_PROXY_PROVIDER,
                "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider");

        Configuration conf = new YarnConfiguration(configuration);
        rm = new MockRM(conf) {
            @Override
            protected Dispatcher createDispatcher() {
                return new MyCountingDispatcher();
            }
        };
        rm.init(conf);
        int expectedEventHandlerCount = ((MyCountingDispatcher) rm.getRMContext().getDispatcher())
                .getEventHandlerCount();
        int expectedServiceCount = rm.getServices().size();
        assertTrue(expectedEventHandlerCount != 0);

        StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);

        assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
        assertFalse("RM is ready to become active before being started",
                rm.adminService.getServiceStatus().isReadyToBecomeActive());
        rm.start();

        //call transitions to standby and active a couple of times
        rm.adminService.transitionToStandby(requestInfo);
        rm.adminService.transitionToActive(requestInfo);
        rm.adminService.transitionToStandby(requestInfo);
        rm.adminService.transitionToActive(requestInfo);
        rm.adminService.transitionToStandby(requestInfo);

        MyCountingDispatcher dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
        assertTrue(!dispatcher.isStopped());

        rm.adminService.transitionToActive(requestInfo);
        assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
                ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount());
        assertEquals(errorMessageForService, expectedServiceCount, rm.getServices().size());

        // Keep the dispatcher reference before transitioning to standby
        dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();

        rm.adminService.transitionToStandby(requestInfo);
        assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
                ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount());
        assertEquals(errorMessageForService, expectedServiceCount, rm.getServices().size());

        assertTrue(dispatcher.isStopped());

        rm.stop();
    }

    @Test
    public void testHAIDLookup() {
        //test implicitly lookup HA-ID
        Configuration conf = new YarnConfiguration(configuration);
        rm = new MockRM(conf);
        rm.init(conf);

        assertEquals(conf.get(YarnConfiguration.RM_HA_ID), RM2_NODE_ID);

        //test explicitly lookup HA-ID
        configuration.set(YarnConfiguration.RM_HA_ID, RM1_NODE_ID);
        conf = new YarnConfiguration(configuration);
        rm = new MockRM(conf);
        rm.init(conf);
        assertEquals(conf.get(YarnConfiguration.RM_HA_ID), RM1_NODE_ID);

        //test if RM_HA_ID can not be found
        configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM3_NODE_ID);
        configuration.unset(YarnConfiguration.RM_HA_ID);
        conf = new YarnConfiguration(configuration);
        try {
            rm = new MockRM(conf);
            rm.init(conf);
            fail("Should get an exception here.");
        } catch (Exception ex) {
            Assert.assertTrue(ex.getMessage().contains("Invalid configuration! Can not find valid RM_HA_ID."));
        }

        rm.stop();
    }

    @Test
    public void testHAWithRMHostName() throws Exception {
        innerTestHAWithRMHostName(false);
        configuration.clear();
        setUp();
        innerTestHAWithRMHostName(true);
    }

    @Ignore //does not work with our current implementation of the leader election
    @Test(timeout = 30000)
    public void testFailoverWhenTransitionToActiveThrowException() throws Exception {
        configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
        configuration.set(YarnConfiguration.LEADER_CLIENT_FAILOVER_PROXY_PROVIDER,
                "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider");

        Configuration conf = new YarnConfiguration(configuration);

        MemoryRMStateStore memStore = new MemoryRMStateStore() {
            int count = 0;

            @Override
            public synchronized void startInternal() throws Exception {
                // first time throw exception
                if (count++ == 0) {
                    throw new Exception("Session Expired");
                }
            }
        };
        // start RM
        memStore.init(conf);

        rm = new MockRM(conf, memStore);
        rm.init(conf);
        StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);

        assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
        assertFalse("RM is ready to become active before being started",
                rm.adminService.getServiceStatus().isReadyToBecomeActive());
        checkMonitorHealth();

        rm.start();
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        // 2. Try Transition to active, throw exception
        try {
            rm.adminService.transitionToActive(requestInfo);
            Assert.fail("Transitioned to Active should throw exception.");
        } catch (Exception e) {
            assertTrue("Error when transitioning to Active mode".contains(e.getMessage()));
        }

        // 3. Transition to active, success
        rm.adminService.transitionToActive(requestInfo);
        checkMonitorHealth();
        checkActiveRMFunctionality();

        rm.stop();
    }

    @Test(timeout = 130000)
    public void testTransitionedToStandbyShouldNotHang() throws Exception {
        configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
        configuration.set(YarnConfiguration.LEADER_CLIENT_FAILOVER_PROXY_PROVIDER,
                "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider");

        Configuration conf = new YarnConfiguration(configuration);

        MemoryRMStateStore memStore = new MemoryRMStateStore() {
            @Override
            public void updateApplicationState(ApplicationStateData appState) {
                notifyStoreOperationFailed(new StoreFencedException());
            }
        };
        memStore.init(conf);
        rm = new MockRM(conf, memStore) {
            @Override
            void stopSchedulerServices() throws Exception {
                Thread.sleep(10000);
                super.stopSchedulerServices();
            }
        };
        rm.init(conf);
        final StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);

        assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
        assertFalse("RM is ready to become active before being started",
                rm.adminService.getServiceStatus().isReadyToBecomeActive());
        checkMonitorHealth();

        rm.start();
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        // 2. Transition to Active.
        rm.adminService.transitionToActive(requestInfo);

        // 3. Try Transition to standby
        Thread t = new Thread(new Runnable() {
            @Override
            public void run() {
                try {
                    rm.transitionToStandby(true);
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        });
        t.start();

        t.join();
        rm.getRMContext().getStateStore().updateApplicationState(null);
        //t.join(); // wait for thread to finish

        rm.adminService.transitionToStandby(requestInfo);
        checkStandbyRMFunctionality();
        rm.stop();
    }

    @Test
    @Ignore
    public void testFailoverClearsRMContext() throws Exception {
        configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
        configuration.set(YarnConfiguration.LEADER_CLIENT_FAILOVER_PROXY_PROVIDER,
                "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider");
        configuration.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
        Configuration conf = new YarnConfiguration(configuration);

        MemoryRMStateStore memStore = new MemoryRMStateStore();
        memStore.init(conf);

        // 1. start RM
        rm = new MockRM(conf, memStore);
        rm.init(conf);
        rm.start();

        StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        // 2. Transition to active
        rm.adminService.transitionToActive(requestInfo);
        checkMonitorHealth();
        checkActiveRMFunctionality();
        LOG.error("Before checking first RM ClusterMetrics");
        verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
        assertEquals(1, rm.getRMContext().getRMNodes().size());
        assertEquals(1, rm.getRMContext().getRMApps().size());

        LOG.error("Checks for the first RM are passed");

        // 3. Create new RM
        rm = new MockRM(conf, memStore) {
            @Override
            protected ResourceTrackerService createResourceTrackerService() {
                return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor,
                        this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {
                    @Override
                    protected void serviceStart() throws Exception {
                        throw new Exception("ResourceTracker service failed");
                    }
                };
            }
        };
        rm.init(conf);
        rm.start();
        checkMonitorHealth();
        checkStandbyRMFunctionality();

        // 4. Try Transition to active, throw exception
        try {
            rm.adminService.transitionToActive(requestInfo);
            Assert.fail("Transitioned to Active should throw exception.");
        } catch (Exception e) {
            assertTrue("Error when transitioning to Active mode".contains(e.getMessage()));
        }
        // 5. Clears the metrics
        verifyClusterMetrics(0, 0, 0, 0, 0, 0);
        assertEquals(0, rm.getRMContext().getRMNodes().size());
        assertEquals(0, rm.getRMContext().getRMApps().size());

        rm.stop();
    }

    @Test(timeout = 90000)
    @Ignore
    // In Hops we re-initialize the Scheduler services during transition to active
    public void testTransitionedToActiveRefreshFail() throws Exception {
        configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
        configuration.set(YarnConfiguration.LEADER_CLIENT_FAILOVER_PROXY_PROVIDER,
                "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider");

        YarnConfiguration conf = new YarnConfiguration(configuration);
        configuration = new CapacitySchedulerConfiguration(conf);
        rm = new MockRM(configuration) {
            @Override
            protected AdminService createAdminService() {
                return new AdminService(this, getRMContext()) {
                    @Override
                    protected void setConfig(Configuration conf) {
                        super.setConfig(configuration);
                    }
                };
            }

            @Override
            protected Dispatcher createDispatcher() {
                return new FailFastDispatcher();
            }
        };

        rm.init(configuration);
        rm.start();
        final StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
                HAServiceProtocol.RequestSource.REQUEST_BY_USER);

        configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
        rm.adminService.transitionToStandby(requestInfo);
        assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
        configuration.set("yarn.scheduler.capacity.root.default.capacity", "200");
        try {
            rm.adminService.transitionToActive(requestInfo);
        } catch (Exception e) {
            assertTrue("Error on refreshAll during transistion to Active".contains(e.getMessage()));
        }
        FailFastDispatcher dispatcher = ((FailFastDispatcher) rm.rmContext.getDispatcher());
        dispatcher.await();
        assertEquals(1, dispatcher.getEventCount());
        // Making correct conf and check the state
        configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
        rm.adminService.transitionToActive(requestInfo);
        assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
        rm.adminService.transitionToStandby(requestInfo);
        assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());

        rm.stop();
    }

    public void innerTestHAWithRMHostName(boolean includeBindHost) {
        //this is run two times, with and without a bind host configured
        if (includeBindHost) {
            configuration.set(YarnConfiguration.RM_BIND_HOST, "9.9.9.9");
        }

        //test if both RM_HOSTBANE_{rm_id} and RM_RPCADDRESS_{rm_id} are set
        //We should only read rpc addresses from RM_RPCADDRESS_{rm_id} configuration
        configuration.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, RM1_NODE_ID), "1.1.1.1");
        configuration.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, RM2_NODE_ID), "0.0.0.0");
        configuration.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, RM3_NODE_ID), "2.2.2.2");
        try {
            Configuration conf = new YarnConfiguration(configuration);
            rm = new MockRM(conf);
            rm.init(conf);
            for (String confKey : YarnConfiguration.getServiceAddressConfKeys(conf)) {
                assertEquals("RPC address not set for " + confKey, RM1_ADDRESS,
                        conf.get(HAUtil.addSuffix(confKey, RM1_NODE_ID)));
                assertEquals("RPC address not set for " + confKey, RM2_ADDRESS,
                        conf.get(HAUtil.addSuffix(confKey, RM2_NODE_ID)));
                assertEquals("RPC address not set for " + confKey, RM3_ADDRESS,
                        conf.get(HAUtil.addSuffix(confKey, RM3_NODE_ID)));
                if (includeBindHost) {
                    assertEquals("Web address misconfigured WITH bind-host", rm.webAppAddress.substring(0, 7),
                            "9.9.9.9");
                } else {
                    //YarnConfiguration tries to figure out which rm host it's on by binding to it,
                    //which doesn't happen for any of these fake addresses, so we end up with 0.0.0.0
                    assertEquals("Web address misconfigured WITHOUT bind-host", rm.webAppAddress.substring(0, 7),
                            "0.0.0.0");
                }
            }
        } catch (YarnRuntimeException e) {
            fail("Should not throw any exceptions.");
        }

        //test if only RM_HOSTBANE_{rm_id} is set
        configuration.clear();
        configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
        configuration.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID);
        configuration.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, RM1_NODE_ID), "1.1.1.1");
        configuration.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, RM2_NODE_ID), "0.0.0.0");
        try {
            Configuration conf = new YarnConfiguration(configuration);
            rm = new MockRM(conf);
            rm.init(conf);
            assertEquals("RPC address not set for " + YarnConfiguration.RM_ADDRESS, "1.1.1.1:8032",
                    conf.get(HAUtil.addSuffix(YarnConfiguration.RM_ADDRESS, RM1_NODE_ID)));
            assertEquals("RPC address not set for " + YarnConfiguration.RM_ADDRESS, "0.0.0.0:8032",
                    conf.get(HAUtil.addSuffix(YarnConfiguration.RM_ADDRESS, RM2_NODE_ID)));

        } catch (YarnRuntimeException e) {
            fail("Should not throw any exceptions.");
        }

        rm.stop();
    }

    private void verifyClusterMetrics(int activeNodes, int appsSubmitted, int appsPending, int containersPending,
            long availableMB, int activeApplications) throws Exception {
        int timeoutSecs = 0;
        QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
        ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
        boolean isAllMetricAssertionDone = false;
        String message = null;
        while (timeoutSecs++ < 5) {
            try {
                // verify queue metrics
                assertMetric("appsSubmitted", appsSubmitted, metrics.getAppsSubmitted());
                assertMetric("appsPending", appsPending, metrics.getAppsPending());
                assertMetric("containersPending", containersPending, metrics.getPendingContainers());
                assertMetric("availableMB", availableMB, metrics.getAvailableMB());
                assertMetric("activeApplications", activeApplications, metrics.getActiveApps());
                // verify node metric
                assertMetric("activeNodes", activeNodes, clusterMetrics.getNumActiveNMs());
                isAllMetricAssertionDone = true;
                break;
            } catch (AssertionError e) {
                message = e.getMessage();
                System.out.println("Waiting for metrics assertion to complete");
                Thread.sleep(1000);
            }
        }
        assertTrue(message, isAllMetricAssertionDone);
    }

    private void assertMetric(String metricName, long expected, long actual) {
        assertEquals("Incorrect value for metric " + metricName, expected, actual);
    }

    @SuppressWarnings("rawtypes")
    class MyCountingDispatcher extends AbstractService implements Dispatcher {

        private int eventHandlerCount;

        private volatile boolean stopped = false;

        public MyCountingDispatcher() {
            super("MyCountingDispatcher");
            this.eventHandlerCount = 0;
        }

        @Override
        public EventHandler getEventHandler() {
            return null;
        }

        @Override
        public void register(Class<? extends Enum> eventType, EventHandler handler) {
            this.eventHandlerCount++;
        }

        public int getEventHandlerCount() {
            return this.eventHandlerCount;
        }

        @Override
        protected void serviceStop() throws Exception {
            this.stopped = true;
            super.serviceStop();
        }

        public boolean isStopped() {
            return this.stopped;
        }
    }

    class FailFastDispatcher extends DrainDispatcher {
        int eventreceived = 0;

        @SuppressWarnings("rawtypes")
        @Override
        protected void dispatch(Event event) {
            if (event.getType() == RMFatalEventType.TRANSITION_TO_ACTIVE_FAILED) {
                eventreceived++;
            } else {
                super.dispatch(event);
            }
        }

        public int getEventCount() {
            return eventreceived;
        }
    }
}