org.apache.hadoop.hdfs.server.namenode.TestHABasicFailover.java Source code

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.TestHABasicFailover.java
Source

/*
 * Copyright (C) 2015 hops.io.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Test;

import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeoutException;

public class TestHABasicFailover extends junit.framework.TestCase {

    public static final Log LOG = LogFactory.getLog(TestHABasicFailover.class);

    {
        ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) LeaseManager.LOG).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
    }

    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = null;
    int NUM_NAMENODES = 2;
    int NUM_DATANODES = 1;

    @Override
    @After
    public void tearDown() throws Exception {
        cluster.shutdown();
    }

    /**
     * Testing basic failover. After starting namenodes NN1, NN2, the namenode
     * that first initializes itself would be elected the leader. We allow NN1
     * to be the leader. We kill NN1. Failover will start and NN2 will detect
     * failure of NN1 and hence would elect itself as the leader Also perform
     * fail-back to NN1 by killing NN2
     */
    @Test(timeout = 900000)
    public void testFailover() throws IOException, TimeoutException {

        final int NN1 = 0, NN2 = 1;
        if (NUM_NAMENODES < 2) {
            NUM_NAMENODES = 2;
        }

        try {
            // Create cluster with 2 namenodes
            cluster = new MiniDFSCluster.Builder(conf)
                    .nnTopology(MiniDFSNNTopology.simpleHOPSTopology(NUM_NAMENODES)).numDataNodes(NUM_DATANODES)
                    .build();
            cluster.waitActive();

            // Give it time for leader to be elected
            long timeout = conf.getInt(DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_KEY,
                    DFSConfigKeys.DFS_LEADER_CHECK_INTERVAL_IN_MS_DEFAULT)
                    + conf.getLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
                            DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT) * 1000L;

            /**
             * *********************************
             * testing fail over from NN1 to NN2
             * **********************************
             */
            // Check NN1 is the leader
            LOG.info("NameNode 1 id " + cluster.getNameNode(NN1).getId() + " address "
                    + cluster.getNameNode(NN1).getServiceRpcAddress().toString());
            LOG.info("NameNode 2 id " + cluster.getNameNode(NN2).getId() + " address "
                    + cluster.getNameNode(NN2).getServiceRpcAddress().toString());

            assertTrue("NN1 is expected to be leader, but is not", cluster.getNameNode(NN1).isLeader());

            // performing failover - Kill NN1. This would allow NN2 to be leader
            cluster.shutdownNameNode(NN1);

            // wait for leader to be elected and for Datanodes to also detect the leader
            waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN2), timeout * 10);

            // Check NN2 is the leader and failover is detected
            assertTrue("NN2 is expected to be the leader, but is not", cluster.getNameNode(NN2).isLeader());
            assertTrue("Not all datanodes detected the new leader",
                    doesDataNodesRecognizeLeader(cluster.getDataNodes(), cluster.getNameNode(NN2)));

            LOG.debug("TestNN going to restart the NN2");
            // restart the newly elected leader and see if it is still the leader
            cluster.restartNameNode(NN2);

            cluster.waitActive();
            waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN2), timeout * 10);
            assertTrue("NN2 is expected to be the leader, but is not", cluster.getNameNode(NN2).isLeader());
            assertTrue("Not all datanodes detected the new leader",
                    doesDataNodesRecognizeLeader(cluster.getDataNodes(), cluster.getNameNode(NN2)));

            /**
             * **************************************
             * testing fail-back after some interval datanode asks for a
             * namenode to return all alive namenodes in the system.
             *
             * datanode starts new threads for new namenodes. if it finds out that some
             * previous namenode is dead then the corresponding service thread
             * is killed.              *
             * A datanodes find out new namenodes by asking existing name nodes
             * in the system. what happen data node is connected to X set of
             * namenodes and they all die suddenly; and after a while Y set of
             * namenodes come online. datanode will have no way of finding out
             * namenodes belonging to set Y
             *
             * there is no fix for it yet. if such thing happens then restart
             * datanode with some correct namenode.              *
             * in the tests such secnaiors are avoided by making sure that
             * datanodes are connected to atleast one name node after killing
             * other namenodes. **************************************
             */
            // Doing a fail back scenario to NN1
            cluster.restartNameNode(NN1); // will be restarted in the system with the next highest id while NN2 is still the leader
            cluster.waitActive();

            waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN2), timeout * 10);

            cluster.shutdownNameNode(NN2);
            cluster.waitActive();

            // waiting for NN1 to elect itself as the leader
            waitLeaderElection(cluster.getDataNodes(), cluster.getNameNode(NN1), timeout * 10);
            assertTrue("NN1 is expected to be the leader, but is not", cluster.getNameNode(NN1).isLeader());
            assertTrue("Not all datanodes detected the new leader",
                    doesDataNodesRecognizeLeader(cluster.getDataNodes(), cluster.getNameNode(NN1)));
        } finally {
            if (cluster != null) {
                cluster.shutdown();
            }
        }

    }

    public static boolean doesDataNodesRecognizeLeader(List<DataNode> datanodes, NameNode namenode) {
        boolean result = true;
        for (DataNode datanode : datanodes) {
            result = result & datanode.isConnectedToNN(namenode.getNameNodeAddress());
        }
        return result;
    }

    public static void waitLeaderElection(List<DataNode> datanodes, NameNode nn, long timeout)
            throws TimeoutException {
        // wait for the new leader to be elected
        long initTime = System.currentTimeMillis();
        while (!nn.isLeader()) {
            try {
                Thread.sleep(500);
            } catch (InterruptedException ex) {
                ex.printStackTrace();
            }

            // check for time out
            if (System.currentTimeMillis() - initTime >= timeout) {
                throw new TimeoutException("Namenode was not elected leader. Time out " + timeout);
            }
        }

        // wait for all datanodes to recognize the new leader
        initTime = System.currentTimeMillis();
        while (true) {

            try {
                Thread.sleep(2000); // 2sec
            } catch (InterruptedException ex) {
                ex.printStackTrace();
            }

            boolean result = doesDataNodesRecognizeLeader(datanodes, nn);
            if (result) {
                break;
            }
            // check for time out
            if (System.currentTimeMillis() - initTime >= timeout) {
                throw new TimeoutException("Datanodes weren't able to detect newly elected leader");
            }
        }
    }
}