org.apache.hadoop.hdfs.server.balancer.TestBalancerWithNodeGroup.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.balancer.TestBalancerWithNodeGroup.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.balancer;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeoutException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.MiniDFSClusterWithNodeGroup;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.hadoop.net.NetworkTopology;
import org.junit.Assert;
import org.junit.Test;

/**
 * This class tests if a balancer schedules tasks correctly.
 */
public class TestBalancerWithNodeGroup {

    final private static long CAPACITY = 500L;
    final private static String RACK0 = "/rack0";
    final private static String RACK1 = "/rack1";
    final private static String NODEGROUP0 = "/nodegroup0";
    final private static String NODEGROUP1 = "/nodegroup1";
    final private static String NODEGROUP2 = "/nodegroup2";
    final static private String fileName = "/tmp.txt";
    final static private Path filePath = new Path(fileName);
    private static final Log LOG = LogFactory.getLog("org.apache.hadoop.hdfs.TestBalancerWithNodeGroup");
    MiniDFSClusterWithNodeGroup cluster;

    ClientProtocol client;
    private Balancer balancer;

    static final long TIMEOUT = 20000L; //msec
    static final double CAPACITY_ALLOWED_VARIANCE = 0.005; // 0.5%
    static final double BALANCE_ALLOWED_VARIANCE = 0.11; // 10%+delta
    static final int DEFAULT_BLOCK_SIZE = 5;
    private static final Random r = new Random();

    static {
        Balancer.setBlockMoveWaitTime(1000L);
    }

    private void initConf(Configuration conf) {
        conf.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
        conf.setInt("io.bytes.per.checksum", DEFAULT_BLOCK_SIZE);
        conf.setLong("dfs.heartbeat.interval", 1L);
        conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
        conf.setLong("dfs.balancer.movedWinWidth", 2000L);
        conf.set("net.topology.impl", "org.apache.hadoop.net.NetworkTopologyWithNodeGroup");
        conf.set("dfs.block.replicator.classname",
                "org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyWithNodeGroup");
    }

    /* create a file with a length of <code>fileLen</code> */
    private void createFile(long fileLen, short replicationFactor) throws IOException {
        FileSystem fs = cluster.getFileSystem();
        DFSTestUtil.createFile(fs, filePath, fileLen, replicationFactor, r.nextLong());
        DFSTestUtil.waitReplication(fs, filePath, replicationFactor);
    }

    // create and initiate conf for balancer
    private Configuration createConf() {
        Configuration conf = new Configuration();
        initConf(conf);
        return conf;
    }

    /**
     * Wait until heartbeat gives expected results, within CAPACITY_ALLOWED_VARIANCE, 
     * summed over all nodes.  Times out after TIMEOUT msec.
     * @param expectedUsedSpace
     * @param expectedTotalSpace
     * @throws IOException - if getStats() fails
     * @throws TimeoutException
     */
    private void waitForHeartBeat(long expectedUsedSpace, long expectedTotalSpace)
            throws IOException, TimeoutException {
        long timeout = TIMEOUT;
        long failtime = (timeout <= 0L) ? Long.MAX_VALUE : System.currentTimeMillis() + timeout;

        while (true) {
            long[] status = client.getStats();
            double totalSpaceVariance = Math.abs((double) status[0] - expectedTotalSpace) / expectedTotalSpace;
            double usedSpaceVariance = Math.abs((double) status[1] - expectedUsedSpace) / expectedUsedSpace;
            if (totalSpaceVariance < CAPACITY_ALLOWED_VARIANCE && usedSpaceVariance < CAPACITY_ALLOWED_VARIANCE)
                break; //done

            if (System.currentTimeMillis() > failtime) {
                throw new TimeoutException("Cluster failed to reached expected values of " + "totalSpace (current: "
                        + status[0] + ", expected: " + expectedTotalSpace + "), or usedSpace (current: " + status[1]
                        + ", expected: " + expectedUsedSpace + "), in more than " + timeout + " msec.");
            }
            try {
                Thread.sleep(100L);
            } catch (InterruptedException ignored) {
            }
        }
    }

    /**
     * Wait until balanced: each datanode gives utilization within 
     * BALANCE_ALLOWED_VARIANCE of average
     * @throws IOException
     * @throws TimeoutException
     */
    private void waitForBalancer(long totalUsedSpace, long totalCapacity) throws IOException, TimeoutException {
        long timeout = TIMEOUT;
        long failtime = (timeout <= 0L) ? Long.MAX_VALUE : System.currentTimeMillis() + timeout;
        final double avgUtilization = ((double) totalUsedSpace) / totalCapacity;
        boolean balanced;
        do {
            DatanodeInfo[] datanodeReport = client.getDatanodeReport(DatanodeReportType.ALL);
            assertEquals(datanodeReport.length, cluster.getDataNodes().size());
            balanced = true;
            for (DatanodeInfo datanode : datanodeReport) {
                double nodeUtilization = ((double) datanode.getDfsUsed()) / datanode.getCapacity();
                if (Math.abs(avgUtilization - nodeUtilization) > BALANCE_ALLOWED_VARIANCE) {
                    balanced = false;
                    if (System.currentTimeMillis() > failtime) {
                        throw new TimeoutException("Rebalancing expected avg utilization to become "
                                + avgUtilization + ", but on datanode " + datanode + " it remains at "
                                + nodeUtilization + " after more than " + TIMEOUT + " msec.");
                    }
                    try {
                        Thread.sleep(100);
                    } catch (InterruptedException ignored) {
                    }
                    break;
                }
            }
        } while (!balanced);
    }

    /** Start balancer and check if the cluster is balanced after the run */
    private void runBalancer(Configuration conf, long totalUsedSpace, long totalCapacity) throws Exception {
        waitForHeartBeat(totalUsedSpace, totalCapacity);

        // start rebalancing
        balancer = new Balancer(conf);

        final int r = balancer.run(new String[0]);

        assertEquals(Balancer.SUCCESS, r);

        waitForHeartBeat(totalUsedSpace, totalCapacity);
        LOG.info("Rebalancing with default factor.");
        waitForBalancer(totalUsedSpace, totalCapacity);
    }

    private void runBalancerCanFinish(Configuration conf, long totalUsedSpace, long totalCapacity)
            throws Exception {
        waitForHeartBeat(totalUsedSpace, totalCapacity);

        balancer = new Balancer(conf);
        final int r = balancer.run(new String[0]);
        Assert.assertTrue(r == Balancer.SUCCESS || (r == Balancer.NO_MOVE_PROGRESS));
        waitForHeartBeat(totalUsedSpace, totalCapacity);
        LOG.info("Rebalancing ends successful.");
    }

    /**
     * Create a 4 nodes cluster: 2 nodes (n0, n1) in RACK0/NODEGROUP0, 1 node (n2)
     * in RACK1/NODEGROUP1 and 1 node (n3) in RACK1/NODEGROUP2. Fill the cluster 
     * to 60% and 3 replicas, so n2 and n3 will have replica for all blocks according
     * to replica placement policy with NodeGroup. As a result, n2 and n3 will be
     * filled with 80% (60% x 4 / 3), and no blocks can be migrated from n2 and n3
     * to n0 or n1 as balancer policy with node group. Thus, we expect the balancer
     * to end in 5 iterations without move block process.
     */
    @Test(timeout = 60000)
    public void testBalancerEndInNoMoveProgress() throws Exception {
        Configuration conf = createConf();
        long[] capacities = new long[] { CAPACITY, CAPACITY, CAPACITY, CAPACITY };
        String[] racks = new String[] { RACK0, RACK0, RACK1, RACK1 };
        String[] nodeGroups = new String[] { NODEGROUP0, NODEGROUP0, NODEGROUP1, NODEGROUP2 };

        int numOfDatanodes = capacities.length;
        assertEquals(numOfDatanodes, racks.length);
        assertEquals(numOfDatanodes, nodeGroups.length);
        MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
        cluster = new MiniDFSClusterWithNodeGroup(0, conf, capacities.length, true, true, null, racks, capacities);
        try {
            cluster.waitActive();
            client = DFSClient.createNamenode(conf);

            long totalCapacity = 0L;
            for (long capacity : capacities) {
                totalCapacity += capacity;
            }

            // fill up the cluster to be 60% full
            long totalUsedSpace = totalCapacity * 6 / 10;

            createFile(totalUsedSpace / 3, (short) 3);

            // run balancer which can finish in 5 iterations with no block movement.
            runBalancerCanFinish(conf, totalUsedSpace, totalCapacity);

        } finally {
            cluster.shutdown();
        }
    }

    /**
     * Create a cluster with even distribution, and a new empty node is added to
     * the cluster, then test rack locality for balancer policy. 
     */
    @Test(timeout = 60000)
    public void testBalancerWithRackLocality() throws Exception {
        Configuration conf = createConf();
        long[] capacities = new long[] { CAPACITY, CAPACITY };
        String[] racks = new String[] { RACK0, RACK1 };
        String[] nodeGroups = new String[] { NODEGROUP0, NODEGROUP1 };

        int numOfDatanodes = capacities.length;
        assertEquals(numOfDatanodes, racks.length);
        MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);

        MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
        cluster = new MiniDFSClusterWithNodeGroup(0, conf, capacities.length, true, true, null, racks, capacities);
        try {
            cluster.waitActive();

            client = DFSClient.createNamenode(conf);

            long totalCapacity = 0L;
            for (long capacity : capacities) {
                totalCapacity += capacity;
            }

            // fill up the cluster to be 30% full
            long totalUsedSpace = totalCapacity * 3 / 10;
            createFile(totalUsedSpace / numOfDatanodes, (short) numOfDatanodes);

            long newCapacity = CAPACITY;
            String newRack = RACK1;
            String newNodeGroup = NODEGROUP2;

            // start up an empty node with the same capacity and on the same rack
            cluster.startDataNodes(conf, 1, true, null, new String[] { newRack }, new String[] { newNodeGroup },
                    new long[] { newCapacity });

            totalCapacity += newCapacity;

            // run balancer and validate results
            runBalancerCanFinish(conf, totalUsedSpace, totalCapacity);

            DatanodeInfo[] datanodeReport = client.getDatanodeReport(DatanodeReportType.ALL);

            Map<String, Integer> rackToUsedCapacity = new HashMap<String, Integer>();
            for (DatanodeInfo datanode : datanodeReport) {
                String rack = NetworkTopology.getFirstHalf(datanode.getNetworkLocation());
                int usedCapacity = (int) datanode.getDfsUsed();

                if (rackToUsedCapacity.get(rack) != null) {
                    rackToUsedCapacity.put(rack, usedCapacity + rackToUsedCapacity.get(rack));
                } else {
                    rackToUsedCapacity.put(rack, usedCapacity);
                }
            }
            assertEquals(rackToUsedCapacity.size(), 2);
            assertEquals(rackToUsedCapacity.get(RACK0), rackToUsedCapacity.get(RACK1));

        } finally {
            cluster.shutdown();
        }
    }

    /** Create a cluster with even distribution, and a new empty node is added to
     *  the cluster, then test rack locality for balancer policy. 
     **/
    @Test(timeout = 60000)
    public void testBalancerWithNodeGroup() throws Exception {
        Configuration conf = createConf();
        long[] capacities = new long[] { CAPACITY, CAPACITY };
        String[] racks = new String[] { RACK0, RACK1 };
        String[] nodeGroups = new String[] { NODEGROUP0, NODEGROUP1 };

        int numOfDatanodes = capacities.length;
        assertEquals(numOfDatanodes, racks.length);
        MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
        cluster = new MiniDFSClusterWithNodeGroup(0, conf, capacities.length, true, true, null, racks, capacities);
        try {
            cluster.waitActive();
            client = DFSClient.createNamenode(conf);

            long totalCapacity = 0L;
            for (long capacity : capacities) {
                totalCapacity += capacity;
            }

            // fill up the cluster to be 30% full
            long totalUsedSpace = totalCapacity * 3 / 10;

            createFile(totalUsedSpace / numOfDatanodes, (short) numOfDatanodes);

            long newCapacity = CAPACITY;
            String newRack = RACK1;
            String newNodeGroup = NODEGROUP2;
            // start up an empty node with the same capacity and on the same rack
            cluster.startDataNodes(conf, 1, true, null, new String[] { newRack }, new String[] { newNodeGroup },
                    new long[] { newCapacity });

            totalCapacity += newCapacity;

            // run balancer and validate results
            runBalancer(conf, totalUsedSpace, totalCapacity);

        } finally {
            cluster.shutdown();
        }
    }

}