org.apache.hadoop.hdfs.TestReplication.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.TestReplication.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs;

import junit.framework.TestCase;
import java.io.*;
import java.util.Iterator;
import java.util.Random;
import java.net.*;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.FSConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.BlockLocation;

/**
 * This class tests the replication of a DFS file.
 */
public class TestReplication extends TestCase {
    private static final long seed = 0xDEADBEEFL;
    private static final int blockSize = 8192;
    private static final int fileSize = 16384;
    private static final String racks[] = new String[] { "/d1/r1", "/d1/r1", "/d1/r2", "/d1/r2", "/d1/r2", "/d2/r3",
            "/d2/r3" };
    private static final int numDatanodes = racks.length;
    private static final Log LOG = LogFactory.getLog("org.apache.hadoop.hdfs.TestReplication");

    private void writeFile(FileSystem fileSys, Path name, int repl) throws IOException {
        // create and write a file that contains three blocks of data
        FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096),
                (short) repl, (long) blockSize);
        byte[] buffer = new byte[fileSize];
        Random rand = new Random(seed);
        rand.nextBytes(buffer);
        stm.write(buffer);
        stm.close();
    }

    /* check if there are at least two nodes are on the same rack */
    private void checkFile(FileSystem fileSys, Path name, int repl) throws IOException {
        Configuration conf = fileSys.getConf();
        ClientProtocol namenode = DFSClient.createNamenode(conf);

        waitForBlockReplication(name.toString(), namenode, Math.min(numDatanodes, repl), -1);

        LocatedBlocks locations = namenode.getBlockLocations(name.toString(), 0, Long.MAX_VALUE);
        FileStatus stat = fileSys.getFileStatus(name);
        BlockLocation[] blockLocations = fileSys.getFileBlockLocations(stat, 0L, Long.MAX_VALUE);
        // verify that rack locations match
        assertTrue(blockLocations.length == locations.locatedBlockCount());
        for (int i = 0; i < blockLocations.length; i++) {
            LocatedBlock blk = locations.get(i);
            DatanodeInfo[] datanodes = blk.getLocations();
            String[] topologyPaths = blockLocations[i].getTopologyPaths();
            assertTrue(topologyPaths.length == datanodes.length);
            for (int j = 0; j < topologyPaths.length; j++) {
                boolean found = false;
                for (int k = 0; k < racks.length; k++) {
                    if (topologyPaths[j].startsWith(racks[k])) {
                        found = true;
                        break;
                    }
                }
                assertTrue(found);
            }
        }

        boolean isOnSameRack = true, isNotOnSameRack = true;
        for (LocatedBlock blk : locations.getLocatedBlocks()) {
            DatanodeInfo[] datanodes = blk.getLocations();
            if (datanodes.length <= 1)
                break;
            if (datanodes.length == 2) {
                isNotOnSameRack = !(datanodes[0].getNetworkLocation().equals(datanodes[1].getNetworkLocation()));
                break;
            }
            isOnSameRack = false;
            isNotOnSameRack = false;
            for (int i = 0; i < datanodes.length - 1; i++) {
                LOG.info("datanode " + i + ": " + datanodes[i].getName());
                boolean onRack = false;
                for (int j = i + 1; j < datanodes.length; j++) {
                    if (datanodes[i].getNetworkLocation().equals(datanodes[j].getNetworkLocation())) {
                        onRack = true;
                    }
                }
                if (onRack) {
                    isOnSameRack = true;
                }
                if (!onRack) {
                    isNotOnSameRack = true;
                }
                if (isOnSameRack && isNotOnSameRack)
                    break;
            }
            if (!isOnSameRack || !isNotOnSameRack)
                break;
        }
        assertTrue(isOnSameRack);
        assertTrue(isNotOnSameRack);
    }

    private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
        assertTrue(fileSys.exists(name));
        fileSys.delete(name, true);
        assertTrue(!fileSys.exists(name));
    }

    /* 
     * Test if Datanode reports bad blocks during replication request
     */
    public void testBadBlockReportOnTransfer() throws Exception {
        Configuration conf = new Configuration();
        FileSystem fs = null;
        DFSClient dfsClient = null;
        LocatedBlocks blocks = null;
        int replicaCount = 0;
        MiniDFSCluster cluster = new MiniDFSCluster(conf, 2, true, null);
        cluster.waitActive();
        fs = cluster.getFileSystem();
        dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);

        // Create file with replication factor of 1
        Path file1 = new Path("/tmp/testBadBlockReportOnTransfer/file1");
        DFSTestUtil.createFile(fs, file1, 1024, (short) 1, 0);
        DFSTestUtil.waitReplication(fs, file1, (short) 1);

        // Corrupt the block belonging to the created file
        String block = DFSTestUtil.getFirstBlock(fs, file1).getBlockName();
        cluster.corruptBlockOnDataNodes(block);

        // Increase replication factor, this should invoke transfer request
        // Receiving datanode fails on checksum and reports it to namenode
        fs.setReplication(file1, (short) 2);

        // Now get block details and check if the block is corrupt
        blocks = dfsClient.namenode.getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
        while (blocks.get(0).isCorrupt() != true) {
            try {
                LOG.info("Waiting until block is marked as corrupt...");
                Thread.sleep(1000);
            } catch (InterruptedException ie) {
            }
            blocks = dfsClient.namenode.getBlockLocations(file1.toString(), 0, Long.MAX_VALUE);
        }
        replicaCount = blocks.get(0).getLocations().length;
        assertTrue(replicaCount == 1);
        cluster.shutdown();
    }

    /**
     * Tests replication in DFS.
     */
    public void runReplication(boolean simulated) throws IOException {
        Configuration conf = new Configuration();
        conf.setBoolean("dfs.replication.considerLoad", false);
        if (simulated) {
            conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
        }
        MiniDFSCluster cluster = new MiniDFSCluster(conf, numDatanodes, true, racks);
        cluster.waitActive();

        InetSocketAddress addr = new InetSocketAddress("localhost", cluster.getNameNodePort());
        DFSClient client = new DFSClient(addr, conf);

        DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
        assertEquals("Number of Datanodes ", numDatanodes, info.length);
        FileSystem fileSys = cluster.getFileSystem();
        try {
            Path file1 = new Path("/smallblocktest.dat");
            writeFile(fileSys, file1, 3);
            checkFile(fileSys, file1, 3);
            cleanupFile(fileSys, file1);
            writeFile(fileSys, file1, 10);
            checkFile(fileSys, file1, 10);
            cleanupFile(fileSys, file1);
            writeFile(fileSys, file1, 4);
            checkFile(fileSys, file1, 4);
            cleanupFile(fileSys, file1);
            writeFile(fileSys, file1, 1);
            checkFile(fileSys, file1, 1);
            cleanupFile(fileSys, file1);
            writeFile(fileSys, file1, 2);
            checkFile(fileSys, file1, 2);
            cleanupFile(fileSys, file1);
        } finally {
            fileSys.close();
            cluster.shutdown();
        }
    }

    public void testReplicationSimulatedStorag() throws IOException {
        runReplication(true);
    }

    public void testReplication() throws IOException {
        runReplication(false);
    }

    // Waits for all of the blocks to have expected replication
    private void waitForBlockReplication(String filename, ClientProtocol namenode, int expected, long maxWaitSec)
            throws IOException {
        long start = System.currentTimeMillis();

        //wait for all the blocks to be replicated;
        LOG.info("Checking for block replication for " + filename);
        int iters = 0;
        while (true) {
            boolean replOk = true;
            LocatedBlocks blocks = namenode.getBlockLocations(filename, 0, Long.MAX_VALUE);

            for (Iterator<LocatedBlock> iter = blocks.getLocatedBlocks().iterator(); iter.hasNext();) {
                LocatedBlock block = iter.next();
                int actual = block.getLocations().length;
                if (actual < expected) {
                    if (true || iters > 0) {
                        LOG.info("Not enough replicas for " + block.getBlock() + " yet. Expecting " + expected
                                + ", got " + actual + ".");
                    }
                    replOk = false;
                    break;
                }
            }

            if (replOk) {
                return;
            }

            iters++;

            if (maxWaitSec > 0 && (System.currentTimeMillis() - start) > (maxWaitSec * 1000)) {
                throw new IOException(
                        "Timedout while waiting for all blocks to " + " be replicated for " + filename);
            }

            try {
                Thread.sleep(500);
            } catch (InterruptedException ignored) {
            }
        }
    }

    /* This test makes sure that NameNode retries all the available blocks 
     * for under replicated blocks. 
     * 
     * It creates a file with one block and replication of 4. It corrupts 
     * two of the blocks and removes one of the replicas. Expected behaviour is
     * that missing replica will be copied from one valid source.
     */
    public void testPendingReplicationRetry() throws IOException {

        MiniDFSCluster cluster = null;
        int numDataNodes = 4;
        String testFile = "/replication-test-file";
        Path testPath = new Path(testFile);

        byte buffer[] = new byte[1024];
        for (int i = 0; i < buffer.length; i++) {
            buffer[i] = '1';
        }

        try {
            Configuration conf = new Configuration();
            conf.set("dfs.replication", Integer.toString(numDataNodes));
            //first time format
            cluster = new MiniDFSCluster(0, conf, numDataNodes, true, true, null, null);
            cluster.waitActive();
            DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()),
                    conf);

            OutputStream out = cluster.getFileSystem().create(testPath);
            out.write(buffer);
            out.close();

            waitForBlockReplication(testFile, dfsClient.namenode, numDataNodes, -1);

            // get first block of the file.
            String block = dfsClient.namenode.getBlockLocations(testFile, 0, Long.MAX_VALUE).get(0).getBlock()
                    .getBlockName();

            cluster.shutdown();
            cluster = null;

            //Now mess up some of the replicas.
            //Delete the first and corrupt the next two.
            File baseDir = new File(System.getProperty("test.build.data"), "dfs/data");
            for (int i = 0; i < 25; i++) {
                buffer[i] = '0';
            }

            int fileCount = 0;
            for (int i = 0; i < 6; i++) {
                File blockFile = new File(baseDir, "data" + (i + 1) + "/current/" + block);
                LOG.info("Checking for file " + blockFile);

                if (blockFile.exists()) {
                    if (fileCount == 0) {
                        LOG.info("Deleting file " + blockFile);
                        assertTrue(blockFile.delete());
                    } else {
                        // corrupt it.
                        LOG.info("Corrupting file " + blockFile);
                        long len = blockFile.length();
                        assertTrue(len > 50);
                        RandomAccessFile blockOut = new RandomAccessFile(blockFile, "rw");
                        try {
                            blockOut.seek(len / 3);
                            blockOut.write(buffer, 0, 25);
                        } finally {
                            blockOut.close();
                        }
                    }
                    fileCount++;
                }
            }
            assertEquals(3, fileCount);

            /* Start the MiniDFSCluster with more datanodes since once a writeBlock
             * to a datanode node fails, same block can not be written to it
             * immediately. In our case some replication attempts will fail.
             */

            LOG.info("Restarting minicluster after deleting a replica and corrupting 2 crcs");
            conf = new Configuration();
            conf.set("dfs.replication", Integer.toString(numDataNodes));
            conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
            conf.set("dfs.datanode.block.write.timeout.sec", Integer.toString(5));
            conf.set("dfs.safemode.threshold.pct", "0.75f"); // only 3 copies exist

            cluster = new MiniDFSCluster(0, conf, numDataNodes * 2, false, true, null, null);
            cluster.waitActive();

            dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);

            waitForBlockReplication(testFile, dfsClient.namenode, numDataNodes, -1);

        } finally {
            if (cluster != null) {
                cluster.shutdown();
            }
        }
    }

    /**
     * Test if replication can detect mismatched length on-disk blocks
     * @throws Exception
     */
    public void testReplicateLenMismatchedBlock() throws Exception {
        MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 2, true, null);
        try {
            cluster.waitActive();
            // test truncated block
            changeBlockLen(cluster, -1);
            // test extended block
            changeBlockLen(cluster, 1);
        } finally {
            cluster.shutdown();
        }
    }

    private void changeBlockLen(MiniDFSCluster cluster, int lenDelta) throws IOException, InterruptedException {
        final Path fileName = new Path("/file1");
        final short REPLICATION_FACTOR = (short) 1;
        final FileSystem fs = cluster.getFileSystem();
        final int fileLen = fs.getConf().getInt("io.bytes.per.checksum", 512);
        DFSTestUtil.createFile(fs, fileName, fileLen, REPLICATION_FACTOR, 0);
        DFSTestUtil.waitReplication(fs, fileName, REPLICATION_FACTOR);

        String block = DFSTestUtil.getFirstBlock(fs, fileName).getBlockName();

        // Change the length of a replica
        for (int i = 0; i < cluster.getDataNodes().size(); i++) {
            if (TestDatanodeBlockScanner.changeReplicaLength(block, i, lenDelta)) {
                break;
            }
        }

        // increase the file's replication factor
        fs.setReplication(fileName, (short) (REPLICATION_FACTOR + 1));

        // block replication triggers corrupt block detection
        DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()),
                fs.getConf());
        LocatedBlocks blocks = dfsClient.namenode.getBlockLocations(fileName.toString(), 0, fileLen);
        if (lenDelta < 0) { // replica truncated
            while (!blocks.get(0).isCorrupt() || REPLICATION_FACTOR != blocks.get(0).getLocations().length) {
                Thread.sleep(100);
                blocks = dfsClient.namenode.getBlockLocations(fileName.toString(), 0, fileLen);
            }
        } else { // no corruption detected; block replicated
            while (REPLICATION_FACTOR + 1 != blocks.get(0).getLocations().length) {
                Thread.sleep(100);
                blocks = dfsClient.namenode.getBlockLocations(fileName.toString(), 0, fileLen);
            }
        }
        fs.delete(fileName, true);
    }
}