org.apache.hadoop.hdfs.TestDatanodeDeath.java Source code

Introduction

Here is the source code for org.apache.hadoop.hdfs.TestDatanodeDeath.java
Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.log4j.Level;

/**
 * This class tests that a file need not be closed before its
 * data can be read by another client.
 */
public class TestDatanodeDeath extends TestCase {
    {
        ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) LeaseManager.LOG).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) DFSClient.LOG).getLogger().setLevel(Level.ALL);
        ((Log4JLogger) InterDatanodeProtocol.LOG).getLogger().setLevel(Level.ALL);
    }

    static final int blockSize = 8192;
    static final int numBlocks = 2;
    static final int fileSize = numBlocks * blockSize + 1;
    static final int numDatanodes = 15;
    static final short replication = 3;

    int numberOfFiles = 3;
    int numThreads = 5;
    Workload[] workload = null;

    //
    // an object that does a bunch of transactions
    //
    static class Workload extends Thread {
        private short replication;
        private int numberOfFiles;
        private int id;
        private FileSystem fs;
        private long stamp;
        private final long myseed;

        Workload(long myseed, FileSystem fs, int threadIndex, int numberOfFiles, short replication, long stamp) {
            this.myseed = myseed;
            id = threadIndex;
            this.fs = fs;
            this.numberOfFiles = numberOfFiles;
            this.replication = replication;
            this.stamp = stamp;
        }

        // create a bunch of files. Write to them and then verify.
        public void run() {
            System.out.println("Workload starting ");
            for (int i = 0; i < numberOfFiles; i++) {
                Path filename = new Path(id + "." + i);
                try {
                    System.out.println("Workload processing file " + filename);
                    FSDataOutputStream stm = createFile(fs, filename, replication);
                    DFSClient.DFSOutputStream dfstream = (DFSClient.DFSOutputStream) (stm.getWrappedStream());
                    dfstream.setArtificialSlowdown(1000);
                    writeFile(stm, myseed);
                    stm.close();
                    checkFile(fs, filename, replication, numBlocks, fileSize, myseed);
                } catch (Throwable e) {
                    System.out.println("Workload exception " + e);
                    assertTrue(e.toString(), false);
                }

                // increment the stamp to indicate that another file is done.
                synchronized (this) {
                    stamp++;
                }
            }
        }

        public synchronized void resetStamp() {
            this.stamp = 0;
        }

        public synchronized long getStamp() {
            return stamp;
        }
    }

    //
    // creates a file and returns a descriptor for writing to it.
    //
    static private FSDataOutputStream createFile(FileSystem fileSys, Path name, short repl) throws IOException {
        // create and write a file that contains three blocks of data
        FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096),
                repl, (long) blockSize);
        return stm;
    }

    //
    // writes to file
    //
    static private void writeFile(FSDataOutputStream stm, long seed) throws IOException {
        byte[] buffer = AppendTestUtil.randomBytes(seed, fileSize);

        int mid = fileSize / 2;
        stm.write(buffer, 0, mid);
        stm.write(buffer, mid, fileSize - mid);
    }

    //
    // verify that the data written are sane
    // 
    static private void checkFile(FileSystem fileSys, Path name, int repl, int numblocks, int filesize, long seed)
            throws IOException {
        boolean done = false;
        int attempt = 0;

        long len = fileSys.getFileStatus(name).getLen();
        assertTrue(name + " should be of size " + filesize + " but found to be of size " + len, len == filesize);

        // wait till all full blocks are confirmed by the datanodes.
        while (!done) {
            attempt++;
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }
            done = true;
            BlockLocation[] locations = fileSys.getFileBlockLocations(fileSys.getFileStatus(name), 0, filesize);

            if (locations.length < numblocks) {
                if (attempt > 100) {
                    System.out.println("File " + name + " has only " + locations.length + " blocks, "
                            + " but is expected to have " + numblocks + " blocks.");
                }
                done = false;
                continue;
            }
            for (int idx = 0; idx < locations.length; idx++) {
                if (locations[idx].getHosts().length < repl) {
                    if (attempt > 100) {
                        System.out.println("File " + name + " has " + locations.length + " blocks: " + " The " + idx
                                + " block has only " + locations[idx].getHosts().length
                                + " replicas but is expected to have " + repl + " replicas.");
                    }
                    done = false;
                    break;
                }
            }
        }
        FSDataInputStream stm = fileSys.open(name);
        final byte[] expected = AppendTestUtil.randomBytes(seed, fileSize);

        // do a sanity check. Read the file
        byte[] actual = new byte[filesize];
        stm.readFully(0, actual);
        checkData(actual, 0, expected, "Read 1");
    }

    private static void checkData(byte[] actual, int from, byte[] expected, String message) {
        for (int idx = 0; idx < actual.length; idx++) {
            assertEquals(message + " byte " + (from + idx) + " differs. expected " + expected[from + idx]
                    + " actual " + actual[idx], actual[idx], expected[from + idx]);
            actual[idx] = 0;
        }
    }

    /**
     * A class that kills one datanode and recreates a new one. It waits to
     * ensure that that all workers have finished at least one file since the 
     * last kill of a datanode. This guarantees that all three replicas of
     * a block do not get killed (otherwise the file will be corrupt and the
     * test will fail).
     */
    class Modify extends Thread {
        volatile boolean running;
        MiniDFSCluster cluster;
        Configuration conf;

        Modify(Configuration conf, MiniDFSCluster cluster) {
            running = true;
            this.cluster = cluster;
            this.conf = conf;
        }

        public void run() {

            while (running) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    continue;
                }

                // check if all threads have a new stamp. 
                // If so, then all workers have finished at least one file
                // since the last stamp.
                boolean loop = false;
                for (int i = 0; i < numThreads; i++) {
                    if (workload[i].getStamp() == 0) {
                        loop = true;
                        break;
                    }
                }
                if (loop) {
                    continue;
                }

                // Now it is guaranteed that there will be at least one valid
                // replica of a file.

                for (int i = 0; i < replication - 1; i++) {
                    // pick a random datanode to shutdown
                    int victim = AppendTestUtil.nextInt(numDatanodes);
                    try {
                        System.out.println("Stopping datanode " + victim);
                        cluster.restartDataNode(victim);
                        // cluster.startDataNodes(conf, 1, true, null, null);
                    } catch (IOException e) {
                        System.out.println("TestDatanodeDeath Modify exception " + e);
                        assertTrue("TestDatanodeDeath Modify exception " + e, false);
                        running = false;
                    }
                }

                // set a new stamp for all workers
                for (int i = 0; i < numThreads; i++) {
                    workload[i].resetStamp();
                }
            }
        }

        // Make the thread exit.
        void close() {
            running = false;
            this.interrupt();
        }
    }

    /**
     * Test that writing to files is good even when datanodes in the pipeline
     * dies.
     */
    private void complexTest() throws IOException {
        Configuration conf = new Configuration();
        conf.setInt("heartbeat.recheck.interval", 2000);
        conf.setInt("dfs.heartbeat.interval", 2);
        conf.setInt("dfs.replication.pending.timeout.sec", 2);
        conf.setInt("dfs.socket.timeout", 5000);
        MiniDFSCluster cluster = new MiniDFSCluster(conf, numDatanodes, true, null);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        Modify modThread = null;

        try {

            // Create threads and make them run workload concurrently.
            workload = new Workload[numThreads];
            for (int i = 0; i < numThreads; i++) {
                workload[i] = new Workload(AppendTestUtil.nextLong(), fs, i, numberOfFiles, replication, 0);
                workload[i].start();
            }

            // Create a thread that kills existing datanodes and creates new ones.
            modThread = new Modify(conf, cluster);
            modThread.start();

            // wait for all transactions to get over
            for (int i = 0; i < numThreads; i++) {
                try {
                    System.out.println("Waiting for thread " + i + " to complete...");
                    workload[i].join();

                    // if most of the threads are done, then stop restarting datanodes.
                    if (i >= numThreads / 2) {
                        modThread.close();
                    }

                } catch (InterruptedException e) {
                    i--; // retry
                }
            }
        } finally {
            if (modThread != null) {
                modThread.close();
                try {
                    modThread.join();
                } catch (InterruptedException e) {
                }
            }
            fs.close();
            cluster.shutdown();
        }
    }

    /**
     * Write to one file, then kill one datanode in the pipeline and then
     * close the file.
     */
    private void simpleTest(int datanodeToKill) throws IOException {
        Configuration conf = new Configuration();
        conf.setInt("heartbeat.recheck.interval", 2000);
        conf.setInt("dfs.heartbeat.interval", 1);
        conf.setInt("dfs.replication.pending.timeout.sec", 2);
        conf.setInt("dfs.socket.timeout", 5000);
        int myMaxNodes = 5;
        System.out.println("SimpleTest starting with DataNode to Kill " + datanodeToKill);
        MiniDFSCluster cluster = new MiniDFSCluster(conf, myMaxNodes, true, null);
        cluster.waitActive();
        FileSystem fs = cluster.getFileSystem();
        short repl = 3;

        Path filename = new Path("simpletest.dat");
        try {

            // create a file and write one block of data
            System.out.println("SimpleTest creating file " + filename);
            FSDataOutputStream stm = createFile(fs, filename, repl);
            DFSClient.DFSOutputStream dfstream = (DFSClient.DFSOutputStream) (stm.getWrappedStream());

            // these are test settings
            dfstream.setChunksPerPacket(5);
            dfstream.setArtificialSlowdown(3000);

            final long myseed = AppendTestUtil.nextLong();
            byte[] buffer = AppendTestUtil.randomBytes(myseed, fileSize);
            int mid = fileSize / 4;
            stm.write(buffer, 0, mid);

            DatanodeInfo[] targets = dfstream.getPipeline();
            int count = 5;
            while (count-- > 0 && targets == null) {
                try {
                    System.out.println("SimpleTest: Waiting for pipeline to be created.");
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                }
                targets = dfstream.getPipeline();
            }

            if (targets == null) {
                int victim = AppendTestUtil.nextInt(myMaxNodes);
                System.out.println("SimpleTest stopping datanode random " + victim);
                cluster.stopDataNode(victim);
            } else {
                int victim = datanodeToKill;
                System.out.println("SimpleTest stopping datanode " + targets[victim].getName());
                cluster.stopDataNode(targets[victim].getName());
            }
            System.out.println("SimpleTest stopping datanode complete");

            // write some more data to file, close and verify
            stm.write(buffer, mid, fileSize - mid);
            stm.close();

            checkFile(fs, filename, repl, numBlocks, fileSize, myseed);
        } catch (Throwable e) {
            System.out.println("Simple Workload exception " + e);
            e.printStackTrace();
            assertTrue(e.toString(), false);
        } finally {
            fs.close();
            cluster.shutdown();
        }
    }

    public void testSimple0() throws IOException {
        simpleTest(0);
    }

    public void testSimple1() throws IOException {
        simpleTest(1);
    }

    public void testSimple2() throws IOException {
        simpleTest(2);
    }

    public void testComplex() throws IOException {
        complexTest();
    }
}