org.apache.hadoop.hdfs.server.namenode.TestEditLogRace.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.TestEditLogRace.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.namenode;

import java.io.*;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicReference;

import junit.framework.TestCase;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.permission.*;

import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;

import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import static org.mockito.Mockito.*;

/**
 * This class tests various synchronization bugs in FSEditLog rolling
 * and namespace saving.
 */
public class TestEditLogRace extends TestCase {
    private static final Log LOG = LogFactory.getLog(TestEditLogRace.class);

    // This test creates NUM_THREADS threads and each thread continuously writes
    // transactions
    static final int NUM_THREADS = 16;

    /**
     * The number of times to roll the edit log during the test. Since this
     * tests for a race condition, higher numbers are more likely to find
     * a bug if it exists, but the test will take longer.
     */
    static final int NUM_ROLLS = 30;

    /**
     * The number of times to save the fsimage and create an empty edit log.
     */
    static final int NUM_SAVE_IMAGE = 30;

    private List<Transactions> workers = new ArrayList<Transactions>();

    private static final int NUM_DATA_NODES = 1;

    /**
     * Several of the test cases work by introducing a sleep
     * into an operation that is usually fast, and then verifying
     * that another operation blocks for at least this amount of time.
     * This value needs to be significantly longer than the average
     * time for an fsync() or enterSafeMode().
     */
    private static final int BLOCK_TIME = 10;

    //
    // an object that does a bunch of transactions
    //
    static class Transactions implements Runnable {
        final FSNamesystem namesystem;
        short replication = 3;
        long blockSize = 64;
        volatile boolean stopped = false;
        volatile Thread thr;
        final AtomicReference<Throwable> caught;

        Transactions(FSNamesystem ns, AtomicReference<Throwable> caught) {
            this.namesystem = ns;
            this.caught = caught;
        }

        // add a bunch of transactions.
        public void run() {
            thr = Thread.currentThread();
            PermissionStatus p = FSNamesystem.getFSNamesystem()
                    .createFsOwnerPermissions(new FsPermission((short) 0777));

            int i = 0;
            while (!stopped) {
                try {
                    String dirname = "/thr-" + thr.getId() + "-dir-" + i;
                    namesystem.mkdirs(dirname, p);
                    namesystem.delete(dirname, true);
                } catch (SafeModeException sme) {
                    // This is OK - the tests will bring NN in and out of safemode
                } catch (Throwable e) {
                    LOG.warn("Got error in transaction thread", e);
                    caught.compareAndSet(null, e);
                    break;
                }
            }
        }

        public void stop() {
            stopped = true;
        }

        public Thread getThread() {
            return thr;
        }
    }

    private void startTransactionWorkers(FSNamesystem namesystem, AtomicReference<Throwable> caughtErr) {
        // Create threads and make them run transactions concurrently.
        for (int i = 0; i < NUM_THREADS; i++) {
            Transactions trans = new Transactions(namesystem, caughtErr);
            new Thread(trans, "TransactionThread-" + i).start();
            workers.add(trans);
        }
    }

    private void stopTransactionWorkers() {
        // wait for all transactions to get over
        for (Transactions worker : workers) {
            worker.stop();
        }

        for (Transactions worker : workers) {
            Thread thr = worker.getThread();
            try {
                if (thr != null)
                    thr.join();
            } catch (InterruptedException ie) {
            }
        }
    }

    /**
     * Tests rolling edit logs while transactions are ongoing.
     */
    public void testEditLogRolling() throws Exception {
        // start a cluster 
        Configuration conf = getConf();
        MiniDFSCluster cluster = null;
        FileSystem fileSys = null;

        AtomicReference<Throwable> caughtErr = new AtomicReference<Throwable>();
        try {
            cluster = new MiniDFSCluster(conf, NUM_DATA_NODES, true, null);
            cluster.waitActive();
            fileSys = cluster.getFileSystem();
            final FSNamesystem namesystem = cluster.getNameNode().namesystem;

            FSImage fsimage = namesystem.getFSImage();
            FSEditLog editLog = fsimage.getEditLog();

            // set small size of flush buffer
            FSEditLog.setBufferCapacity(2048);
            editLog.close();
            editLog.open();

            startTransactionWorkers(namesystem, caughtErr);

            for (int i = 0; i < NUM_ROLLS && caughtErr.get() == null; i++) {
                try {
                    Thread.sleep(20);
                } catch (InterruptedException e) {
                }

                LOG.info("Starting roll " + i + ".");
                editLog.rollEditLog();
                LOG.info("Roll complete " + i + ".");

                verifyEditLogs(fsimage);

                LOG.info("Starting purge " + i + ".");
                editLog.purgeEditLog();
                LOG.info("Complete purge " + i + ".");
            }
        } finally {
            stopTransactionWorkers();
            if (caughtErr.get() != null) {
                throw new RuntimeException(caughtErr.get());
            }

            if (fileSys != null)
                fileSys.close();
            if (cluster != null)
                cluster.shutdown();
        }
    }

    private void verifyEditLogs(FSImage fsimage) throws IOException {
        // Verify that we can read in all the transactions that we have written.
        // If there were any corruptions, it is likely that the reading in
        // of these transactions will throw an exception.
        for (Iterator<StorageDirectory> it = fsimage.dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
            File editFile = FSImage.getImageFile(it.next(), NameNodeFile.EDITS);
            System.out.println("Verifying file: " + editFile);
            int numEdits = FSEditLog.loadFSEdits(new FSEditLog.EditLogFileInputStream(editFile));
            System.out.println("Number of edits: " + numEdits);
        }
    }

    /**
     * Tests saving fs image while transactions are ongoing.
     */
    public void testSaveNamespace() throws Exception {
        // start a cluster 
        Configuration conf = getConf();
        MiniDFSCluster cluster = null;
        FileSystem fileSys = null;

        AtomicReference<Throwable> caughtErr = new AtomicReference<Throwable>();
        try {
            cluster = new MiniDFSCluster(conf, NUM_DATA_NODES, true, null);
            cluster.waitActive();
            fileSys = cluster.getFileSystem();
            final FSNamesystem namesystem = FSNamesystem.getFSNamesystem();

            FSImage fsimage = namesystem.getFSImage();
            FSEditLog editLog = fsimage.getEditLog();

            // set small size of flush buffer
            FSEditLog.setBufferCapacity(2048);
            editLog.close();
            editLog.open();

            assertTrue(editLog.getEditStreams().size() > 0);
            startTransactionWorkers(namesystem, caughtErr);

            for (int i = 0; i < NUM_SAVE_IMAGE; i++) {
                try {
                    Thread.sleep(20);
                } catch (InterruptedException e) {
                }

                LOG.info("Save " + i + ": entering safe mode");
                namesystem.enterSafeMode();

                // Verify edit logs before the save
                verifyEditLogs(fsimage);

                LOG.info("Save " + i + ": saving namespace");
                namesystem.saveNamespace();
                LOG.info("Save " + i + ": leaving safemode");

                // Verify that edit logs post save are also not corrupt
                verifyEditLogs(fsimage);

                namesystem.leaveSafeMode(false);
                LOG.info("Save " + i + ": complete");

            }
        } finally {
            stopTransactionWorkers();
            if (caughtErr.get() != null) {
                throw new RuntimeException(caughtErr.get());
            }

            if (fileSys != null)
                fileSys.close();
            if (cluster != null)
                cluster.shutdown();
        }
    }

    private Configuration getConf() {
        Configuration conf = new Configuration();
        conf.set("dfs.name.dir", MiniDFSCluster.getBaseDir() + "/data");
        conf.setBoolean("dfs.permissions", false);
        return conf;
    }

    /**
     * The logSync() method in FSEditLog is unsynchronized whiel syncing
     * so that other threads can concurrently enqueue edits while the prior
     * sync is ongoing. This test checks that the log is saved correctly
     * if the saveImage occurs while the syncing thread is in the unsynchronized middle section.
     * 
     * This replicates the following manual test proposed by Konstantin:
     *   I start the name-node in debugger.
     *   I do -mkdir and stop the debugger in logSync() just before it does flush.
     *   Then I enter safe mode with another client
     *   I start saveNamepsace and stop the debugger in
     *     FSImage.saveFSImage() -> FSEditLog.createEditLogFile()
     *     -> EditLogFileOutputStream.create() ->
     *     after truncating the file but before writing LAYOUT_VERSION into it.
     *   Then I let logSync() run.
     *   Then I terminate the name-node.
     *   After that the name-node wont start, since the edits file is broken.
     */
    public void testSaveImageWhileSyncInProgress() throws Throwable {
        Configuration conf = getConf();
        NameNode.format(conf);
        NameNode fakeNN = mock(NameNode.class);
        NameNode.myMetrics = new NameNodeMetrics(conf, fakeNN);
        Mockito.doReturn(new InetSocketAddress("127.0.0.1", 12345)).when(fakeNN).getNameNodeAddress();

        final FSNamesystem namesystem = new FSNamesystem(fakeNN, conf);

        try {
            FSImage fsimage = namesystem.getFSImage();
            FSEditLog editLog = fsimage.getEditLog();

            ArrayList<EditLogOutputStream> streams = editLog.getEditStreams();
            EditLogOutputStream spyElos = spy(streams.get(0));
            streams.set(0, spyElos);

            final AtomicReference<Throwable> deferredException = new AtomicReference<Throwable>();
            final CountDownLatch waitToEnterFlush = new CountDownLatch(1);

            final Thread doAnEditThread = new Thread() {
                public void run() {
                    try {
                        LOG.info("Starting mkdirs");
                        namesystem.mkdirs("/test",
                                new PermissionStatus("test", "test", new FsPermission((short) 00755)));
                        LOG.info("mkdirs complete");
                    } catch (Throwable ioe) {
                        deferredException.set(ioe);
                        waitToEnterFlush.countDown();
                    }
                }
            };

            Answer<Void> blockingFlush = new Answer<Void>() {
                @Override
                public Void answer(InvocationOnMock invocation) throws Throwable {
                    LOG.info("Flush called");
                    if (Thread.currentThread() == doAnEditThread) {
                        LOG.info("edit thread: Telling main thread we made it to flush section...");
                        // Signal to main thread that the edit thread is in the racy section
                        waitToEnterFlush.countDown();
                        LOG.info("edit thread: sleeping for " + BLOCK_TIME + "secs");
                        Thread.sleep(BLOCK_TIME * 1000);
                        LOG.info("Going through to flush. This will allow the main thread to continue.");
                    }
                    invocation.callRealMethod();
                    LOG.info("Flush complete");
                    return null;
                }
            };
            doAnswer(blockingFlush).when(spyElos).flush();

            doAnEditThread.start();
            // Wait for the edit thread to get to the logsync unsynchronized section
            LOG.info("Main thread: waiting to enter flush...");
            waitToEnterFlush.await();
            if (deferredException.get() != null) {
                throw deferredException.get();
            }
            LOG.info("Main thread: detected that logSync is in unsynchronized section.");
            LOG.info("Trying to enter safe mode.");
            LOG.info("This should block for " + BLOCK_TIME + "sec, since flush will sleep that long");

            long st = System.currentTimeMillis();
            namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
            long et = System.currentTimeMillis();
            LOG.info("Entered safe mode");
            // Make sure we really waited for the flush to complete!
            assertTrue(et - st > (BLOCK_TIME - 1) * 1000);

            // Once we're in safe mode, save namespace.
            namesystem.saveNamespace();

            LOG.info("Joining on edit thread...");
            doAnEditThread.join();
            assertNull(deferredException.get());

            verifyEditLogs(fsimage);
        } finally {
            LOG.info("Closing namesystem");
            if (namesystem != null)
                namesystem.close();
        }
    }

    /**
     * Most of the FSNamesystem methods have a synchronized section where they
     * update the name system itself and write to the edit log, and then
     * unsynchronized, they call logSync. This test verifies that, if an
     * operation has written to the edit log but not yet synced it,
     * we wait for that sync before entering safe mode.
     */
    public void testSaveRightBeforeSync() throws Exception {
        Configuration conf = getConf();
        NameNode.format(conf);
        NameNode fakeNN = mock(NameNode.class);
        NameNode.myMetrics = new NameNodeMetrics(conf, fakeNN);
        Mockito.doReturn(new InetSocketAddress("127.0.0.1", 12345)).when(fakeNN).getNameNodeAddress();
        final FSNamesystem namesystem = new FSNamesystem(fakeNN, conf);

        try {
            FSImage fsimage = namesystem.getFSImage();
            FSEditLog editLog = spy(fsimage.getEditLog());
            fsimage.editLog = editLog;

            final AtomicReference<Throwable> deferredException = new AtomicReference<Throwable>();
            final CountDownLatch waitToEnterSync = new CountDownLatch(1);

            final Thread doAnEditThread = new Thread() {
                public void run() {
                    try {
                        LOG.info("Starting mkdirs");
                        namesystem.mkdirs("/test",
                                new PermissionStatus("test", "test", new FsPermission((short) 00755)));
                        LOG.info("mkdirs complete");
                    } catch (Throwable ioe) {
                        deferredException.set(ioe);
                        waitToEnterSync.countDown();
                    }
                }
            };

            Answer<Void> blockingSync = new Answer<Void>() {
                @Override
                public Void answer(InvocationOnMock invocation) throws Throwable {
                    LOG.info("logSync called");
                    if (Thread.currentThread() == doAnEditThread) {
                        LOG.info("edit thread: Telling main thread we made it just before logSync...");
                        waitToEnterSync.countDown();
                        LOG.info("edit thread: sleeping for " + BLOCK_TIME + "secs");
                        Thread.sleep(BLOCK_TIME * 1000);
                        LOG.info("Going through to logSync. This will allow the main thread to continue.");
                    }
                    invocation.callRealMethod();
                    LOG.info("logSync complete");
                    return null;
                }
            };
            doAnswer(blockingSync).when(editLog).logSync();

            doAnEditThread.start();
            LOG.info("Main thread: waiting to just before logSync...");
            waitToEnterSync.await();
            assertNull(deferredException.get());
            LOG.info("Main thread: detected that logSync about to be called.");
            LOG.info("Trying to enter safe mode.");
            LOG.info("This should block for " + BLOCK_TIME + "sec, since we have pending edits");

            long st = System.currentTimeMillis();
            namesystem.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
            long et = System.currentTimeMillis();
            LOG.info("Entered safe mode");
            // Make sure we really waited for the flush to complete!
            assertTrue(et - st > (BLOCK_TIME - 1) * 1000);

            // Once we're in safe mode, save namespace.
            namesystem.saveNamespace();

            LOG.info("Joining on edit thread...");
            doAnEditThread.join();
            assertNull(deferredException.get());

            verifyEditLogs(fsimage);
        } finally {
            LOG.info("Closing namesystem");
            if (namesystem != null)
                namesystem.close();
        }
    }
}