org.apache.hadoop.hdfs.server.namenode.TestNameNodeRecovery.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.namenode.TestNameNodeRecovery.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hdfs.server.namenode;

import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
import org.apache.hadoop.util.StringUtils;
import org.junit.Test;

/**
 * This tests data recovery mode for the NameNode.
 */
public class TestNameNodeRecovery {
    private static final Log LOG = LogFactory.getLog(TestNameNodeRecovery.class);
    private static StartupOption recoverStartOpt = StartupOption.RECOVER;

    static {
        recoverStartOpt.setForce(MetaRecoveryContext.FORCE_ALL);
    }

    static interface Corruptor {
        public void corrupt(File editFile) throws IOException;

        public boolean fatalCorruption();
    }

    static class TruncatingCorruptor implements Corruptor {
        @Override
        public void corrupt(File editFile) throws IOException {
            // Corrupt the last edit
            long fileLen = editFile.length();
            RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
            rwf.setLength(fileLen - 1);
            rwf.close();
        }

        @Override
        public boolean fatalCorruption() {
            return true;
        }
    }

    static final void pad(RandomAccessFile rwf, byte b, int amt) throws IOException {
        byte buf[] = new byte[1024];
        for (int i = 0; i < buf.length; i++) {
            buf[i] = 0;
        }
        while (amt > 0) {
            int len = (amt < buf.length) ? amt : buf.length;
            rwf.write(buf, 0, len);
            amt -= len;
        }
    }

    static class PaddingCorruptor implements Corruptor {
        @Override
        public void corrupt(File editFile) throws IOException {
            // Add junk to the end of the file
            RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
            rwf.seek(editFile.length());
            pad(rwf, (byte) 0, 2098176);
            rwf.write(0x44);
            rwf.close();
        }

        @Override
        public boolean fatalCorruption() {
            return true;
        }
    }

    static class SafePaddingCorruptor implements Corruptor {
        private byte padByte;

        public SafePaddingCorruptor(byte padByte) {
            this.padByte = padByte;
            assert ((this.padByte == 0) || (this.padByte == -1));
        }

        @Override
        public void corrupt(File editFile) throws IOException {
            // Add junk to the end of the file
            RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
            rwf.seek(editFile.length());
            rwf.write((byte) -1);
            pad(rwf, padByte, 2098176);
            rwf.close();
        }

        @Override
        public boolean fatalCorruption() {
            return false;
        }
    }

    static void testNameNodeRecoveryImpl(Corruptor corruptor) throws IOException {
        final String TEST_PATH = "/test/path/dir";
        final String TEST_PATH2 = "/alt/test/path";

        // Start up the mini dfs cluster
        Configuration conf = new Configuration();
        conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_TOLERATION_LENGTH_KEY, -1);
        MiniDFSCluster cluster;
        cluster = new MiniDFSCluster(0, conf, 0, true, true, false, StartupOption.FORMAT, null, null, null);
        cluster.waitActive();
        FileSystem fileSys = cluster.getFileSystem();
        fileSys.mkdirs(new Path(TEST_PATH));
        fileSys.mkdirs(new Path(TEST_PATH2));

        List<File> nameEditsDirs = (List<File>) FSNamesystem.getNamespaceEditsDirs(conf);
        cluster.shutdown();

        File dir = nameEditsDirs.get(0); //has only one
        File editFile = new File(new File(dir, "current"), NameNodeFile.EDITS.getName());
        assertTrue("Should exist: " + editFile, editFile.exists());

        corruptor.corrupt(editFile);

        // Check how our corruption affected NameNode startup.
        try {
            LOG.debug("trying to start normally (this should fail)...");
            cluster = new MiniDFSCluster(0, conf, 0, false, true, false, StartupOption.REGULAR, null, null, null);
            cluster.waitActive();
            if (corruptor.fatalCorruption()) {
                fail("expected the truncated edit log to prevent normal startup");
            }
        } catch (IOException e) {
            if (!corruptor.fatalCorruption()) {
                fail("expected to be able to start up normally, but couldn't.");
            }
        } finally {
            cluster.shutdown();
        }

        // Perform recovery
        try {
            LOG.debug("running recovery...");
            cluster = new MiniDFSCluster(0, conf, 0, false, true, false, StartupOption.RECOVER, null, null, null);
            cluster.waitActive();
        } catch (IOException e) {
            fail("caught IOException while trying to recover. " + "message was " + e.getMessage()
                    + "\nstack trace\n" + StringUtils.stringifyException(e));
        } finally {
            cluster.shutdown();
        }

        // Make sure that we can start the cluster normally after recovery
        try {
            cluster = new MiniDFSCluster(0, conf, 0, false, true, false, StartupOption.REGULAR, null, null, null);
            cluster.waitActive();
            assertTrue(cluster.getFileSystem().exists(new Path(TEST_PATH)));
        } catch (IOException e) {
            fail("failed to recover.  Error message: " + e.getMessage());
        } finally {
            cluster.shutdown();
        }
    }

    /** Test that we can successfully recover from a situation where the last
     * entry in the edit log has been truncated. */
    @Test(timeout = 180000)
    public void testRecoverTruncatedEditLog() throws IOException {
        testNameNodeRecoveryImpl(new TruncatingCorruptor());
        LOG.debug("testRecoverTruncatedEditLog: successfully recovered the " + "truncated edit log");
    }

    /** Test that we can successfully recover from a situation where garbage
     * bytes have been added to the end of the file. */
    @Test(timeout = 180000)
    public void testRecoverPaddedEditLog() throws IOException {
        testNameNodeRecoveryImpl(new PaddingCorruptor());
        LOG.debug("testRecoverPaddedEditLog: successfully recovered the " + "padded edit log");
    }

    /** Test that we can successfully recover from a situation where 0
     * bytes have been added to the end of the file. */
    @Test(timeout = 180000)
    public void testRecoverZeroPaddedEditLog() throws IOException {
        testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte) 0));
    }

    /** Test that we can successfully recover from a situation where -1
     * bytes have been added to the end of the file. */
    @Test(timeout = 180000)
    public void testRecoverNegativeOnePaddedEditLog() throws IOException {
        testNameNodeRecoveryImpl(new SafePaddingCorruptor((byte) -1));
    }
}