org.apache.hadoop.raid.TestRaidNode.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.raid.TestRaidNode.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.raid;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.zip.CRC32;

import junit.framework.TestCase;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.TestRaidDfs;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.raid.protocol.PolicyInfo;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.hdfs.protocol.DataTransferProtocol;
import org.apache.hadoop.raid.DistRaid.Counter;
import org.apache.hadoop.raid.LogUtils.LOGRESULTS;
import org.apache.hadoop.raid.LogUtils.LOGTYPES;
import org.apache.hadoop.raid.PlacementMonitor.BlockInfo;
import org.apache.hadoop.raid.Utils.Builder;

/**
  * Test the generation of parity blocks for files with different block
  * sizes. Also test that a data block can be regenerated from a raid stripe
  * using the parity block
  */
public class TestRaidNode extends TestCase {
    final static String TEST_DIR = new File(System.getProperty("test.build.data", "build/contrib/raid/test/data"))
            .getAbsolutePath();
    final static String CONFIG_FILE = new File(TEST_DIR, "test-raid.xml").getAbsolutePath();
    final static long RELOAD_INTERVAL = 1000;
    final static Log LOG = LogFactory.getLog("org.apache.hadoop.raid.TestRaidNode");
    final static Random rand = new Random();

    Configuration conf;
    String namenode = null;
    String hftp = null;
    MiniDFSCluster dfs = null;
    MiniMRCluster mr = null;
    FileSystem fileSys = null;
    String jobTrackerName = null;

    static public void loadTestCodecs(Configuration conf, int xorStripeLength, int rsStripeLength,
            int xorParityLength, int rsParityLength) throws IOException {
        Utils.loadTestCodecs(conf, new Builder[] {
                // priority 100
                Utils.getXORBuilder().setStripeLength(xorStripeLength).setParityLength(xorParityLength),
                Utils.getXORBuilder().dirRaid(true).setParityDir("/dir-raid").setStripeLength(xorStripeLength)
                        .setParityLength(xorParityLength).setCodeId("dir-xor").setPriority(400),
                // priority 300
                Utils.getRSBuilder().setStripeLength(rsStripeLength).setParityLength(rsParityLength),
                Utils.getRSBuilder().dirRaid(true).setParityDir("/dir-raidrs").setStripeLength(rsStripeLength)
                        .setParityLength(rsParityLength).setCodeId("dir-rs").setPriority(600) });
    }

    /**
     * create mapreduce and dfs clusters
     */
    private void createClusters(boolean local, boolean rackAware) throws Exception {
        if (System.getProperty("hadoop.log.dir") == null) {
            String base = new File(".").getAbsolutePath();
            System.setProperty("hadoop.log.dir", new Path(base).toString() + "/logs");
        }

        new File(TEST_DIR).mkdirs(); // Make sure data directory exists
        conf = new Configuration();
        conf.set("raid.config.file", CONFIG_FILE);
        conf.setBoolean("raid.config.reload", true);
        conf.setLong("raid.config.reload.interval", RELOAD_INTERVAL);
        conf.setLong(JobMonitor.JOBMONITOR_INTERVAL_KEY, 20000L);
        conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3000L);
        conf.setLong(RaidNode.MINIMUM_RAIDABLE_FILESIZE_KEY, 1L);
        conf.setLong(RaidNode.RAID_MOD_TIME_PERIOD_KEY, 0);
        // Report block deletion quickly
        conf.setLong("dfs.blockreport.intervalMsec", 8000L);
        conf.setBoolean(StatisticsCollector.STATS_COLLECTOR_SUBMIT_JOBS_CONFIG, false);
        conf.set("mapred.raid.http.address", "localhost:0");
        //disable purge monitor
        conf.setLong(PurgeMonitor.PURGE_MONITOR_SLEEP_TIME_KEY, 3600000L);

        // scan every policy every 5 seconds
        conf.setLong("raid.policy.rescan.interval", 5 * 1000L);

        // the RaidNode does the raiding inline (instead of submitting to map/reduce)
        if (local) {
            conf.set("raid.classname", "org.apache.hadoop.raid.LocalRaidNode");
        } else {
            conf.set("raid.classname", "org.apache.hadoop.raid.DistRaidNode");
        }

        // use local block fixer
        conf.set("raid.blockfix.classname", "org.apache.hadoop.raid.LocalBlockIntegrityMonitor");
        conf.set("dfs.block.replicator.classname",
                "org.apache.hadoop.hdfs.server.namenode.BlockPlacementPolicyRaid");

        conf.set("raid.server.address", "localhost:" + MiniDFSCluster.getFreePort());
        loadTestCodecs(conf, 3, 10, 1, 5);

        // create a dfs and map-reduce cluster
        final int taskTrackers = 4;
        final int jobTrackerPort = 60050;

        if (rackAware) {
            // Because BlockPlacementPolicyRaid only allows one replica in each rack,
            // spread 6 nodes into 6 racks to make sure chooseTarget function could pick
            // more than one node. 
            String[] racks = { "/rack1", "/rack2", "/rack3", "/rack4", "/rack5", "/rack6" };
            dfs = new MiniDFSCluster(conf, 6, true, racks);
        } else {
            dfs = new MiniDFSCluster(conf, 3, true, null);
        }
        dfs.waitActive();
        fileSys = dfs.getFileSystem();
        TestDirectoryRaidDfs.setupStripeStore(conf, fileSys);
        namenode = fileSys.getUri().toString();
        mr = new MiniMRCluster(taskTrackers, namenode, 3);
        jobTrackerName = "localhost:" + mr.getJobTrackerPort();
        hftp = "hftp://localhost.localdomain:" + dfs.getNameNodePort();

        FileSystem.setDefaultUri(conf, namenode);
        conf.set("mapred.job.tracker", jobTrackerName);
    }

    /**
     * stop clusters created earlier
     */
    private void stopClusters() throws Exception {
        if (mr != null) {
            mr.shutdown();
        }
        if (dfs != null) {
            dfs.shutdown();
        }
    }

    public void testFileListDirRaidPolicy() throws Exception {
        LOG.info("Test testFileListDirRaidPolicy started.");
        short targetReplication = 2;
        short metaReplication = 2;
        short srcReplication = 3;

        createClusters(false, false);
        conf.setInt(ConfigManager.DIRRAID_BLOCK_LIMIT_KEY, 14);
        // avoid raidnode start raiding files
        conf.setLong(RaidNode.TRIGGER_MONITOR_SLEEP_TIME_KEY, 3600000);
        ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
        cb.addAbstractPolicy("abstractPolicy2", targetReplication, metaReplication, "dir-xor");
        cb.addFileListPolicy("policy3", "/user/rvadali/raiddirlist.txt", "abstractPolicy2");
        cb.persist();
        RaidNode cnode = null;

        Path dirListPath = new Path("/user/rvadali/raiddirlist.txt");
        try {
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/1/", "/dir-raid/user/rvadali/dir-raidtest/1", 2, 8,
                    srcReplication);
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/2/", "/dir-raid/user/rvadali/dir-raidtest/2", 2, 7,
                    targetReplication);
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/3/", "/dir-raid/user/rvadali/dir-raidtest/3", 2, 3,
                    targetReplication);
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/4/", "/dir-raid/user/rvadali/dir-raidtest/4", 2, 3,
                    targetReplication);
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/5/", "/dir-raid/user/rvadali/dir-raidtest/5", 2, 3,
                    targetReplication);
            LOG.info("Test testFileListDirRaidPolicy created test files");
            FSDataOutputStream dirOut = fileSys.create(dirListPath);
            FileStatus[] dirs = fileSys.listStatus(new Path("/user/rvadali/dir-raidtest"));
            for (FileStatus dir : dirs) {
                dirOut.write(dir.getPath().toString().getBytes());
                dirOut.write("\n".getBytes());
            }
            dirOut.close();

            cnode = RaidNode.createRaidNode(conf);
            PolicyInfo[] infos = cnode.getAllPolicies();
            assertEquals("we should have only one policy", 1, infos.length);
            PolicyInfo info = infos[0];
            cnode.triggerMonitor.putPolicyInfo(info);
            List<FileStatus> list1 = cnode.triggerMonitor.readFileList(info);
            assertEquals("Only one directory is selected", 1, list1.size());
            assertEquals("/user/rvadali/dir-raidtest/1", list1.get(0).getPath().toUri().getPath());
            List<FileStatus> list2 = cnode.triggerMonitor.readFileList(info);
            assertEquals("Only one directory is selected", 1, list2.size());
            assertEquals("/user/rvadali/dir-raidtest/2", list2.get(0).getPath().toUri().getPath());
            List<FileStatus> list3 = cnode.triggerMonitor.readFileList(info);
            assertEquals("Only 3 directories are selected", 3, list3.size());
            assertEquals("/user/rvadali/dir-raidtest/3", list3.get(0).getPath().toUri().getPath());
            assertEquals("/user/rvadali/dir-raidtest/4", list3.get(1).getPath().toUri().getPath());
            assertEquals("/user/rvadali/dir-raidtest/5", list3.get(2).getPath().toUri().getPath());
            List<FileStatus> list4 = cnode.triggerMonitor.readFileList(info);
            assertEquals("None is selected", 0, list4.size());
            LOG.info("Test testFileListDirRaidPolicy successful.");
        } catch (Exception e) {
            LOG.info("testFileListDirRaidPolicy Exception ", e);
            throw e;
        } finally {
            if (cnode != null) {
                cnode.stop();
                cnode.join();
            }
            stopClusters();
        }
        LOG.info("Test testFileListDirRaidPolicy completed.");
    }

    /**
     * Test to run a filter
     */
    public void testPathFilter() throws Exception {
        LOG.info("Test testPathFilter started.");

        long blockSizes[] = { 1024L };
        int stripeLengths[] = { 5, 6, 10, 11, 12 };
        int targetReplication = 1;
        int metaReplication = 1;
        int numBlock = 11;
        int iter = 0;

        createClusters(true, false);
        try {
            for (long blockSize : blockSizes) {
                for (int stripeLength : stripeLengths) {
                    this.loadTestCodecs(conf, stripeLength, stripeLength, 1, 3);
                    doTestPathFilter(iter, targetReplication, metaReplication, stripeLength, blockSize, numBlock);
                    iter++;
                }
            }
            doCheckPolicy();
        } finally {
            stopClusters();
        }
        LOG.info("Test testPathFilter completed.");
    }

    private void simulateErrors(RaidShell shell, Path file1, long crc, long blockSize, long numBlock,
            long stripeLength) throws IOException {
        if (numBlock >= 1) {
            LOG.info("doTestPathFilter Check error at beginning of file.");
            simulateError(shell, fileSys, file1, crc, 0);
        }

        // check for error at the beginning of second block
        if (numBlock >= 2) {
            LOG.info("doTestPathFilter Check error at beginning of second block.");
            simulateError(shell, fileSys, file1, crc, blockSize + 1);
        }

        // check for error at the middle of third block
        if (numBlock >= 3) {
            LOG.info("doTestPathFilter Check error at middle of third block.");
            simulateError(shell, fileSys, file1, crc, 2 * blockSize + 10);
        }

        // check for error at the middle of second stripe
        if (numBlock >= stripeLength + 1) {
            LOG.info("doTestPathFilter Check error at middle of second stripe.");
            simulateError(shell, fileSys, file1, crc, stripeLength * blockSize + 100);
        }
    }

    /**
     * Test to run a filter
     */
    private void doTestPathFilter(int iter, long targetReplication, long metaReplication, long stripeLength,
            long blockSize, int numBlock) throws Exception {
        LOG.info("doTestPathFilter started---------------------------:" + " iter " + iter + " blockSize="
                + blockSize + " stripeLength=" + stripeLength);
        ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
        cb.addPolicy("policy1", "/user/dhruba/raidtest", targetReplication, metaReplication);
        cb.addPolicy("policy2", "/user/dhruba/dir-raidtest", targetReplication, metaReplication, "dir-xor");
        cb.persist();

        RaidShell shell = null;
        Path dir = new Path("/user/dhruba/raidtest/");
        Path file1 = new Path(dir + "/file" + iter);
        Path dir1 = new Path("/user/dhruba/dir-raidtest/1");
        Path file2 = new Path(dir1 + "/file2");
        Path file3 = new Path(dir1 + "/file3");
        RaidNode cnode = null;
        try {
            Path destPath = new Path("/raid/user/dhruba/raidtest");
            Path destPath1 = new Path("/dir-raid/user/dhruba/dir-raidtest");
            fileSys.delete(dir, true);
            fileSys.delete(destPath, true);
            fileSys.delete(dir1, true);
            fileSys.delete(destPath1, true);
            long crc1 = createOldFile(fileSys, file1, 1, numBlock, blockSize);
            long crc2 = createOldFile(fileSys, file2, 1, numBlock, blockSize);
            long crc3 = createOldFile(fileSys, file3, 1, numBlock, blockSize);
            LOG.info("doTestPathFilter created test files for iteration " + iter);

            // create an instance of the RaidNode
            Configuration localConf = new Configuration(conf);
            cnode = RaidNode.createRaidNode(null, localConf);
            TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath);
            TestRaidDfs.waitForDirRaided(LOG, fileSys, dir1, destPath1);

            LOG.info("doTestPathFilter all files found in Raid.");
            // check for error at beginning of file
            shell = new RaidShell(conf);
            shell.initializeRpc(conf, cnode.getListenerAddress());
            this.simulateErrors(shell, file1, crc1, blockSize, numBlock, stripeLength);
            this.simulateErrors(shell, file2, crc2, blockSize, numBlock, stripeLength);
            this.simulateErrors(shell, file3, crc3, blockSize, numBlock, stripeLength);
        } catch (Exception e) {
            LOG.info("doTestPathFilter Exception ", e);
            throw e;
        } finally {
            if (shell != null)
                shell.close();
            if (cnode != null) {
                cnode.stop();
                cnode.join();
            }
            LOG.info("doTestPathFilter clean up");
            fileSys.delete(dir, true);
            fileSys.delete(new Path("/raid"), true);
            fileSys.delete(dir1, true);
            fileSys.delete(new Path("/dir-raid"), true);
        }
        LOG.info("doTestPathFilter completed:" + " blockSize=" + blockSize + " stripeLength=" + stripeLength);
    }

    // Check that raid occurs only on files that have a replication factor
    // greater than or equal to the specified value
    private void doCheckPolicy() throws Exception {
        LOG.info("doCheckPolicy started---------------------------:");
        short srcReplication = 3;
        short targetReplication = 2;
        long metaReplication = 1;
        long stripeLength = 2;
        long blockSize = 1024;
        int numBlock = 3;
        ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
        cb.addPolicy("policy1", "/user/dhruba/policytest", targetReplication, metaReplication);
        cb.addPolicy("policy2", "/user/dhruba/dir-policytest", targetReplication, metaReplication, "dir-xor");
        cb.persist();
        Path dir = new Path("/user/dhruba/policytest/");
        Path dir1 = new Path("/user/dhruba/dir-policytest/1");
        Path file1 = new Path(dir + "/file1");
        Path file2 = new Path(dir1 + "/file2");
        Path file3 = new Path(dir1 + "/file3");
        Path file4 = new Path(dir1 + "/file4");
        RaidNode cnode = null;
        try {
            Path destPath = new Path("/raid/user/dhruba/policytest");
            Path destPath1 = new Path("/dir-raid/user/dhruba/dir-policytest");
            fileSys.delete(dir, true);
            fileSys.delete(destPath, true);
            fileSys.delete(dir1, true);
            fileSys.delete(destPath1, true);

            // create an instance of the RaidNode
            Configuration localConf = new Configuration(conf);
            cnode = RaidNode.createRaidNode(null, localConf);

            // this file should be picked up RaidNode
            createOldFile(fileSys, file1, 3, numBlock, blockSize);
            createOldFile(fileSys, file2, 3, numBlock, blockSize);
            createOldFile(fileSys, file3, 3, numBlock, blockSize);
            TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath, targetReplication);
            TestRaidDfs.waitForDirRaided(LOG, fileSys, dir1, destPath1, targetReplication);

            LOG.info("doCheckPolicy all files found in Raid the first time.");

            LOG.info("doCheckPolicy: recreating source file");
            long firstmodetime1 = fileSys.getFileStatus(file1).getModificationTime();
            createOldFile(fileSys, file1, 3, numBlock, blockSize);
            assertTrue(fileSys.getFileStatus(file1).getModificationTime() > firstmodetime1);

            LOG.info("Change the modification time of directory");
            long firstmodetime2 = fileSys.getFileStatus(dir1).getModificationTime();
            createOldFile(fileSys, file4, 3, numBlock, blockSize);
            assertTrue(fileSys.getFileStatus(dir1).getModificationTime() > firstmodetime2);
            TestRaidDfs.waitForFileRaided(LOG, fileSys, file1, destPath, targetReplication);
            TestRaidDfs.waitForDirRaided(LOG, fileSys, dir1, destPath1, targetReplication);

            LOG.info("doCheckPolicy: file got re-raided as expected.");

        } catch (Exception e) {
            LOG.info("doCheckPolicy Exception ", e);
            throw e;
        } finally {
            if (cnode != null) {
                cnode.stop();
                cnode.join();
            }
            LOG.info("doTestPathFilter clean up");
            fileSys.delete(dir, true);
            fileSys.delete(new Path("/raid"), true);
            fileSys.delete(dir1, true);
            fileSys.delete(new Path("/dir-raid"), true);
        }
        LOG.info("doCheckPolicy completed:");
    }

    static public void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile, int nblock)
            throws IOException {
        createTestFiles(fileSys, path, destpath, nfile, nblock, (short) 1);
    }

    static void createTestFiles(FileSystem fileSys, String path, String destpath, int nfile, int nblock, short repl)
            throws IOException {
        long blockSize = 1024L;
        Path dir = new Path(path);
        Path destPath = new Path(destpath);
        fileSys.delete(dir, true);
        fileSys.delete(destPath, true);

        for (int i = 0; i < nfile; i++) {
            Path file = new Path(dir, "file" + i);
            createOldFile(fileSys, file, repl, nblock, blockSize);
        }
    }

    private void checkTestFiles(String srcDir, String parityDir, int stripeLength, short targetReplication,
            short metaReplication, PlacementMonitor pm, Codec codec, int nfiles)
            throws IOException, InterruptedException {
        for (int i = 0; i < nfiles; i++) {
            Path srcPath = new Path(srcDir, "file" + i);
            Path parityPath = null;
            if (codec.isDirRaid) {
                parityPath = new Path(parityDir);
                TestRaidDfs.waitForDirRaided(LOG, fileSys, srcPath.getParent(), parityPath.getParent(),
                        targetReplication);
            } else {
                parityPath = new Path(parityDir, "file" + i);
                TestRaidDfs.waitForFileRaided(LOG, fileSys, srcPath, parityPath.getParent(), targetReplication);
            }
            TestRaidDfs.waitForReplicasReduction(fileSys, parityPath, targetReplication);
            FileStatus srcFile = fileSys.getFileStatus(srcPath);
            FileStatus parityStat = fileSys.getFileStatus(parityPath);
            assertEquals(srcFile.getReplication(), targetReplication);
            assertEquals(parityStat.getReplication(), metaReplication);
            List<BlockInfo> parityBlocks = pm.getBlockInfos(fileSys, parityStat);

            int parityLength = codec.parityLength;
            if (parityLength == 1) {
                continue;
            }
            if (codec.isDirRaid && i > 0) {
                // One directory has one parity, just need to check once
                continue;
            }
            long numBlocks;
            if (codec.isDirRaid) {
                List<FileStatus> lfs = RaidNode.listDirectoryRaidFileStatus(conf, fileSys, new Path(srcDir));
                numBlocks = DirectoryStripeReader.getBlockNum(lfs);
            } else {
                numBlocks = RaidNode.numBlocks(srcFile);
            }
            int numStripes = (int) RaidNode.numStripes(numBlocks, stripeLength);

            Map<String, Integer> nodeToNumBlocks = new HashMap<String, Integer>();
            Set<String> nodesInThisStripe = new HashSet<String>();
            for (int stripeIndex = 0; stripeIndex < numStripes; ++stripeIndex) {
                List<BlockInfo> stripeBlocks = new ArrayList<BlockInfo>();
                // Adding parity blocks
                int stripeStart = parityLength * stripeIndex;
                int stripeEnd = Math.min(stripeStart + parityLength, parityBlocks.size());
                if (stripeStart < stripeEnd) {
                    stripeBlocks.addAll(parityBlocks.subList(stripeStart, stripeEnd));
                }
                PlacementMonitor.countBlocksOnEachNode(stripeBlocks, nodeToNumBlocks, nodesInThisStripe);
                LOG.info("file: " + parityPath + " stripe: " + stripeIndex);
                int max = 0;
                for (String node : nodeToNumBlocks.keySet()) {
                    int count = nodeToNumBlocks.get(node);
                    LOG.info("node:" + node + " count:" + count);
                    if (max < count) {
                        max = count;
                    }
                }
                assertTrue("pairty blocks in a stripe cannot live in the same node", max < parityLength);
            }
        }
    }

    /**
     * Test dist Raid
     */
    public void testDistRaid() throws Exception {
        LOG.info("Test testDistRaid started.");
        short targetReplication = 2;
        short metaReplication = 2;
        short rstargetReplication = 1;
        short rsmetaReplication = 1;
        short xorstripeLength = 3;
        int rsstripeLength = 10;

        createClusters(false, true);
        ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
        cb.addPolicy("policy1", "/user/dhruba/raidtest", targetReplication, metaReplication);
        cb.addAbstractPolicy("abstractPolicy", targetReplication, metaReplication, "xor");
        cb.addPolicy("policy2", "/user/dhruba/raidtest2", "abstractPolicy");
        cb.addPolicy("policy3", "/user/dhruba/raidtest3", rstargetReplication, rsmetaReplication, "rs");
        cb.addPolicy("policy4", "/user/dhruba/dir-raidtest/", targetReplication, metaReplication, "dir-xor");
        cb.addPolicy("policy5", "/user/dhruba/dir-raidtestrs/", rstargetReplication, rsmetaReplication, "dir-rs");
        cb.persist();

        RaidNode cnode = null;
        try {
            createTestFiles(fileSys, "/user/dhruba/raidtest/", "/raid/user/dhruba/raidtest", 5, 7);
            createTestFiles(fileSys, "/user/dhruba/raidtest2/", "/raid/user/dhruba/raidtest2", 5, 7);
            createTestFiles(fileSys, "/user/dhruba/raidtest3/", "/raidrs/user/dhruba/raidtest3", 1, 10);
            createTestFiles(fileSys, "/user/dhruba/dir-raidtest/1/", "/dir-raid/user/dhruba/dir-raidtest/1", 5, 7);
            createTestFiles(fileSys, "/user/dhruba/dir-raidtestrs/2/", "/dir-raidrs/user/dhruba/dir-raidtestrs/2",
                    2, 8);
            LOG.info("Test testDistRaid created test files");

            Configuration localConf = new Configuration(conf);
            //Avoid block mover to move blocks
            localConf.setInt(PlacementMonitor.BLOCK_MOVE_QUEUE_LENGTH_KEY, 0);
            localConf.setInt(PlacementMonitor.NUM_MOVING_THREADS_KEY, 1);
            // don't allow rescan, make sure only one job is submitted.
            localConf.setLong("raid.policy.rescan.interval", 3600 * 1000L);
            cnode = RaidNode.createRaidNode(null, localConf);
            // Verify the policies are parsed correctly
            for (PolicyInfo p : cnode.getAllPolicies()) {
                if (p.getName().equals("policy1")) {
                    Path srcPath = new Path("/user/dhruba/raidtest");
                    assertTrue(p.getSrcPath().equals(srcPath.makeQualified(srcPath.getFileSystem(conf))));
                } else if (p.getName().equals("policy2")) {
                    Path srcPath = new Path("/user/dhruba/raidtest2");
                    assertTrue(p.getSrcPath().equals(srcPath.makeQualified(srcPath.getFileSystem(conf))));
                } else if (p.getName().equals("policy3")) {
                    Path srcPath = new Path("/user/dhruba/raidtest3");
                    assertTrue(p.getSrcPath().equals(srcPath.makeQualified(srcPath.getFileSystem(conf))));
                } else if (p.getName().equals("policy4")) {
                    Path srcPath = new Path("/user/dhruba/dir-raidtest/");
                    assertTrue(p.getSrcPath().equals(srcPath.makeQualified(srcPath.getFileSystem(conf))));
                } else {
                    assertEquals(p.getName(), "policy5");
                    Path srcPath = new Path("/user/dhruba/dir-raidtestrs/");
                    assertTrue(p.getSrcPath().equals(srcPath.makeQualified(srcPath.getFileSystem(conf))));
                }
                if (p.getName().equals("policy3") || p.getName().equals("policy5")) {
                    if (p.getName().equals("policy3")) {
                        assertTrue(p.getCodecId().equals("rs"));
                    } else {
                        assertTrue(p.getCodecId().equals("dir-rs"));
                    }
                    assertEquals(rstargetReplication, Integer.parseInt(p.getProperty("targetReplication")));
                    assertEquals(rsmetaReplication, Integer.parseInt(p.getProperty("metaReplication")));
                } else {
                    if (p.getName().equals("policy4")) {
                        assertTrue(p.getCodecId().equals("dir-xor"));
                    } else {
                        assertTrue(p.getCodecId().equals("xor"));
                    }
                    assertEquals(targetReplication, Integer.parseInt(p.getProperty("targetReplication")));
                    assertEquals(metaReplication, Integer.parseInt(p.getProperty("metaReplication")));
                }
            }

            long start = System.currentTimeMillis();
            final int MAX_WAITTIME = 120000;

            assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
            DistRaidNode dcnode = (DistRaidNode) cnode;
            checkTestFiles("/user/dhruba/raidtest/", "/raid/user/dhruba/raidtest", xorstripeLength,
                    targetReplication, metaReplication, dcnode.placementMonitor, Codec.getCodec("xor"), 5);
            checkTestFiles("/user/dhruba/raidtest2/", "/raid/user/dhruba/raidtest2", xorstripeLength,
                    targetReplication, metaReplication, dcnode.placementMonitor, Codec.getCodec("xor"), 5);
            checkTestFiles("/user/dhruba/raidtest3/", "/raidrs/user/dhruba/raidtest3", rsstripeLength,
                    rstargetReplication, rsmetaReplication, dcnode.placementMonitor, Codec.getCodec("rs"), 1);
            checkTestFiles("/user/dhruba/dir-raidtest/1/", "/dir-raid/user/dhruba/dir-raidtest/1", xorstripeLength,
                    targetReplication, metaReplication, dcnode.placementMonitor, Codec.getCodec("dir-xor"), 5);
            checkTestFiles("/user/dhruba/dir-raidtestrs/2/", "/dir-raidrs/user/dhruba/dir-raidtestrs/2",
                    rsstripeLength, rstargetReplication, rsmetaReplication, dcnode.placementMonitor,
                    Codec.getCodec("dir-rs"), 2);
            while (dcnode.jobMonitor.runningJobsCount() > 0 && System.currentTimeMillis() - start < MAX_WAITTIME) {
                LOG.info("Waiting for zero running jobs: " + dcnode.jobMonitor.runningJobsCount());
                Thread.sleep(1000);
            }
            TestBlockFixer.verifyMetrics(fileSys, cnode, LOGTYPES.ENCODING, LOGRESULTS.SUCCESS, 5 + 5 + 1 + 1 + 1,
                    true);

            LOG.info("Test testDistRaid successful.");
        } catch (Exception e) {
            LOG.info("testDistRaid Exception " + e + StringUtils.stringifyException(e));
            throw e;
        } finally {
            if (cnode != null) {
                cnode.stop();
                cnode.join();
            }
            stopClusters();
        }
        LOG.info("Test testDistRaid completed.");
    }

    //
    // simulate a corruption at specified offset and verify that eveyrthing is good
    //
    void simulateError(RaidShell shell, FileSystem fileSys, Path file1, long crc, long corruptOffset)
            throws IOException {
        // recover the file assuming that we encountered a corruption at offset 0
        String[] args = new String[3];
        args[0] = "-recover";
        args[1] = file1.toString();
        args[2] = Long.toString(corruptOffset);
        Path recover1 = shell.recover(args[0], args, 1)[0];

        // compare that the recovered file is identical to the original one
        LOG.info("Comparing file " + file1 + " with recovered file " + recover1);
        validateFile(fileSys, file1, recover1, crc);
        fileSys.delete(recover1, false);
    }

    //
    // creates a file and populate it with random data. Returns its crc.
    //
    static long createOldFile(FileSystem fileSys, Path name, int repl, int numBlocks, long blocksize)
            throws IOException {
        CRC32 crc = new CRC32();
        FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096),
                (short) repl, blocksize);
        // fill random data into file
        byte[] b = new byte[(int) blocksize];
        for (int i = 0; i < numBlocks; i++) {
            if (i == (numBlocks - 1)) {
                b = new byte[(int) blocksize / 2];
            }
            rand.nextBytes(b);
            stm.write(b);
            crc.update(b);
        }

        stm.close();
        return crc.getValue();
    }

    //
    // validates that file matches the crc.
    //
    private void validateFile(FileSystem fileSys, Path name1, Path name2, long crc) throws IOException {

        FileStatus stat1 = fileSys.getFileStatus(name1);
        FileStatus stat2 = fileSys.getFileStatus(name2);
        assertTrue(" Length of file " + name1 + " is " + stat1.getLen() + " is different from length of file "
                + name1 + " " + stat2.getLen(), stat1.getLen() == stat2.getLen());

        CRC32 newcrc = new CRC32();
        FSDataInputStream stm = fileSys.open(name2);
        final byte[] b = new byte[4192];
        int num = 0;
        while (num >= 0) {
            num = stm.read(b);
            if (num < 0) {
                break;
            }
            newcrc.update(b, 0, num);
        }
        stm.close();
        if (newcrc.getValue() != crc) {
            fail("CRC mismatch of files " + name1 + " with file " + name2);
        }
    }

    public void testSuspendTraversal() throws Exception {
        LOG.info("Test testSuspendTraversal started.");
        long targetReplication = 2;
        long metaReplication = 2;

        createClusters(false, false);
        ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
        cb.addPolicy("policy1", "/user/dhruba/raidtest", targetReplication, metaReplication);
        cb.persist();

        RaidNode cnode = null;
        try {
            for (int i = 0; i < 6; i++) {
                Path file = new Path("/user/dhruba/raidtest/dir" + i + "/file" + i);
                createOldFile(fileSys, file, 1, 7, 1024L);
            }

            LOG.info("Test testSuspendTraversal created test files");

            Configuration localConf = new Configuration(conf);
            localConf.setInt("raid.distraid.max.jobs", 3);
            localConf.setInt("raid.distraid.max.files", 2);
            localConf.setInt("raid.directorytraversal.threads", 1);
            localConf.setBoolean(StatisticsCollector.STATS_COLLECTOR_SUBMIT_JOBS_CONFIG, false);
            // don't allow rescan, make sure only one job is submitted.
            localConf.setLong("raid.policy.rescan.interval", 3600 * 1000L);
            // 6 test files: 3 jobs with 2 files each.
            final int numJobsExpected = 3;
            cnode = RaidNode.createRaidNode(null, localConf);

            long start = System.currentTimeMillis();
            final int MAX_WAITTIME = 300000;

            assertTrue("cnode is not DistRaidNode", cnode instanceof DistRaidNode);
            DistRaidNode dcnode = (DistRaidNode) cnode;

            start = System.currentTimeMillis();
            while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected
                    && System.currentTimeMillis() - start < MAX_WAITTIME) {
                LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() + " to reach "
                        + numJobsExpected);
                Thread.sleep(3000);
            }
            // Wait for any running jobs to finish.
            start = System.currentTimeMillis();
            while (dcnode.jobMonitor.runningJobsCount() > 0 && System.currentTimeMillis() - start < MAX_WAITTIME) {
                LOG.info("Waiting for zero running jobs: " + dcnode.jobMonitor.runningJobsCount());
                Thread.sleep(1000);
            }
            assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored());
            assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded());

            LOG.info("Test testSuspendTraversal successful.");
            TestBlockFixer.verifyMetrics(fileSys, cnode, LOGTYPES.ENCODING, LOGRESULTS.SUCCESS, 6L, true);
        } catch (Exception e) {
            LOG.info("testSuspendTraversal Exception " + e + StringUtils.stringifyException(e));
            throw e;
        } finally {
            if (cnode != null) {
                cnode.stop();
                cnode.join();
            }
            stopClusters();
        }
        LOG.info("Test testSuspendTraversal completed.");
    }

    public void testFileListPolicy() throws Exception {
        LOG.info("Test testFileListPolicy started.");
        short targetReplication = 2;
        short metaReplication = 2;
        long stripeLength = 3;
        short srcReplication = 3;

        createClusters(false, false);
        ConfigBuilder cb = new ConfigBuilder(CONFIG_FILE);
        cb.addAbstractPolicy("abstractPolicy", targetReplication, metaReplication, "xor");
        cb.addFileListPolicy("policy2", "/user/rvadali/raidfilelist.txt", "abstractPolicy");
        cb.addAbstractPolicy("abstractPolicy2", targetReplication, metaReplication, "dir-xor");
        cb.addFileListPolicy("policy3", "/user/rvadali/raiddirlist.txt", "abstractPolicy2");
        cb.persist();

        RaidNode cnode = null;
        Path fileListPath = new Path("/user/rvadali/raidfilelist.txt");
        Path dirListPath = new Path("/user/rvadali/raiddirlist.txt");
        try {
            createTestFiles(fileSys, "/user/rvadali/raidtest/", "/raid/user/rvadali/raidtest", 5, 7,
                    srcReplication);
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/1/", "/dir-raid/user/rvadali/dir-raidtest/1", 2, 7,
                    srcReplication);
            // althought the files reach the target replication, because
            // they don't have parities, we are still going to raid them.
            createTestFiles(fileSys, "/user/rvadali/dir-raidtest/2/", "/dir-raid/user/rvadali/dir-raidtest/2", 2, 7,
                    targetReplication);
            createTestFiles(fileSys, "/user/dhruba/unraidable/", "/raid/user/dhruba/unradiable/", 2, 1,
                    srcReplication);
            // generate parities files for the following directory, raidnode
            // won't raid them again
            Path srcDir3 = new Path("/user/rvadali/dir-raidtest/3/");
            Path destPath3 = new Path("/dir-raid/user/rvadali/dir-raidtest/3");
            createTestFiles(fileSys, srcDir3.toString(), destPath3.toString(), 2, 4, targetReplication);
            Codec dirCode = Codec.getCodec("dir-xor");
            FileStatus srcStat3 = fileSys.getFileStatus(srcDir3);
            assertTrue(RaidNode.doRaid(conf, srcStat3, new Path(dirCode.parityDirectory), dirCode,
                    new RaidNode.Statistics(), RaidUtils.NULL_PROGRESSABLE, false, targetReplication,
                    metaReplication));
            FileStatus parityStat3 = fileSys.getFileStatus(destPath3);
            assertEquals(parityStat3.getModificationTime(), srcStat3.getModificationTime());
            assertEquals(parityStat3.getReplication(), metaReplication);
            LOG.info("Test testFileListPolicy created test files");

            // Create list of files to raid.
            FSDataOutputStream out = fileSys.create(fileListPath);
            FileStatus[] files = fileSys.listStatus(new Path("/user/rvadali/raidtest"));
            for (FileStatus f : files) {
                out.write(f.getPath().toString().getBytes());
                out.write("\n".getBytes());
            }
            // write directory, we should filter it.
            out.write("/user/rvadali/raidtest/\n".getBytes());
            // small file, we should filter it
            out.write("/user/dhruba/unraidable/file0\n".getBytes());
            out.write("/user/dhruba/unraidable/file1\n".getBytes());
            out.close();

            FSDataOutputStream dirOut = fileSys.create(dirListPath);
            FileStatus[] dirs = fileSys.listStatus(new Path("/user/rvadali/dir-raidtest"));
            for (FileStatus dir : dirs) {
                dirOut.write(dir.getPath().toString().getBytes());
                dirOut.write("\n".getBytes());
            }
            // write file, we should filter it
            dirOut.write("/user/rvadali/raidtest/file0\n".getBytes());
            // small directory, we should filter it
            out.write("/user/dhruba/unraidable\n".getBytes());
            dirOut.close();

            Configuration localConf = new Configuration(conf);
            // don't allow rescan, make sure only one job is submitted.
            localConf.setLong("raid.policy.rescan.interval", 3600 * 1000L);
            cnode = RaidNode.createRaidNode(localConf);
            final int MAX_WAITTIME = 120000;
            DistRaidNode dcnode = (DistRaidNode) cnode;

            long start = System.currentTimeMillis();
            int numJobsExpected = 2;
            while (dcnode.jobMonitor.jobsSucceeded() < numJobsExpected
                    && System.currentTimeMillis() - start < MAX_WAITTIME) {
                LOG.info("Waiting for num jobs succeeded " + dcnode.jobMonitor.jobsSucceeded() + " to reach "
                        + numJobsExpected);
                Thread.sleep(1000);
            }
            assertEquals(numJobsExpected, dcnode.jobMonitor.jobsMonitored());
            assertEquals(numJobsExpected, dcnode.jobMonitor.jobsSucceeded());

            Path destPath = new Path("/raid/user/rvadali/raidtest");
            for (FileStatus file : files) {
                TestRaidDfs.waitForFileRaided(LOG, fileSys, file.getPath(), destPath, targetReplication);
            }
            Path destPath1 = new Path("/dir-raid/user/rvadali/dir-raidtest");
            for (FileStatus dir : dirs) {
                TestRaidDfs.waitForDirRaided(LOG, fileSys, dir.getPath(), destPath1, targetReplication);
            }
            assertTrue(!fileSys.exists(new Path("/raid/user/dhruba/unraidable/file0")));
            assertTrue(!fileSys.exists(new Path("/raid/user/dhruba/unraidable/file1")));
            assertTrue(!fileSys.exists(new Path("/dir-raid/user/dhruba/unraidable")));

            TestBlockFixer.verifyMetrics(fileSys, cnode, LOGTYPES.ENCODING, LOGRESULTS.SUCCESS, 7L, true);
            Map<String, Counters> raidProgress = dcnode.jobMonitor.getRaidProgress();
            long succeedFiles = 0;
            for (Counters ctrs : raidProgress.values()) {
                Counters.Counter ctr = ctrs.findCounter(Counter.FILES_SUCCEEDED);
                succeedFiles += ctr.getValue();
            }
            // We have one raided directory, so it's total - 1;
            assertEquals(succeedFiles, files.length + dirs.length - 1);
            LOG.info("Test testFileListPolicy successful.");
        } catch (Exception e) {
            LOG.info("testFileListPolicy Exception " + e + StringUtils.stringifyException(e));
            throw e;
        } finally {
            if (cnode != null) {
                cnode.stop();
                cnode.join();
            }
            stopClusters();
        }
        LOG.info("Test testFileListPolicy completed.");
    }
}