org.apache.hadoop.tools.TestIntegrationByChunk.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.tools.TestIntegrationByChunk.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.tools;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapreduce.Cluster;
import org.apache.hadoop.mapreduce.JobSubmissionFiles;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.tools.util.DistCpTestUtils;
import org.apache.hadoop.tools.util.DistCpUtils;
import org.apache.hadoop.tools.util.TestDistCpUtils;
import org.apache.hadoop.util.DataChecksum;
import org.apache.log4j.Level;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.*;

public class TestIntegrationByChunk {
    private static final Log LOG = LogFactory.getLog(TestIntegrationByChunk.class);

    private static FileSystem fs;

    private static Path listFile;
    private static Path target;
    private final static String SOURCE_PATH = "/tmp/source";
    private final static String LIST_PATH = "/tmp/listing";
    private final static String WORK_PATH = "/tmp/working";
    private final static String TARGET_PATH = "/tmp/target";
    private final static String ROOT_PATH = "/tmp";
    private static String root;
    private static final int DEFAULT_BUFFER_SIZE = 1024;
    private static final long DEFAULT_FILE_SIZE = 30 * 1024 + 900; //8 chunks
    private static final long MID_FILE_SIZE = 32 * 1024; //8 chunks
    private static final long LARGE_FILE_SIZE = 320 * 1024 + 76; //81 chunks
    private static final long NON_DEFAULT_BLOCK_SIZE = 4 * 1024;

    private static MiniDFSCluster cluster;

    private static Configuration configuration;

    @BeforeClass
    public static void setup() throws Exception {
        try {
            configuration = getConfigurationForCluster();
            cluster = new MiniDFSCluster.Builder(configuration).numDataNodes(1).format(true).build();
            fs = cluster.getFileSystem();
            // TODO: only for debuging unit, remove this when ok
            org.apache.log4j.LogManager.getRootLogger().setLevel(Level.DEBUG);
            listFile = new Path(LIST_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            target = new Path(TARGET_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            root = new Path(ROOT_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();

            TestDistCpUtils.delete(fs, root);
        } catch (IOException e) {
            LOG.error("Exception encountered ", e);
        }
    }

    private static Configuration getConfigurationForCluster() throws IOException {
        Configuration configuration = new Configuration();
        System.setProperty("test.build.data", "target/tmp/build/TEST_COPY_CHUNK_MAPPER/data");
        configuration.set("hadoop.log.dir", "target/tmp");
        configuration.set("dfs.namenode.fs-limits.min-block-size", "0");
        configuration.set("dfs.blocksize", NON_DEFAULT_BLOCK_SIZE + "");
        LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
        LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
        return configuration;
    }

    private static Configuration getConf() throws IOException {
        Configuration configuration = getConfigurationForCluster();
        final FileSystem fs = cluster.getFileSystem();
        Path workPath = new Path(WORK_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
        Path targetPath = new Path(TARGET_PATH).makeQualified(fs.getUri(), fs.getWorkingDirectory());
        configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath.toString());
        configuration.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetPath.toString());
        configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
        configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
        configuration.setBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), true);
        configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), "br");
        configuration.setBoolean(DistCpConstants.CONF_LABEL_COPY_BY_CHUNK, true);
        return configuration;
    }

    @Test(timeout = 100000)
    public void testSingleFileMissingTargetByChunk() {
        caseSingleFileMissingTargetByChunk(false);
        System.out.println("split***********************************");
        //TODO sync is  controled by "-update"
        caseSingleFileMissingTargetByChunk(true);
    }

    /**
     * bychunk need FileSytem support concat operation
     * @param sync
     */
    private void caseSingleFileMissingTargetByChunk(boolean sync) {
        try {
            addEntries(listFile, "singlefile1/file1");
            //createFiles("singlefile1/file1");
            long size = 2048 + 900; // 31*1024+900
            short replic = 2;
            long blockSize = 1024; // 32 chunks
            Path sourceFile = new Path(root + "/" + "singlefile1/file1");

            TestDistCpUtils.createFile(fs, sourceFile, size, replic, blockSize);
            Assert.assertEquals(blockSize, fs.getFileStatus(sourceFile).getBlockSize()); // assert blockSize
            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(sync);
            options.setDeleteMissing(false);

            //TODO: overwrite
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(false);// forTargetMissing, rootPath is the sourcePath itself
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 1, "singlefile1/file1");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testSingleFileTargetFileByChunk() {
        caseSingleFileTargetFileByChunk(false);
        caseSingleFileTargetFileByChunk(true);
    }

    private void caseSingleFileTargetFileByChunk(boolean sync) {

        try {
            addEntries(listFile, "singlefile1/file1");
            createFilesWithDiffSeed("singlefile1/file1", "target");
            //createLargeFiles("singlefile1/file1", "target");
            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(sync);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");

            runTest(options);

            checkResult(options, target, 1, "singlefile1/file1");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testSingleFileTargetDirByChunk() {
        caseSingleFileTargetDirByChunk(false);
        caseSingleFileTargetDirByChunk(true);
    }

    private void caseSingleFileTargetDirByChunk(boolean sync) {

        try {
            addEntries(listFile, "singlefile2/file2");
            createFilesWithDiffSeed("singlefile2/file2");
            mkdirs(target.toString());

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(sync);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 1, "singlefile2/file2");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testSingleDirTargetMissingByChunk() {
        caseSingleDirTargetMissingByChunk(false);
        caseSingleDirTargetMissingByChunk(true);
    }

    private void caseSingleDirTargetMissingByChunk(boolean sync) {

        try {
            addEntries(listFile, "singledir");
            mkdirs(root + "/singledir/dir1");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(sync);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(false);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 1, "singledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testSingleDirTargetPresentByChunk() {

        try {
            addEntries(listFile, "singledir");
            mkdirs(root + "/singledir/dir1");
            mkdirs(target.toString());

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(false);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 1, "singledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testUpdateSingleDirTargetPresentByChunk() {

        try {
            addEntries(listFile, "Usingledir");
            mkdirs(root + "/Usingledir/Udir1");
            mkdirs(target.toString());

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(true);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 1, "Usingledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 1000000)
    public void testMultiFileTargetPresentByChunk() {
        caseMultiFileTargetPresentByChunk(false);
        caseMultiFileTargetPresentByChunk(true);
    }

    private void caseMultiFileTargetPresentByChunk(boolean sync) {

        try {
            addEntries(listFile, "multifile/file3", "multifile/file4", "multifile/file5");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            //createLargeFiles("multifile/file3", "multifile/file4", "multifile/file5");
            mkdirs(target.toString());

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(sync);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 3, "multifile/file3", "multifile/file4", "multifile/file5");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testCustomCopyListingByChunk() {

        try {

            addEntries(listFile, "multifile1/file3", "multifile1/file4", "multifile1/file5");
            createFiles("multifile1/file3", "multifile1/file4", "multifile1/file5");
            mkdirs(target.toString());

            Configuration conf = getConf();
            try {
                //exclude file3
                conf.setClass(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS, CustomCopyListing.class,
                        CopyListing.class);
                DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(root + "/" + "multifile1")),
                        target);
                options.setSyncFolder(true);
                options.setDeleteMissing(false);
                options.setOverwrite(false);
                options.setByChunk(true);
                options.setPreserveStatus("bc");

                try {
                    new DistCp(conf, options).execute();
                    checkResult(options, target, 2, "multifile1/file4", "multifile1/file5");
                } catch (Exception e) {
                    LOG.error("Exception encountered ", e);
                    throw new IOException(e);
                }
            } finally {
                conf.unset(DistCpConstants.CONF_LABEL_COPY_LISTING_CLASS);
            }
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    private static class CustomCopyListing extends SimpleCopyListing {

        public CustomCopyListing(Configuration configuration, Credentials credentials) {
            super(configuration, credentials);
        }

        @Override
        protected boolean shouldCopy(Path path, DistCpOptions options) {
            return !path.getName().equals("file3");
        }
    }

    @Test(timeout = 100000)
    public void testMultiFileTargetMissingByChunk() {
        caseMultiFileTargetMissingByChunk(false);
        caseMultiFileTargetMissingByChunk(true);
    }

    private void caseMultiFileTargetMissingByChunk(boolean sync) {

        try {
            addEntries(listFile, "multifile/file3", "multifile/file4", "multifile/file5");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(sync);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(false);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 3, "multifile/file3", "multifile/file4", "multifile/file5");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testMultiDirTargetPresentByChunk() {

        try {
            addEntries(listFile, "multifile", "singledir");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            mkdirs(target.toString(), root + "/singledir/dir1");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(false);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 2, "multifile", "singledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testUpdateMultiDirTargetPresentByChunk() {

        try {
            addEntries(listFile, "Umultifile", "Usingledir");
            createFiles("Umultifile/Ufile3", "Umultifile/Ufile4", "Umultifile/Ufile5");
            mkdirs(target.toString(), root + "/Usingledir/Udir1");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(true);// update
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(true);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 4, "Umultifile", "Usingledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testMultiDirTargetMissingByChunk() {

        try {
            addEntries(listFile, "multifile", "singledir");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            mkdirs(root + "/singledir/dir1");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(false);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(false);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 2, "multifile", "singledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testUpdateMultiDirTargetMissingByChunk() {

        try {
            addEntries(listFile, "multifile", "singledir");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            mkdirs(root + "/singledir/dir1");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(true);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setByChunk(true);
            options.setTargetPathExists(false);
            options.setPreserveStatus("bc");
            runTest(options);

            checkResult(options, target, 4, "multifile", "singledir");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
        }
    }

    @Test(timeout = 100000)
    public void testDeleteMissingInDestinationByChunk() {

        try {
            addEntries(listFile, "srcdir");
            createFiles("srcdir/file1", "dstdir/file1", "dstdir/file2");

            Path dstTarget = new Path(root + "/dstdir");
            DistCpOptions options = new DistCpOptions(listFile, dstTarget);
            options.setSyncFolder(true);
            options.setDeleteMissing(true);
            options.setOverwrite(false);
            options.setTargetPathExists(false);
            options.setByChunk(true);
            runTest(options);

            checkResult(options, dstTarget, 1, "srcdir");
        } catch (IOException e) {
            LOG.error("Exception encountered while running distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
            TestDistCpUtils.delete(fs, "target/tmp1");
        }
    }

    @Test(timeout = 100000)
    public void testOverwriteByChunk() {
        byte[] contents1 = "contents1".getBytes();
        byte[] contents2 = "contents2".getBytes();
        Assert.assertEquals(contents1.length, contents2.length);

        try {
            addEntries(listFile, "srcdir");
            createWithContents("srcdir/file1", contents1);
            createWithContents("dstdir/file1", contents2);

            Path dstTarget = new Path(root + "/dstdir");
            DistCpOptions options = new DistCpOptions(listFile, dstTarget);
            options.setSyncFolder(false);
            options.setDeleteMissing(false);
            options.setOverwrite(true);
            options.setTargetPathExists(false);
            options.setByChunk(true);
            runTest(options);

            checkResult(options, dstTarget, 1, "srcdir");

            // make sure dstdir/file1 has been overwritten with the contents
            // of srcdir/file1
            FSDataInputStream is = fs.open(new Path(root + "/dstdir/file1"));
            byte[] dstContents = new byte[contents1.length];
            is.readFully(dstContents);
            is.close();
            Assert.assertArrayEquals(contents1, dstContents);
        } catch (IOException e) {
            LOG.error("Exception encountered while running distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
            TestDistCpUtils.delete(fs, "target/tmp1");
        }
    }

    @Test(timeout = 100000)
    public void testGlobTargetMissingSingleLevelByChunk() {

        try {
            Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(), fs.getWorkingDirectory());
            addEntries(listFile, "*");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            createFiles("singledir/dir2/file6");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(false);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setTargetPathExists(false);
            options.setByChunk(true);
            runTest(options);

            checkResult(target, 2, "multifile/file3", "multifile/file4", "multifile/file5", "singledir/dir2/file6");
        } catch (IOException e) {
            LOG.error("Exception encountered while testing distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
            TestDistCpUtils.delete(fs, "target/tmp1");
        }
    }

    @Test(timeout = 100000)
    public void testUpdateGlobTargetMissingSingleLevelByChunk() {

        try {
            Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(), fs.getWorkingDirectory());
            addEntries(listFile, "*");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            createFiles("singledir/dir2/file6");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(true);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setTargetPathExists(false);
            options.setByChunk(true);
            runTest(options);

            checkResult(target, 4, "file3", "file4", "file5", "dir2/file6");
        } catch (IOException e) {
            LOG.error("Exception encountered while running distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
            TestDistCpUtils.delete(fs, "target/tmp1");
        }
    }

    @Test(timeout = 100000)
    public void testGlobTargetMissingMultiLevelByChunk() {

        try {
            Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(), fs.getWorkingDirectory());
            addEntries(listFile, "*/*");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            createFiles("singledir1/dir3/file7", "singledir1/dir3/file8", "singledir1/dir3/file9");

            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(false);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setTargetPathExists(false);
            options.setByChunk(true);
            runTest(options);

            checkResult(target, 4, "file3", "file4", "file5", "dir3/file7", "dir3/file8", "dir3/file9");
        } catch (IOException e) {
            LOG.error("Exception encountered while running distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
            TestDistCpUtils.delete(fs, "target/tmp1");
        }
    }

    @Test(timeout = 100000)
    public void testUpdateGlobTargetMissingMultiLevelByChunk() {

        try {
            Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(), fs.getWorkingDirectory());
            addEntries(listFile, "*/*");
            createFiles("multifile/file3", "multifile/file4", "multifile/file5");
            createFiles("singledir1/dir3/file7", "singledir1/dir3/file8", "singledir1/dir3/file9");
            DistCpOptions options = new DistCpOptions(listFile, target);
            options.setSyncFolder(true);
            options.setDeleteMissing(false);
            options.setOverwrite(false);
            options.setTargetPathExists(false);
            options.setByChunk(true);
            runTest(options);

            checkResult(target, 6, "file3", "file4", "file5", "file7", "file8", "file9");
        } catch (IOException e) {
            LOG.error("Exception encountered while running distcp", e);
            Assert.fail("distcp failure");
        } finally {
            TestDistCpUtils.delete(fs, root);
            TestDistCpUtils.delete(fs, "target/tmp1");
        }
    }

    @Test(timeout = 100000)
    public void testCleanup() {
        try {
            Path sourcePath = new Path("noscheme:///file");
            List<Path> sources = new ArrayList<Path>();
            sources.add(sourcePath);

            DistCpOptions options = new DistCpOptions(sources, target);

            Configuration conf = getConf();
            Path stagingDir = JobSubmissionFiles.getStagingDir(new Cluster(conf), conf);
            stagingDir.getFileSystem(conf).mkdirs(stagingDir);

            try {
                new DistCp(conf, options).execute();
            } catch (Throwable t) {
                Assert.assertEquals(stagingDir.getFileSystem(conf).listStatus(stagingDir).length, 0);
            }
        } catch (Exception e) {
            LOG.error("Exception encountered ", e);
            Assert.fail("testCleanup failed " + e.getMessage());
        }
    }

    private void addEntries(Path listFile, String... entries) throws IOException {
        OutputStream out = fs.create(listFile);
        try {
            for (String entry : entries) {
                out.write((root + "/" + entry).getBytes());
                out.write("\n".getBytes());
            }
        } finally {
            out.close();
        }
    }

    private void createFiles(String... entries) throws IOException {
        for (int i = 0; i < entries.length; i++) {
            TestDistCpUtils.delete(fs, root + "/" + entries[i]);
            touchFile(root + "/" + entries[i], DEFAULT_FILE_SIZE, false,
                    new Options.ChecksumOpt(DataChecksum.Type.CRC32, 512));
        }
    }

    private void createFilesWithDiffSeed(String... entries) throws IOException {
        for (int i = 0; i < entries.length; i++) {
            TestDistCpUtils.delete(fs, root + "/" + entries[i]);
            touchFile(root + "/" + entries[i], DEFAULT_FILE_SIZE, false,
                    new Options.ChecksumOpt(DataChecksum.Type.CRC32, 512), i);
        }
    }

    private void createLargeFiles(String... entries) throws IOException {
        for (int i = 0; i < entries.length; i++) {
            TestDistCpUtils.delete(fs, root + "/" + entries[i]);
            touchFile(root + "/" + entries[i], LARGE_FILE_SIZE, false,
                    new Options.ChecksumOpt(DataChecksum.Type.CRC32, 512), i);
        }
    }

    private static void touchFile(String path, long totalFileSize, boolean preserveBlockSize,
            Options.ChecksumOpt checksumOpt) throws IOException {
        touchFile(path, totalFileSize, preserveBlockSize, checksumOpt, 1);
    }

    private static void touchFile(String path, long totalFileSize, boolean preserveBlockSize,
            Options.ChecksumOpt checksumOpt, long seed) throws IOException {
        FileSystem fs;
        DataOutputStream outputStream = null;
        try {
            fs = cluster.getFileSystem();
            final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(), fs.getWorkingDirectory());
            final long blockSize = preserveBlockSize ? NON_DEFAULT_BLOCK_SIZE
                    : fs.getDefaultBlockSize(qualifiedPath) * 2;
            FsPermission permission = FsPermission.getFileDefault().applyUMask(FsPermission.getUMask(fs.getConf()));
            outputStream = fs.create(qualifiedPath, permission, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
                    0, (short) (fs.getDefaultReplication(qualifiedPath) * 2), blockSize, null, checksumOpt);
            try {

                if (totalFileSize > 0) {
                    int bufferLen = DEFAULT_BUFFER_SIZE;
                    byte[] toWrite = new byte[bufferLen];
                    Random rb = new Random(seed);
                    long bytesToWrite = totalFileSize;
                    while (bytesToWrite > 0) {
                        rb.nextBytes(toWrite);
                        int bytesToWriteNext = (bufferLen < bytesToWrite) ? bufferLen : (int) bytesToWrite;

                        outputStream.write(toWrite, 0, bytesToWriteNext);
                        bytesToWrite -= bytesToWriteNext;
                    }
                }
            } finally {
                if (outputStream != null) {
                    outputStream.close();
                }
            }

            FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
            System.out.println(fileStatus.getBlockSize());
            System.out.println(fileStatus.getReplication());
        } finally {
            IOUtils.cleanup(null, outputStream);
        }
    }

    private void createWithContents(String entry, byte[] contents) throws IOException {
        OutputStream out = fs.create(new Path(root + "/" + entry));
        try {
            out.write(contents);
        } finally {
            out.close();
        }
    }

    private void mkdirs(String... entries) throws IOException {
        for (String entry : entries) {
            fs.mkdirs(new Path(entry));
        }
    }

    private void runTest(Path listFile, Path target, boolean targetExists, boolean sync) throws IOException {
        runTest(listFile, target, targetExists, sync, false, false);
    }

    private void runTest(Path listFile, Path target, boolean targetExists, boolean sync, boolean delete,
            boolean overwrite) throws IOException {
        DistCpOptions options = new DistCpOptions(listFile, target);
        options.setSyncFolder(sync);
        options.setDeleteMissing(delete);
        options.setOverwrite(overwrite);
        options.setTargetPathExists(targetExists);
        try {
            new DistCp(getConf(), options).execute();
        } catch (Exception e) {
            LOG.error("Exception encountered ", e);
            throw new IOException(e);
        }
    }

    private void runTest(DistCpOptions options) throws IOException {
        try {
            new DistCp(getConf(), options).execute();
        } catch (Exception e) {
            LOG.error("Exception encountered ", e);
            throw new IOException(e);
        }
    }

    //for compatible
    private void checkResult(Path dstTarget, int count, String... relPaths) throws IOException {
        Assert.assertEquals(count, fs.listStatus(target).length);
        if (relPaths == null || relPaths.length == 0) {
            Assert.assertTrue(dstTarget.toString(), fs.exists(dstTarget));
            return;
        }
        boolean targetIsFile = fs.isFile(dstTarget);
        if (targetIsFile) {
            //TODO: source must be one file
            validateFile(new Path(root + "/" + relPaths[0]), null, dstTarget, dstTarget, fs, fs);
        } else {
            //validate every sourcePath
            //TODO: for compatible
        }
    }

    private void checkResult(DistCpOptions options, Path dstTarget, int count, String... relPaths)
            throws IOException {
        Assert.assertEquals(count, fs.listStatus(dstTarget).length);
        if (relPaths == null || relPaths.length == 0) {
            Assert.assertTrue(dstTarget.toString(), fs.exists(dstTarget));
            return;
        }
        boolean targetIsFile = fs.isFile(dstTarget);
        if (targetIsFile) {
            //TODO: source must be one file
            validateFile(new Path(root + "/" + relPaths[0]), null, dstTarget, dstTarget, fs, fs);
        } else {
            //validate every sourcePath
            for (String relPath : relPaths) {
                //TODO: Target Path
                Path source = new Path(root + "/" + relPath);
                Path sourceRoot = DistCpTestUtils.computeSourceRootPath(fs.getFileStatus(source), fs, fs, options);
                validateFile(source, sourceRoot,
                        new Path(dstTarget.toString() + "/" + DistCpUtils.getRelativePath(sourceRoot, source)),
                        dstTarget, fs, fs);
            }
        }
    }

    /**
     * TODO : checksum
     * @param sourcePath
     * @param targetPath
     * @param srcFS
     * @param targetFS
     * @return
     */
    private void validateFile(Path sourcePath, Path sourceRoot, Path targetPath, Path targetRoot, FileSystem srcFS,
            FileSystem targetFS) throws IOException {
        Assert.assertTrue(targetPath.toString(), fs.exists(targetPath));
        FileStatus targetFileStatus = srcFS.getFileStatus(targetPath);
        FileStatus srcFileStatus = targetFS.getFileStatus(sourcePath);
        if (srcFileStatus.isDirectory()) {
            // TODO: check dir recurisely
            FileStatus[] paths = fs.listStatus(sourcePath);
            for (FileStatus fileStatus : paths) {
                validateFile(fileStatus.getPath(), sourceRoot,
                        new Path(targetRoot.toString() + "/"
                                + DistCpUtils.getRelativePath(sourceRoot, fileStatus.getPath())),
                        targetRoot, srcFS, targetFS);
            }
        } else {
            // check file length
            Assert.assertEquals(srcFileStatus.getLen(), targetFileStatus.getLen());

            // TODO: check file by content
            long fileLen = srcFileStatus.getLen();
            long bufferLen = 8 * 1024;
            byte[] ori_content = new byte[(int) bufferLen];
            byte[] trg_content = new byte[(int) bufferLen];
            FSDataInputStream ori_stm;
            ori_stm = srcFS.open(srcFileStatus.getPath());
            FSDataInputStream trg_stm;
            trg_stm = targetFS.open(targetFileStatus.getPath());
            try {
                long position = 0;
                long readLen = 0;
                while (position < fileLen) {
                    // ori_content
                    if (fileLen - position < bufferLen) {
                        readLen = fileLen - position;
                        ori_content = new byte[(int) readLen];
                        trg_content = new byte[(int) readLen];
                    } else {
                        readLen = bufferLen;
                    }
                    ori_stm.readFully(position, ori_content);
                    trg_stm.readFully(position, trg_content);
                    for (int j = 0; j < readLen; j++) {
                        if (ori_content[j] != trg_content[j]) {
                            Assert.fail("content mismatch");
                        }
                    }
                    position += readLen;
                }
            } finally {
                ori_stm.close();
                trg_stm.close();
            }

            // TODO: check File by checksum

            Configuration conf = getConf();
            FileSystem sourceFS = sourcePath.getFileSystem(conf);
            String fileAttibutesStr = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS, "");
            EnumSet<DistCpOptions.FileAttribute> fileAttributes = DistCpUtils.unpackAttributes(fileAttibutesStr);
            final FileChecksum sourceChecksum = fileAttributes.contains(DistCpOptions.FileAttribute.CHECKSUMTYPE)
                    ? sourceFS.getFileChecksum(sourcePath)
                    : null;
            if (!conf.getBoolean(DistCpConstants.CONF_LABEL_SKIP_CRC, false)) {
                Assert.assertEquals(true, DistCpUtils.compareCheckSums(sourceFS, srcFileStatus.getPath(),
                        sourceChecksum, targetFS, targetPath));
            }
            LOG.info("source=(" + srcFileStatus + "), target=(" + targetFileStatus + ")");
        }
    }
}