org.apache.hadoop.tools.DistCpV1.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.tools.DistCpV1.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.tools;

import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Stack;
import java.util.StringTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.InvalidInputException;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.mapreduce.JobSubmissionFiles;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;

/**
 * A Map-reduce program to recursively copy directories between
 * different file-systems.
 */
@Deprecated
public class DistCpV1 implements Tool {
    public static final Log LOG = LogFactory.getLog(DistCpV1.class);

    private static final String NAME = "distcp";

    private static final String usage = NAME + " [OPTIONS] <srcurl>* <desturl>" + "\n\nOPTIONS:"
            + "\n-p[rbugpt]             Preserve status" + "\n                       r: replication number"
            + "\n                       b: block size" + "\n                       u: user"
            + "\n                       g: group" + "\n                       p: permission"
            + "\n                       t: modification and access times"
            + "\n                       -p alone is equivalent to -prbugpt"
            + "\n-i                     Ignore failures"
            + "\n-basedir <basedir>     Use <basedir> as the base directory when copying files from <srcurl>"
            + "\n-log <logdir>          Write logs to <logdir>"
            + "\n-m <num_maps>          Maximum number of simultaneous copies"
            + "\n-overwrite             Overwrite destination"
            + "\n-update                Overwrite if src size different from dst size"
            + "\n-skipcrccheck          Do not use CRC check to determine if src is "
            + "\n                       different from dest. Relevant only if -update"
            + "\n                       is specified"
            + "\n-f <urilist_uri>       Use list at <urilist_uri> as src list"
            + "\n-filelimit <n>         Limit the total number of files to be <= n"
            + "\n-sizelimit <n>         Limit the total size to be <= n bytes"
            + "\n-delete                Delete the files existing in the dst but not in src"
            + "\n-dryrun                Display count of files and total size of files"
            + "\n                        in src and then exit. Copy is not done at all."
            + "\n                        desturl should not be speicified with out -update."
            + "\n-mapredSslConf <f>     Filename of SSL configuration for mapper task" +

            "\n\nNOTE 1: if -overwrite or -update are set, each source URI is "
            + "\n      interpreted as an isomorphic update to an existing directory." + "\nFor example:"
            + "\nhadoop " + NAME + " -p -update \"hdfs://A:8020/user/foo/bar\" "
            + "\"hdfs://B:8020/user/foo/baz\"\n"
            + "\n     would update all descendants of 'baz' also in 'bar'; it would "
            + "\n     *not* update /user/foo/baz/bar" +

            "\n\nNOTE 2: The parameter <n> in -filelimit and -sizelimit can be "
            + "\n     specified with symbolic representation.  For examples,"
            + "\n       1230k = 1230 * 1024 = 1259520" + "\n       891g = 891 * 1024^3 = 956703965184" +

            "\n";

    private static final long BYTES_PER_MAP = 256 * 1024 * 1024;
    private static final int MAX_MAPS_PER_NODE = 20;
    private static final int SYNC_FILE_MAX = 10;
    private static final int DEFAULT_FILE_RETRIES = 3;

    static enum Counter {
        COPY, SKIP, FAIL, BYTESCOPIED, BYTESEXPECTED
    }

    static enum Options {
        DELETE("-delete", NAME + ".delete"), FILE_LIMIT("-filelimit", NAME + ".limit.file"), SIZE_LIMIT(
                "-sizelimit",
                NAME + ".limit.size"), IGNORE_READ_FAILURES("-i", NAME + ".ignore.read.failures"), PRESERVE_STATUS(
                        "-p",
                        NAME + ".preserve.status"), OVERWRITE("-overwrite", NAME + ".overwrite.always"), UPDATE(
                                "-update",
                                NAME + ".overwrite.ifnewer"), SKIPCRC("-skipcrccheck", NAME + ".skip.crc.check");

        final String cmd, propertyname;

        private Options(String cmd, String propertyname) {
            this.cmd = cmd;
            this.propertyname = propertyname;
        }

        private long parseLong(String[] args, int offset) {
            if (offset == args.length) {
                throw new IllegalArgumentException("<n> not specified in " + cmd);
            }
            long n = StringUtils.TraditionalBinaryPrefix.string2long(args[offset]);
            if (n <= 0) {
                throw new IllegalArgumentException("n = " + n + " <= 0 in " + cmd);
            }
            return n;
        }
    }

    static enum FileAttribute {
        BLOCK_SIZE, REPLICATION, USER, GROUP, PERMISSION, TIMES;

        final char symbol;

        private FileAttribute() {
            symbol = StringUtils.toLowerCase(toString()).charAt(0);
        }

        static EnumSet<FileAttribute> parse(String s) {
            if (s == null || s.length() == 0) {
                return EnumSet.allOf(FileAttribute.class);
            }

            EnumSet<FileAttribute> set = EnumSet.noneOf(FileAttribute.class);
            FileAttribute[] attributes = values();
            for (char c : s.toCharArray()) {
                int i = 0;
                for (; i < attributes.length && c != attributes[i].symbol; i++)
                    ;
                if (i < attributes.length) {
                    if (!set.contains(attributes[i])) {
                        set.add(attributes[i]);
                    } else {
                        throw new IllegalArgumentException(
                                "There are more than one '" + attributes[i].symbol + "' in " + s);
                    }
                } else {
                    throw new IllegalArgumentException("'" + c + "' in " + s + " is undefined.");
                }
            }
            return set;
        }
    }

    static final String TMP_DIR_LABEL = NAME + ".tmp.dir";
    static final String DST_DIR_LABEL = NAME + ".dest.path";
    static final String JOB_DIR_LABEL = NAME + ".job.dir";
    static final String MAX_MAPS_LABEL = NAME + ".max.map.tasks";
    static final String SRC_LIST_LABEL = NAME + ".src.list";
    static final String SRC_COUNT_LABEL = NAME + ".src.count";
    static final String TOTAL_SIZE_LABEL = NAME + ".total.size";
    static final String DST_DIR_LIST_LABEL = NAME + ".dst.dir.list";
    static final String BYTES_PER_MAP_LABEL = NAME + ".bytes.per.map";
    static final String PRESERVE_STATUS_LABEL = Options.PRESERVE_STATUS.propertyname + ".value";
    static final String FILE_RETRIES_LABEL = NAME + ".file.retries";

    private JobConf conf;

    public void setConf(Configuration conf) {
        if (conf instanceof JobConf) {
            this.conf = (JobConf) conf;
        } else {
            this.conf = new JobConf(conf);
        }
    }

    public Configuration getConf() {
        return conf;
    }

    public DistCpV1(Configuration conf) {
        setConf(conf);
    }

    /**
     * An input/output pair of filenames.
     */
    static class FilePair implements Writable {
        FileStatus input = new FileStatus();
        String output;

        FilePair() {
        }

        FilePair(FileStatus input, String output) {
            this.input = input;
            this.output = output;
        }

        public void readFields(DataInput in) throws IOException {
            input.readFields(in);
            output = Text.readString(in);
        }

        public void write(DataOutput out) throws IOException {
            input.write(out);
            Text.writeString(out, output);
        }

        public String toString() {
            return input + " : " + output;
        }
    }

    /**
     * InputFormat of a distcp job responsible for generating splits of the src
     * file list.
     */
    static class CopyInputFormat implements InputFormat<Text, Text> {

        /**
         * Produce splits such that each is no greater than the quotient of the
         * total size and the number of splits requested.
         * @param job The handle to the JobConf object
         * @param numSplits Number of splits requested
         */
        public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
            int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
            long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
            String srcfilelist = job.get(SRC_LIST_LABEL, "");
            if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
                throw new RuntimeException("Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize
                        + ") listuri(" + srcfilelist + ")");
            }
            Path src = new Path(srcfilelist);
            FileSystem fs = src.getFileSystem(job);
            FileStatus srcst = fs.getFileStatus(src);

            ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
            LongWritable key = new LongWritable();
            FilePair value = new FilePair();
            final long targetsize = cbsize / numSplits;
            long pos = 0L;
            long last = 0L;
            long acc = 0L;
            long cbrem = srcst.getLen();
            try (SequenceFile.Reader sl = new SequenceFile.Reader(job, Reader.file(src))) {
                for (; sl.next(key, value); last = sl.getPosition()) {
                    // if adding this split would put this split past the target size,
                    // cut the last split and put this next file in the next split.
                    if (acc + key.get() > targetsize && acc != 0) {
                        long splitsize = last - pos;
                        splits.add(new FileSplit(src, pos, splitsize, (String[]) null));
                        cbrem -= splitsize;
                        pos = last;
                        acc = 0L;
                    }
                    acc += key.get();
                }
            }
            if (cbrem != 0) {
                splits.add(new FileSplit(src, pos, cbrem, (String[]) null));
            }

            return splits.toArray(new FileSplit[splits.size()]);
        }

        /**
         * Returns a reader for this split of the src file list.
         */
        public RecordReader<Text, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter)
                throws IOException {
            return new SequenceFileRecordReader<Text, Text>(job, (FileSplit) split);
        }
    }

    /**
     * FSCopyFilesMapper: The mapper for copying files between FileSystems.
     */
    static class CopyFilesMapper implements Mapper<LongWritable, FilePair, WritableComparable<?>, Text> {
        // config
        private int sizeBuf = 128 * 1024;
        private FileSystem destFileSys = null;
        private boolean ignoreReadFailures;
        private boolean preserve_status;
        private EnumSet<FileAttribute> preseved;
        private boolean overwrite;
        private boolean update;
        private Path destPath = null;
        private byte[] buffer = null;
        private JobConf job;
        private boolean skipCRCCheck = false;

        // stats
        private int failcount = 0;
        private int skipcount = 0;
        private int copycount = 0;

        private String getCountString() {
            return "Copied: " + copycount + " Skipped: " + skipcount + " Failed: " + failcount;
        }

        private void updateStatus(Reporter reporter) {
            reporter.setStatus(getCountString());
        }

        /**
         * Return true if dst should be replaced by src and the update flag is set.
         * Right now, this merely checks that the src and dst len are not equal. 
         * This should be improved on once modification times, CRCs, etc. can
         * be meaningful in this context.
         * @throws IOException 
         */
        private boolean needsUpdate(FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException {
            return update
                    && !sameFile(srcstatus.getPath().getFileSystem(job), srcstatus, dstfs, dstpath, skipCRCCheck);
        }

        private FSDataOutputStream create(Path f, Reporter reporter, FileStatus srcstat) throws IOException {
            if (destFileSys.exists(f)) {
                destFileSys.delete(f, false);
            }
            if (!preserve_status) {
                return destFileSys.create(f, true, sizeBuf, reporter);
            }

            FsPermission permission = preseved.contains(FileAttribute.PERMISSION) ? srcstat.getPermission() : null;
            short replication = preseved.contains(FileAttribute.REPLICATION) ? srcstat.getReplication()
                    : destFileSys.getDefaultReplication(f);
            long blockSize = preseved.contains(FileAttribute.BLOCK_SIZE) ? srcstat.getBlockSize()
                    : destFileSys.getDefaultBlockSize(f);
            return destFileSys.create(f, permission, true, sizeBuf, replication, blockSize, reporter);
        }

        /**
         * Validates copy by checking the sizes of files first and then
         * checksums, if the filesystems support checksums.
         * @param srcstat src path and metadata
         * @param absdst dst path
         * @return true if src & destination files are same
         */
        private boolean validateCopy(FileStatus srcstat, Path absdst) throws IOException {
            if (destFileSys.exists(absdst)) {
                if (sameFile(srcstat.getPath().getFileSystem(job), srcstat, destFileSys, absdst, skipCRCCheck)) {
                    return true;
                }
            }
            return false;
        }

        /**
         * Increment number of files copied and bytes copied and then report status
         */
        void updateCopyStatus(FileStatus srcstat, Reporter reporter) {
            copycount++;
            reporter.incrCounter(Counter.BYTESCOPIED, srcstat.getLen());
            reporter.incrCounter(Counter.COPY, 1);
            updateStatus(reporter);
        }

        /**
         * Skip copying this file if already exists at the destination.
         * Updates counters and copy status if skipping this file.
         * @return true    if copy of this file can be skipped
         */
        private boolean skipCopyFile(FileStatus srcstat, Path absdst,
                OutputCollector<WritableComparable<?>, Text> outc, Reporter reporter) throws IOException {
            if (destFileSys.exists(absdst) && !overwrite && !needsUpdate(srcstat, destFileSys, absdst)) {
                outc.collect(null, new Text("SKIP: " + srcstat.getPath()));
                ++skipcount;
                reporter.incrCounter(Counter.SKIP, 1);
                updateStatus(reporter);
                return true;
            }
            return false;
        }

        /**
         * Copies single file to the path specified by tmpfile.
         * @param srcstat  src path and metadata
         * @param tmpfile  temporary file to which copy is to be done
         * @param absdst   actual destination path to which copy is to be done
         * @param reporter
         * @return Number of bytes copied
         */
        private long doCopyFile(FileStatus srcstat, Path tmpfile, Path absdst, Reporter reporter)
                throws IOException {
            long bytesCopied = 0L;
            Path srcPath = srcstat.getPath();
            // open src file
            try (FSDataInputStream in = srcPath.getFileSystem(job).open(srcPath)) {
                reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen());
                // open tmp file
                try (FSDataOutputStream out = create(tmpfile, reporter, srcstat)) {
                    LOG.info("Copying file " + srcPath + " of size " + srcstat.getLen() + " bytes...");

                    // copy file
                    for (int bytesRead; (bytesRead = in.read(buffer)) >= 0;) {
                        out.write(buffer, 0, bytesRead);
                        bytesCopied += bytesRead;
                        reporter.setStatus(String.format("%.2f ", bytesCopied * 100.0 / srcstat.getLen()) + absdst
                                + " [ " + TraditionalBinaryPrefix.long2String(bytesCopied, "", 1) + " / "
                                + TraditionalBinaryPrefix.long2String(srcstat.getLen(), "", 1) + " ]");
                    }
                }
            }
            return bytesCopied;
        }

        /**
         * Copy a file to a destination.
         * @param srcstat src path and metadata
         * @param relativedst relative dst path
         * @param outc Log of skipped files
         * @param reporter
         * @throws IOException if copy fails(even if the validation of copy fails)
         */
        private void copy(FileStatus srcstat, Path relativedst, OutputCollector<WritableComparable<?>, Text> outc,
                Reporter reporter) throws IOException {
            Path absdst = new Path(destPath, relativedst);
            int totfiles = job.getInt(SRC_COUNT_LABEL, -1);
            assert totfiles >= 0 : "Invalid file count " + totfiles;

            if (totfiles == 1) {
                // Copying a single file; use dst path provided by user as
                // destination file rather than destination directory
                Path dstparent = absdst.getParent();
                if (!(destFileSys.exists(dstparent) && destFileSys.getFileStatus(dstparent).isDirectory())) {
                    absdst = dstparent;
                }
            }

            // if a directory, ensure created even if empty
            if (srcstat.isDirectory()) {
                if (destFileSys.exists(absdst)) {
                    if (destFileSys.getFileStatus(absdst).isFile()) {
                        throw new IOException("Failed to mkdirs: " + absdst + " is a file.");
                    }
                } else if (!destFileSys.mkdirs(absdst)) {
                    throw new IOException("Failed to mkdirs " + absdst);
                }
                // TODO: when modification times can be set, directories should be
                // emitted to reducers so they might be preserved. Also, mkdirs does
                // not currently return an error when the directory already exists;
                // if this changes, all directory work might as well be done in reduce
                return;
            }

            // Can we skip copying this file ?
            if (skipCopyFile(srcstat, absdst, outc, reporter)) {
                return;
            }

            Path tmpfile = new Path(job.get(TMP_DIR_LABEL), relativedst);
            // do the actual copy to tmpfile
            long bytesCopied = doCopyFile(srcstat, tmpfile, absdst, reporter);

            if (bytesCopied != srcstat.getLen()) {
                throw new IOException(
                        "File size not matched: copied " + bytesString(bytesCopied) + " to tmpfile (=" + tmpfile
                                + ") but expected " + bytesString(srcstat.getLen()) + " from " + srcstat.getPath());
            } else {
                if (destFileSys.exists(absdst) && destFileSys.getFileStatus(absdst).isDirectory()) {
                    throw new IOException(absdst + " is a directory");
                }
                if (!destFileSys.mkdirs(absdst.getParent())) {
                    throw new IOException("Failed to create parent dir: " + absdst.getParent());
                }
                rename(tmpfile, absdst);

                if (!validateCopy(srcstat, absdst)) {
                    destFileSys.delete(absdst, false);
                    throw new IOException("Validation of copy of file " + srcstat.getPath() + " failed.");
                }
                updateDestStatus(srcstat, destFileSys.getFileStatus(absdst));
            }

            // report at least once for each file
            updateCopyStatus(srcstat, reporter);
        }

        /** rename tmp to dst, delete dst if already exists */
        private void rename(Path tmp, Path dst) throws IOException {
            try {
                if (destFileSys.exists(dst)) {
                    destFileSys.delete(dst, true);
                }
                if (!destFileSys.rename(tmp, dst)) {
                    throw new IOException();
                }
            } catch (IOException cause) {
                throw (IOException) new IOException(
                        "Fail to rename tmp file (=" + tmp + ") to destination file (=" + dst + ")")
                                .initCause(cause);
            }
        }

        private void updateDestStatus(FileStatus src, FileStatus dst) throws IOException {
            if (preserve_status) {
                DistCpV1.updateDestStatus(src, dst, preseved, destFileSys);
            }
        }

        static String bytesString(long b) {
            return b + " bytes (" + TraditionalBinaryPrefix.long2String(b, "", 1) + ")";
        }

        /**
         * Copies a file and validates the copy by checking the checksums.
         * If validation fails, retries (max number of tries is distcp.file.retries)
         * to copy the file.
         */
        void copyWithRetries(FileStatus srcstat, Path relativedst, OutputCollector<WritableComparable<?>, Text> out,
                Reporter reporter) throws IOException {

            // max tries to copy when validation of copy fails
            final int maxRetries = job.getInt(FILE_RETRIES_LABEL, DEFAULT_FILE_RETRIES);
            // save update flag for later copies within the same map task
            final boolean saveUpdate = update;

            int retryCnt = 1;
            for (; retryCnt <= maxRetries; retryCnt++) {
                try {
                    //copy the file and validate copy
                    copy(srcstat, relativedst, out, reporter);
                    break;// copy successful
                } catch (IOException e) {
                    LOG.warn("Copy of " + srcstat.getPath() + " failed.", e);
                    if (retryCnt < maxRetries) {// copy failed and need to retry
                        LOG.info("Retrying copy of file " + srcstat.getPath());
                        update = true; // set update flag for retries
                    } else {// no more retries... Give up
                        update = saveUpdate;
                        throw new IOException("Copy of file failed even with " + retryCnt + " tries.", e);
                    }
                }
            }
        }

        /** Mapper configuration.
         * Extracts source and destination file system, as well as
         * top-level paths on source and destination directories.
         * Gets the named file systems, to be used later in map.
         */
        public void configure(JobConf job) {
            destPath = new Path(job.get(DST_DIR_LABEL, "/"));
            try {
                destFileSys = destPath.getFileSystem(job);
            } catch (IOException ex) {
                throw new RuntimeException("Unable to get the named file system.", ex);
            }
            sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
            buffer = new byte[sizeBuf];
            ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
            preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
            if (preserve_status) {
                preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
            }
            update = job.getBoolean(Options.UPDATE.propertyname, false);
            overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
            skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false);
            this.job = job;
        }

        /** Map method. Copies one file from source file system to destination.
         * @param key src len
         * @param value FilePair (FileStatus src, Path dst)
         * @param out Log of failed copies
         * @param reporter
         */
        public void map(LongWritable key, FilePair value, OutputCollector<WritableComparable<?>, Text> out,
                Reporter reporter) throws IOException {
            final FileStatus srcstat = value.input;
            final Path relativedst = new Path(value.output);
            try {
                copyWithRetries(srcstat, relativedst, out, reporter);
            } catch (IOException e) {
                ++failcount;
                reporter.incrCounter(Counter.FAIL, 1);
                updateStatus(reporter);
                final String sfailure = "FAIL " + relativedst + " : " + StringUtils.stringifyException(e);
                out.collect(null, new Text(sfailure));
                LOG.info(sfailure);
                if (e instanceof FileNotFoundException) {
                    final String s = "Possible Cause for failure: Either the filesystem "
                            + srcstat.getPath().getFileSystem(job) + " is not accessible or the file is deleted";
                    LOG.error(s);
                    out.collect(null, new Text(s));
                }

                try {
                    for (int i = 0; i < 3; ++i) {
                        try {
                            final Path tmp = new Path(job.get(TMP_DIR_LABEL), relativedst);
                            if (destFileSys.delete(tmp, true))
                                break;
                        } catch (Throwable ex) {
                            // ignore, we are just cleaning up
                            LOG.debug("Ignoring cleanup exception", ex);
                        }
                        // update status, so we don't get timed out
                        updateStatus(reporter);
                        Thread.sleep(3 * 1000);
                    }
                } catch (InterruptedException inte) {
                    throw (IOException) new IOException().initCause(inte);
                }
            } finally {
                updateStatus(reporter);
            }
        }

        public void close() throws IOException {
            if (0 == failcount || ignoreReadFailures) {
                return;
            }
            throw new IOException(getCountString());
        }
    }

    private static List<Path> fetchFileList(Configuration conf, Path srcList) throws IOException {
        List<Path> result = new ArrayList<Path>();
        FileSystem fs = srcList.getFileSystem(conf);
        try (BufferedReader input = new BufferedReader(
                new InputStreamReader(fs.open(srcList), Charset.forName("UTF-8")))) {
            String line = input.readLine();
            while (line != null) {
                result.add(new Path(line));
                line = input.readLine();
            }
        }
        return result;
    }

    @Deprecated
    public static void copy(Configuration conf, String srcPath, String destPath, Path logPath, boolean srcAsList,
            boolean ignoreReadFailures) throws IOException {
        final Path src = new Path(srcPath);
        List<Path> tmp = new ArrayList<Path>();
        if (srcAsList) {
            tmp.addAll(fetchFileList(conf, src));
        } else {
            tmp.add(src);
        }
        EnumSet<Options> flags = ignoreReadFailures ? EnumSet.of(Options.IGNORE_READ_FAILURES)
                : EnumSet.noneOf(Options.class);

        final Path dst = new Path(destPath);
        copy(conf,
                new Arguments(tmp, null, dst, logPath, flags, null, Long.MAX_VALUE, Long.MAX_VALUE, null, false));
    }

    /** Sanity check for srcPath */
    private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) throws IOException {
        List<IOException> rslt = new ArrayList<IOException>();
        List<Path> unglobbed = new LinkedList<Path>();

        Path[] ps = new Path[srcPaths.size()];
        ps = srcPaths.toArray(ps);
        TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf);

        for (Path p : srcPaths) {
            FileSystem fs = p.getFileSystem(jobConf);
            FileStatus[] inputs = fs.globStatus(p);

            if (inputs != null && inputs.length > 0) {
                for (FileStatus onePath : inputs) {
                    unglobbed.add(onePath.getPath());
                }
            } else {
                rslt.add(new IOException("Input source " + p + " does not exist."));
            }
        }
        if (!rslt.isEmpty()) {
            throw new InvalidInputException(rslt);
        }
        srcPaths.clear();
        srcPaths.addAll(unglobbed);
    }

    /**
     * Driver to copy srcPath to destPath depending on required protocol.
     * @param conf configuration
     * @param args arguments
     */
    static void copy(final Configuration conf, final Arguments args) throws IOException {
        LOG.info("srcPaths=" + args.srcs);
        if (!args.dryrun || args.flags.contains(Options.UPDATE)) {
            LOG.info("destPath=" + args.dst);
        }

        JobConf job = createJobConf(conf);

        checkSrcPath(job, args.srcs);
        if (args.preservedAttributes != null) {
            job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
        }
        if (args.mapredSslConf != null) {
            job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
        }

        //Initialize the mapper
        try {
            if (setup(conf, job, args)) {
                JobClient.runJob(job);
            }
            if (!args.dryrun) {
                finalize(conf, job, args.dst, args.preservedAttributes);
            }
        } finally {
            if (!args.dryrun) {
                //delete tmp
                fullyDelete(job.get(TMP_DIR_LABEL), job);
            }
            //delete jobDirectory
            fullyDelete(job.get(JOB_DIR_LABEL), job);
        }
    }

    private static void updateDestStatus(FileStatus src, FileStatus dst, EnumSet<FileAttribute> preseved,
            FileSystem destFileSys) throws IOException {
        String owner = null;
        String group = null;
        if (preseved.contains(FileAttribute.USER) && !src.getOwner().equals(dst.getOwner())) {
            owner = src.getOwner();
        }
        if (preseved.contains(FileAttribute.GROUP) && !src.getGroup().equals(dst.getGroup())) {
            group = src.getGroup();
        }
        if (owner != null || group != null) {
            destFileSys.setOwner(dst.getPath(), owner, group);
        }
        if (preseved.contains(FileAttribute.PERMISSION) && !src.getPermission().equals(dst.getPermission())) {
            destFileSys.setPermission(dst.getPath(), src.getPermission());
        }
        if (preseved.contains(FileAttribute.TIMES)) {
            destFileSys.setTimes(dst.getPath(), src.getModificationTime(), src.getAccessTime());
        }
    }

    static private void finalize(Configuration conf, JobConf jobconf, final Path destPath,
            String presevedAttributes) throws IOException {
        if (presevedAttributes == null) {
            return;
        }
        EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
        if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP)
                && !preseved.contains(FileAttribute.PERMISSION)) {
            return;
        }

        FileSystem dstfs = destPath.getFileSystem(conf);
        Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
        try (SequenceFile.Reader in = new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
            Text dsttext = new Text();
            FilePair pair = new FilePair();
            for (; in.next(dsttext, pair);) {
                Path absdst = new Path(destPath, pair.output);
                updateDestStatus(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
            }
        }
    }

    static class Arguments {
        final List<Path> srcs;
        final Path basedir;
        final Path dst;
        final Path log;
        final EnumSet<Options> flags;
        final String preservedAttributes;
        final long filelimit;
        final long sizelimit;
        final String mapredSslConf;
        final boolean dryrun;

        /**
         * Arguments for distcp
         * @param srcs List of source paths
         * @param basedir Base directory for copy
         * @param dst Destination path
         * @param log Log output directory
         * @param flags Command-line flags
         * @param preservedAttributes Preserved attributes 
         * @param filelimit File limit
         * @param sizelimit Size limit
         * @param mapredSslConf ssl configuration
         * @param dryrun
         */
        Arguments(List<Path> srcs, Path basedir, Path dst, Path log, EnumSet<Options> flags,
                String preservedAttributes, long filelimit, long sizelimit, String mapredSslConf, boolean dryrun) {
            this.srcs = srcs;
            this.basedir = basedir;
            this.dst = dst;
            this.log = log;
            this.flags = flags;
            this.preservedAttributes = preservedAttributes;
            this.filelimit = filelimit;
            this.sizelimit = sizelimit;
            this.mapredSslConf = mapredSslConf;
            this.dryrun = dryrun;

            if (LOG.isTraceEnabled()) {
                LOG.trace("this = " + this);
            }
        }

        static Arguments valueOf(String[] args, Configuration conf) throws IOException {
            List<Path> srcs = new ArrayList<Path>();
            Path dst = null;
            Path log = null;
            Path basedir = null;
            EnumSet<Options> flags = EnumSet.noneOf(Options.class);
            String presevedAttributes = null;
            String mapredSslConf = null;
            long filelimit = Long.MAX_VALUE;
            long sizelimit = Long.MAX_VALUE;
            boolean dryrun = false;

            for (int idx = 0; idx < args.length; idx++) {
                Options[] opt = Options.values();
                int i = 0;
                for (; i < opt.length && !args[idx].startsWith(opt[i].cmd); i++)
                    ;

                if (i < opt.length) {
                    flags.add(opt[i]);
                    if (opt[i] == Options.PRESERVE_STATUS) {
                        presevedAttributes = args[idx].substring(2);
                        FileAttribute.parse(presevedAttributes); //validation
                    } else if (opt[i] == Options.FILE_LIMIT) {
                        filelimit = Options.FILE_LIMIT.parseLong(args, ++idx);
                    } else if (opt[i] == Options.SIZE_LIMIT) {
                        sizelimit = Options.SIZE_LIMIT.parseLong(args, ++idx);
                    }
                } else if ("-f".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("urilist_uri not specified in -f");
                    }
                    srcs.addAll(fetchFileList(conf, new Path(args[idx])));
                } else if ("-log".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("logdir not specified in -log");
                    }
                    log = new Path(args[idx]);
                } else if ("-basedir".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("basedir not specified in -basedir");
                    }
                    basedir = new Path(args[idx]);
                } else if ("-mapredSslConf".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("ssl conf file not specified in -mapredSslConf");
                    }
                    mapredSslConf = args[idx];
                } else if ("-dryrun".equals(args[idx])) {
                    dryrun = true;
                    dst = new Path("/tmp/distcp_dummy_dest");//dummy destination
                } else if ("-m".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("num_maps not specified in -m");
                    }
                    try {
                        conf.setInt(MAX_MAPS_LABEL, Integer.parseInt(args[idx]));
                    } catch (NumberFormatException e) {
                        throw new IllegalArgumentException("Invalid argument to -m: " + args[idx]);
                    }
                } else if ('-' == args[idx].codePointAt(0)) {
                    throw new IllegalArgumentException("Invalid switch " + args[idx]);
                } else if (idx == args.length - 1 && (!dryrun || flags.contains(Options.UPDATE))) {
                    dst = new Path(args[idx]);
                } else {
                    srcs.add(new Path(args[idx]));
                }
            }
            // mandatory command-line parameters
            if (srcs.isEmpty() || dst == null) {
                throw new IllegalArgumentException("Missing " + (dst == null ? "dst path" : "src"));
            }
            // incompatible command-line flags
            final boolean isOverwrite = flags.contains(Options.OVERWRITE);
            final boolean isUpdate = flags.contains(Options.UPDATE);
            final boolean isDelete = flags.contains(Options.DELETE);
            final boolean skipCRC = flags.contains(Options.SKIPCRC);
            if (isOverwrite && isUpdate) {
                throw new IllegalArgumentException("Conflicting overwrite policies");
            }
            if (!isUpdate && skipCRC) {
                throw new IllegalArgumentException(
                        Options.SKIPCRC.cmd + " is relevant only with the " + Options.UPDATE.cmd + " option");
            }
            if (isDelete && !isOverwrite && !isUpdate) {
                throw new IllegalArgumentException(Options.DELETE.cmd + " must be specified with "
                        + Options.OVERWRITE + " or " + Options.UPDATE + ".");
            }
            return new Arguments(srcs, basedir, dst, log, flags, presevedAttributes, filelimit, sizelimit,
                    mapredSslConf, dryrun);
        }

        /** {@inheritDoc} */
        public String toString() {
            return getClass().getName() + "{" + "\n  srcs = " + srcs + "\n  dst = " + dst + "\n  log = " + log
                    + "\n  flags = " + flags + "\n  preservedAttributes = " + preservedAttributes
                    + "\n  filelimit = " + filelimit + "\n  sizelimit = " + sizelimit + "\n  mapredSslConf = "
                    + mapredSslConf + "\n}";
        }
    }

    /**
     * This is the main driver for recursively copying directories
     * across file systems. It takes at least two cmdline parameters. A source
     * URL and a destination URL. It then essentially does an "ls -lR" on the
     * source URL, and writes the output in a round-robin manner to all the map
     * input files. The mapper actually copies the files allotted to it. The
     * reduce is empty.
     */
    public int run(String[] args) {
        try {
            copy(conf, Arguments.valueOf(args, conf));
            return 0;
        } catch (IllegalArgumentException e) {
            System.err.println(StringUtils.stringifyException(e) + "\n" + usage);
            ToolRunner.printGenericCommandUsage(System.err);
            return -1;
        } catch (DuplicationException e) {
            System.err.println(StringUtils.stringifyException(e));
            return DuplicationException.ERROR_CODE;
        } catch (RemoteException e) {
            final IOException unwrapped = e.unwrapRemoteException(FileNotFoundException.class,
                    AccessControlException.class, QuotaExceededException.class);
            System.err.println(StringUtils.stringifyException(unwrapped));
            return -3;
        } catch (Exception e) {
            System.err.println("With failures, global counters are inaccurate; " + "consider running with -i");
            System.err.println("Copy failed: " + StringUtils.stringifyException(e));
            return -999;
        }
    }

    public static void main(String[] args) throws Exception {
        JobConf job = new JobConf(DistCpV1.class);
        DistCpV1 distcp = new DistCpV1(job);
        int res = ToolRunner.run(distcp, args);
        System.exit(res);
    }

    /**
     * Make a path relative with respect to a root path.
     * absPath is always assumed to descend from root.
     * Otherwise returned path is null.
     */
    static String makeRelative(Path root, Path absPath) {
        if (!absPath.isAbsolute()) {
            throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath);
        }
        String p = absPath.toUri().getPath();

        StringTokenizer pathTokens = new StringTokenizer(p, "/");
        for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens
                .hasMoreTokens();) {
            if (!rootTokens.nextToken().equals(pathTokens.nextToken())) {
                return null;
            }
        }
        StringBuilder sb = new StringBuilder();
        for (; pathTokens.hasMoreTokens();) {
            sb.append(pathTokens.nextToken());
            if (pathTokens.hasMoreTokens()) {
                sb.append(Path.SEPARATOR);
            }
        }
        return sb.length() == 0 ? "." : sb.toString();
    }

    /**
     * Calculate how many maps to run.
     * Number of maps is bounded by a minimum of the cumulative size of the
     * copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the
     * command line) and at most (distcp.max.map.tasks, default
     * MAX_MAPS_PER_NODE * nodes in the cluster).
     * @param totalBytes Count of total bytes for job
     * @param job The job to configure
     * @return Count of maps to run.
     */
    private static int setMapCount(long totalBytes, JobConf job) throws IOException {
        int numMaps = (int) (totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
        numMaps = Math.min(numMaps, job.getInt(MAX_MAPS_LABEL,
                MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers()));
        numMaps = Math.max(numMaps, 1);
        job.setNumMapTasks(numMaps);
        return numMaps;
    }

    /** Fully delete dir */
    static void fullyDelete(String dir, Configuration conf) throws IOException {
        if (dir != null) {
            Path tmp = new Path(dir);
            boolean success = tmp.getFileSystem(conf).delete(tmp, true);
            if (!success) {
                LOG.warn("Could not fully delete " + tmp);
            }
        }
    }

    //Job configuration
    private static JobConf createJobConf(Configuration conf) {
        JobConf jobconf = new JobConf(conf, DistCpV1.class);
        jobconf.setJobName(conf.get("mapred.job.name", NAME));

        // turn off speculative execution, because DFS doesn't handle
        // multiple writers to the same file.
        jobconf.setMapSpeculativeExecution(false);

        jobconf.setInputFormat(CopyInputFormat.class);
        jobconf.setOutputKeyClass(Text.class);
        jobconf.setOutputValueClass(Text.class);

        jobconf.setMapperClass(CopyFilesMapper.class);
        jobconf.setNumReduceTasks(0);
        return jobconf;
    }

    private static final Random RANDOM = new Random();

    public static String getRandomId() {
        return Integer.toString(RANDOM.nextInt(Integer.MAX_VALUE), 36);
    }

    /**
     * Increase the replication factor of _distcp_src_files to
     * sqrt(min(maxMapsOnCluster, numMaps)). This is to reduce the chance of
     * failing of distcp because of "not having a replication of _distcp_src_files
     * available for reading for some maps".
     */
    private static void setReplication(Configuration conf, JobConf jobConf, Path srcfilelist, int numMaps)
            throws IOException {
        int numMaxMaps = new JobClient(jobConf).getClusterStatus().getMaxMapTasks();
        short replication = (short) Math.ceil(Math.sqrt(Math.min(numMaxMaps, numMaps)));
        FileSystem fs = srcfilelist.getFileSystem(conf);
        FileStatus srcStatus = fs.getFileStatus(srcfilelist);

        if (srcStatus.getReplication() < replication) {
            if (!fs.setReplication(srcfilelist, replication)) {
                throw new IOException("Unable to increase the replication of file " + srcfilelist);
            }
        }
    }

    /**
     * Does the dir already exist at destination ?
     * @return true   if the dir already exists at destination
     */
    private static boolean dirExists(Configuration conf, Path dst) throws IOException {
        FileSystem destFileSys = dst.getFileSystem(conf);
        FileStatus status = null;
        try {
            status = destFileSys.getFileStatus(dst);
        } catch (FileNotFoundException e) {
            return false;
        }
        if (status.isFile()) {
            throw new FileAlreadyExistsException("Not a dir: " + dst + " is a file.");
        }
        return true;
    }

    /**
     * Initialize DFSCopyFileMapper specific job-configuration.
     * @param conf : The dfs/mapred configuration.
     * @param jobConf : The handle to the jobConf object to be initialized.
     * @param args Arguments
     * @return true if it is necessary to launch a job.
     */
    static boolean setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
        jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

        //set boolean values
        final boolean update = args.flags.contains(Options.UPDATE);
        final boolean skipCRCCheck = args.flags.contains(Options.SKIPCRC);
        final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE) && !args.dryrun;
        jobConf.setBoolean(Options.UPDATE.propertyname, update);
        jobConf.setBoolean(Options.SKIPCRC.propertyname, skipCRCCheck);
        jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
        jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
                args.flags.contains(Options.IGNORE_READ_FAILURES));
        jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

        final String randomId = getRandomId();
        JobClient jClient = new JobClient(jobConf);
        Path stagingArea;
        try {
            stagingArea = JobSubmissionFiles.getStagingDir(jClient.getClusterHandle(), conf);
        } catch (InterruptedException ie) {
            throw new IOException(ie);
        }

        Path jobDirectory = new Path(stagingArea + NAME + "_" + randomId);
        FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
        FileSystem.mkdirs(jClient.getFs(), jobDirectory, mapredSysPerms);
        jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

        long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

        FileSystem dstfs = args.dst.getFileSystem(conf);

        // get tokens for all the required FileSystems..
        TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[] { args.dst }, conf);

        boolean dstExists = dstfs.exists(args.dst);
        boolean dstIsDir = false;
        if (dstExists) {
            dstIsDir = dstfs.getFileStatus(args.dst).isDirectory();
        }

        // default logPath
        Path logPath = args.log;
        if (logPath == null) {
            String filename = "_distcp_logs_" + randomId;
            if (!dstExists || !dstIsDir) {
                Path parent = args.dst.getParent();
                if (null == parent) {
                    // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                    // will return null. In this case, use '/' as its own parent to prevent
                    // NPE errors below.
                    parent = args.dst;
                }
                if (!dstfs.exists(parent)) {
                    dstfs.mkdirs(parent);
                }
                logPath = new Path(parent, filename);
            } else {
                logPath = new Path(args.dst, filename);
            }
        }
        FileOutputFormat.setOutputPath(jobConf, logPath);

        // create src list, dst list
        FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

        Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
        Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
        Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
        jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
        jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
        int srcCount = 0, cnsyncf = 0, dirsyn = 0;
        long fileCount = 0L, dirCount = 0L, byteCount = 0L, cbsyncs = 0L, skipFileCount = 0L, skipByteCount = 0L;
        try (SequenceFile.Writer src_writer = SequenceFile.createWriter(jobConf, Writer.file(srcfilelist),
                Writer.keyClass(LongWritable.class), Writer.valueClass(FilePair.class),
                Writer.compression(SequenceFile.CompressionType.NONE));
                SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobConf, Writer.file(dstfilelist),
                        Writer.keyClass(Text.class), Writer.valueClass(Text.class),
                        Writer.compression(SequenceFile.CompressionType.NONE));
                SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobConf, Writer.file(dstdirlist),
                        Writer.keyClass(Text.class), Writer.valueClass(FilePair.class),
                        Writer.compression(SequenceFile.CompressionType.NONE));) {
            // handle the case where the destination directory doesn't exist
            // and we've only a single src directory OR we're updating/overwriting
            // the contents of the destination directory.
            final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;

            Path basedir = null;
            HashSet<Path> parentDirsToCopy = new HashSet<Path>();
            if (args.basedir != null) {
                FileSystem basefs = args.basedir.getFileSystem(conf);
                basedir = args.basedir.makeQualified(basefs.getUri(), basefs.getWorkingDirectory());
                if (!basefs.isDirectory(basedir)) {
                    throw new IOException("Basedir " + basedir + " is not a directory.");
                }
            }

            for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
                final Path src = srcItr.next();
                FileSystem srcfs = src.getFileSystem(conf);
                FileStatus srcfilestat = srcfs.getFileStatus(src);
                Path root = special && srcfilestat.isDirectory() ? src : src.getParent();
                if (dstExists && !dstIsDir && (args.srcs.size() > 1 || srcfilestat.isDirectory())) {
                    // destination should not be a file
                    throw new IOException("Destination " + args.dst + " should be a dir"
                            + " if multiple source paths are there OR if" + " the source path is a dir");
                }

                if (basedir != null) {
                    root = basedir;
                    Path parent = src.getParent().makeQualified(srcfs.getUri(), srcfs.getWorkingDirectory());
                    while (parent != null && !parent.equals(basedir)) {
                        if (!parentDirsToCopy.contains(parent)) {
                            parentDirsToCopy.add(parent);
                            String dst = makeRelative(root, parent);
                            FileStatus pst = srcfs.getFileStatus(parent);
                            src_writer.append(new LongWritable(0), new FilePair(pst, dst));
                            dst_writer.append(new Text(dst), new Text(parent.toString()));
                            dir_writer.append(new Text(dst), new FilePair(pst, dst));
                            if (++dirsyn > SYNC_FILE_MAX) {
                                dirsyn = 0;
                                dir_writer.sync();
                            }
                        }
                        parent = parent.getParent();
                    }

                    if (parent == null) {
                        throw new IOException("Basedir " + basedir + " is not a prefix of source path " + src);
                    }
                }

                if (srcfilestat.isDirectory()) {
                    ++srcCount;
                    final String dst = makeRelative(root, src);
                    if (!update || !dirExists(conf, new Path(args.dst, dst))) {
                        ++dirCount;
                        src_writer.append(new LongWritable(0), new FilePair(srcfilestat, dst));
                    }
                    dst_writer.append(new Text(dst), new Text(src.toString()));
                }

                Stack<FileStatus> pathstack = new Stack<FileStatus>();
                for (pathstack.push(srcfilestat); !pathstack.empty();) {
                    FileStatus cur = pathstack.pop();
                    FileStatus[] children = srcfs.listStatus(cur.getPath());
                    for (int i = 0; i < children.length; i++) {
                        boolean skipPath = false;
                        final FileStatus child = children[i];
                        final String dst = makeRelative(root, child.getPath());
                        ++srcCount;

                        if (child.isDirectory()) {
                            pathstack.push(child);
                            if (!update || !dirExists(conf, new Path(args.dst, dst))) {
                                ++dirCount;
                            } else {
                                skipPath = true; // skip creating dir at destination
                            }
                        } else {
                            Path destPath = new Path(args.dst, dst);
                            if (cur.isFile() && (args.srcs.size() == 1)) {
                                // Copying a single file; use dst path provided by user as
                                // destination file rather than destination directory
                                Path dstparent = destPath.getParent();
                                FileSystem destFileSys = destPath.getFileSystem(jobConf);
                                if (!(destFileSys.exists(dstparent)
                                        && destFileSys.getFileStatus(dstparent).isDirectory())) {
                                    destPath = dstparent;
                                }
                            }
                            //skip path if the src and the dst files are the same.
                            skipPath = update && sameFile(srcfs, child, dstfs, destPath, skipCRCCheck);
                            //skip path if it exceed file limit or size limit
                            skipPath |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                            if (!skipPath) {
                                ++fileCount;
                                byteCount += child.getLen();

                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("adding file " + child.getPath());
                                }

                                ++cnsyncf;
                                cbsyncs += child.getLen();
                                if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                    src_writer.sync();
                                    dst_writer.sync();
                                    cnsyncf = 0;
                                    cbsyncs = 0L;
                                }
                            } else {
                                ++skipFileCount;
                                skipByteCount += child.getLen();
                                if (LOG.isTraceEnabled()) {
                                    LOG.trace("skipping file " + child.getPath());
                                }
                            }
                        }

                        if (!skipPath) {
                            src_writer.append(new LongWritable(child.isDirectory() ? 0 : child.getLen()),
                                    new FilePair(child, dst));
                        }

                        dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                    }

                    if (cur.isDirectory()) {
                        String dst = makeRelative(root, cur.getPath());
                        dir_writer.append(new Text(dst), new FilePair(cur, dst));
                        if (++dirsyn > SYNC_FILE_MAX) {
                            dirsyn = 0;
                            dir_writer.sync();
                        }
                    }
                }
            }
        }
        LOG.info("sourcePathsCount(files+directories)=" + srcCount);
        LOG.info("filesToCopyCount=" + fileCount);
        LOG.info("bytesToCopyCount=" + TraditionalBinaryPrefix.long2String(byteCount, "", 1));
        if (update) {
            LOG.info("filesToSkipCopyCount=" + skipFileCount);
            LOG.info("bytesToSkipCopyCount=" + TraditionalBinaryPrefix.long2String(skipByteCount, "", 1));
        }
        if (args.dryrun) {
            return false;
        }
        int mapCount = setMapCount(byteCount, jobConf);
        // Increase the replication of _distcp_src_files, if needed
        setReplication(conf, jobConf, srcfilelist, mapCount);

        FileStatus dststatus = null;
        try {
            dststatus = dstfs.getFileStatus(args.dst);
        } catch (FileNotFoundException fnfe) {
            LOG.info(args.dst + " does not exist.");
        }

        // create dest path dir if copying > 1 file
        if (dststatus == null) {
            if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
                throw new IOException("Failed to create" + args.dst);
            }
        }

        final Path sorted = new Path(jobDirectory, "_distcp_sorted");
        checkDuplication(jobfs, dstfilelist, sorted, conf);

        if (dststatus != null && args.flags.contains(Options.DELETE)) {
            long deletedPathsCount = deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf,
                    conf);
            LOG.info("deletedPathsFromDestCount(files+directories)=" + deletedPathsCount);
        }

        Path tmpDir = new Path(
                (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
                "_distcp_tmp_" + randomId);
        jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

        // Explicitly create the tmpDir to ensure that it can be cleaned
        // up by fullyDelete() later.
        tmpDir.getFileSystem(conf).mkdirs(tmpDir);

        LOG.info("sourcePathsCount=" + srcCount);
        LOG.info("filesToCopyCount=" + fileCount);
        LOG.info("bytesToCopyCount=" + TraditionalBinaryPrefix.long2String(byteCount, "", 1));
        jobConf.setInt(SRC_COUNT_LABEL, srcCount);
        jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);

        return (fileCount + dirCount) > 0;
    }

    /**
     * Check whether the contents of src and dst are the same.
     * 
     * Return false if dstpath does not exist
     * 
     * If the files have different sizes, return false.
     * 
     * If the files have the same sizes, the file checksums will be compared.
     * 
     * When file checksum is not supported in any of file systems,
     * two files are considered as the same if they have the same size.
     */
    static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath,
            boolean skipCRCCheck) throws IOException {
        FileStatus dststatus;
        try {
            dststatus = dstfs.getFileStatus(dstpath);
        } catch (FileNotFoundException fnfe) {
            return false;
        }

        //same length?
        if (srcstatus.getLen() != dststatus.getLen()) {
            return false;
        }

        if (skipCRCCheck) {
            LOG.debug("Skipping the CRC check");
            return true;
        }

        //get src checksum
        final FileChecksum srccs;
        try {
            srccs = srcfs.getFileChecksum(srcstatus.getPath());
        } catch (FileNotFoundException fnfe) {
            /*
             * Two possible cases:
             * (1) src existed once but was deleted between the time period that
             *     srcstatus was obtained and the try block above.
             * (2) srcfs does not support file checksum and (incorrectly) throws
             *     FNFE, e.g. some previous versions of HftpFileSystem.
             * For case (1), it is okay to return true since src was already deleted.
             * For case (2), true should be returned.  
             */
            return true;
        }

        //compare checksums
        try {
            final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
            //return true if checksum is not supported
            //(i.e. some of the checksums is null)
            return srccs == null || dstcs == null || srccs.equals(dstcs);
        } catch (FileNotFoundException fnfe) {
            return false;
        }
    }

    /**
     * Delete the dst files/dirs which do not exist in src
     * 
     * @return total count of files and directories deleted from destination
     * @throws IOException
     */
    static private long deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
            Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
        if (dstroot.isFile()) {
            throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                    + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
        }

        //write dst lsr results
        final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
        try (final SequenceFile.Writer writer = SequenceFile.createWriter(jobconf, Writer.file(dstlsr),
                Writer.keyClass(Text.class), Writer.valueClass(NullWritable.class),
                Writer.compression(SequenceFile.CompressionType.NONE))) {
            //do lsr to get all file statuses in dstroot
            final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
            for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
                final FileStatus status = lsrstack.pop();
                if (status.isDirectory()) {
                    for (FileStatus child : dstfs.listStatus(status.getPath())) {
                        String relative = makeRelative(dstroot.getPath(), child.getPath());
                        writer.append(new Text(relative), NullWritable.get());
                        lsrstack.push(child);
                    }
                }
            }
        }

        //sort lsr results
        final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
        SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
                NullWritable.class, jobconf);
        sorter.sort(dstlsr, sortedlsr);

        //compare lsr list and dst list  
        long deletedPathsCount = 0;
        try (SequenceFile.Reader lsrin = new SequenceFile.Reader(jobconf, Reader.file(sortedlsr));
                SequenceFile.Reader dstin = new SequenceFile.Reader(jobconf, Reader.file(dstsorted))) {
            //compare sorted lsr list and sorted dst list
            final Text lsrpath = new Text();
            final Text dstpath = new Text();
            final Text dstfrom = new Text();
            final Trash trash = new Trash(dstfs, conf);
            Path lastpath = null;

            boolean hasnext = dstin.next(dstpath, dstfrom);
            while (lsrin.next(lsrpath, NullWritable.get())) {
                int dst_cmp_lsr = dstpath.compareTo(lsrpath);
                while (hasnext && dst_cmp_lsr < 0) {
                    hasnext = dstin.next(dstpath, dstfrom);
                    dst_cmp_lsr = dstpath.compareTo(lsrpath);
                }

                if (dst_cmp_lsr == 0) {
                    //lsrpath exists in dst, skip it
                    hasnext = dstin.next(dstpath, dstfrom);
                } else {
                    //lsrpath does not exist, delete it
                    final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
                    ++deletedPathsCount;
                    if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
                        if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
                            throw new IOException("Failed to delete " + rmpath);
                        }
                        lastpath = rmpath;
                    }
                }
            }
        }
        return deletedPathsCount;
    }

    //is x an ancestor path of y?
    static private boolean isAncestorPath(Path xp, Path yp) {
        final String x = xp.toString();
        final String y = yp.toString();
        if (!y.startsWith(x)) {
            return false;
        }
        final int len = x.length();
        return y.length() == len || y.charAt(len) == Path.SEPARATOR_CHAR;
    }

    /** Check whether the file list have duplication. */
    static private void checkDuplication(FileSystem fs, Path file, Path sorted, Configuration conf)
            throws IOException {
        SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new Text.Comparator(), Text.class, Text.class,
                conf);
        sorter.sort(file, sorted);
        try (SequenceFile.Reader in = new SequenceFile.Reader(conf, Reader.file(sorted))) {
            Text prevdst = null, curdst = new Text();
            Text prevsrc = null, cursrc = new Text();
            for (; in.next(curdst, cursrc);) {
                if (prevdst != null && curdst.equals(prevdst)) {
                    throw new DuplicationException(
                            "Invalid input, there are duplicated files in the sources: " + prevsrc + ", " + cursrc);
                }
                prevdst = curdst;
                curdst = new Text();
                prevsrc = cursrc;
                cursrc = new Text();
            }
        }
    }

    /** An exception class for duplicated source files. */
    public static class DuplicationException extends IOException {
        private static final long serialVersionUID = 1L;
        /** Error code for this exception */
        public static final int ERROR_CODE = -2;

        DuplicationException(String message) {
            super(message);
        }
    }
}