fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java Source code

Java tutorial

Introduction

Here is the source code for fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

Source

/*
 *                  Eoulsan development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public License version 2.1 or
 * later and CeCILL-C. This should be distributed with the code.
 * If you do not have a copy, see:
 *
 *      http://www.gnu.org/licenses/lgpl-2.1.txt
 *      http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
 *
 * Copyright for this code is held jointly by the Genomic platform
 * of the Institut de Biologie de l'cole normale suprieure and
 * the individual authors. These should be listed in @author doc
 * comments.
 *
 * For more information on the Eoulsan project and its aims,
 * or to join the Eoulsan Google group, visit the home page
 * at:
 *
 *      http://outils.genomique.biologie.ens.fr/eoulsan
 *
 */

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop;

import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger;

import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Stack;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.InvalidInputException;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileRecordReader;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.Globals;

/**
 * A Map-reduce program to recursively copy directories between different
 * file-systems.
 * @since 1.0
 * @author The hadoop project
 */
@SuppressWarnings("deprecation")
public class DistCp implements Tool {

    /* Default Charset. */
    private static final Charset CHARSET = Charset.forName(Globals.DEFAULT_FILE_ENCODING);

    private static final String NAME = "distcp";

    private static final String usage = NAME + " [OPTIONS] <srcurl>* <desturl>" + "\n\nOPTIONS:"
            + "\n-p[rbugp]              Preserve status" + "\n                       r: replication number"
            + "\n                       b: block size" + "\n                       u: user"
            + "\n                       g: group" + "\n                       p: permission"
            + "\n                       -p alone is equivalent to -prbugp"
            + "\n-i                     Ignore failures" + "\n-log <logdir>          Write logs to <logdir>"
            + "\n-m <num_maps>          Maximum number of simultaneous copies"
            + "\n-overwrite             Overwrite destination"
            + "\n-update                Overwrite if src size different from dst size"
            + "\n-f <urilist_uri>       Use list at <urilist_uri> as src list"
            + "\n-filelimit <n>         Limit the total number of files to be <= n"
            + "\n-sizelimit <n>         Limit the total size to be <= n bytes"
            + "\n-delete                Delete the files existing in the dst but not in src"
            + "\n-mapredSslConf <f>     Filename of SSL configuration for mapper task" +

            "\n\nNOTE 1: if -overwrite or -update are set, each source URI is "
            + "\n      interpreted as an isomorphic update to an existing directory." + "\nFor example:"
            + "\nhadoop " + NAME + " -p -update \"hdfs://A:8020/user/foo/bar\" "
            + "\"hdfs://B:8020/user/foo/baz\"\n"
            + "\n     would update all descendants of 'baz' also in 'bar'; it would "
            + "\n     *not* update /user/foo/baz/bar" +

            "\n\nNOTE 2: The parameter <n> in -filelimit and -sizelimit can be "
            + "\n     specified with symbolic representation.  For examples,"
            + "\n       1230k = 1230 * 1024 = 1259520" + "\n       891g = 891 * 1024^3 = 956703965184" +

            "\n";

    private static final long BYTES_PER_MAP = 256 * 1024 * 1024;
    private static final int MAX_MAPS_PER_NODE = 20;
    private static final int SYNC_FILE_MAX = 10;

    enum Counter {
        COPY, SKIP, FAIL, BYTESCOPIED, BYTESEXPECTED
    }

    enum Options {
        DELETE("-delete", NAME + ".delete"), FILE_LIMIT("-filelimit", NAME + ".limit.file"), SIZE_LIMIT(
                "-sizelimit",
                NAME + ".limit.size"), IGNORE_READ_FAILURES("-i", NAME + ".ignore.read.failures"), PRESERVE_STATUS(
                        "-p", NAME + ".preserve.status"), OVERWRITE("-overwrite",
                                NAME + ".overwrite.always"), UPDATE("-update", NAME + ".overwrite.ifnewer");

        final String cmd, propertyname;

        Options(final String cmd, final String propertyname) {
            this.cmd = cmd;
            this.propertyname = propertyname;
        }

        private long parseLong(final String[] args, final int offset) {
            if (offset == args.length) {
                throw new IllegalArgumentException("<n> not specified in " + this.cmd);
            }
            long n = StringUtils.TraditionalBinaryPrefix.string2long(args[offset]);
            if (n <= 0) {
                throw new IllegalArgumentException("n = " + n + " <= 0 in " + this.cmd);
            }
            return n;
        }
    }

    enum FileAttribute {
        BLOCK_SIZE, REPLICATION, USER, GROUP, PERMISSION;

        final char symbol;

        FileAttribute() {
            this.symbol = toString().toLowerCase().charAt(0);
        }

        static EnumSet<FileAttribute> parse(final String s) {
            if (s == null || s.length() == 0) {
                return EnumSet.allOf(FileAttribute.class);
            }

            EnumSet<FileAttribute> set = EnumSet.noneOf(FileAttribute.class);
            FileAttribute[] attributes = values();
            for (char c : s.toCharArray()) {
                int i = 0;
                for (; i < attributes.length && c != attributes[i].symbol; i++) {
                }
                if (i < attributes.length) {
                    if (!set.contains(attributes[i])) {
                        set.add(attributes[i]);
                    } else {
                        throw new IllegalArgumentException(
                                "There are more than one '" + attributes[i].symbol + "' in " + s);
                    }
                } else {
                    throw new IllegalArgumentException("'" + c + "' in " + s + " is undefined.");
                }
            }
            return set;
        }
    }

    static final String TMP_DIR_LABEL = NAME + ".tmp.dir";
    static final String DST_DIR_LABEL = NAME + ".dest.path";
    static final String JOB_DIR_LABEL = NAME + ".job.dir";
    static final String MAX_MAPS_LABEL = NAME + ".max.map.tasks";
    static final String SRC_LIST_LABEL = NAME + ".src.list";
    static final String SRC_COUNT_LABEL = NAME + ".src.count";
    static final String TOTAL_SIZE_LABEL = NAME + ".total.size";
    static final String DST_DIR_LIST_LABEL = NAME + ".dst.dir.list";
    static final String BYTES_PER_MAP_LABEL = NAME + ".bytes.per.map";
    static final String PRESERVE_STATUS_LABEL = Options.PRESERVE_STATUS.propertyname + ".value";

    private JobConf conf;

    @Override
    public void setConf(final Configuration conf) {
        if (conf instanceof JobConf) {
            this.conf = (JobConf) conf;
        } else {
            this.conf = new JobConf(conf);
        }
    }

    @Override
    public Configuration getConf() {
        return this.conf;
    }

    public DistCp(final Configuration conf) {
        setConf(conf);
    }

    /**
     * An input/output pair of filenames.
     */
    static class FilePair implements Writable {
        FileStatus input = new FileStatus();
        String output;

        FilePair() {
        }

        FilePair(final FileStatus input, final String output) {
            this.input = input;
            this.output = output;
        }

        @Override
        public void readFields(final DataInput in) throws IOException {
            this.input.readFields(in);
            this.output = Text.readString(in);
        }

        @Override
        public void write(final DataOutput out) throws IOException {
            this.input.write(out);
            Text.writeString(out, this.output);
        }

        @Override
        public String toString() {
            return this.input + " : " + this.output;
        }
    }

    /**
     * InputFormat of a distcp job responsible for generating splits of the src
     * file list.
     */
    static class CopyInputFormat implements InputFormat<Text, Text> {

        /**
         * Produce splits such that each is no greater than the quotient of the
         * total size and the number of splits requested.
         * @param job The handle to the JobConf object
         * @param numSplits Number of splits requested
         */
        @Override
        public InputSplit[] getSplits(final JobConf job, final int numSplits) throws IOException {
            int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
            long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
            String srcfilelist = job.get(SRC_LIST_LABEL, "");
            if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
                throw new RuntimeException("Invalid metadata: #files(" + cnfiles + ") total_size(" + cbsize
                        + ") listuri(" + srcfilelist + ")");
            }
            Path src = new Path(srcfilelist);
            FileSystem fs = src.getFileSystem(job);
            FileStatus srcst = fs.getFileStatus(src);

            ArrayList<FileSplit> splits = new ArrayList<>(numSplits);
            LongWritable key = new LongWritable();
            FilePair value = new FilePair();
            final long targetsize = cbsize / numSplits;
            long pos = 0L;
            long last = 0L;
            long acc = 0L;
            long cbrem = srcst.getLen();
            SequenceFile.Reader sl = null;
            try {
                sl = new SequenceFile.Reader(fs, src, job);
                for (; sl.next(key, value); last = sl.getPosition()) {
                    // if adding this split would put this split past the target size,
                    // cut the last split and put this next file in the next split.
                    if (acc + key.get() > targetsize && acc != 0) {
                        long splitsize = last - pos;
                        splits.add(new FileSplit(src, pos, splitsize, (String[]) null));
                        cbrem -= splitsize;
                        pos = last;
                        acc = 0L;
                    }
                    acc += key.get();
                }
            } finally {
                checkAndClose(sl);
            }
            if (cbrem != 0) {
                splits.add(new FileSplit(src, pos, cbrem, (String[]) null));
            }

            return splits.toArray(new FileSplit[splits.size()]);
        }

        /**
         * Returns a reader for this split of the src file list.
         */
        @Override
        public RecordReader<Text, Text> getRecordReader(final InputSplit split, final JobConf job,
                final Reporter reporter) throws IOException {
            return new SequenceFileRecordReader<>(job, (FileSplit) split);
        }
    }

    /**
     * FSCopyFilesMapper: The mapper for copying files between FileSystems.
     */
    static class CopyFilesMapper implements Mapper<LongWritable, FilePair, WritableComparable<?>, Text> {
        // config
        private int sizeBuf = 128 * 1024;
        private FileSystem destFileSys = null;
        private boolean ignoreReadFailures;
        private boolean preserve_status;
        private EnumSet<FileAttribute> preseved;
        private boolean overwrite;
        private boolean update;
        private Path destPath = null;
        private byte[] buffer = null;
        private JobConf job;

        // stats
        private int failcount = 0;
        private int skipcount = 0;
        private int copycount = 0;

        private String getCountString() {
            return "Copied: " + this.copycount + " Skipped: " + this.skipcount + " Failed: " + this.failcount;
        }

        private void updateStatus(final Reporter reporter) {
            reporter.setStatus(getCountString());
        }

        /**
         * Return true if dst should be replaced by src and the update flag is set.
         * Right now, this merely checks that the src and dst len are not equal.
         * This should be improved on once modification times, CRCs, etc. can be
         * meaningful in this context.
         * @throws IOException
         */
        private boolean needsUpdate(final FileStatus srcstatus, final FileSystem dstfs, final Path dstpath)
                throws IOException {
            return this.update && !sameFile(srcstatus.getPath().getFileSystem(this.job), srcstatus, dstfs, dstpath);
        }

        private FSDataOutputStream create(final Path f, final Reporter reporter, final FileStatus srcstat)
                throws IOException {
            if (this.destFileSys.exists(f)) {
                this.destFileSys.delete(f, false);
            }
            if (!this.preserve_status) {
                return this.destFileSys.create(f, true, this.sizeBuf, reporter);
            }

            FsPermission permission = this.preseved.contains(FileAttribute.PERMISSION) ? srcstat.getPermission()
                    : null;
            short replication = this.preseved.contains(FileAttribute.REPLICATION) ? srcstat.getReplication()
                    : this.destFileSys.getDefaultReplication();
            long blockSize = this.preseved.contains(FileAttribute.BLOCK_SIZE) ? srcstat.getBlockSize()
                    : this.destFileSys.getDefaultBlockSize();
            return this.destFileSys.create(f, permission, true, this.sizeBuf, replication, blockSize, reporter);
        }

        /**
         * Copy a file to a destination.
         * @param srcstat src path and metadata
         * @param relativedst dst path
         * @param reporter Hadoop reporter
         */
        private void copy(final FileStatus srcstat, final Path relativedst,
                final OutputCollector<WritableComparable<?>, Text> outc, final Reporter reporter)
                throws IOException {
            Path absdst = new Path(this.destPath, relativedst);
            int totfiles = this.job.getInt(SRC_COUNT_LABEL, -1);
            assert totfiles >= 0 : "Invalid file count " + totfiles;

            // if a directory, ensure created even if empty
            if (srcstat.isDir()) {
                if (this.destFileSys.exists(absdst)) {
                    if (!this.destFileSys.getFileStatus(absdst).isDir()) {
                        throw new IOException("Failed to mkdirs: " + absdst + " is a file.");
                    }
                } else if (!this.destFileSys.mkdirs(absdst)) {
                    throw new IOException("Failed to mkdirs " + absdst);
                }
                // TODO: when modification times can be set, directories should be
                // emitted to reducers so they might be preserved. Also, mkdirs does
                // not currently return an error when the directory already exists;
                // if this changes, all directory work might as well be done in reduce
                return;
            }

            if (this.destFileSys.exists(absdst) && !this.overwrite
                    && !needsUpdate(srcstat, this.destFileSys, absdst)) {
                outc.collect(null, new Text("SKIP: " + srcstat.getPath()));
                ++this.skipcount;
                reporter.incrCounter(Counter.SKIP, 1);
                updateStatus(reporter);
                return;
            }

            Path tmpfile = new Path(this.job.get(TMP_DIR_LABEL), relativedst);
            long cbcopied = 0L;
            FSDataInputStream in = null;
            FSDataOutputStream out = null;
            try {
                // open src file
                in = srcstat.getPath().getFileSystem(this.job).open(srcstat.getPath());
                reporter.incrCounter(Counter.BYTESEXPECTED, srcstat.getLen());
                // open tmp file
                out = create(tmpfile, reporter, srcstat);
                // copy file
                for (int cbread; (cbread = in.read(this.buffer)) >= 0;) {
                    out.write(this.buffer, 0, cbread);
                    cbcopied += cbread;
                    reporter.setStatus(String.format("%.2f ", cbcopied * 100.0 / srcstat.getLen()) + absdst + " [ "
                            + StringUtils.humanReadableInt(cbcopied) + " / "
                            + StringUtils.humanReadableInt(srcstat.getLen()) + " ]");
                }
            } finally {
                checkAndClose(in);
                checkAndClose(out);
            }

            if (cbcopied != srcstat.getLen()) {
                throw new IOException(
                        "File size not matched: copied " + bytesString(cbcopied) + " to tmpfile (=" + tmpfile
                                + ") but expected " + bytesString(srcstat.getLen()) + " from " + srcstat.getPath());
            } else {
                if (totfiles == 1) {
                    // Copying a single file; use dst path provided by user as destination
                    // rather than destination directory, if a file
                    Path dstparent = absdst.getParent();
                    if (!(this.destFileSys.exists(dstparent)
                            && this.destFileSys.getFileStatus(dstparent).isDir())) {
                        absdst = dstparent;
                    }
                }
                if (this.destFileSys.exists(absdst) && this.destFileSys.getFileStatus(absdst).isDir()) {
                    throw new IOException(absdst + " is a directory");
                }
                if (!this.destFileSys.mkdirs(absdst.getParent())) {
                    throw new IOException("Failed to create parent dir: " + absdst.getParent());
                }
                rename(tmpfile, absdst);

                FileStatus dststat = this.destFileSys.getFileStatus(absdst);
                if (dststat.getLen() != srcstat.getLen()) {
                    this.destFileSys.delete(absdst, false);
                    throw new IOException("File size not matched: copied " + bytesString(dststat.getLen())
                            + " to dst (=" + absdst + ") but expected " + bytesString(srcstat.getLen()) + " from "
                            + srcstat.getPath());
                }
                updatePermissions(srcstat, dststat);
            }

            // report at least once for each file
            ++this.copycount;
            reporter.incrCounter(Counter.BYTESCOPIED, cbcopied);
            reporter.incrCounter(Counter.COPY, 1);
            updateStatus(reporter);
        }

        /** rename tmp to dst, delete dst if already exists */
        private void rename(final Path tmp, final Path dst) throws IOException {
            try {
                if (this.destFileSys.exists(dst)) {
                    this.destFileSys.delete(dst, true);
                }
                if (!this.destFileSys.rename(tmp, dst)) {
                    throw new IOException();
                }
            } catch (IOException cause) {
                throw (IOException) new IOException(
                        "Fail to rename tmp file (=" + tmp + ") to destination file (=" + dst + ")")
                                .initCause(cause);
            }
        }

        private void updatePermissions(final FileStatus src, final FileStatus dst) throws IOException {
            if (this.preserve_status) {
                DistCp.updatePermissions(src, dst, this.preseved, this.destFileSys);
            }
        }

        static String bytesString(final long b) {
            return b + " bytes (" + StringUtils.humanReadableInt(b) + ")";
        }

        /**
         * Mapper configuration. Extracts source and destination file system, as
         * well as top-level paths on source and destination directories. Gets the
         * named file systems, to be used later in map.
         */
        @Override
        public void configure(final JobConf job) {
            this.destPath = new Path(job.get(DST_DIR_LABEL, "/"));
            try {
                this.destFileSys = this.destPath.getFileSystem(job);
            } catch (IOException ex) {
                throw new RuntimeException("Unable to get the named file system.", ex);
            }
            this.sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
            this.buffer = new byte[this.sizeBuf];
            this.ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
            this.preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
            if (this.preserve_status) {
                this.preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
            }
            this.update = job.getBoolean(Options.UPDATE.propertyname, false);
            this.overwrite = !this.update && job.getBoolean(Options.OVERWRITE.propertyname, false);
            this.job = job;
        }

        /**
         * Map method. Copies one file from source file system to destination.
         * @param key src len
         * @param value FilePair (FileStatus src, Path dst)
         * @param out Log of failed copies
         * @param reporter Hadoop reporter
         */
        @Override
        public void map(final LongWritable key, final FilePair value,
                final OutputCollector<WritableComparable<?>, Text> out, final Reporter reporter)
                throws IOException {
            final FileStatus srcstat = value.input;
            final Path relativedst = new Path(value.output);
            try {
                copy(srcstat, relativedst, out, reporter);
            } catch (IOException e) {
                ++this.failcount;
                reporter.incrCounter(Counter.FAIL, 1);
                updateStatus(reporter);
                final String sfailure = "FAIL " + relativedst + " : " + StringUtils.stringifyException(e);
                out.collect(null, new Text(sfailure));
                getLogger().info(sfailure);
                try {
                    for (int i = 0; i < 3; ++i) {
                        try {
                            final Path tmp = new Path(this.job.get(TMP_DIR_LABEL), relativedst);
                            if (this.destFileSys.delete(tmp, true)) {
                                break;
                            }
                        } catch (Throwable ex) {
                            // ignore, we are just cleaning up
                            getLogger().fine("Ignoring cleanup exception: " + ex.getMessage());
                        }
                        // update status, so we don't get timed out
                        updateStatus(reporter);
                        Thread.sleep(3 * 1000);
                    }
                } catch (InterruptedException inte) {
                    throw (IOException) new IOException().initCause(inte);
                }
            } finally {
                updateStatus(reporter);
            }
        }

        @Override
        public void close() throws IOException {
            if (0 == this.failcount || this.ignoreReadFailures) {
                return;
            }
            throw new IOException(getCountString());
        }
    }

    private static List<Path> fetchFileList(final Configuration conf, final Path srcList) throws IOException {
        List<Path> result = new ArrayList<>();
        FileSystem fs = srcList.getFileSystem(conf);
        BufferedReader input = null;
        try {
            input = new BufferedReader(new InputStreamReader(fs.open(srcList), CHARSET));
            String line = input.readLine();
            while (line != null) {
                result.add(new Path(line));
                line = input.readLine();
            }
        } finally {
            checkAndClose(input);
        }
        return result;
    }

    @Deprecated
    public static void copy(final Configuration conf, final String srcPath, final String destPath,
            final Path logPath, final boolean srcAsList, final boolean ignoreReadFailures) throws IOException {
        final Path src = new Path(srcPath);
        List<Path> tmp = new ArrayList<>();
        if (srcAsList) {
            tmp.addAll(fetchFileList(conf, src));
        } else {
            tmp.add(src);
        }
        EnumSet<Options> flags = ignoreReadFailures ? EnumSet.of(Options.IGNORE_READ_FAILURES)
                : EnumSet.noneOf(Options.class);

        final Path dst = new Path(destPath);
        copy(conf, new Arguments(tmp, dst, logPath, flags, null, Long.MAX_VALUE, Long.MAX_VALUE, null));
    }

    /** Sanity check for srcPath */
    private static void checkSrcPath(final Configuration conf, final List<Path> srcPaths) throws IOException {
        List<IOException> rslt = new ArrayList<>();
        for (Path p : srcPaths) {
            FileSystem fs = p.getFileSystem(conf);
            if (!fs.exists(p)) {
                rslt.add(new IOException("Input source " + p + " does not exist."));
            }
        }
        if (!rslt.isEmpty()) {
            throw new InvalidInputException(rslt);
        }
    }

    /**
     * Driver to copy srcPath to destPath depending on required protocol.
     * @param args arguments
     */
    static void copy(final Configuration conf, final Arguments args) throws IOException {
        getLogger().info("srcPaths=" + args.srcs);
        getLogger().info("destPath=" + args.dst);
        checkSrcPath(conf, args.srcs);

        JobConf job = createJobConf(conf);
        if (args.preservedAttributes != null) {
            job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
        }
        if (args.mapredSslConf != null) {
            job.set("dfs.client.https.keystore.resource", args.mapredSslConf);
        }

        // Initialize the mapper
        try {
            setup(conf, job, args);
            JobClient.runJob(job);
            finalize(conf, job, args.dst, args.preservedAttributes);
        } finally {
            // delete tmp
            fullyDelete(job.get(TMP_DIR_LABEL), job);
            // delete jobDirectory
            fullyDelete(job.get(JOB_DIR_LABEL), job);
        }
    }

    private static void updatePermissions(final FileStatus src, final FileStatus dst,
            final EnumSet<FileAttribute> preseved, final FileSystem destFileSys) throws IOException {
        String owner = null;
        String group = null;
        if (preseved.contains(FileAttribute.USER) && !src.getOwner().equals(dst.getOwner())) {
            owner = src.getOwner();
        }
        if (preseved.contains(FileAttribute.GROUP) && !src.getGroup().equals(dst.getGroup())) {
            group = src.getGroup();
        }
        if (owner != null || group != null) {
            destFileSys.setOwner(dst.getPath(), owner, group);
        }
        if (preseved.contains(FileAttribute.PERMISSION) && !src.getPermission().equals(dst.getPermission())) {
            destFileSys.setPermission(dst.getPath(), src.getPermission());
        }
    }

    static private void finalize(final Configuration conf, final JobConf jobconf, final Path destPath,
            final String presevedAttributes) throws IOException {
        if (presevedAttributes == null) {
            return;
        }
        EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
        if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP)
                && !preseved.contains(FileAttribute.PERMISSION)) {
            return;
        }

        FileSystem dstfs = destPath.getFileSystem(conf);
        Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
        SequenceFile.Reader in = null;
        try {
            in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
            Text dsttext = new Text();
            FilePair pair = new FilePair();
            for (; in.next(dsttext, pair);) {
                Path absdst = new Path(destPath, pair.output);
                updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
            }
        } finally {
            checkAndClose(in);
        }
    }

    static private class Arguments {
        final List<Path> srcs;
        final Path dst;
        final Path log;
        final EnumSet<Options> flags;
        final String preservedAttributes;
        final long filelimit;
        final long sizelimit;
        final String mapredSslConf;

        /**
         * Arguments for distcp
         * @param srcs List of source paths
         * @param dst Destination path
         * @param log Log output directory
         * @param flags Command-line flags
         * @param preservedAttributes Preserved attributes
         * @param filelimit File limit
         * @param sizelimit Size limit
         */
        Arguments(final List<Path> srcs, final Path dst, final Path log, final EnumSet<Options> flags,
                final String preservedAttributes, final long filelimit, final long sizelimit,
                final String mapredSslConf) {
            this.srcs = srcs;
            this.dst = dst;
            this.log = log;
            this.flags = flags;
            this.preservedAttributes = preservedAttributes;
            this.filelimit = filelimit;
            this.sizelimit = sizelimit;
            this.mapredSslConf = mapredSslConf;

            // if (LOG.isTraceEnabled()) {
            // LOG.trace("this = " + this);
            // }
        }

        static Arguments valueOf(final String[] args, final Configuration conf) throws IOException {
            List<Path> srcs = new ArrayList<>();
            Path dst = null;
            Path log = null;
            EnumSet<Options> flags = EnumSet.noneOf(Options.class);
            String presevedAttributes = null;
            String mapredSslConf = null;
            long filelimit = Long.MAX_VALUE;
            long sizelimit = Long.MAX_VALUE;

            for (int idx = 0; idx < args.length; idx++) {
                Options[] opt = Options.values();
                int i = 0;
                for (; i < opt.length && !args[idx].startsWith(opt[i].cmd); i++) {
                }

                if (i < opt.length) {
                    flags.add(opt[i]);
                    if (opt[i] == Options.PRESERVE_STATUS) {
                        presevedAttributes = args[idx].substring(2);
                        FileAttribute.parse(presevedAttributes); // validation
                    } else if (opt[i] == Options.FILE_LIMIT) {
                        filelimit = Options.FILE_LIMIT.parseLong(args, ++idx);
                    } else if (opt[i] == Options.SIZE_LIMIT) {
                        sizelimit = Options.SIZE_LIMIT.parseLong(args, ++idx);
                    }
                } else if ("-f".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("urilist_uri not specified in -f");
                    }
                    srcs.addAll(fetchFileList(conf, new Path(args[idx])));
                } else if ("-log".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("logdir not specified in -log");
                    }
                    log = new Path(args[idx]);
                } else if ("-mapredSslConf".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("ssl conf file not specified in -mapredSslConf");
                    }
                    mapredSslConf = args[idx];
                } else if ("-m".equals(args[idx])) {
                    if (++idx == args.length) {
                        throw new IllegalArgumentException("num_maps not specified in -m");
                    }
                    try {
                        conf.setInt(MAX_MAPS_LABEL, Integer.parseInt(args[idx]));
                    } catch (NumberFormatException e) {
                        throw new IllegalArgumentException("Invalid argument to -m: " + args[idx]);
                    }
                } else if ('-' == args[idx].codePointAt(0)) {
                    throw new IllegalArgumentException("Invalid switch " + args[idx]);
                } else if (idx == args.length - 1) {
                    dst = new Path(args[idx]);
                } else {
                    srcs.add(new Path(args[idx]));
                }
            }
            // mandatory command-line parameters
            if (srcs.isEmpty() || dst == null) {
                throw new IllegalArgumentException("Missing " + (dst == null ? "dst path" : "src"));
            }
            // incompatible command-line flags
            final boolean isOverwrite = flags.contains(Options.OVERWRITE);
            final boolean isUpdate = flags.contains(Options.UPDATE);
            final boolean isDelete = flags.contains(Options.DELETE);
            if (isOverwrite && isUpdate) {
                throw new IllegalArgumentException("Conflicting overwrite policies");
            }
            if (isDelete && !isOverwrite && !isUpdate) {
                throw new IllegalArgumentException(Options.DELETE.cmd + " must be specified with "
                        + Options.OVERWRITE + " or " + Options.UPDATE + ".");
            }
            return new Arguments(srcs, dst, log, flags, presevedAttributes, filelimit, sizelimit, mapredSslConf);
        }

        /** {@inheritDoc} */
        @Override
        public String toString() {
            return getClass().getName() + "{" + "\n  srcs = " + this.srcs + "\n  dst = " + this.dst + "\n  log = "
                    + this.log + "\n  flags = " + this.flags + "\n  preservedAttributes = "
                    + this.preservedAttributes + "\n  filelimit = " + this.filelimit + "\n  sizelimit = "
                    + this.sizelimit + "\n  mapredSslConf = " + this.mapredSslConf + "\n}";
        }
    }

    /**
     * This is the main driver for recursively copying directories across file
     * systems. It takes at least two cmdline parameters. A source URL and a
     * destination URL. It then essentially does an "ls -lR" on the source URL,
     * and writes the output in a round-robin manner to all the map input files.
     * The mapper actually copies the files allotted to it. The reduce is empty.
     */
    @Override
    public int run(final String[] args) {
        try {
            copy(this.conf, Arguments.valueOf(args, this.conf));
            return 0;
        } catch (IllegalArgumentException e) {
            System.err.println(StringUtils.stringifyException(e) + "\n" + usage);
            ToolRunner.printGenericCommandUsage(System.err);
            return -1;
        } catch (DuplicationException e) {
            System.err.println(StringUtils.stringifyException(e));
            return DuplicationException.ERROR_CODE;
        } catch (RemoteException e) {
            final IOException unwrapped = e.unwrapRemoteException(FileNotFoundException.class,
                    AccessControlException.class, QuotaExceededException.class);
            System.err.println(StringUtils.stringifyException(unwrapped));
            return -3;
        } catch (Exception e) {
            System.err.println("With failures, global counters are inaccurate; " + "consider running with -i");
            System.err.println("Copy failed: " + StringUtils.stringifyException(e));
            return -999;
        }
    }

    /**
     * This is the main driver for recursively copying directories across file
     * systems. It takes at least two cmdline parameters. A source URL and a
     * destination URL. It then essentially does an "ls -lR" on the source URL,
     * and writes the output in a round-robin manner to all the map input files.
     * The mapper actually copies the files allotted to it. The reduce is empty.
     * @throws EoulsanException if an error occurs
     */
    public void runWithException(final String[] args) throws EoulsanException {
        try {
            copy(this.conf, Arguments.valueOf(args, this.conf));

        } catch (IllegalArgumentException e) {
            throw new EoulsanException(StringUtils.stringifyException(e) + "\n" + usage);
        } catch (DuplicationException e) {
            throw new EoulsanException(StringUtils.stringifyException(e));
        } catch (RemoteException e) {
            final IOException unwrapped = e.unwrapRemoteException(FileNotFoundException.class,
                    AccessControlException.class, QuotaExceededException.class);
            throw new EoulsanException(StringUtils.stringifyException(unwrapped));

        } catch (Exception e) {

            throw new EoulsanException("Copy failed: " + StringUtils.stringifyException(e));

        }
    }

    /**
     * Make a path relative with respect to a root path. absPath is always assumed
     * to descend from root. Otherwise returned path is null.
     */
    static String makeRelative(final Path root, final Path absPath) {
        if (!absPath.isAbsolute()) {
            throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath);
        }
        String p = absPath.toUri().getPath();

        StringTokenizer pathTokens = new StringTokenizer(p, "/");
        for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens
                .hasMoreTokens();) {
            if (!rootTokens.nextToken().equals(pathTokens.nextToken())) {
                return null;
            }
        }
        StringBuilder sb = new StringBuilder();
        for (; pathTokens.hasMoreTokens();) {
            sb.append(pathTokens.nextToken());
            if (pathTokens.hasMoreTokens()) {
                sb.append(Path.SEPARATOR);
            }
        }
        return sb.length() == 0 ? "." : sb.toString();
    }

    /**
     * Calculate how many maps to run. Number of maps is bounded by a minimum of
     * the cumulative size of the copy / (distcp.bytes.per.map, default
     * BYTES_PER_MAP or -m on the command line) and at most (distcp.max.map.tasks,
     * default MAX_MAPS_PER_NODE * nodes in the cluster).
     * @param totalBytes Count of total bytes for job
     * @param job The job to configure
     */
    private static void setMapCount(final long totalBytes, final JobConf job) throws IOException {
        int numMaps = (int) (totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
        numMaps = Math.min(numMaps, job.getInt(MAX_MAPS_LABEL,
                MAX_MAPS_PER_NODE * new JobClient(job).getClusterStatus().getTaskTrackers()));
        job.setNumMapTasks(Math.max(numMaps, 1));
    }

    /** Fully delete dir */
    static void fullyDelete(final String dir, final Configuration conf) throws IOException {
        if (dir != null) {
            Path tmp = new Path(dir);
            boolean success = tmp.getFileSystem(conf).delete(tmp, true);
            if (!success) {
                getLogger().warning("Could not fully delete " + tmp);
            }
        }
    }

    // Job configuration
    private static JobConf createJobConf(final Configuration conf) {
        JobConf jobconf = new JobConf(conf, DistCp.class);
        jobconf.setJobName(NAME);

        // turn off speculative execution, because DFS doesn't handle
        // multiple writers to the same file.
        jobconf.setMapSpeculativeExecution(false);

        jobconf.setInputFormat(CopyInputFormat.class);
        jobconf.setOutputKeyClass(Text.class);
        jobconf.setOutputValueClass(Text.class);

        jobconf.setMapperClass(CopyFilesMapper.class);
        jobconf.setNumReduceTasks(0);
        return jobconf;
    }

    private static final Random RANDOM = new Random();

    public static String getRandomId() {
        return Integer.toString(RANDOM.nextInt(Integer.MAX_VALUE), 36);
    }

    /**
     * Initialize DFSCopyFileMapper specific job-configuration.
     * @param conf : The dfs/mapred configuration.
     * @param jobConf : The handle to the jobConf object to be initialized.
     * @param args Arguments
     */
    private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args)
            throws IOException {
        jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

        // set boolean values
        final boolean update = args.flags.contains(Options.UPDATE);
        final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
        jobConf.setBoolean(Options.UPDATE.propertyname, update);
        jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
        jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
                args.flags.contains(Options.IGNORE_READ_FAILURES));
        jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

        final String randomId = getRandomId();
        JobClient jClient = new JobClient(jobConf);
        Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
        jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

        long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

        FileSystem dstfs = args.dst.getFileSystem(conf);
        boolean dstExists = dstfs.exists(args.dst);
        boolean dstIsDir = false;
        if (dstExists) {
            dstIsDir = dstfs.getFileStatus(args.dst).isDir();
        }

        // default logPath
        Path logPath = args.log;
        if (logPath == null) {
            String filename = "_distcp_logs_" + randomId;
            if (!dstExists || !dstIsDir) {
                Path parent = args.dst.getParent();
                if (null == parent) {
                    // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                    // will return null. In this case, use '/' as its own parent to
                    // prevent
                    // NPE errors below.
                    parent = args.dst;
                }
                if (!dstfs.exists(parent)) {
                    dstfs.mkdirs(parent);
                }
                logPath = new Path(parent, filename);
            } else {
                logPath = new Path(args.dst, filename);
            }
        }
        FileOutputFormat.setOutputPath(jobConf, logPath);

        // create src list, dst list
        FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

        Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
        jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
        SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
                FilePair.class, SequenceFile.CompressionType.NONE);

        Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
        SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
                Text.class, SequenceFile.CompressionType.NONE);

        Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
        jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
        SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
                FilePair.class, SequenceFile.CompressionType.NONE);

        // handle the case where the destination directory doesn't exist
        // and we've only a single src directory OR we're updating/overwriting
        // the contents of the destination directory.
        final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
        int srcCount = 0, cnsyncf = 0, dirsyn = 0;
        long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
        try {
            for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
                final Path src = srcItr.next();
                FileSystem srcfs = src.getFileSystem(conf);
                FileStatus srcfilestat = srcfs.getFileStatus(src);
                Path root = special && srcfilestat.isDir() ? src : src.getParent();
                if (srcfilestat.isDir()) {
                    ++srcCount;
                }

                Stack<FileStatus> pathstack = new Stack<>();
                for (pathstack.push(srcfilestat); !pathstack.empty();) {
                    FileStatus cur = pathstack.pop();
                    FileStatus[] children = srcfs.listStatus(cur.getPath());
                    for (int i = 0; i < children.length; i++) {
                        boolean skipfile = false;
                        final FileStatus child = children[i];
                        final String dst = makeRelative(root, child.getPath());
                        ++srcCount;

                        if (child.isDir()) {
                            pathstack.push(child);
                        } else {
                            // skip file if the src and the dst files are the same.
                            skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                            // skip file if it exceed file limit or size limit
                            skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                            if (!skipfile) {
                                ++fileCount;
                                byteCount += child.getLen();

                                // if (LOG.isTraceEnabled()) {
                                // LOG.trace("adding file " + child.getPath());
                                // }

                                ++cnsyncf;
                                cbsyncs += child.getLen();
                                if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                    src_writer.sync();
                                    dst_writer.sync();
                                    cnsyncf = 0;
                                    cbsyncs = 0L;
                                }
                            }
                        }

                        if (!skipfile) {
                            src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                    new FilePair(child, dst));
                        }

                        dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                    }

                    if (cur.isDir()) {
                        String dst = makeRelative(root, cur.getPath());
                        dir_writer.append(new Text(dst), new FilePair(cur, dst));
                        if (++dirsyn > SYNC_FILE_MAX) {
                            dirsyn = 0;
                            dir_writer.sync();
                        }
                    }
                }
            }
        } finally {
            checkAndClose(src_writer);
            checkAndClose(dst_writer);
            checkAndClose(dir_writer);
        }

        FileStatus dststatus = null;
        try {
            dststatus = dstfs.getFileStatus(args.dst);
        } catch (FileNotFoundException fnfe) {
            getLogger().info(args.dst + " does not exist.");
        }

        // create dest path dir if copying > 1 file
        if (dststatus == null) {
            if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
                throw new IOException("Failed to create" + args.dst);
            }
        }

        final Path sorted = new Path(jobDirectory, "_distcp_sorted");
        checkDuplication(jobfs, dstfilelist, sorted, conf);

        if (dststatus != null && args.flags.contains(Options.DELETE)) {
            deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
        }

        Path tmpDir = new Path(
                (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
                "_distcp_tmp_" + randomId);
        jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

        // Explicitly create the tmpDir to ensure that it can be cleaned
        // up by fullyDelete() later.
        tmpDir.getFileSystem(conf).mkdirs(tmpDir);

        getLogger().info("srcCount=" + srcCount);
        jobConf.setInt(SRC_COUNT_LABEL, srcCount);
        jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
        setMapCount(byteCount, jobConf);
    }

    /**
     * Check whether the contents of src and dst are the same. Return false if
     * dstpath does not exist If the files have different sizes, return false. If
     * the files have the same sizes, the file checksums will be compared. When
     * file checksum is not supported in any of file systems, two files are
     * considered as the same if they have the same size.
     */
    static private boolean sameFile(final FileSystem srcfs, final FileStatus srcstatus, final FileSystem dstfs,
            final Path dstpath) throws IOException {
        FileStatus dststatus;
        try {
            dststatus = dstfs.getFileStatus(dstpath);
        } catch (FileNotFoundException fnfe) {
            return false;
        }

        // same length?
        if (srcstatus.getLen() != dststatus.getLen()) {
            return false;
        }

        // get src checksum
        final FileChecksum srccs;
        try {
            srccs = srcfs.getFileChecksum(srcstatus.getPath());
        } catch (FileNotFoundException fnfe) {
            /*
             * Two possible cases: (1) src existed once but was deleted between the
             * time period that srcstatus was obtained and the try block above. (2)
             * srcfs does not support file checksum and (incorrectly) throws FNFE,
             * e.g. some previous versions of HftpFileSystem. For case (1), it is okay
             * to return true since src was already deleted. For case (2), true should
             * be returned.
             */
            return true;
        }

        // compare checksums
        try {
            final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
            // return true if checksum is not supported
            // (i.e. some of the checksums is null)
            return srccs == null || dstcs == null || srccs.equals(dstcs);
        } catch (FileNotFoundException fnfe) {
            return false;
        }
    }

    /** Delete the dst files/dirs which do not exist in src */
    static private void deleteNonexisting(final FileSystem dstfs, final FileStatus dstroot, final Path dstsorted,
            final FileSystem jobfs, final Path jobdir, final JobConf jobconf, final Configuration conf)
            throws IOException {
        if (!dstroot.isDir()) {
            throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                    + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
        }

        // write dst lsr results
        final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
        final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
                dstroot.getClass(), SequenceFile.CompressionType.NONE);
        try {
            // do lsr to get all file statuses in dstroot
            final Stack<FileStatus> lsrstack = new Stack<>();
            for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
                final FileStatus status = lsrstack.pop();
                if (status.isDir()) {
                    for (FileStatus child : dstfs.listStatus(status.getPath())) {
                        String relative = makeRelative(dstroot.getPath(), child.getPath());
                        writer.append(new Text(relative), child);
                        lsrstack.push(child);
                    }
                }
            }
        } finally {
            checkAndClose(writer);
        }

        // sort lsr results
        final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
        SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
                FileStatus.class, jobconf);
        sorter.sort(dstlsr, sortedlsr);

        // compare lsr list and dst list
        SequenceFile.Reader lsrin = null;
        SequenceFile.Reader dstin = null;
        try {
            lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
            dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

            // compare sorted lsr list and sorted dst list
            final Text lsrpath = new Text();
            final FileStatus lsrstatus = new FileStatus();
            final Text dstpath = new Text();
            final Text dstfrom = new Text();
            final FsShell shell = new FsShell(conf);
            final String[] shellargs = { "-rmr", null };

            boolean hasnext = dstin.next(dstpath, dstfrom);
            for (; lsrin.next(lsrpath, lsrstatus);) {
                int dst_cmp_lsr = dstpath.compareTo(lsrpath);
                for (; hasnext && dst_cmp_lsr < 0;) {
                    hasnext = dstin.next(dstpath, dstfrom);
                    dst_cmp_lsr = dstpath.compareTo(lsrpath);
                }

                if (dst_cmp_lsr == 0) {
                    // lsrpath exists in dst, skip it
                    hasnext = dstin.next(dstpath, dstfrom);
                } else {
                    // lsrpath does not exist, delete it
                    String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                    if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                        shellargs[1] = s;
                        int r = 0;
                        try {
                            r = shell.run(shellargs);
                        } catch (Exception e) {
                            throw new IOException("Exception from shell.", e);
                        }
                        if (r != 0) {
                            throw new IOException(
                                    "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                        }
                    }
                }
            }
        } finally {
            checkAndClose(lsrin);
            checkAndClose(dstin);
        }
    }

    // is x an ancestor path of y?
    static private boolean isAncestorPath(final String x, final String y) {
        if (!y.startsWith(x)) {
            return false;
        }
        final int len = x.length();
        return y.length() == len || y.charAt(len) == Path.SEPARATOR_CHAR;
    }

    /** Check whether the file list have duplication. */
    static private void checkDuplication(final FileSystem fs, final Path file, final Path sorted,
            final Configuration conf) throws IOException {
        SequenceFile.Reader in = null;
        try {
            SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new Text.Comparator(), Text.class, Text.class,
                    conf);
            sorter.sort(file, sorted);
            in = new SequenceFile.Reader(fs, sorted, conf);

            Text prevdst = null, curdst = new Text();
            Text prevsrc = null, cursrc = new Text();
            for (; in.next(curdst, cursrc);) {
                if (prevdst != null && curdst.equals(prevdst)) {
                    throw new DuplicationException(
                            "Invalid input, there are duplicated files in the sources: " + prevsrc + ", " + cursrc);
                }
                prevdst = curdst;
                curdst = new Text();
                prevsrc = cursrc;
                cursrc = new Text();
            }
        } finally {
            checkAndClose(in);
        }
    }

    static boolean checkAndClose(final java.io.Closeable io) {
        if (io != null) {
            try {
                io.close();
            } catch (IOException ioe) {
                getLogger().warning(StringUtils.stringifyException(ioe));
                return false;
            }
        }
        return true;
    }

    /** An exception class for duplicated source files. */
    public static class DuplicationException extends IOException {
        private static final long serialVersionUID = 1L;
        /** Error code for this exception */
        public static final int ERROR_CODE = -2;

        DuplicationException(final String message) {
            super(message);
        }
    }
}