Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments//from  ww  w .jav  a  2s  . c o m
 */
private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args)
        throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    // set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP);

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (null == parent) {
                // If dst is '/' on S3, it might not exist yet, but dst.getParent()
                // will return null. In this case, use '/' as its own parent to
                // prevent
                // NPE errors below.
                parent = args.dst;
            }
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        // skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        // skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            // if (LOG.isTraceEnabled()) {
                            // LOG.trace("adding file " + child.getPath());
                            // }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        getLogger().info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());

    // Explicitly create the tmpDir to ensure that it can be cleaned
    // up by fullyDelete() later.
    tmpDir.getFileSystem(conf).mkdirs(tmpDir);

    getLogger().info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java

License:LGPL

/**
 * Check whether the contents of src and dst are the same. Return false if
 * dstpath does not exist If the files have different sizes, return false. If
 * the files have the same sizes, the file checksums will be compared. When
 * file checksum is not supported in any of file systems, two files are
 * considered as the same if they have the same size.
 *///from   ww w .  j a v a2  s.  c o m
static private boolean sameFile(final FileSystem srcfs, final FileStatus srcstatus, final FileSystem dstfs,
        final Path dstpath) throws IOException {
    FileStatus dststatus;
    try {
        dststatus = dstfs.getFileStatus(dstpath);
    } catch (FileNotFoundException fnfe) {
        return false;
    }

    // same length?
    if (srcstatus.getLen() != dststatus.getLen()) {
        return false;
    }

    // get src checksum
    final FileChecksum srccs;
    try {
        srccs = srcfs.getFileChecksum(srcstatus.getPath());
    } catch (FileNotFoundException fnfe) {
        /*
         * Two possible cases: (1) src existed once but was deleted between the
         * time period that srcstatus was obtained and the try block above. (2)
         * srcfs does not support file checksum and (incorrectly) throws FNFE,
         * e.g. some previous versions of HftpFileSystem. For case (1), it is okay
         * to return true since src was already deleted. For case (2), true should
         * be returned.
         */
        return true;
    }

    // compare checksums
    try {
        final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
        // return true if checksum is not supported
        // (i.e. some of the checksums is null)
        return srccs == null || dstcs == null || srccs.equals(dstcs);
    } catch (FileNotFoundException fnfe) {
        return false;
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Get the length of a file./*from w  ww  .  j a va  2 s.c om*/
 * @param path Path of the file to open
 * @param conf configuration
 * @return an InputStream
 * @throws IOException if an error occurs while creating InputStream
 */
public static final long getSize(final Path path, final Configuration conf) throws IOException {

    if (path == null) {
        throw new NullPointerException("Path to create is null");
    }
    if (conf == null) {
        throw new NullPointerException("The configuration object is null");
    }

    final FileSystem fs = path.getFileSystem(conf);

    if (fs == null) {
        throw new IOException("Unable to create InputSteam, The FileSystem is null");
    }

    return fs.getFileStatus(path).getLen();
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Return a list of the file of a path/*w ww  .  java2  s .c om*/
 * @param dir Path of the directory
 * @param prefix filter on suffix
 * @param allowCompressedExtension Allow compressed extensions
 * @param conf Configuration
 * @return a list of Path
 * @throws IOException if an error occurs while listing the directory
 */
public static List<Path> listPathsByPrefix(final Path dir, final String prefix,
        final boolean allowCompressedExtension, final Configuration conf) throws IOException {

    if (dir == null) {
        throw new NullPointerException("Directory path is null");
    }

    if (prefix == null) {
        throw new NullPointerException("Prefix is null");
    }

    if (conf == null) {
        throw new NullPointerException("Configuration is null");
    }

    final FileSystem fs = dir.getFileSystem(conf);
    if (!fs.getFileStatus(dir).isDirectory()) {
        throw new IOException("Directory path is not a directory: " + dir);
    }

    final FileStatus[] filesStatus = fs.listStatus(dir, new PrefixPathFilter(prefix, allowCompressedExtension));

    if (filesStatus == null) {
        return Collections.emptyList();
    }

    final List<Path> result = new ArrayList<>(filesStatus.length);

    for (FileStatus fst : filesStatus) {
        result.add(fst.getPath());
    }

    return result;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Return a list of the file of a path//from w w w  .  j a v  a 2s.  c o  m
 * @param dir Path of the directory
 * @param suffix filter on suffix
 * @param allowCompressedExtension Allow compressed extensions
 * @param conf Configuration
 * @return a list of Path
 * @throws IOException if an error occurs while listing the directory
 */
public static List<Path> listPathsBySuffix(final Path dir, final String suffix,
        final boolean allowCompressedExtension, final Configuration conf) throws IOException {

    if (dir == null) {
        throw new NullPointerException("Directory path is null");
    }

    if (suffix == null) {
        throw new NullPointerException("Suffix is null");
    }

    if (conf == null) {
        throw new NullPointerException("Configuration is null");
    }

    final FileSystem fs = dir.getFileSystem(conf);
    if (!fs.getFileStatus(dir).isDirectory()) {
        throw new IOException("Directory path is not a directory: " + dir);
    }

    final FileStatus[] filesStatus = fs.listStatus(dir, new SuffixPathFilter(suffix, allowCompressedExtension));

    if (filesStatus == null) {
        return Collections.emptyList();
    }

    final List<Path> result = new ArrayList<>(filesStatus.length);

    for (FileStatus fst : filesStatus) {
        result.add(fst.getPath());
    }

    return result;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Copy all files in a directory to one output file (merge).
 * @param paths list of path files to concat
 * @param dstPath destination path//from   ww w.  j  ava2 s .c o  m
 * @param deleteSource true if the original files must be deleted
 * @param overwrite true if an existing destination file must be deleted
 * @param conf Configuration
 * @param addString string to add
 */
public static boolean concat(final List<Path> paths, final Path dstPath, final boolean deleteSource,
        final boolean overwrite, final Configuration conf, final String addString) throws IOException {

    if (paths == null) {
        throw new NullPointerException("The list of path to concat is null");
    }

    if (paths.size() == 0) {
        return false;
    }

    if (dstPath == null) {
        throw new NullPointerException("The destination path is null");
    }

    if (conf == null) {
        throw new NullPointerException("The configuration is null.");
    }

    final FileSystem srcFs = paths.get(0).getFileSystem(conf);
    final FileSystem dstFs = dstPath.getFileSystem(conf);

    if (!overwrite && dstFs.exists(dstPath)) {
        throw new IOException("The output file already exists: " + dstPath);
    }

    try (OutputStream out = dstFs.create(dstPath)) {
        // FileStatus contents[] = srcFS.listStatus(srcDir);
        // for (int i = 0; i < contents.length; i++) {
        for (Path p : paths) {
            if (!srcFs.getFileStatus(p).isDirectory()) {
                try (InputStream in = srcFs.open(p)) {
                    IOUtils.copyBytes(in, out, conf, false);
                    if (addString != null) {
                        out.write(addString.getBytes(FileCharsets.UTF8_CHARSET));
                    }

                }
            }
        }

    }

    if (deleteSource) {
        for (Path p : paths) {
            if (!srcFs.delete(p, false)) {
                return false;
            }
        }
    }

    return true;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Check if a directory exists/*  w  w w. j  a  v  a2  s . co  m*/
 * @param directory directory to test * @param conf Configuration
 * @param conf the configuration object
 * @param msgFileType message for the description of the file
 * @throws IOException if the file doesn't exists
 */
public static final void checkExistingDirectoryFile(final Path directory, final Configuration conf,
        final String msgFileType) throws IOException {

    checkExistingFile(directory, conf, msgFileType);

    final FileSystem fs = directory.getFileSystem(conf);

    if (!fs.getFileStatus(directory).isDirectory()) {
        throw new IOException("The " + msgFileType + " is not a directory: " + directory);
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Check if a directory exists/*from www.  ja v  a  2  s.  c  om*/
 * @param directory directory to test * @param conf Configuration
 * @param conf the configuration object
 * @return true is the directory exists
 */
public static final boolean isExistingDirectoryFile(final Path directory, final Configuration conf)
        throws IOException {

    if (directory == null) {
        throw new NullPointerException("The directory is null");
    }

    if (conf == null) {
        throw new NullPointerException("The configuration is null");
    }

    final FileSystem fs = directory.getFileSystem(conf);

    try {
        return fs.getFileStatus(directory).isDirectory();
    } catch (FileNotFoundException e) {
        return false;
    }
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java

License:LGPL

/**
 * Check if a file exists//w w w .ja  v  a  2 s . c o m
 * @param file File to test * @param conf Configuration
 * @param msgFileType message for the description of the file
 * @throws IOException if the file doesn't exists
 */
public static final void checkExistingStandardFileOrDirectory(final Path file, final Configuration conf,
        final String msgFileType) throws IOException {

    checkExistingDirectoryFile(file, conf, msgFileType);

    final FileSystem fs = file.getFileSystem(conf);

    if (!fs.isFile(file) && !fs.getFileStatus(file).isDirectory()) {
        throw new IOException("The " + msgFileType + " is  not a standard file or a directory: " + file);
    }
}

From source file:fuse4j.hadoopfs.HdfsClientImpl.java

License:Apache License

/**
 * getFileInfo()//w ww.j av a2 s . co  m
 */
@Override
public HdfsFileAttr getFileInfo(int uid, String path) {
    FileSystem dfs = null;
    try {
        dfs = getDfs(uid);
        FileStatus dfsStat = dfs.getFileStatus(new Path(path));

        final boolean directory = dfsStat.isDir();
        final int inode = 0;
        final int mode = dfsStat.getPermission().toShort();
        final int uuid = userCache.getUid(dfsStat.getOwner());
        final int gid = 0;

        // TODO: per-file block-size can't be retrieved correctly,
        //       using default block size for now.
        final long size = dfsStat.getLen();
        final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize());

        // modification/create-times are the same as access-time
        final int modificationTime = (int) (dfsStat.getModificationTime() / 1000);
        final int accessTime = (int) (dfsStat.getAccessTime() / 1000);

        HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uuid, gid, 1);
        hdfsFileAttr.setSize(size, blocks);
        hdfsFileAttr.setTime(modificationTime, modificationTime, accessTime);

        // TODO Hack to set inode;
        hdfsFileAttr.inode = hdfsFileAttr.hashCode();

        return hdfsFileAttr;
    } catch (Exception ioe) {
        // fall through to failure
    }

    // failed
    return null;
}