org.apache.hadoop.hive.common.FileUtils.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hive.common.FileUtils.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.common;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.AccessControlException;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

import com.google.common.annotations.VisibleForTesting;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.GlobFilter;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.ShutdownHookManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Collection of file manipulation utilities common across Hive.
 */
public final class FileUtils {
    private static final Logger LOG = LoggerFactory.getLogger(FileUtils.class.getName());
    private static final Random random = new Random();

    public static final PathFilter HIDDEN_FILES_PATH_FILTER = new PathFilter() {
        @Override
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    };

    public static final PathFilter STAGING_DIR_PATH_FILTER = new PathFilter() {
        @Override
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith(".");
        }
    };

    /**
     * Variant of Path.makeQualified that qualifies the input path against the default file system
     * indicated by the configuration
     *
     * This does not require a FileSystem handle in most cases - only requires the Filesystem URI.
     * This saves the cost of opening the Filesystem - which can involve RPCs - as well as cause
     * errors
     *
     * @param path
     *          path to be fully qualified
     * @param conf
     *          Configuration file
     * @return path qualified relative to default file system
     */
    public static Path makeQualified(Path path, Configuration conf) throws IOException {

        if (!path.isAbsolute()) {
            // in this case we need to get the working directory
            // and this requires a FileSystem handle. So revert to
            // original method.
            return path.makeQualified(FileSystem.get(conf));
        }

        URI fsUri = FileSystem.getDefaultUri(conf);
        URI pathUri = path.toUri();

        String scheme = pathUri.getScheme();
        String authority = pathUri.getAuthority();

        // validate/fill-in scheme and authority. this follows logic
        // identical to FileSystem.get(URI, conf) - but doesn't actually
        // obtain a file system handle

        if (scheme == null) {
            // no scheme - use default file system uri
            scheme = fsUri.getScheme();
            authority = fsUri.getAuthority();
            if (authority == null) {
                authority = "";
            }
        } else {
            if (authority == null) {
                // no authority - use default one if it applies
                if (scheme.equals(fsUri.getScheme()) && fsUri.getAuthority() != null) {
                    authority = fsUri.getAuthority();
                } else {
                    authority = "";
                }
            }
        }

        return new Path(scheme, authority, pathUri.getPath());
    }

    private FileUtils() {
        // prevent instantiation
    }

    public static String makePartName(List<String> partCols, List<String> vals) {
        return makePartName(partCols, vals, null);
    }

    /**
     * Makes a valid partition name.
     * @param partCols The partition keys' names
     * @param vals The partition values
     * @param defaultStr
     *         The default name given to a partition value if the respective value is empty or null.
     * @return An escaped, valid partition name.
     */
    public static String makePartName(List<String> partCols, List<String> vals, String defaultStr) {
        StringBuilder name = new StringBuilder();
        for (int i = 0; i < partCols.size(); i++) {
            if (i > 0) {
                name.append(Path.SEPARATOR);
            }
            name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr));
            name.append('=');
            name.append(escapePathName(vals.get(i), defaultStr));
        }
        return name.toString();
    }

    /**
     * default directory will have the same depth as number of skewed columns
     * this will make future operation easy like DML merge, concatenate merge
     * @param skewedCols
     * @param name
     * @return
     */
    public static String makeDefaultListBucketingDirName(List<String> skewedCols, String name) {
        String lbDirName;
        String defaultDir = FileUtils.escapePathName(name);
        StringBuilder defaultDirPath = new StringBuilder();
        for (int i = 0; i < skewedCols.size(); i++) {
            if (i > 0) {
                defaultDirPath.append(Path.SEPARATOR);
            }
            defaultDirPath.append(defaultDir);
        }
        lbDirName = defaultDirPath.toString();
        return lbDirName;
    }

    /**
     * Makes a valid list bucketing directory name.
     * @param lbCols The skewed keys' names
     * @param vals The skewed values
     * @return An escaped, valid list bucketing directory name.
     */
    public static String makeListBucketingDirName(List<String> lbCols, List<String> vals) {
        StringBuilder name = new StringBuilder();
        for (int i = 0; i < lbCols.size(); i++) {
            if (i > 0) {
                name.append(Path.SEPARATOR);
            }
            name.append(escapePathName((lbCols.get(i)).toLowerCase()));
            name.append('=');
            name.append(escapePathName(vals.get(i)));
        }
        return name.toString();
    }

    // NOTE: This is for generating the internal path name for partitions. Users
    // should always use the MetaStore API to get the path name for a partition.
    // Users should not directly take partition values and turn it into a path
    // name by themselves, because the logic below may change in the future.
    //
    // In the future, it's OK to add new chars to the escape list, and old data
    // won't be corrupt, because the full path name in metastore is stored.
    // In that case, Hive will continue to read the old data, but when it creates
    // new partitions, it will use new names.
    // edit : There are some use cases for which adding new chars does not seem
    // to be backward compatible - Eg. if partition was created with name having
    // a special char that you want to start escaping, and then you try dropping
    // the partition with a hive version that now escapes the special char using
    // the list below, then the drop partition fails to work.

    static BitSet charToEscape = new BitSet(128);
    static {
        for (char c = 0; c < ' '; c++) {
            charToEscape.set(c);
        }

        /**
         * ASCII 01-1F are HTTP control characters that need to be escaped.
         * \u000A and \u000D are \n and \r, respectively.
         */
        char[] clist = new char[] { '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008',
                '\u0009', '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012',
                '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
                '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{',
                '[', ']', '^' };

        for (char c : clist) {
            charToEscape.set(c);
        }
    }

    static boolean needsEscaping(char c) {
        return c >= 0 && c < charToEscape.size() && charToEscape.get(c);
    }

    public static String escapePathName(String path) {
        return escapePathName(path, null);
    }

    /**
     * Escapes a path name.
     * @param path The path to escape.
     * @param defaultPath
     *          The default name for the path, if the given path is empty or null.
     * @return An escaped path name.
     */
    public static String escapePathName(String path, String defaultPath) {

        // __HIVE_DEFAULT_NULL__ is the system default value for null and empty string.
        // TODO: we should allow user to specify default partition or HDFS file location.
        if (path == null || path.length() == 0) {
            if (defaultPath == null) {
                //previously, when path is empty or null and no default path is specified,
                // __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName
                return "__HIVE_DEFAULT_PARTITION__";
            } else {
                return defaultPath;
            }
        }

        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < path.length(); i++) {
            char c = path.charAt(i);
            if (needsEscaping(c)) {
                sb.append('%');
                sb.append(String.format("%1$02X", (int) c));
            } else {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public static String unescapePathName(String path) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < path.length(); i++) {
            char c = path.charAt(i);
            if (c == '%' && i + 2 < path.length()) {
                int code = -1;
                try {
                    code = Integer.parseInt(path.substring(i + 1, i + 3), 16);
                } catch (Exception e) {
                    code = -1;
                }
                if (code >= 0) {
                    sb.append((char) code);
                    i += 2;
                    continue;
                }
            }
            sb.append(c);
        }
        return sb.toString();
    }

    /**
     * Recursively lists status for all files starting from a particular directory (or individual file
     * as base case).
     *
     * @param fs
     *          file system
     *
     * @param fileStatus
     *          starting point in file system
     *
     * @param results
     *          receives enumeration of all files found
     */
    public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus, List<FileStatus> results)
            throws IOException {

        if (fileStatus.isDir()) {
            for (FileStatus stat : fs.listStatus(fileStatus.getPath(), HIDDEN_FILES_PATH_FILTER)) {
                listStatusRecursively(fs, stat, results);
            }
        } else {
            results.add(fileStatus);
        }
    }

    /**
     * Find the parent of path that exists, if path does not exist
     *
     * @param fs
     *          file system
     * @param path
     * @return FileStatus for argument path if it exists or the first ancestor in the path that exists
     * @throws IOException
     */
    public static FileStatus getPathOrParentThatExists(FileSystem fs, Path path) throws IOException {
        FileStatus stat = FileUtils.getFileStatusOrNull(fs, path);
        if (stat != null) {
            return stat;
        }
        Path parentPath = path.getParent();
        return getPathOrParentThatExists(fs, parentPath);
    }

    public static void checkFileAccessWithImpersonation(final FileSystem fs, final FileStatus stat,
            final FsAction action, final String user)
            throws IOException, AccessControlException, InterruptedException, Exception {
        checkFileAccessWithImpersonation(fs, stat, action, user, null);
    }

    /**
     * Perform a check to determine if the user is able to access the file passed in.
     * If the user name passed in is different from the current user, this method will
     * attempt to do impersonate the user to do the check; the current user should be
     * able to create proxy users in this case.
     * @param fs   FileSystem of the path to check
     * @param stat FileStatus representing the file
     * @param action FsAction that will be checked
     * @param user User name of the user that will be checked for access.  If the user name
     *             is null or the same as the current user, no user impersonation will be done
     *             and the check will be done as the current user. Otherwise the file access
     *             check will be performed within a doAs() block to use the access privileges
     *             of this user. In this case the user must be configured to impersonate other
     *             users, otherwise this check will fail with error.
     * @param children List of children to be collected. If this is null, no children are collected.
     *        To be set only if this is a directory
     * @throws IOException
     * @throws AccessControlException
     * @throws InterruptedException
     * @throws Exception
     */
    public static void checkFileAccessWithImpersonation(final FileSystem fs, final FileStatus stat,
            final FsAction action, final String user, final List<FileStatus> children)
            throws IOException, AccessControlException, InterruptedException, Exception {
        UserGroupInformation ugi = Utils.getUGI();
        String currentUser = ugi.getShortUserName();

        if (user == null || currentUser.equals(user)) {
            // No need to impersonate user, do the checks as the currently configured user.
            ShimLoader.getHadoopShims().checkFileAccess(fs, stat, action);
            addChildren(fs, stat.getPath(), children);
            return;
        }

        // Otherwise, try user impersonation. Current user must be configured to do user impersonation.
        UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(user,
                UserGroupInformation.getLoginUser());
        try {
            proxyUser.doAs(new PrivilegedExceptionAction<Object>() {
                @Override
                public Object run() throws Exception {
                    FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf());
                    ShimLoader.getHadoopShims().checkFileAccess(fsAsUser, stat, action);
                    addChildren(fsAsUser, stat.getPath(), children);
                    return null;
                }
            });
        } finally {
            FileSystem.closeAllForUGI(proxyUser);
        }
    }

    private static void addChildren(FileSystem fsAsUser, Path path, List<FileStatus> children) throws IOException {
        if (children != null) {
            FileStatus[] listStatus;
            try {
                listStatus = fsAsUser.listStatus(path);
            } catch (IOException e) {
                LOG.warn("Unable to list files under " + path + " : " + e);
                throw e;
            }
            children.addAll(Arrays.asList(listStatus));
        }
    }

    /**
     * Check if user userName has permissions to perform the given FsAction action
     * on all files under the file whose FileStatus fileStatus is provided
     *
     * @param fs
     * @param fileStatus
     * @param userName
     * @param action
     * @return
     * @throws IOException
     */
    public static boolean isActionPermittedForFileHierarchy(FileSystem fs, FileStatus fileStatus, String userName,
            FsAction action) throws Exception {
        return isActionPermittedForFileHierarchy(fs, fileStatus, userName, action, true);
    }

    public static boolean isActionPermittedForFileHierarchy(FileSystem fs, FileStatus fileStatus, String userName,
            FsAction action, boolean recurse) throws Exception {
        boolean isDir = fileStatus.isDir();

        FsAction dirActionNeeded = action;
        if (isDir) {
            // for dirs user needs execute privileges as well
            dirActionNeeded.and(FsAction.EXECUTE);
        }

        List<FileStatus> subDirsToCheck = null;
        if (isDir && recurse) {
            subDirsToCheck = new ArrayList<FileStatus>();
        }

        try {
            checkFileAccessWithImpersonation(fs, fileStatus, action, userName, subDirsToCheck);
        } catch (AccessControlException err) {
            // Action not permitted for user
            LOG.warn("Action " + action + " denied on " + fileStatus.getPath() + " for user " + userName);
            return false;
        }

        if (subDirsToCheck == null || subDirsToCheck.isEmpty()) {
            // no sub dirs to be checked
            return true;
        }

        // check all children
        for (FileStatus childStatus : subDirsToCheck) {
            // check children recursively - recurse is true if we're here.
            if (!isActionPermittedForFileHierarchy(fs, childStatus, userName, action, true)) {
                return false;
            }
        }
        return true;
    }

    /**
     * A best effort attempt to determine if if the file is a local file
     * @param conf
     * @param fileName
     * @return true if it was successfully able to determine that it is a local file
     */
    public static boolean isLocalFile(HiveConf conf, String fileName) {
        try {
            // do best effort to determine if this is a local file
            return isLocalFile(conf, new URI(fileName));
        } catch (URISyntaxException e) {
            LOG.warn("Unable to create URI from " + fileName, e);
        }
        return false;
    }

    /**
     * A best effort attempt to determine if if the file is a local file
     * @param conf
     * @param fileUri
     * @return true if it was successfully able to determine that it is a local file
     */
    public static boolean isLocalFile(HiveConf conf, URI fileUri) {
        try {
            // do best effort to determine if this is a local file
            FileSystem fsForFile = FileSystem.get(fileUri, conf);
            return LocalFileSystem.class.isInstance(fsForFile);
        } catch (IOException e) {
            LOG.warn("Unable to get FileSystem for " + fileUri, e);
        }
        return false;
    }

    public static boolean isOwnerOfFileHierarchy(FileSystem fs, FileStatus fileStatus, String userName)
            throws IOException, InterruptedException {
        return isOwnerOfFileHierarchy(fs, fileStatus, userName, true);
    }

    public static boolean isOwnerOfFileHierarchy(final FileSystem fs, final FileStatus fileStatus,
            final String userName, final boolean recurse) throws IOException, InterruptedException {
        UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(userName,
                UserGroupInformation.getLoginUser());
        try {
            boolean isOwner = proxyUser.doAs(new PrivilegedExceptionAction<Boolean>() {
                @Override
                public Boolean run() throws Exception {
                    FileSystem fsAsUser = FileSystem.get(fs.getUri(), fs.getConf());
                    return checkIsOwnerOfFileHierarchy(fsAsUser, fileStatus, userName, recurse);
                }
            });
            return isOwner;
        } finally {
            FileSystem.closeAllForUGI(proxyUser);
        }
    }

    public static boolean checkIsOwnerOfFileHierarchy(FileSystem fs, FileStatus fileStatus, String userName,
            boolean recurse) throws IOException {
        if (!fileStatus.getOwner().equals(userName)) {
            return false;
        }

        if ((!fileStatus.isDir()) || (!recurse)) {
            // no sub dirs to be checked
            return true;
        }
        // check all children
        FileStatus[] childStatuses = fs.listStatus(fileStatus.getPath());
        for (FileStatus childStatus : childStatuses) {
            // check children recursively - recurse is true if we're here.
            if (!checkIsOwnerOfFileHierarchy(fs, childStatus, userName, true)) {
                return false;
            }
        }
        return true;
    }

    /**
     * Creates the directory and all necessary parent directories.
     * @param fs FileSystem to use
     * @param f path to create.
     * @param conf Hive configuration
     * @return true if directory created successfully.  False otherwise, including if it exists.
     * @throws IOException exception in creating the directory
     */
    public static boolean mkdir(FileSystem fs, Path f, Configuration conf) throws IOException {
        LOG.info("Creating directory if it doesn't exist: " + f);
        return fs.mkdirs(f);
    }

    public static Path makeAbsolute(FileSystem fileSystem, Path path) throws IOException {
        if (path.isAbsolute()) {
            return path;
        } else {
            return new Path(fileSystem.getWorkingDirectory(), path);
        }
    }

    /**
     * Copies files between filesystems.
     */
    public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
            boolean overwrite, HiveConf conf) throws IOException {
        return copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf, ShimLoader.getHadoopShims());
    }

    /**
     * Copies files between filesystems as a fs super user using distcp, and runs
     * as a privileged user.
     */
    public static boolean privilegedCopy(FileSystem srcFS, List<Path> srcPaths, Path dst, HiveConf conf)
            throws IOException {
        String privilegedUser = conf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER);
        return distCp(srcFS, srcPaths, dst, false, privilegedUser, conf, ShimLoader.getHadoopShims());
    }

    @VisibleForTesting
    static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
            boolean overwrite, HiveConf conf, HadoopShims shims) throws IOException {

        boolean copied = false;
        boolean triedDistcp = false;

        /* Run distcp if source file/dir is too big */
        if (srcFS.getUri().getScheme().equals("hdfs")) {
            ContentSummary srcContentSummary = srcFS.getContentSummary(src);
            if (srcContentSummary.getFileCount() > conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXNUMFILES)
                    && srcContentSummary.getLength() > conf
                            .getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE)) {

                LOG.info("Source is " + srcContentSummary.getLength() + " bytes. (MAX: "
                        + conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE) + ")");
                LOG.info("Source is " + srcContentSummary.getFileCount() + " files. (MAX: "
                        + conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXNUMFILES) + ")");
                LOG.info("Launch distributed copy (distcp) job.");
                triedDistcp = true;
                copied = distCp(srcFS, Collections.singletonList(src), dst, deleteSource, null, conf, shims);
            }
        }
        if (!triedDistcp) {
            // Note : Currently, this implementation does not "fall back" to regular copy if distcp
            // is tried and it fails. We depend upon that behaviour in cases like replication,
            // wherein if distcp fails, there is good reason to not plod along with a trivial
            // implementation, and fail instead.
            copied = FileUtil.copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf);
        }
        return copied;
    }

    public static boolean distCp(FileSystem srcFS, List<Path> srcPaths, Path dst, boolean deleteSource,
            String doAsUser, HiveConf conf, HadoopShims shims) throws IOException {
        boolean copied = false;
        if (doAsUser == null) {
            copied = shims.runDistCp(srcPaths, dst, conf);
        } else {
            copied = shims.runDistCpAs(srcPaths, dst, conf, doAsUser);
        }
        if (copied && deleteSource) {
            for (Path path : srcPaths) {
                srcFS.delete(path, true);
            }
        }
        return copied;
    }

    /**
     * Move a particular file or directory to the trash.
     * @param fs FileSystem to use
     * @param f path of file or directory to move to trash.
     * @param conf
     * @return true if move successful
     * @throws IOException
     */
    public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean purge) throws IOException {
        LOG.debug("deleting  " + f);
        boolean result = false;
        try {
            if (purge) {
                LOG.debug("purge is set to true. Not moving to Trash " + f);
            } else {
                result = Trash.moveToAppropriateTrash(fs, f, conf);
                if (result) {
                    LOG.trace("Moved to trash: " + f);
                    return true;
                }
            }
        } catch (IOException ioe) {
            // for whatever failure reason including that trash has lower encryption zone
            // retry with force delete
            LOG.warn(ioe.getMessage() + "; Force to delete it.");
        }

        result = fs.delete(f, true);
        if (!result) {
            LOG.error("Failed to delete " + f);
        }
        return result;
    }

    public static boolean rename(FileSystem fs, Path sourcePath, Path destPath, Configuration conf)
            throws IOException {
        LOG.info("Renaming " + sourcePath + " to " + destPath);

        // If destPath directory exists, rename call will move the sourcePath
        // into destPath without failing. So check it before renaming.
        if (fs.exists(destPath)) {
            throw new IOException("Cannot rename the source path. The destination " + "path already exists.");
        }
        return fs.rename(sourcePath, destPath);
    }

    /**
     * @param fs1
     * @param fs2
     * @return return true if both file system arguments point to same file system
     */
    public static boolean equalsFileSystem(FileSystem fs1, FileSystem fs2) {
        //When file system cache is disabled, you get different FileSystem objects
        // for same file system, so '==' can't be used in such cases
        //FileSystem api doesn't have a .equals() function implemented, so using
        //the uri for comparison. FileSystem already uses uri+Configuration for
        //equality in its CACHE .
        //Once equality has been added in HDFS-9159, we should make use of it
        return fs1.getUri().equals(fs2.getUri());
    }

    /**
     * Checks if delete can be performed on given path by given user.
     * If file does not exist it just returns without throwing an Exception
     * @param path
     * @param conf
     * @param user
     * @throws AccessControlException
     * @throws InterruptedException
     * @throws Exception
     */
    public static void checkDeletePermission(Path path, Configuration conf, String user)
            throws AccessControlException, InterruptedException, Exception {
        // This requires ability to delete the given path.
        // The following 2 conditions should be satisfied for this-
        // 1. Write permissions on parent dir
        // 2. If sticky bit is set on parent dir then one of following should be
        // true
        //   a. User is owner of the current dir/file
        //   b. User is owner of the parent dir
        //   Super users are also allowed to drop the file, but there is no good way of checking
        //   if a user is a super user. Also super users running hive queries is not a common
        //   use case. super users can also do a chown to be able to drop the file

        if (path == null) {
            // no file/dir to be deleted
            return;
        }

        final FileSystem fs = path.getFileSystem(conf);
        // check user has write permissions on the parent dir
        FileStatus stat = null;
        try {
            stat = fs.getFileStatus(path);
        } catch (FileNotFoundException e) {
            // ignore
        }
        if (stat == null) {
            // no file/dir to be deleted
            return;
        }
        FileUtils.checkFileAccessWithImpersonation(fs, stat, FsAction.WRITE, user);

        HadoopShims shims = ShimLoader.getHadoopShims();
        if (!shims.supportStickyBit()) {
            // not supports sticky bit
            return;
        }

        // check if sticky bit is set on the parent dir
        FileStatus parStatus = fs.getFileStatus(path.getParent());
        if (!shims.hasStickyBit(parStatus.getPermission())) {
            // no sticky bit, so write permission on parent dir is sufficient
            // no further checks needed
            return;
        }

        // check if user is owner of parent dir
        if (parStatus.getOwner().equals(user)) {
            return;
        }

        // check if user is owner of current dir/file
        FileStatus childStatus = fs.getFileStatus(path);
        if (childStatus.getOwner().equals(user)) {
            return;
        }
        String msg = String.format("Permission Denied: User %s can't delete %s because sticky bit is"
                + " set on the parent dir and user does not own this file or its parent", user, path);
        throw new IOException(msg);

    }

    /**
     * Attempts to get file status.  This method differs from the FileSystem API in that it returns
     * null instead of throwing FileNotFoundException if the path does not exist.
     *
     * @param fs file system to check
     * @param path file system path to check
     * @return FileStatus for path or null if path does not exist
     * @throws IOException if there is an I/O error
     */
    public static FileStatus getFileStatusOrNull(FileSystem fs, Path path) throws IOException {
        try {
            return fs.getFileStatus(path);
        } catch (FileNotFoundException e) {
            return null;
        }
    }

    public static void deleteDirectory(File directory) throws IOException {
        org.apache.commons.io.FileUtils.deleteDirectory(directory);
    }

    /**
     * create temporary file and register it to delete-on-exit hook.
     * File.deleteOnExit is not used for possible memory leakage.
     *
     * Make sure to use {@link #deleteTmpFile(File)} after the file is no longer required,
     * and has been deleted to avoid a memory leak.
     */
    public static File createTempFile(String lScratchDir, String prefix, String suffix) throws IOException {
        File tmpDir = lScratchDir == null ? null : new File(lScratchDir);
        if (tmpDir != null && !tmpDir.exists() && !tmpDir.mkdirs()) {
            // Do another exists to check to handle possible race condition
            // Another thread might have created the dir, if that is why
            // mkdirs returned false, that is fine
            if (!tmpDir.exists()) {
                throw new RuntimeException("Unable to create temp directory " + lScratchDir);
            }
        }
        File tmpFile = File.createTempFile(prefix, suffix, tmpDir);
        ShutdownHookManager.deleteOnExit(tmpFile);
        return tmpFile;
    }

    public static File createLocalDirsTempFile(String localDirList, String prefix, String suffix,
            boolean isDirectory) throws IOException {
        if (localDirList == null || localDirList.isEmpty()) {
            return createFileInTmp(prefix, suffix, "Local directories not specified", isDirectory);
        }
        String[] localDirs = StringUtils.getTrimmedStrings(localDirList);
        if (localDirs.length == 0) {
            return createFileInTmp(prefix, suffix, "Local directories not specified", isDirectory);
        }
        // TODO: we could stagger these to threads by ID, but that can also lead to bad effects.
        String path = localDirs[random.nextInt(localDirs.length)];
        if (path == null || path.isEmpty()) {
            return createFileInTmp(prefix, suffix, "Empty path for one of the local dirs", isDirectory);
        }
        File targetDir = new File(path);
        if (!targetDir.exists() && !targetDir.mkdirs()) {
            return createFileInTmp(prefix, suffix, "Cannot access or create " + targetDir, isDirectory);
        }
        try {
            File file = File.createTempFile(prefix, suffix, targetDir);
            if (isDirectory && (!file.delete() || !file.mkdirs())) {
                // TODO: or we could just generate a name ourselves and not do this?
                return createFileInTmp(prefix, suffix, "Cannot recreate " + file + " as directory", isDirectory);
            }
            file.deleteOnExit();
            return file;
        } catch (IOException ex) {
            LOG.error("Error creating a file in " + targetDir, ex);
            return createFileInTmp(prefix, suffix, "Cannot create a file in " + targetDir, isDirectory);
        }
    }

    private static File createFileInTmp(String prefix, String suffix, String reason, boolean isDirectory)
            throws IOException {
        File file = File.createTempFile(prefix, suffix);
        if (isDirectory && (!file.delete() || !file.mkdirs())) {
            // TODO: or we could just generate a name ourselves and not do this?
            throw new IOException("Cannot recreate " + file + " as directory");
        }
        file.deleteOnExit();
        LOG.info(reason + "; created a tmp file: " + file.getAbsolutePath());
        return file;
    }

    public static File createLocalDirsTempFile(Configuration conf, String prefix, String suffix,
            boolean isDirectory) throws IOException {
        return createLocalDirsTempFile(conf.get("yarn.nodemanager.local-dirs"), prefix, suffix, isDirectory);
    }

    /**
     * delete a temporary file and remove it from delete-on-exit hook.
     */
    public static boolean deleteTmpFile(File tempFile) {
        if (tempFile != null) {
            tempFile.delete();
            ShutdownHookManager.cancelDeleteOnExit(tempFile);
            return true;
        }
        return false;
    }

    /**
     * Return whenever all paths in the collection are schemaless
     * 
     * @param paths
     * @return
     */
    public static boolean pathsContainNoScheme(Collection<Path> paths) {
        for (Path path : paths) {
            if (path.toUri().getScheme() != null) {
                return false;
            }
        }
        return true;
    }

    /**
     * Returns the deepest candidate path for the given path.
     * 
     * prioritizes on paths including schema / then includes matches without schema
     * 
     * @param path
     * @param candidates  the candidate paths
     * @return
     */
    public static Path getParentRegardlessOfScheme(Path path, Collection<Path> candidates) {
        Path schemalessPath = Path.getPathWithoutSchemeAndAuthority(path);

        for (; path != null
                && schemalessPath != null; path = path.getParent(), schemalessPath = schemalessPath.getParent()) {
            if (candidates.contains(path))
                return path;
            if (candidates.contains(schemalessPath))
                return schemalessPath;
        }
        // exception?
        return null;
    }

    /**
     * Checks whenever path is inside the given subtree
     * 
     * return true iff
     *  * path = subtree
     *  * subtreeContains(path,d) for any descendant of the subtree node
     * @param path    the path in question
     * @param subtree
     * 
     * @return
     */
    public static boolean isPathWithinSubtree(Path path, Path subtree) {
        return isPathWithinSubtree(path, subtree, subtree.depth());
    }

    private static boolean isPathWithinSubtree(Path path, Path subtree, int subtreeDepth) {
        while (path != null) {
            if (subtreeDepth > path.depth()) {
                return false;
            }
            if (subtree.equals(path)) {
                return true;
            }
            path = path.getParent();
        }
        return false;
    }

    public static void populateParentPaths(Set<Path> parents, Path path) {
        if (parents == null) {
            return;
        }
        while (path != null) {
            parents.add(path);
            path = path.getParent();
        }
    }

    /**
     * Get the URI of the path. Assume to be local file system if no scheme.
     */
    public static URI getURI(String path) throws URISyntaxException {
        if (path == null) {
            return null;
        }

        URI uri = new URI(path);
        if (uri.getScheme() == null) {
            // if no scheme in the path, we assume it's file on local fs.
            uri = new File(path).toURI();
        }

        return uri;
    }

    /**
     * Given a path string, get all the jars from the folder or the files themselves.
     *
     * @param pathString  the path string is the comma-separated path list
     * @return            the list of the file names in the format of URI formats.
     */
    public static Set<String> getJarFilesByPath(String pathString, Configuration conf) {
        Set<String> result = new HashSet<String>();
        if (pathString == null || org.apache.commons.lang.StringUtils.isBlank(pathString)) {
            return result;
        }

        String[] paths = pathString.split(",");
        for (String path : paths) {
            try {
                Path p = new Path(getURI(path));
                FileSystem fs = p.getFileSystem(conf);
                if (!fs.exists(p)) {
                    LOG.error("The jar file path " + path + " doesn't exist");
                    continue;
                }
                if (fs.isDirectory(p)) {
                    // add all jar files under the folder
                    FileStatus[] files = fs.listStatus(p, new GlobFilter("*.jar"));
                    for (FileStatus file : files) {
                        result.add(file.getPath().toUri().toString());
                    }
                } else {
                    result.add(p.toUri().toString());
                }
            } catch (URISyntaxException | IOException e) {
                LOG.error("Invalid file path " + path, e);
            }
        }
        return result;
    }

}