hdfs.FileUtil.java Source code

Java tutorial

Introduction

Here is the source code for hdfs.FileUtil.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package hdfs;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.util.StringUtils;

/**
 * A collection of file-processing util methods
 */
public class FileUtil {
    private static final Log LOG = LogFactory.getLog(FileUtil.class);

    /**
     * convert an array of FileStatus to an array of Path
     * 
     * @param stats
     *          an array of FileStatus objects
     * @return an array of paths corresponding to the input
     */
    public static Path[] stat2Paths(FileStatus[] stats) {
        if (stats == null)
            return null;
        Path[] ret = new Path[stats.length];
        for (int i = 0; i < stats.length; ++i) {
            ret[i] = stats[i].getPath();
        }
        return ret;
    }

    /**
     * convert an array of FileStatus to an array of Path.
     * If stats if null, return path
     * @param stats
     *          an array of FileStatus objects
     * @param path
     *          default path to return in stats is null
     * @return an array of paths corresponding to the input
     */
    public static Path[] stat2Paths(FileStatus[] stats, Path path) {
        if (stats == null)
            return new Path[] { path };
        else
            return stat2Paths(stats);
    }

    /**
     * Delete a directory and all its contents.  If
     * we return false, the directory may be partially-deleted.
     */
    public static boolean fullyDelete(File dir) throws IOException {
        if (!fullyDeleteContents(dir)) {
            return false;
        }
        return dir.delete();
    }

    /**
     * Delete the contents of a directory, not the directory itself.  If
     * we return false, the directory may be partially-deleted.
     */
    public static boolean fullyDeleteContents(File dir) throws IOException {
        boolean deletionSucceeded = true;
        File contents[] = dir.listFiles();
        if (contents != null) {
            for (int i = 0; i < contents.length; i++) {
                if (contents[i].isFile()) {
                    if (!contents[i].delete()) {
                        deletionSucceeded = false;
                        continue; // continue deletion of other files/dirs under dir
                    }
                } else {
                    //try deleting the directory
                    // this might be a symlink
                    boolean b = false;
                    b = contents[i].delete();
                    if (b) {
                        //this was indeed a symlink or an empty directory
                        continue;
                    }
                    // if not an empty directory or symlink let
                    // fullydelete handle it.
                    if (!fullyDelete(contents[i])) {
                        deletionSucceeded = false;
                        continue; // continue deletion of other files/dirs under dir
                    }
                }
            }
        }
        return deletionSucceeded;
    }

    /**
     * Recursively delete a directory.
     * 
     * @param fs {@link FileSystem} on which the path is present
     * @param dir directory to recursively delete 
     * @throws IOException
     * @deprecated Use {@link FileSystem#delete(Path, boolean)}
     */
    @Deprecated
    public static void fullyDelete(FileSystem fs, Path dir) throws IOException {
        fs.delete(dir, true);
    }

    //
    // If the destination is a subdirectory of the source, then
    // generate exception
    //
    private static void checkDependencies(FileSystem srcFS, Path src, FileSystem dstFS, Path dst)
            throws IOException {
        if (srcFS == dstFS) {
            String srcq = src.makeQualified(srcFS).toString() + Path.SEPARATOR;
            String dstq = dst.makeQualified(dstFS).toString() + Path.SEPARATOR;
            if (dstq.startsWith(srcq)) {
                if (srcq.length() == dstq.length()) {
                    throw new IOException("Cannot copy " + src + " to itself.");
                } else {
                    throw new IOException("Cannot copy " + src + " to its subdirectory " + dst);
                }
            }
        }
    }

    /** Copy files between FileSystems. */
    public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
            Configuration conf) throws IOException {
        return copy(srcFS, src, dstFS, dst, deleteSource, true, conf);
    }

    public static boolean copy(FileSystem srcFS, Path[] srcs, FileSystem dstFS, Path dst, boolean deleteSource,
            boolean overwrite, Configuration conf) throws IOException {
        boolean gotException = false;
        boolean returnVal = true;
        StringBuffer exceptions = new StringBuffer();

        if (srcs.length == 1)
            return copy(srcFS, srcs[0], dstFS, dst, deleteSource, overwrite, conf);

        // Check if dest is directory
        if (!dstFS.exists(dst)) {
            throw new IOException("`" + dst + "': specified destination directory " + "does not exist");
        } else {
            FileStatus sdst = dstFS.getFileStatus(dst);
            if (!sdst.isDir())
                throw new IOException("copying multiple files, but last argument `" + dst + "' is not a directory");
        }

        for (Path src : srcs) {
            try {
                if (!copy(srcFS, src, dstFS, dst, deleteSource, overwrite, conf))
                    returnVal = false;
            } catch (IOException e) {
                gotException = true;
                exceptions.append(e.getMessage());
                exceptions.append("\n");
            }
        }
        if (gotException) {
            throw new IOException(exceptions.toString());
        }
        return returnVal;
    }

    /** Copy files between FileSystems. */
    public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
            boolean overwrite, Configuration conf) throws IOException {
        dst = checkDest(src.getName(), dstFS, dst, overwrite);

        if (srcFS.getFileStatus(src).isDir()) {
            checkDependencies(srcFS, src, dstFS, dst);
            if (!dstFS.mkdirs(dst)) {
                return false;
            }
            FileStatus contents[] = srcFS.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                copy(srcFS, contents[i].getPath(), dstFS, new Path(dst, contents[i].getPath().getName()),
                        deleteSource, overwrite, conf);
            }
        } else if (srcFS.isFile(src)) {
            InputStream in = null;
            OutputStream out = null;
            try {
                in = srcFS.open(src);
                out = dstFS.create(dst, overwrite);
                IOUtils.copyBytes(in, out, conf, true);
            } catch (IOException e) {
                IOUtils.closeStream(out);
                IOUtils.closeStream(in);
                throw e;
            }
        } else {
            throw new IOException(src.toString() + ": No such file or directory");
        }
        if (deleteSource) {
            return srcFS.delete(src, true);
        } else {
            return true;
        }

    }

    /** Copy all files in a directory to one output file (merge). */
    public static boolean copyMerge(FileSystem srcFS, Path srcDir, FileSystem dstFS, Path dstFile,
            boolean deleteSource, Configuration conf, String addString) throws IOException {
        dstFile = checkDest(srcDir.getName(), dstFS, dstFile, false);

        if (!srcFS.getFileStatus(srcDir).isDir())
            return false;

        OutputStream out = dstFS.create(dstFile);

        try {
            FileStatus contents[] = srcFS.listStatus(srcDir);
            for (int i = 0; i < contents.length; i++) {
                if (!contents[i].isDir()) {
                    InputStream in = srcFS.open(contents[i].getPath());
                    try {
                        IOUtils.copyBytes(in, out, conf, false);
                        if (addString != null)
                            out.write(addString.getBytes("UTF-8"));

                    } finally {
                        in.close();
                    }
                }
            }
        } finally {
            out.close();
        }

        if (deleteSource) {
            return srcFS.delete(srcDir, true);
        } else {
            return true;
        }
    }

    /** Copy local files to a FileSystem. */
    public static boolean copy(File src, FileSystem dstFS, Path dst, boolean deleteSource, Configuration conf)
            throws IOException {
        dst = checkDest(src.getName(), dstFS, dst, false);

        if (src.isDirectory()) {
            if (!dstFS.mkdirs(dst)) {
                return false;
            }
            File contents[] = listFiles(src);
            for (int i = 0; i < contents.length; i++) {
                copy(contents[i], dstFS, new Path(dst, contents[i].getName()), deleteSource, conf);
            }
        } else if (src.isFile()) {
            InputStream in = null;
            OutputStream out = null;
            try {
                in = new FileInputStream(src);
                out = dstFS.create(dst);
                IOUtils.copyBytes(in, out, conf);
            } catch (IOException e) {
                IOUtils.closeStream(out);
                IOUtils.closeStream(in);
                throw e;
            }
        } else {
            throw new IOException(src.toString() + ": No such file or directory");
        }
        if (deleteSource) {
            return FileUtil.fullyDelete(src);
        } else {
            return true;
        }
    }

    /** Copy FileSystem files to local files. */
    public static boolean copy(FileSystem srcFS, Path src, File dst, boolean deleteSource, Configuration conf)
            throws IOException {
        if (srcFS.getFileStatus(src).isDir()) {
            if (!dst.mkdirs()) {
                return false;
            }
            FileStatus contents[] = srcFS.listStatus(src);
            for (int i = 0; i < contents.length; i++) {
                copy(srcFS, contents[i].getPath(), new File(dst, contents[i].getPath().getName()), deleteSource,
                        conf);
            }
        } else if (srcFS.isFile(src)) {
            InputStream in = srcFS.open(src);
            IOUtils.copyBytes(in, new FileOutputStream(dst), conf);
        } else {
            throw new IOException(src.toString() + ": No such file or directory");
        }
        if (deleteSource) {
            return srcFS.delete(src, true);
        } else {
            return true;
        }
    }

    private static Path checkDest(String srcName, FileSystem dstFS, Path dst, boolean overwrite)
            throws IOException {
        if (dstFS.exists(dst)) {
            FileStatus sdst = dstFS.getFileStatus(dst);
            if (sdst.isDir()) {
                if (null == srcName) {
                    throw new IOException("Target " + dst + " is a directory");
                }
                return checkDest(null, dstFS, new Path(dst, srcName), overwrite);
            } else if (!overwrite) {
                throw new IOException("Target " + dst + " already exists");
            }
        } else if (dst.toString().isEmpty()) {
            return checkDest(null, dstFS, new Path(srcName), overwrite);
        }
        return dst;
    }

    /**
     * This class is only used on windows to invoke the cygpath command.
     */
    private static class CygPathCommand extends Shell {
        String[] command;
        String result;

        CygPathCommand(String path) throws IOException {
            command = new String[] { "cygpath", "-u", path };
            run();
        }

        String getResult() throws IOException {
            return result;
        }

        protected String[] getExecString() {
            return command;
        }

        protected void parseExecResult(BufferedReader lines) throws IOException {
            String line = lines.readLine();
            if (line == null) {
                throw new IOException("Can't convert '" + command[2] + " to a cygwin path");
            }
            result = line;
        }
    }

    /**
     * Convert a os-native filename to a path that works for the shell.
     * @param filename The filename to convert
     * @return The unix pathname
     * @throws IOException on windows, there can be problems with the subprocess
     */
    public static String makeShellPath(String filename) throws IOException {
        if (Path.WINDOWS) {
            return new CygPathCommand(filename).getResult();
        } else {
            return filename;
        }
    }

    /**
     * Convert a os-native filename to a path that works for the shell.
     * @param file The filename to convert
     * @return The unix pathname
     * @throws IOException on windows, there can be problems with the subprocess
     */
    public static String makeShellPath(File file) throws IOException {
        return makeShellPath(file, false);
    }

    /**
     * Convert a os-native filename to a path that works for the shell.
     * @param file The filename to convert
     * @param makeCanonicalPath 
     *          Whether to make canonical path for the file passed
     * @return The unix pathname
     * @throws IOException on windows, there can be problems with the subprocess
     */
    public static String makeShellPath(File file, boolean makeCanonicalPath) throws IOException {
        if (makeCanonicalPath) {
            return makeShellPath(file.getCanonicalPath());
        } else {
            return makeShellPath(file.toString());
        }
    }

    /**
     * Takes an input dir and returns the du on that local directory. Very basic
     * implementation.
     * 
     * @param dir
     *          The input dir to get the disk space of this local dir
     * @return The total disk space of the input local directory
     */
    public static long getDU(File dir) {
        long size = 0;
        if (!dir.exists())
            return 0;
        if (!dir.isDirectory()) {
            return dir.length();
        } else {
            File[] allFiles = dir.listFiles();
            if (allFiles != null) {
                for (int i = 0; i < allFiles.length; i++) {
                    boolean isSymLink;
                    try {
                        isSymLink = org.apache.commons.io.FileUtils.isSymlink(allFiles[i]);
                    } catch (IOException ioe) {
                        isSymLink = true;
                    }
                    if (!isSymLink) {
                        size += getDU(allFiles[i]);
                    }
                }
            }
            return size;
        }
    }

    /**
     * Given a File input it will unzip the file in a the unzip directory
     * passed as the second parameter
     * @param inFile The zip file as input
     * @param unzipDir The unzip directory where to unzip the zip file.
     * @throws IOException
     */
    public static void unZip(File inFile, File unzipDir) throws IOException {
        Enumeration<? extends ZipEntry> entries;
        ZipFile zipFile = new ZipFile(inFile);

        try {
            entries = zipFile.entries();
            while (entries.hasMoreElements()) {
                ZipEntry entry = entries.nextElement();
                if (!entry.isDirectory()) {
                    InputStream in = zipFile.getInputStream(entry);
                    try {
                        File file = new File(unzipDir, entry.getName());
                        if (!file.getParentFile().mkdirs()) {
                            if (!file.getParentFile().isDirectory()) {
                                throw new IOException("Mkdirs failed to create " + file.getParentFile().toString());
                            }
                        }
                        OutputStream out = new FileOutputStream(file);
                        try {
                            byte[] buffer = new byte[8192];
                            int i;
                            while ((i = in.read(buffer)) != -1) {
                                out.write(buffer, 0, i);
                            }
                        } finally {
                            out.close();
                        }
                    } finally {
                        in.close();
                    }
                }
            }
        } finally {
            zipFile.close();
        }
    }

    /**
     * Given a Tar File as input it will untar the file in a the untar directory
     * passed as the second parameter
     * 
     * This utility will untar ".tar" files and ".tar.gz","tgz" files.
     *  
     * @param inFile The tar file as input. 
     * @param untarDir The untar directory where to untar the tar file.
     * @throws IOException
     */
    public static void unTar(File inFile, File untarDir) throws IOException {
        if (!untarDir.mkdirs()) {
            if (!untarDir.isDirectory()) {
                throw new IOException("Mkdirs failed to create " + untarDir);
            }
        }

        StringBuffer untarCommand = new StringBuffer();
        boolean gzipped = inFile.toString().endsWith("gz");
        if (gzipped) {
            untarCommand.append(" gzip -dc '");
            untarCommand.append(FileUtil.makeShellPath(inFile));
            untarCommand.append("' | (");
        }
        untarCommand.append("cd '");
        untarCommand.append(FileUtil.makeShellPath(untarDir));
        untarCommand.append("' ; ");
        untarCommand.append("tar -xf ");

        if (gzipped) {
            untarCommand.append(" -)");
        } else {
            untarCommand.append(FileUtil.makeShellPath(inFile));
        }
        String[] shellCmd = { "bash", "-c", untarCommand.toString() };
        ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
        shexec.execute();
        int exitcode = shexec.getExitCode();
        if (exitcode != 0) {
            throw new IOException(
                    "Error untarring file " + inFile + ". Tar process exited with exit code " + exitcode);
        }
    }

    /**
     * Create a soft link between a src and destination
     * only on a local disk. HDFS does not support this
     * @param target the target for symlink 
     * @param linkname the symlink
     * @return value returned by the command
     */
    public static int symLink(String target, String linkname) throws IOException {
        String cmd = "ln -s " + target + " " + linkname;
        Process p = Runtime.getRuntime().exec(cmd, null);
        int returnVal = -1;
        try {
            returnVal = p.waitFor();
        } catch (InterruptedException e) {
            //do nothing as of yet
        }
        if (returnVal != 0) {
            LOG.warn("Command '" + cmd + "' failed " + returnVal + " with: " + copyStderr(p));
        }
        return returnVal;
    }

    private static String copyStderr(Process p) throws IOException {
        InputStream err = p.getErrorStream();
        StringBuilder result = new StringBuilder();
        byte[] buff = new byte[4096];
        int len = err.read(buff);
        while (len > 0) {
            result.append(new String(buff, 0, len));
            len = err.read(buff);
        }
        return result.toString();
    }

    /**
     * Change the permissions on a filename.
     * @param filename the name of the file to change
     * @param perm the permission string
     * @return the exit code from the command
     * @throws IOException
     * @throws InterruptedException
     */
    public static int chmod(String filename, String perm) throws IOException, InterruptedException {
        return chmod(filename, perm, false);
    }

    /**
     * Change the permissions on a file / directory, recursively, if
     * needed.
     * @param filename name of the file whose permissions are to change
     * @param perm permission string
     * @param recursive true, if permissions should be changed recursively
     * @return the exit code from the command.
     * @throws IOException
     * @throws InterruptedException
     */
    public static int chmod(String filename, String perm, boolean recursive) throws IOException {
        StringBuffer cmdBuf = new StringBuffer();
        cmdBuf.append("chmod ");
        if (recursive) {
            cmdBuf.append("-R ");
        }
        cmdBuf.append(perm).append(" ");
        cmdBuf.append(filename);
        String[] shellCmd = { "bash", "-c", cmdBuf.toString() };
        ShellCommandExecutor shExec = new ShellCommandExecutor(shellCmd);
        try {
            shExec.execute();
        } catch (IOException e) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Error while changing permission : " + filename + " Exception: "
                        + StringUtils.stringifyException(e));
            }
        }
        return shExec.getExitCode();
    }

    /**
     * Set permissions to the required value. Uses the java primitives instead
     * of forking if group == other.
     * @param f the file to change
     * @param permission the new permissions
     * @throws IOException
     */
    public static void setPermission(File f, FsPermission permission) throws IOException {
        FsAction user = permission.getUserAction();
        FsAction group = permission.getGroupAction();
        FsAction other = permission.getOtherAction();

        // use the native/fork if the group/other permissions are different
        // or if the native is available    
        if (group != other || NativeIO.isAvailable()) {
            execSetPermission(f, permission);
            return;
        }

        boolean rv = true;

        // read perms
        rv = f.setReadable(group.implies(FsAction.READ), false);
        checkReturnValue(rv, f, permission);
        if (group.implies(FsAction.READ) != user.implies(FsAction.READ)) {
            f.setReadable(user.implies(FsAction.READ), true);
            checkReturnValue(rv, f, permission);
        }

        // write perms
        rv = f.setWritable(group.implies(FsAction.WRITE), false);
        checkReturnValue(rv, f, permission);
        if (group.implies(FsAction.WRITE) != user.implies(FsAction.WRITE)) {
            f.setWritable(user.implies(FsAction.WRITE), true);
            checkReturnValue(rv, f, permission);
        }

        // exec perms
        rv = f.setExecutable(group.implies(FsAction.EXECUTE), false);
        checkReturnValue(rv, f, permission);
        if (group.implies(FsAction.EXECUTE) != user.implies(FsAction.EXECUTE)) {
            f.setExecutable(user.implies(FsAction.EXECUTE), true);
            checkReturnValue(rv, f, permission);
        }
    }

    private static void checkReturnValue(boolean rv, File p, FsPermission permission) throws IOException {
        /* if (!rv) {
           throw new IOException("Failed to set permissions of path: " + p + 
                        " to " + 
                        String.format("%04o", permission.toShort()));
         }*/
    }

    private static void execSetPermission(File f, FsPermission permission) throws IOException {
        if (NativeIO.isAvailable()) {
            // NativeIO.chmod(f.getCanonicalPath(), permission.toShort());
        } else {
            execCommand(f, Shell.SET_PERMISSION_COMMAND, String.format("%04o", permission.toShort()));
        }
    }

    static String execCommand(File f, String... cmd) throws IOException {
        String[] args = new String[cmd.length + 1];
        System.arraycopy(cmd, 0, args, 0, cmd.length);
        args[cmd.length] = f.getCanonicalPath();
        String output = Shell.execCommand(args);
        return output;
    }

    /**
     * Create a tmp file for a base file.
     * @param basefile the base file of the tmp
     * @param prefix file name prefix of tmp
     * @param isDeleteOnExit if true, the tmp will be deleted when the VM exits
     * @return a newly created tmp file
     * @exception IOException If a tmp file cannot created
     * @see java.io.File#createTempFile(String, String, File)
     * @see java.io.File#deleteOnExit()
     */
    public static final File createLocalTempFile(final File basefile, final String prefix,
            final boolean isDeleteOnExit) throws IOException {
        File tmp = File.createTempFile(prefix + basefile.getName(), "", basefile.getParentFile());
        if (isDeleteOnExit) {
            tmp.deleteOnExit();
        }
        return tmp;
    }

    /**
     * Move the src file to the name specified by target.
     * @param src the source file
     * @param target the target file
     * @exception IOException If this operation fails
     */
    public static void replaceFile(File src, File target) throws IOException {
        /* renameTo() has two limitations on Windows platform.
         * src.renameTo(target) fails if
         * 1) If target already exists OR
         * 2) If target is already open for reading/writing.
         */
        if (!src.renameTo(target)) {
            int retries = 5;
            while (target.exists() && !target.delete() && retries-- >= 0) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    throw new IOException("replaceFile interrupted.");
                }
            }
            if (!src.renameTo(target)) {
                throw new IOException("Unable to rename " + src + " to " + target);
            }
        }
    }

    /**
     * A wrapper for {@link File#listFiles()}. This java.io API returns null 
     * when a dir is not a directory or for any I/O error. Instead of having
     * null check everywhere File#listFiles() is used, we will add utility API
     * to get around this problem. For the majority of cases where we prefer 
     * an IOException to be thrown.
     * @param dir directory for which listing should be performed
     * @return list of files or empty list
     * @exception IOException for invalid directory or for a bad disk.
     */
    public static File[] listFiles(File dir) throws IOException {
        File[] files = dir.listFiles();
        if (files == null) {
            throw new IOException("Invalid directory or I/O error occurred for dir: " + dir.toString());
        }
        return files;
    }

    /**
     * A wrapper for {@link File#list()}. This java.io API returns null 
     * when a dir is not a directory or for any I/O error. Instead of having
     * null check everywhere File#list() is used, we will add utility API
     * to get around this problem. For the majority of cases where we prefer 
     * an IOException to be thrown.
     * @param dir directory for which listing should be performed
     * @return list of file names or empty string list
     * @exception IOException for invalid directory or for a bad disk.
     */
    public static String[] list(File dir) throws IOException {
        String[] fileNames = dir.list();
        if (fileNames == null) {
            throw new IOException("Invalid directory or I/O error occurred for dir: " + dir.toString());
        }
        return fileNames;
    }
}