Java tutorial
/** * Copyright 2012 LiveRamp * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.liveramp.cascading_ext; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import java.io.*; import java.util.UUID; /** * This class contains helper methods for working with files, filepaths, and * FileSystems. */ public class FileSystemHelper { private static final int DEFAULT_FS_OP_NUM_TRIES = 3; private static final long DEFAULT_FS_OP_DELAY_BETWEEN_TRIES = 5 * 1000L; @Deprecated // use getFS() instead. public static FileSystem getFileSystem() { return getFS(); } public static FileSystem getFS() { try { return FileSystem.get(new Configuration()); } catch (IOException e) { throw new RuntimeException(e); } } public static void createLocalFile(String path, String content) { try { DataOutputStream dos = new DataOutputStream(new FileOutputStream(new File(path))); dos.writeUTF(content); dos.close(); } catch (IOException ioe) { throw new RuntimeException("failed to create local file", ioe); } } public static void createFile(FileSystem fs, String path, String content) throws IOException { FSDataOutputStream os = fs.create(new Path(path)); os.write(content.getBytes()); os.close(); } /** * merge all files in <code>sourceDir</code> into local <code>targetFile</code>, retrying a few times on failure */ public static void copyMergeToLocal(String srcDir, String dstFile) throws IOException { copyMergeToLocal(srcDir, dstFile, DEFAULT_FS_OP_NUM_TRIES, DEFAULT_FS_OP_DELAY_BETWEEN_TRIES); } /** * merge all files in <code>sourceDir</code> into local <code>targetFile</code>, retrying on failure */ public static void copyMergeToLocal(String srcDir, String dstFile, int numTries, long delayBetweenTries) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = getFS(); FileSystem localfs = FileSystem.getLocal(conf); while (numTries-- > 0) { if (FileUtil.copyMerge(hdfs, new Path(srcDir), localfs, new Path(dstFile), false, conf, null)) { return; } try { Thread.sleep(delayBetweenTries); } catch (InterruptedException ie) { throw new RuntimeException(ie); } } throw new IOException("Could not copyMerge from \"" + srcDir + "\" to \"" + dstFile + "\"!"); } /** * Safely renames a path by retrying the operation <code>numTries</code> times * and sleeping <code>delayBetweenTries</code> seconds between each try. If it * still fails, it throws an IOException. * * @param fs the filesystem object * @param src the directory to be renamed * @param dst the new name of the directory * @param numTries number of tries to attempt the operation * @param delayBetweenTries the sleep delta between tries in millis * @throws IOException */ public static void safeRename(FileSystem fs, Path src, Path dst, int numTries, long delayBetweenTries) throws IOException { while (numTries-- > 0) { if (fs.rename(src, dst)) { return; } else { try { Thread.sleep(delayBetweenTries); } catch (InterruptedException ie) { throw new RuntimeException(ie); } } } throw new IOException("Could not rename the file from \"" + src + "\" to \"" + dst + "\"!"); } /** * Safely renames a path by retrying the operation 3 and sleeping 5000 * milliseconds between tries. If it still fails, it throws an IOException. * * @param fs the filesystem object * @param src the directory to be renamed * @param dst the new name of the directory * @throws IOException */ public static void safeRename(FileSystem fs, Path src, Path dst) throws IOException { safeRename(fs, src, dst, DEFAULT_FS_OP_NUM_TRIES, DEFAULT_FS_OP_DELAY_BETWEEN_TRIES); } public static void safeRename(Path src, Path dst) throws IOException { safeRename(getFS(), src, dst, DEFAULT_FS_OP_NUM_TRIES, DEFAULT_FS_OP_DELAY_BETWEEN_TRIES); } /** * Safely mkdirs a directory by retrying the operation <code>numTries</code> times and sleeping <code>delayBetweenTries</code> milliseconds between each * try. If it still fails, it throws an IOException. * * @param fs the filesystem object * @param dir the directory to be created * @param numTries number of tries to attempt the operation * @param delayBetweenTries the sleep delta between tries in millis * @throws IOException */ public static void safeMkdirs(FileSystem fs, Path dir, int numTries, long delayBetweenTries) throws IOException { while (numTries-- > 0) { if (fs.mkdirs(dir)) { return; } else { try { Thread.sleep(delayBetweenTries); } catch (InterruptedException ie) { throw new RuntimeException(ie); } } } throw new IOException("Could not mkdirs the directory \"" + dir + "\"!"); } /** * Safely mkdirs a directory by retrying the operation 3 times and sleeping * 5000 milliseconds between tries. If it still fails, it throws an * IOException. * * @param fs the filesystem object * @param dir the directory to be created * @throws IOException */ public static void safeMkdirs(FileSystem fs, Path dir) throws IOException { safeMkdirs(fs, dir, DEFAULT_FS_OP_NUM_TRIES, DEFAULT_FS_OP_DELAY_BETWEEN_TRIES); } /** * Creates a random path under a given path prefix * * @param pathPrefix * @return the random path */ public static Path getRandomPath(Path pathPrefix) { return new Path(pathPrefix, UUID.randomUUID().toString()); } /** * Creates a random path under a given path prefix * * @param pathPrefix * @return the random path */ public static Path getRandomPath(String pathPrefix) { return getRandomPath(new Path(pathPrefix)); } /** * Use this method to create a random temporary path that must be deleted upon * exit. It creates a random directory with a random file in it. The reason * for this is that hadoop requires the path to exist when it's marked for * deletion. * * @param pathPrefix the prefix under which the path is created * @return the random temporary path * @throws IOException */ public static Path getRandomTemporaryPath(Path pathPrefix) throws IOException { Path randomTemporaryDir = getRandomPath(pathPrefix); getFS().mkdirs(randomTemporaryDir); getFS().deleteOnExit(randomTemporaryDir); return getRandomPath(randomTemporaryDir); } /** * Gets a random temporary path that is deleted upon exit. * * @param pathPrefix the prefix under which the path is created * @return the random temporary path * @throws IOException */ public static Path getRandomTemporaryPath(String pathPrefix) throws IOException { return getRandomTemporaryPath(new Path(pathPrefix)); } /** * Gets a random temporary path that is deleted upon exit. The path will be * created under "/tmp" * * @return the random temporary path under "/tmp" * @throws IOException */ public static Path getRandomTemporaryPath() throws IOException { return getRandomTemporaryPath("/tmp"); } /** * Recursively print the path and children to stdout. */ public static void printFiles(String path) throws IOException { FileSystem fs = getFS(); if (fs.exists(new Path(path))) { printFiles(fs, new Path(path), 0); } else { System.out.println("no files at " + path); } } private static void printFiles(FileSystem fs, Path p, int indent) throws IOException { FileStatus stat = fs.getFileStatus(p); for (int i = 0; i < indent; i++) { System.out.print("\t"); } System.out.println(p.toString()); if (stat.isDir()) { for (FileStatus child : fs.listStatus(p)) { printFiles(fs, child.getPath(), indent + 1); } } } public static FileStatus[] safeListStatus(Path p) throws IOException { return safeListStatus(getFS(), p); } public static FileStatus[] safeListStatus(Path p, PathFilter filter) throws IOException { return safeListStatus(getFS(), p, filter); } public static FileStatus[] safeListStatus(FileSystem fs, Path p) throws IOException { return safeListStatus(fs, p, null); } public static FileStatus[] safeListStatus(FileSystem fs, Path p, PathFilter filter) throws IOException { try { if (filter == null) { return fs.listStatus(p); } else { return fs.listStatus(p, filter); } } // CDH4 will throw FNFEs if p doesn't exist--let people safely check for files at a path catch (FileNotFoundException e) { return new FileStatus[0]; } } }