List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments//from ww w .jav a 2s . c o m */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Check whether the contents of src and dst are the same. Return false if * dstpath does not exist If the files have different sizes, return false. If * the files have the same sizes, the file checksums will be compared. When * file checksum is not supported in any of file systems, two files are * considered as the same if they have the same size. *///from ww w . j a v a2 s. c o m static private boolean sameFile(final FileSystem srcfs, final FileStatus srcstatus, final FileSystem dstfs, final Path dstpath) throws IOException { FileStatus dststatus; try { dststatus = dstfs.getFileStatus(dstpath); } catch (FileNotFoundException fnfe) { return false; } // same length? if (srcstatus.getLen() != dststatus.getLen()) { return false; } // get src checksum final FileChecksum srccs; try { srccs = srcfs.getFileChecksum(srcstatus.getPath()); } catch (FileNotFoundException fnfe) { /* * Two possible cases: (1) src existed once but was deleted between the * time period that srcstatus was obtained and the try block above. (2) * srcfs does not support file checksum and (incorrectly) throws FNFE, * e.g. some previous versions of HftpFileSystem. For case (1), it is okay * to return true since src was already deleted. For case (2), true should * be returned. */ return true; } // compare checksums try { final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath()); // return true if checksum is not supported // (i.e. some of the checksums is null) return srccs == null || dstcs == null || srccs.equals(dstcs); } catch (FileNotFoundException fnfe) { return false; } }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Get the length of a file./*from w ww . j a va 2 s.c om*/ * @param path Path of the file to open * @param conf configuration * @return an InputStream * @throws IOException if an error occurs while creating InputStream */ public static final long getSize(final Path path, final Configuration conf) throws IOException { if (path == null) { throw new NullPointerException("Path to create is null"); } if (conf == null) { throw new NullPointerException("The configuration object is null"); } final FileSystem fs = path.getFileSystem(conf); if (fs == null) { throw new IOException("Unable to create InputSteam, The FileSystem is null"); } return fs.getFileStatus(path).getLen(); }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Return a list of the file of a path/*w ww . java2 s .c om*/ * @param dir Path of the directory * @param prefix filter on suffix * @param allowCompressedExtension Allow compressed extensions * @param conf Configuration * @return a list of Path * @throws IOException if an error occurs while listing the directory */ public static List<Path> listPathsByPrefix(final Path dir, final String prefix, final boolean allowCompressedExtension, final Configuration conf) throws IOException { if (dir == null) { throw new NullPointerException("Directory path is null"); } if (prefix == null) { throw new NullPointerException("Prefix is null"); } if (conf == null) { throw new NullPointerException("Configuration is null"); } final FileSystem fs = dir.getFileSystem(conf); if (!fs.getFileStatus(dir).isDirectory()) { throw new IOException("Directory path is not a directory: " + dir); } final FileStatus[] filesStatus = fs.listStatus(dir, new PrefixPathFilter(prefix, allowCompressedExtension)); if (filesStatus == null) { return Collections.emptyList(); } final List<Path> result = new ArrayList<>(filesStatus.length); for (FileStatus fst : filesStatus) { result.add(fst.getPath()); } return result; }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Return a list of the file of a path//from w w w . j a v a 2s. c o m * @param dir Path of the directory * @param suffix filter on suffix * @param allowCompressedExtension Allow compressed extensions * @param conf Configuration * @return a list of Path * @throws IOException if an error occurs while listing the directory */ public static List<Path> listPathsBySuffix(final Path dir, final String suffix, final boolean allowCompressedExtension, final Configuration conf) throws IOException { if (dir == null) { throw new NullPointerException("Directory path is null"); } if (suffix == null) { throw new NullPointerException("Suffix is null"); } if (conf == null) { throw new NullPointerException("Configuration is null"); } final FileSystem fs = dir.getFileSystem(conf); if (!fs.getFileStatus(dir).isDirectory()) { throw new IOException("Directory path is not a directory: " + dir); } final FileStatus[] filesStatus = fs.listStatus(dir, new SuffixPathFilter(suffix, allowCompressedExtension)); if (filesStatus == null) { return Collections.emptyList(); } final List<Path> result = new ArrayList<>(filesStatus.length); for (FileStatus fst : filesStatus) { result.add(fst.getPath()); } return result; }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Copy all files in a directory to one output file (merge). * @param paths list of path files to concat * @param dstPath destination path//from ww w. j ava2 s .c o m * @param deleteSource true if the original files must be deleted * @param overwrite true if an existing destination file must be deleted * @param conf Configuration * @param addString string to add */ public static boolean concat(final List<Path> paths, final Path dstPath, final boolean deleteSource, final boolean overwrite, final Configuration conf, final String addString) throws IOException { if (paths == null) { throw new NullPointerException("The list of path to concat is null"); } if (paths.size() == 0) { return false; } if (dstPath == null) { throw new NullPointerException("The destination path is null"); } if (conf == null) { throw new NullPointerException("The configuration is null."); } final FileSystem srcFs = paths.get(0).getFileSystem(conf); final FileSystem dstFs = dstPath.getFileSystem(conf); if (!overwrite && dstFs.exists(dstPath)) { throw new IOException("The output file already exists: " + dstPath); } try (OutputStream out = dstFs.create(dstPath)) { // FileStatus contents[] = srcFS.listStatus(srcDir); // for (int i = 0; i < contents.length; i++) { for (Path p : paths) { if (!srcFs.getFileStatus(p).isDirectory()) { try (InputStream in = srcFs.open(p)) { IOUtils.copyBytes(in, out, conf, false); if (addString != null) { out.write(addString.getBytes(FileCharsets.UTF8_CHARSET)); } } } } } if (deleteSource) { for (Path p : paths) { if (!srcFs.delete(p, false)) { return false; } } } return true; }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Check if a directory exists/* w w w. j a v a2 s . co m*/ * @param directory directory to test * @param conf Configuration * @param conf the configuration object * @param msgFileType message for the description of the file * @throws IOException if the file doesn't exists */ public static final void checkExistingDirectoryFile(final Path directory, final Configuration conf, final String msgFileType) throws IOException { checkExistingFile(directory, conf, msgFileType); final FileSystem fs = directory.getFileSystem(conf); if (!fs.getFileStatus(directory).isDirectory()) { throw new IOException("The " + msgFileType + " is not a directory: " + directory); } }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Check if a directory exists/*from www. ja v a 2 s. c om*/ * @param directory directory to test * @param conf Configuration * @param conf the configuration object * @return true is the directory exists */ public static final boolean isExistingDirectoryFile(final Path directory, final Configuration conf) throws IOException { if (directory == null) { throw new NullPointerException("The directory is null"); } if (conf == null) { throw new NullPointerException("The configuration is null"); } final FileSystem fs = directory.getFileSystem(conf); try { return fs.getFileStatus(directory).isDirectory(); } catch (FileNotFoundException e) { return false; } }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Check if a file exists//w w w .ja v a 2 s . c o m * @param file File to test * @param conf Configuration * @param msgFileType message for the description of the file * @throws IOException if the file doesn't exists */ public static final void checkExistingStandardFileOrDirectory(final Path file, final Configuration conf, final String msgFileType) throws IOException { checkExistingDirectoryFile(file, conf, msgFileType); final FileSystem fs = file.getFileSystem(conf); if (!fs.isFile(file) && !fs.getFileStatus(file).isDirectory()) { throw new IOException("The " + msgFileType + " is not a standard file or a directory: " + file); } }
From source file:fuse4j.hadoopfs.HdfsClientImpl.java
License:Apache License
/** * getFileInfo()//w ww.j av a2 s . co m */ @Override public HdfsFileAttr getFileInfo(int uid, String path) { FileSystem dfs = null; try { dfs = getDfs(uid); FileStatus dfsStat = dfs.getFileStatus(new Path(path)); final boolean directory = dfsStat.isDir(); final int inode = 0; final int mode = dfsStat.getPermission().toShort(); final int uuid = userCache.getUid(dfsStat.getOwner()); final int gid = 0; // TODO: per-file block-size can't be retrieved correctly, // using default block size for now. final long size = dfsStat.getLen(); final int blocks = (int) Math.ceil(((double) size) / dfs.getDefaultBlockSize()); // modification/create-times are the same as access-time final int modificationTime = (int) (dfsStat.getModificationTime() / 1000); final int accessTime = (int) (dfsStat.getAccessTime() / 1000); HdfsFileAttr hdfsFileAttr = new HdfsFileAttr(directory, inode, mode, uuid, gid, 1); hdfsFileAttr.setSize(size, blocks); hdfsFileAttr.setTime(modificationTime, modificationTime, accessTime); // TODO Hack to set inode; hdfsFileAttr.inode = hdfsFileAttr.hashCode(); return hdfsFileAttr; } catch (Exception ioe) { // fall through to failure } // failed return null; }