List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
static private void finalize(Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return;//w w w . j ava 2s . com } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair);) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from w ww .j a va 2 s . com*/ */ private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Check whether the contents of src and dst are the same. * * Return false if dstpath does not exist * * If the files have different sizes, return false. * * If the files have the same sizes, the file checksums will be compared. * * When file checksum is not supported in any of file systems, * two files are considered as the same if they have the same size. *//*from www . j a v a2 s. c o m*/ static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException { FileStatus dststatus; try { dststatus = dstfs.getFileStatus(dstpath); } catch (FileNotFoundException fnfe) { return false; } //same length? if (srcstatus.getLen() != dststatus.getLen()) { return false; } //compare checksums try { final FileChecksum srccs = srcfs.getFileChecksum(srcstatus.getPath()); final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath()); //return true if checksum is not supported //(i.e. some of the checksums is null) return srccs == null || dstcs == null || srccs.equals(dstcs); } catch (FileNotFoundException fnfe) { return false; } }
From source file:com.pinterest.secor.util.FileUtil.java
License:Apache License
public static long getModificationTimeMsRecursive(String path) throws IOException { FileSystem fs = getFileSystem(path); Path fsPath = new Path(path); FileStatus status = fs.getFileStatus(fsPath); long modificationTime = status.getModificationTime(); FileStatus[] statuses = fs.listStatus(fsPath); if (statuses != null) { for (FileStatus fileStatus : statuses) { Path statusPath = fileStatus.getPath(); String stringPath;// w w w . java 2 s . c o m if (path.startsWith("s3://") || path.startsWith("s3n://")) { stringPath = statusPath.toUri().toString(); } else { stringPath = statusPath.toUri().getPath(); } if (!stringPath.equals(path)) { modificationTime = Math.max(modificationTime, getModificationTimeMsRecursive(stringPath)); } } } return modificationTime; }
From source file:com.pivotal.hawq.mapreduce.ao.HAWQAOInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * // www. j av a 2s. co m * @param job * the job context * @throws IOException */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); for (int i = 0; i < fileStatuses.length; ++i) { HAWQAOFileStatus aofilestatus = fileStatuses[i]; String pathStr = aofilestatus.getFilePath(); long fileLength = aofilestatus.getFileLength(); if (fileLength == 0) continue; boolean checksum = aofilestatus.getChecksum(); String compressType = aofilestatus.getCompressType(); int blocksize = aofilestatus.getBlockSize(); Path path = new Path(pathStr); if (fileLength != 0) { FileSystem fs = path.getFileSystem(job.getConfiguration()); BlockLocation[] blkLocations = fs.getFileBlockLocations(fs.getFileStatus(path), 0, fileLength); // not splitable splits.add(new HAWQAOSplit(path, 0, fileLength, blkLocations[0].getHosts(), checksum, compressType, blocksize)); } else { // Create empty hosts array for zero length files splits.add(new HAWQAOSplit(path, 0, fileLength, new String[0], checksum, compressType, blocksize)); } } job.getConfiguration().setLong(NUM_INPUT_FILES, splits.size()); LOG.debug("Total # of splits: " + splits.size()); return splits; }
From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext jobContext) throws IOException { List<FileStatus> result = Lists.newArrayList(); for (HAWQFileStatus hawqFileStatus : hawqFileStatuses) { if (hawqFileStatus.getFileLength() == 0) continue; // skip empty file Path path = new Path(hawqFileStatus.getFilePath()); FileSystem fs = path.getFileSystem(jobContext.getConfiguration()); FileStatus dfsStat = fs.getFileStatus(path); // rewrite file length because HAWQ records the logicalEOF of file, which may // be smaller than the file's actual EOF FileStatus hawqStat = new FileStatus(hawqFileStatus.getFileLength(), // rewrite to logicalEOF dfsStat.isDirectory(), dfsStat.getReplication(), dfsStat.getBlockSize(), dfsStat.getModificationTime(), dfsStat.getAccessTime(), dfsStat.getPermission(), dfsStat.getOwner(), dfsStat.getGroup(), dfsStat.getPath()); result.add(hawqStat);/*w w w . ja v a 2s . c o m*/ } return result; }
From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java
License:Apache License
/** * Move all of the files from the work directory to the final output * @param context the task context/* w ww .jav a 2s. c o m*/ * @param fs the output file system * @param jobOutputDir the final output direcotry * @param taskOutput the work path * @throws IOException */ private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { TaskAttemptID attemptId = context.getTaskAttemptID(); context.progress(); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + attemptId); } if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + attemptId); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(context, fs, jobOutputDir, path.getPath()); } } } }
From source file:com.qubole.rubix.core.CachingInputStream.java
License:Apache License
public CachingInputStream(FSDataInputStream parentInputStream, FileSystem parentFs, Path backendPath, Configuration conf, CachingFileSystemStats statsMbean, long splitSize, ClusterType clusterType) throws IOException { this.remotePath = backendPath.toString(); this.fileSize = parentFs.getLength(backendPath); lastModified = parentFs.getFileStatus(backendPath).getModificationTime(); initialize(parentInputStream, conf); this.statsMbean = statsMbean; this.splitSize = splitSize; this.clusterType = clusterType; }
From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java
License:Open Source License
/** * Get the properties of a path// w w w .ja v a2s .com * * @param path * @return Map of properties * @throws RemoteException */ @Override public Map<String, String> getProperties(String path) throws RemoteException { logger.debug("getProperties"); Map<String, String> prop = new LinkedHashMap<String, String>(); try { logger.debug(0); logger.debug("sys_namenode PathHDFS: " + NameNodeVar.get()); FileSystem fs = NameNodeVar.getFS(); FileStatus stat = fs.getFileStatus(new Path(path)); prop = getProperties(path, stat); logger.debug(1); } catch (IOException e) { logger.error("Error in filesystem"); logger.error(e, e); } catch (Exception e) { logger.error("Not expected exception: " + e); logger.error(e.getMessage(), e); } logger.debug("Properties of " + path + ": " + prop.toString()); return prop; }
From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java
License:Open Source License
/** * Change Ownership of a Path//from w ww .j a va2s . c o m * * @param path * @param owner * @param group * @param recursive * @return Error Message */ protected String changeOwnership(Path path, String owner, String group, boolean recursive) { String error = null; try { FileSystem fs = NameNodeVar.getFS(); FileStatus stat = fs.getFileStatus(path); if (stat.getOwner().equals(System.getProperty("user.name"))) { if (recursive) { FileStatus[] fsA = fs.listStatus(path); for (int i = 0; i < fsA.length && error == null; ++i) { error = changeOwnership(fs, fsA[i].getPath(), owner, group, recursive); } } if (error == null) { fs.setOwner(path, owner, group); } } else { error = LanguageManagerWF.getText("HdfsInterface.changeprop.ownererror", new Object[] { path.toString() }); } // fs.close(); } catch (IOException e) { logger.error("Cannot operate on the file or directory: " + path.toString()); logger.error(e.getMessage()); error = LanguageManagerWF.getText("HdfsInterface.changeprop.fileaccess", new Object[] { path }); } if (error != null) { logger.debug(error); } return error; }