Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

static private void finalize(Configuration conf, JobConf jobconf, final Path destPath,
        String presevedAttributes) throws IOException {
    if (presevedAttributes == null) {
        return;//w  w w .  j  ava  2s  . com
    }
    EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
    if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP)
            && !preseved.contains(FileAttribute.PERMISSION)) {
        return;
    }

    FileSystem dstfs = destPath.getFileSystem(conf);
    Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
    SequenceFile.Reader in = null;
    try {
        in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf);
        Text dsttext = new Text();
        FilePair pair = new FilePair();
        for (; in.next(dsttext, pair);) {
            Path absdst = new Path(destPath, pair.output);
            updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs);
        }
    } finally {
        checkAndClose(in);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments/*from   w  ww .j a  va  2  s .  com*/
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Check whether the contents of src and dst are the same.
 *
 * Return false if dstpath does not exist
 *
 * If the files have different sizes, return false.
 *
 * If the files have the same sizes, the file checksums will be compared.
 *
 * When file checksum is not supported in any of file systems,
 * two files are considered as the same if they have the same size.
 *//*from  www  . j  a v  a2  s.  c  o m*/
static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath)
        throws IOException {
    FileStatus dststatus;
    try {
        dststatus = dstfs.getFileStatus(dstpath);
    } catch (FileNotFoundException fnfe) {
        return false;
    }

    //same length?
    if (srcstatus.getLen() != dststatus.getLen()) {
        return false;
    }

    //compare checksums
    try {
        final FileChecksum srccs = srcfs.getFileChecksum(srcstatus.getPath());
        final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
        //return true if checksum is not supported
        //(i.e. some of the checksums is null)
        return srccs == null || dstcs == null || srccs.equals(dstcs);
    } catch (FileNotFoundException fnfe) {
        return false;
    }
}

From source file:com.pinterest.secor.util.FileUtil.java

License:Apache License

public static long getModificationTimeMsRecursive(String path) throws IOException {
    FileSystem fs = getFileSystem(path);
    Path fsPath = new Path(path);
    FileStatus status = fs.getFileStatus(fsPath);
    long modificationTime = status.getModificationTime();
    FileStatus[] statuses = fs.listStatus(fsPath);
    if (statuses != null) {
        for (FileStatus fileStatus : statuses) {
            Path statusPath = fileStatus.getPath();
            String stringPath;// w  w  w  . java 2 s  . c  o m
            if (path.startsWith("s3://") || path.startsWith("s3n://")) {
                stringPath = statusPath.toUri().toString();
            } else {
                stringPath = statusPath.toUri().getPath();
            }
            if (!stringPath.equals(path)) {
                modificationTime = Math.max(modificationTime, getModificationTimeMsRecursive(stringPath));
            }
        }
    }
    return modificationTime;
}

From source file:com.pivotal.hawq.mapreduce.ao.HAWQAOInputFormat.java

License:Apache License

/**
 * Generate the list of files and make them into FileSplits.
 * // www.  j av a  2s.  co m
 * @param job
 *            the job context
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    for (int i = 0; i < fileStatuses.length; ++i) {
        HAWQAOFileStatus aofilestatus = fileStatuses[i];
        String pathStr = aofilestatus.getFilePath();
        long fileLength = aofilestatus.getFileLength();
        if (fileLength == 0)
            continue;

        boolean checksum = aofilestatus.getChecksum();
        String compressType = aofilestatus.getCompressType();
        int blocksize = aofilestatus.getBlockSize();
        Path path = new Path(pathStr);
        if (fileLength != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(fs.getFileStatus(path), 0, fileLength);
            // not splitable
            splits.add(new HAWQAOSplit(path, 0, fileLength, blkLocations[0].getHosts(), checksum, compressType,
                    blocksize));
        } else {
            // Create empty hosts array for zero length files
            splits.add(new HAWQAOSplit(path, 0, fileLength, new String[0], checksum, compressType, blocksize));
        }
    }
    job.getConfiguration().setLong(NUM_INPUT_FILES, splits.size());
    LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:com.pivotal.hawq.mapreduce.parquet.HAWQParquetInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext jobContext) throws IOException {
    List<FileStatus> result = Lists.newArrayList();
    for (HAWQFileStatus hawqFileStatus : hawqFileStatuses) {
        if (hawqFileStatus.getFileLength() == 0)
            continue; // skip empty file

        Path path = new Path(hawqFileStatus.getFilePath());
        FileSystem fs = path.getFileSystem(jobContext.getConfiguration());
        FileStatus dfsStat = fs.getFileStatus(path);
        // rewrite file length because HAWQ records the logicalEOF of file, which may
        // be smaller than the file's actual EOF
        FileStatus hawqStat = new FileStatus(hawqFileStatus.getFileLength(), // rewrite to logicalEOF
                dfsStat.isDirectory(), dfsStat.getReplication(), dfsStat.getBlockSize(),
                dfsStat.getModificationTime(), dfsStat.getAccessTime(), dfsStat.getPermission(),
                dfsStat.getOwner(), dfsStat.getGroup(), dfsStat.getPath());
        result.add(hawqStat);/*w w w  . ja v a  2s .  c  o  m*/
    }

    return result;
}

From source file:com.practicalHadoop.outputformat.MultpleDirectories.FileOutputCommitter.java

License:Apache License

/**
 * Move all of the files from the work directory to the final output
 * @param context the task context/* w ww  .jav a  2s. c  o m*/
 * @param fs the output file system
 * @param jobOutputDir the final output direcotry
 * @param taskOutput the work path
 * @throws IOException
 */
private void moveTaskOutputs(TaskAttemptContext context, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    TaskAttemptID attemptId = context.getTaskAttemptID();
    context.progress();
    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + attemptId);
            }
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + attemptId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, workPath);
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(context, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:com.qubole.rubix.core.CachingInputStream.java

License:Apache License

public CachingInputStream(FSDataInputStream parentInputStream, FileSystem parentFs, Path backendPath,
        Configuration conf, CachingFileSystemStats statsMbean, long splitSize, ClusterType clusterType)
        throws IOException {
    this.remotePath = backendPath.toString();
    this.fileSize = parentFs.getLength(backendPath);
    lastModified = parentFs.getFileStatus(backendPath).getModificationTime();
    initialize(parentInputStream, conf);
    this.statsMbean = statsMbean;
    this.splitSize = splitSize;
    this.clusterType = clusterType;
}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

/**
 * Get the properties of a path// w  w  w  .ja  v a2s .com
 * 
 * @param path
 * @return Map of properties
 * @throws RemoteException
 */
@Override
public Map<String, String> getProperties(String path) throws RemoteException {

    logger.debug("getProperties");

    Map<String, String> prop = new LinkedHashMap<String, String>();
    try {
        logger.debug(0);
        logger.debug("sys_namenode PathHDFS: " + NameNodeVar.get());
        FileSystem fs = NameNodeVar.getFS();
        FileStatus stat = fs.getFileStatus(new Path(path));
        prop = getProperties(path, stat);
        logger.debug(1);
    } catch (IOException e) {
        logger.error("Error in filesystem");
        logger.error(e, e);
    } catch (Exception e) {
        logger.error("Not expected exception: " + e);
        logger.error(e.getMessage(), e);
    }
    logger.debug("Properties of " + path + ": " + prop.toString());
    return prop;
}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

/**
 * Change Ownership of a Path//from w  ww .j a  va2s  .  c  o  m
 * 
 * @param path
 * @param owner
 * @param group
 * @param recursive
 * @return Error Message
 */
protected String changeOwnership(Path path, String owner, String group, boolean recursive) {
    String error = null;
    try {
        FileSystem fs = NameNodeVar.getFS();
        FileStatus stat = fs.getFileStatus(path);
        if (stat.getOwner().equals(System.getProperty("user.name"))) {
            if (recursive) {
                FileStatus[] fsA = fs.listStatus(path);

                for (int i = 0; i < fsA.length && error == null; ++i) {
                    error = changeOwnership(fs, fsA[i].getPath(), owner, group, recursive);
                }
            }
            if (error == null) {
                fs.setOwner(path, owner, group);
            }
        } else {
            error = LanguageManagerWF.getText("HdfsInterface.changeprop.ownererror",
                    new Object[] { path.toString() });
        }
        // fs.close();
    } catch (IOException e) {
        logger.error("Cannot operate on the file or directory: " + path.toString());
        logger.error(e.getMessage());
        error = LanguageManagerWF.getText("HdfsInterface.changeprop.fileaccess", new Object[] { path });
    }

    if (error != null) {
        logger.debug(error);
    }
    return error;
}