Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

private List<FileStatus> getAllSubFileStatus(JobContext jobContext, Path filterMemberPath) throws IOException {
    List<FileStatus> list = new ArrayList<FileStatus>();

    FileSystem fs = filterMemberPath.getFileSystem(jobContext.getConfiguration());
    FileStatus[] subFiles = fs.listStatus(filterMemberPath);

    if (null != subFiles) {
        if (fs.getFileStatus(filterMemberPath).isDir()) {
            for (FileStatus subFile : subFiles) {
                if (!subFile.getPath().getName().startsWith("_")) {
                    list.addAll(getAllSubFileStatus(jobContext, subFile.getPath()));
                }//from w ww  .j a  va 2 s  .c  o m
            }
        } else {
            if (subFiles.length > 0 && !subFiles[0].getPath().getName().startsWith("_")) {
                list.add(subFiles[0]);
            }
        }
    }

    return list;
}

From source file:com.linkedin.pinot.hadoop.job.SegmentTarPushJob.java

License:Apache License

public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
    String fileName = path.getName();
    if (!fileName.endsWith(".tar.gz")) {
        return;//from   w ww.j a  v  a2 s.c  om
    }
    long length = fs.getFileStatus(path).getLen();
    for (String host : _hosts) {
        InputStream inputStream = null;
        try {
            inputStream = fs.open(path);
            fileName = fileName.split(".tar")[0];
            LOGGER.info("******** Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port);
            try {
                int responseCode = FileUploadUtils.sendSegmentFile(host, _port, fileName, inputStream, length);
                LOGGER.info("Response code: {}", responseCode);
            } catch (Exception e) {
                LOGGER.error("******** Error Upoading file: {} to Host: {} and Port: {}  *******", fileName,
                        host, _port);
                LOGGER.error("Caught exception during upload", e);
                throw new RuntimeException("Got Error during send tar files to push hosts!");
            }
        } finally {
            inputStream.close();
        }
    }
}

From source file:com.linkedin.thirdeye.hadoop.push.SegmentPushPhase.java

License:Apache License

public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
    String fileName = path.getName();
    if (!fileName.endsWith(".tar.gz")) {
        return;/* ww  w . ja  v  a2s  . c  o m*/
    }
    long length = fs.getFileStatus(path).getLen();
    for (String host : hosts) {
        InputStream inputStream = null;
        try {
            inputStream = fs.open(path);
            fileName = fileName.split(".tar")[0];
            if (fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER) != -1) {
                segmentName = fileName.substring(0, fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER));
            }
            LOGGER.info("******** Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port);
            try {
                int responseCode = FileUploadUtils.sendSegmentFile(host, port, fileName, inputStream, length);
                LOGGER.info("Response code: {}", responseCode);

                if (uploadSuccess == true && responseCode != 200) {
                    uploadSuccess = false;
                }

            } catch (Exception e) {
                LOGGER.error("******** Error Uploading file: {} to Host: {} and Port: {}  *******", fileName,
                        host, port);
                LOGGER.error("Caught exception during upload", e);
                throw new RuntimeException("Got Error during send tar files to push hosts!");
            }
        } finally {
            inputStream.close();
        }
    }
}

From source file:com.liveramp.cascading_ext.FileSystemHelper.java

License:Apache License

private static void printFiles(FileSystem fs, Path p, int indent) throws IOException {
    FileStatus stat = fs.getFileStatus(p);
    for (int i = 0; i < indent; i++) {
        System.out.print("\t");
    }//from w w w. ja  va 2 s . c  om
    System.out.println(p.toString());
    if (stat.isDir()) {
        for (FileStatus child : fs.listStatus(p)) {
            printFiles(fs, child.getPath(), indent + 1);
        }
    }
}

From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java

License:Creative Commons License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 * //from   ww w.  jav a  2s.co m
 * @param fileSplit
 *            The CombineFileSplit that this will read from.
 * @param context
 *            The context for this task.
 * @param pathToProcess
 *            The path index from the CombineFileSplit to process in this
 *            record.
 */
public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context,
        Integer pathToProcess) {
    isProcessed = false;

    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);

    mConf = context.getConfiguration();

    /* never used in production, just for code integrity */
    assert 0 == fileSplit.getOffset(pathToProcess);

    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            FileSystem fs = FileSystem.get(mConf);

            /* never used in production, just for code integrity */
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            LOG.debug("Problem in file length");
        }
    }

    fileContent = new BytesWritable();
}

From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = super.getSplits(job);
    List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
    CombineDocumentSplit split = null;/*from   w  ww. j a  va  2 s.c om*/
    for (InputSplit file : splits) {
        Path path = ((FileSplit) file).getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus status = fs.getFileStatus(path);
        long length = status.getLen();
        long blockSize = status.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        if (length != 0) {
            if (split == null) {
                split = new CombineDocumentSplit();
            }

            try {
                if (split.getLength() + length < splitSize || split.getLength() < minSize) {
                    split.addSplit((FileSplit) file);
                } else {
                    combinedSplits.add(split);
                    split = new CombineDocumentSplit();
                    split.addSplit((FileSplit) file);
                }
            } catch (InterruptedException e) {
                LOG.error(e);
                throw new RuntimeException(e);
            }
        }
    }
    if (split != null) {
        combinedSplits.add(split);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits: " + splits.size());
        LOG.debug("Total # of combined splits: " + combinedSplits.size());
    }

    return combinedSplits;
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Configuration conf = job.getConfiguration();
    try {/*from   w  w w .  ja  v a 2s  .  c o m*/
        List<FileStatus> files = listStatus(job);

        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (FileStatus child : files) {
            Path path = child.getPath();
            FileSystem fs = path.getFileSystem(conf);
            // length is 0 for dir according to FSDirectory.java in 0.20
            // however, w/ Hadoop2, dir in local fs has non-zero length
            long length = child.getLen();
            BlockLocation[] blkLocations = null;
            if (!child.isDirectory() || fs instanceof DistributedFileSystem == false) {
                blkLocations = fs.getFileBlockLocations(child, 0, length);
            } else if (length != 0) {
                throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString());
            }

            if ((length != 0) && isSplitable(job, path)) {
                long blockSize = child.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkLocations.length - 1].getHosts()));
                }
            } else if (length != 0) {
                splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
            } else {
                // Create empty hosts array for zero length files
                splits.add(new FileSplit(path, 0, length, new String[0]));
            }
        }
    } catch (InvalidInputException ex) {
        String inPath = conf.get(ConfigConstants.CONF_INPUT_DIRECTORY);
        String pattern = conf.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*");
        throw new IOException("No input files found with the specified input path " + inPath
                + " and input file pattern " + pattern, ex);
    }

    PathFilter jobFilter = getInputPathFilter(job);
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);
    // take a second pass of the splits generated to extract files from
    // directories
    int count = 0;
    // flatten directories until reaching SPLIT_COUNT_LIMIT
    while (count < splits.size() && splits.size() < SPLIT_COUNT_LIMIT) {
        FileSplit split = (FileSplit) splits.get(count);
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(file);
        if (status.isDirectory()) {
            FileStatus[] children = fs.listStatus(file, inputFilter);
            if (children.length + count < SPLIT_COUNT_LIMIT) {
                splits.remove(count);
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    splits.add(child);
                }
            } else {
                count++;
            }
        } else {
            count++;
        }
    }
    return splits;
}

From source file:com.marklogic.contentpump.utilities.FileIterator.java

License:Apache License

@Override
public FileSplit next() {
    while (iterator.hasNext() || !fileDirSplits.isEmpty()) {
        try {//from w w w.j a va  2 s  .  co  m
            if (iterator.hasNext()) {
                FileSplit split = iterator.next();
                Path file = ((FileSplit) split).getPath();

                FileSystem fs = file.getFileSystem(conf);

                FileStatus status = fs.getFileStatus(file);
                if (status.isDirectory()) {
                    FileStatus[] children = fs.listStatus(status.getPath(), inputFilter);
                    for (FileStatus stat : children) {
                        FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                        fileDirSplits.add(child);
                    }
                } else
                    return split;

            } else if (!fileDirSplits.isEmpty()) {
                FileSplit split = (FileSplit) fileDirSplits.remove(0);
                Path file = split.getPath();
                FileSystem fs = file.getFileSystem(conf);
                FileStatus status = fs.getFileStatus(file);

                if (!status.isDirectory()) {
                    return split;
                }
                FileStatus[] children = fs.listStatus(status.getPath(), inputFilter);

                List<FileSplit> expdFileSpts = new LinkedList<FileSplit>();
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    expdFileSpts.add(child);
                }
                iterator = expdFileSpts.iterator();
                continue;
            }
        } catch (IOException e) {
            LOG.error("Invalid next file", e);
        }
    }
    return null;
}

From source file:com.marklogic.mapreduce.LargeBinaryDocument.java

License:Apache License

public byte[] getContentAsByteArray(int offset, int len) {
    FileSystem fs;
    FSDataInputStream is = null;// w  w  w  . j  a  v  a2 s. c  om
    try {
        fs = path.getFileSystem(conf);
        if (!fs.exists(path)) {
            throw new RuntimeException("File not found: " + path);
        }
        FileStatus status = fs.getFileStatus(path);
        if (status.getLen() < offset) {
            throw new RuntimeException("Reached end of file: " + path);
        }
        byte[] buf = new byte[len];
        is = fs.open(path);
        for (int toSkip = offset, skipped = 0; toSkip < offset; toSkip -= skipped) {
            skipped = is.skipBytes(offset);
        }
        for (int bytesRead = 0; bytesRead < len;) {
            bytesRead += is.read(buf, bytesRead, len - bytesRead);
        }
        return buf;
    } catch (IOException e) {
        throw new RuntimeException("Error accessing file: " + path, e);
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
            }
        }
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

/**
 * Returns the stat information about the file.
 * /* w ww.ja  v  a2s  .c o  m*/
 * @throws FileNotFoundException
 *             if the file does not exist.
 */
@Override
public FileStatus getFileStatus(Path f) throws IOException {
    statistics.incrementReadOps(1);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FileStatus>() {
        @Override
        public FileStatus doCall(final Path p) throws IOException, UnresolvedLinkException {
            HdfsFileStatus fi = dfs.getFileInfo(getPathName(p));
            if (fi != null) {
                return fi.makeQualified(getUri(), p);
            } else {
                throw new FileNotFoundException("File does not exist: " + p);
            }
        }

        @Override
        public FileStatus next(final FileSystem fs, final Path p) throws IOException {
            return fs.getFileStatus(p);
        }
    }.resolve(this, absF);
}