Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

private List<FileStatus> getAllSubFileStatus(JobContext jobContext, Path filterMemberPath) throws IOException {
    List<FileStatus> list = new ArrayList<FileStatus>();

    FileSystem fs = filterMemberPath.getFileSystem(jobContext.getConfiguration());
    FileStatus[] subFiles = fs.listStatus(filterMemberPath);

    if (null != subFiles) {
        if (fs.getFileStatus(filterMemberPath).isDir()) {
            for (FileStatus subFile : subFiles) {
                if (!subFile.getPath().getName().startsWith("_")) {
                    list.addAll(getAllSubFileStatus(jobContext, subFile.getPath()));
                }//from w ww  .j a  va 2 s  .c  o m
            }
        } else {
            if (subFiles.length > 0 && !subFiles[0].getPath().getName().startsWith("_")) {
                list.add(subFiles[0]);
            }
        }
    }

    return list;
}

From source file:com.linkedin.pinot.hadoop.job.SegmentTarPushJob.java

License:Apache License

public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
    String fileName = path.getName();
    if (!fileName.endsWith(".tar.gz")) {
        return;//from   w ww.j a  v  a2 s.c  om
    }
    long length = fs.getFileStatus(path).getLen();
    for (String host : _hosts) {
        InputStream inputStream = null;
        try {
            inputStream = fs.open(path);
            fileName = fileName.split(".tar")[0];
            LOGGER.info("******** Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port);
            try {
                int responseCode = FileUploadUtils.sendSegmentFile(host, _port, fileName, inputStream, length);
                LOGGER.info("Response code: {}", responseCode);
            } catch (Exception e) {
                LOGGER.error("******** Error Upoading file: {} to Host: {} and Port: {}  *******", fileName,
                        host, _port);
                LOGGER.error("Caught exception during upload", e);
                throw new RuntimeException("Got Error during send tar files to push hosts!");
            }
        } finally {
            inputStream.close();
        }
    }
}

From source file:com.linkedin.thirdeye.hadoop.push.SegmentPushPhase.java

License:Apache License

public void pushOneTarFile(FileSystem fs, Path path) throws Exception {
    String fileName = path.getName();
    if (!fileName.endsWith(".tar.gz")) {
        return;/* ww  w . ja  v  a2s  . c  o m*/
    }
    long length = fs.getFileStatus(path).getLen();
    for (String host : hosts) {
        InputStream inputStream = null;
        try {
            inputStream = fs.open(path);
            fileName = fileName.split(".tar")[0];
            if (fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER) != -1) {
                segmentName = fileName.substring(0, fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER));
            }
            LOGGER.info("******** Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port);
            try {
                int responseCode = FileUploadUtils.sendSegmentFile(host, port, fileName, inputStream, length);
                LOGGER.info("Response code: {}", responseCode);

                if (uploadSuccess == true && responseCode != 200) {
                    uploadSuccess = false;
                }

            } catch (Exception e) {
                LOGGER.error("******** Error Uploading file: {} to Host: {} and Port: {}  *******", fileName,
                        host, port);
                LOGGER.error("Caught exception during upload", e);
                throw new RuntimeException("Got Error during send tar files to push hosts!");
            }
        } finally {
            inputStream.close();
        }
    }
}

From source file:com.liveramp.cascading_ext.FileSystemHelper.java

License:Apache License

private static void printFiles(FileSystem fs, Path p, int indent) throws IOException {
    FileStatus stat = fs.getFileStatus(p);
    for (int i = 0; i < indent; i++) {
        System.out.print("\t");
    }//from w w w. ja  va 2 s . c  om
    System.out.println(p.toString());
    if (stat.isDir()) {
        for (FileStatus child : fs.listStatus(p)) {
            printFiles(fs, child.getPath(), indent + 1);
        }
    }
}

From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java

License:Creative Commons License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within CombineFileRecordReader.
 * //from   ww w.  jav a  2s.co m
 * @param fileSplit
 *            The CombineFileSplit that this will read from.
 * @param context
 *            The context for this task.
 * @param pathToProcess
 *            The path index from the CombineFileSplit to process in this
 *            record.
 */
public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context,
        Integer pathToProcess) {
    isProcessed = false;

    mFileToRead = fileSplit.getPath(pathToProcess);
    mFileLength = fileSplit.getLength(pathToProcess);

    mConf = context.getConfiguration();

    /* never used in production, just for code integrity */
    assert 0 == fileSplit.getOffset(pathToProcess);

    if (LOG.isDebugEnabled()) {
        LOG.debug("FileToRead is: " + mFileToRead.toString());
        LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths());

        try {
            FileSystem fs = FileSystem.get(mConf);

            /* never used in production, just for code integrity */
            assert fs.getFileStatus(mFileToRead).getLen() == mFileLength;
        } catch (IOException ioe) {
            LOG.debug("Problem in file length");
        }
    }

    fileContent = new BytesWritable();
}

From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = super.getSplits(job);
    List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
    CombineDocumentSplit split = null;/*from   w  ww. j a  va  2 s.c om*/
    for (InputSplit file : splits) {
        Path path = ((FileSplit) file).getPath();
        FileSystem fs = path.getFileSystem(job.getConfiguration());
        FileStatus status = fs.getFileStatus(path);
        long length = status.getLen();
        long blockSize = status.getBlockSize();
        long splitSize = computeSplitSize(blockSize, minSize, maxSize);
        if (length != 0) {
            if (split == null) {
                split = new CombineDocumentSplit();
            }

            try {
                if (split.getLength() + length < splitSize || split.getLength() < minSize) {
                    split.addSplit((FileSplit) file);
                } else {
                    combinedSplits.add(split);
                    split = new CombineDocumentSplit();
                    split.addSplit((FileSplit) file);
                }
            } catch (InterruptedException e) {
                LOG.error(e);
                throw new RuntimeException(e);
            }
        }
    }
    if (split != null) {
        combinedSplits.add(split);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits: " + splits.size());
        LOG.debug("Total # of combined splits: " + combinedSplits.size());
    }

    return combinedSplits;
}

From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = new ArrayList<InputSplit>();
    Configuration conf = job.getConfiguration();
    try {/*from   w  w w .  ja  v a 2s  .  c o m*/
        List<FileStatus> files = listStatus(job);

        long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
        long maxSize = getMaxSplitSize(job);
        for (FileStatus child : files) {
            Path path = child.getPath();
            FileSystem fs = path.getFileSystem(conf);
            // length is 0 for dir according to FSDirectory.java in 0.20
            // however, w/ Hadoop2, dir in local fs has non-zero length
            long length = child.getLen();
            BlockLocation[] blkLocations = null;
            if (!child.isDirectory() || fs instanceof DistributedFileSystem == false) {
                blkLocations = fs.getFileBlockLocations(child, 0, length);
            } else if (length != 0) {
                throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString());
            }

            if ((length != 0) && isSplitable(job, path)) {
                long blockSize = child.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(new FileSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkLocations.length - 1].getHosts()));
                }
            } else if (length != 0) {
                splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts()));
            } else {
                // Create empty hosts array for zero length files
                splits.add(new FileSplit(path, 0, length, new String[0]));
            }
        }
    } catch (InvalidInputException ex) {
        String inPath = conf.get(ConfigConstants.CONF_INPUT_DIRECTORY);
        String pattern = conf.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*");
        throw new IOException("No input files found with the specified input path " + inPath
                + " and input file pattern " + pattern, ex);
    }

    PathFilter jobFilter = getInputPathFilter(job);
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);
    if (jobFilter != null) {
        filters.add(jobFilter);
    }
    PathFilter inputFilter = new MultiPathFilter(filters);
    // take a second pass of the splits generated to extract files from
    // directories
    int count = 0;
    // flatten directories until reaching SPLIT_COUNT_LIMIT
    while (count < splits.size() && splits.size() < SPLIT_COUNT_LIMIT) {
        FileSplit split = (FileSplit) splits.get(count);
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FileStatus status = fs.getFileStatus(file);
        if (status.isDirectory()) {
            FileStatus[] children = fs.listStatus(file, inputFilter);
            if (children.length + count < SPLIT_COUNT_LIMIT) {
                splits.remove(count);
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    splits.add(child);
                }
            } else {
                count++;
            }
        } else {
            count++;
        }
    }
    return splits;
}

From source file:com.marklogic.contentpump.utilities.FileIterator.java

License:Apache License

@Override
public FileSplit next() {
    while (iterator.hasNext() || !fileDirSplits.isEmpty()) {
        try {//from w w w.j a va  2 s  .  co  m
            if (iterator.hasNext()) {
                FileSplit split = iterator.next();
                Path file = ((FileSplit) split).getPath();

                FileSystem fs = file.getFileSystem(conf);

                FileStatus status = fs.getFileStatus(file);
                if (status.isDirectory()) {
                    FileStatus[] children = fs.listStatus(status.getPath(), inputFilter);
                    for (FileStatus stat : children) {
                        FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                        fileDirSplits.add(child);
                    }
                } else
                    return split;

            } else if (!fileDirSplits.isEmpty()) {
                FileSplit split = (FileSplit) fileDirSplits.remove(0);
                Path file = split.getPath();
                FileSystem fs = file.getFileSystem(conf);
                FileStatus status = fs.getFileStatus(file);

                if (!status.isDirectory()) {
                    return split;
                }
                FileStatus[] children = fs.listStatus(status.getPath(), inputFilter);

                List<FileSplit> expdFileSpts = new LinkedList<FileSplit>();
                for (FileStatus stat : children) {
                    FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null);
                    expdFileSpts.add(child);
                }
                iterator = expdFileSpts.iterator();
                continue;
            }
        } catch (IOException e) {
            LOG.error("Invalid next file", e);
        }
    }
    return null;
}

From source file:com.marklogic.mapreduce.LargeBinaryDocument.java

License:Apache License

public byte[] getContentAsByteArray(int offset, int len) {
    FileSystem fs;
    FSDataInputStream is = null;// w  w  w  . j  a  v  a2 s. c  om
    try {
        fs = path.getFileSystem(conf);
        if (!fs.exists(path)) {
            throw new RuntimeException("File not found: " + path);
        }
        FileStatus status = fs.getFileStatus(path);
        if (status.getLen() < offset) {
            throw new RuntimeException("Reached end of file: " + path);
        }
        byte[] buf = new byte[len];
        is = fs.open(path);
        for (int toSkip = offset, skipped = 0; toSkip < offset; toSkip -= skipped) {
            skipped = is.skipBytes(offset);
        }
        for (int bytesRead = 0; bytesRead < len;) {
            bytesRead += is.read(buf, bytesRead, len - bytesRead);
        }
        return buf;
    } catch (IOException e) {
        throw new RuntimeException("Error accessing file: " + path, e);
    } finally {
        if (is != null) {
            try {
                is.close();
            } catch (IOException e) {
            }
        }
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

/**
 * Returns the stat information about the file.
 * /* w ww.ja  v  a2s  .c o  m*/
 * @throws FileNotFoundException
 *             if the file does not exist.
 */
@Override
public FileStatus getFileStatus(Path f) throws IOException {
    statistics.incrementReadOps(1);
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FileStatus>() {
        @Override
        public FileStatus doCall(final Path p) throws IOException, UnresolvedLinkException {
            HdfsFileStatus fi = dfs.getFileInfo(getPathName(p));
            if (fi != null) {
                return fi.makeQualified(getUri(), p);
            } else {
                throw new FileNotFoundException("File does not exist: " + p);
            }
        }

        @Override
        public FileStatus next(final FileSystem fs, final Path p) throws IOException {
            return fs.getFileStatus(p);
        }
    }.resolve(this, absF);
}