List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.linkedin.json.JsonSequenceFileInputFormat.java
License:Apache License
private List<FileStatus> getAllSubFileStatus(JobContext jobContext, Path filterMemberPath) throws IOException { List<FileStatus> list = new ArrayList<FileStatus>(); FileSystem fs = filterMemberPath.getFileSystem(jobContext.getConfiguration()); FileStatus[] subFiles = fs.listStatus(filterMemberPath); if (null != subFiles) { if (fs.getFileStatus(filterMemberPath).isDir()) { for (FileStatus subFile : subFiles) { if (!subFile.getPath().getName().startsWith("_")) { list.addAll(getAllSubFileStatus(jobContext, subFile.getPath())); }//from w ww .j a va 2 s .c o m } } else { if (subFiles.length > 0 && !subFiles[0].getPath().getName().startsWith("_")) { list.add(subFiles[0]); } } } return list; }
From source file:com.linkedin.pinot.hadoop.job.SegmentTarPushJob.java
License:Apache License
public void pushOneTarFile(FileSystem fs, Path path) throws Exception { String fileName = path.getName(); if (!fileName.endsWith(".tar.gz")) { return;//from w ww.j a v a2 s.c om } long length = fs.getFileStatus(path).getLen(); for (String host : _hosts) { InputStream inputStream = null; try { inputStream = fs.open(path); fileName = fileName.split(".tar")[0]; LOGGER.info("******** Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port); try { int responseCode = FileUploadUtils.sendSegmentFile(host, _port, fileName, inputStream, length); LOGGER.info("Response code: {}", responseCode); } catch (Exception e) { LOGGER.error("******** Error Upoading file: {} to Host: {} and Port: {} *******", fileName, host, _port); LOGGER.error("Caught exception during upload", e); throw new RuntimeException("Got Error during send tar files to push hosts!"); } } finally { inputStream.close(); } } }
From source file:com.linkedin.thirdeye.hadoop.push.SegmentPushPhase.java
License:Apache License
public void pushOneTarFile(FileSystem fs, Path path) throws Exception { String fileName = path.getName(); if (!fileName.endsWith(".tar.gz")) { return;/* ww w . ja v a2s . c o m*/ } long length = fs.getFileStatus(path).getLen(); for (String host : hosts) { InputStream inputStream = null; try { inputStream = fs.open(path); fileName = fileName.split(".tar")[0]; if (fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER) != -1) { segmentName = fileName.substring(0, fileName.lastIndexOf(ThirdEyeConstants.SEGMENT_JOINER)); } LOGGER.info("******** Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port); try { int responseCode = FileUploadUtils.sendSegmentFile(host, port, fileName, inputStream, length); LOGGER.info("Response code: {}", responseCode); if (uploadSuccess == true && responseCode != 200) { uploadSuccess = false; } } catch (Exception e) { LOGGER.error("******** Error Uploading file: {} to Host: {} and Port: {} *******", fileName, host, port); LOGGER.error("Caught exception during upload", e); throw new RuntimeException("Got Error during send tar files to push hosts!"); } } finally { inputStream.close(); } } }
From source file:com.liveramp.cascading_ext.FileSystemHelper.java
License:Apache License
private static void printFiles(FileSystem fs, Path p, int indent) throws IOException { FileStatus stat = fs.getFileStatus(p); for (int i = 0; i < indent; i++) { System.out.print("\t"); }//from w w w. ja va 2 s . c om System.out.println(p.toString()); if (stat.isDir()) { for (FileStatus child : fs.listStatus(p)) { printFiles(fs, child.getPath(), indent + 1); } } }
From source file:com.marcolotz.lung.io.inputFormat.MultipleFilesRecordReader.java
License:Creative Commons License
/** * Implementation detail: This constructor is built to be called via * reflection from within CombineFileRecordReader. * //from ww w. jav a 2s.co m * @param fileSplit * The CombineFileSplit that this will read from. * @param context * The context for this task. * @param pathToProcess * The path index from the CombineFileSplit to process in this * record. */ public MultipleFilesRecordReader(CombineFileSplit fileSplit, TaskAttemptContext context, Integer pathToProcess) { isProcessed = false; mFileToRead = fileSplit.getPath(pathToProcess); mFileLength = fileSplit.getLength(pathToProcess); mConf = context.getConfiguration(); /* never used in production, just for code integrity */ assert 0 == fileSplit.getOffset(pathToProcess); if (LOG.isDebugEnabled()) { LOG.debug("FileToRead is: " + mFileToRead.toString()); LOG.debug("Processing path " + pathToProcess + " out of " + fileSplit.getNumPaths()); try { FileSystem fs = FileSystem.get(mConf); /* never used in production, just for code integrity */ assert fs.getFileStatus(mFileToRead).getLen() == mFileLength; } catch (IOException ioe) { LOG.debug("Problem in file length"); } } fileContent = new BytesWritable(); }
From source file:com.marklogic.contentpump.CombineDocumentInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = super.getSplits(job); List<InputSplit> combinedSplits = new ArrayList<InputSplit>(); CombineDocumentSplit split = null;/*from w ww. j a va 2 s.c om*/ for (InputSplit file : splits) { Path path = ((FileSplit) file).getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); FileStatus status = fs.getFileStatus(path); long length = status.getLen(); long blockSize = status.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); if (length != 0) { if (split == null) { split = new CombineDocumentSplit(); } try { if (split.getLength() + length < splitSize || split.getLength() < minSize) { split.addSplit((FileSplit) file); } else { combinedSplits.add(split); split = new CombineDocumentSplit(); split.addSplit((FileSplit) file); } } catch (InterruptedException e) { LOG.error(e); throw new RuntimeException(e); } } } if (split != null) { combinedSplits.add(split); } if (LOG.isDebugEnabled()) { LOG.debug("Total # of splits: " + splits.size()); LOG.debug("Total # of combined splits: " + combinedSplits.size()); } return combinedSplits; }
From source file:com.marklogic.contentpump.FileAndDirectoryInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); Configuration conf = job.getConfiguration(); try {/*from w w w . ja v a 2s . c o m*/ List<FileStatus> files = listStatus(job); long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); for (FileStatus child : files) { Path path = child.getPath(); FileSystem fs = path.getFileSystem(conf); // length is 0 for dir according to FSDirectory.java in 0.20 // however, w/ Hadoop2, dir in local fs has non-zero length long length = child.getLen(); BlockLocation[] blkLocations = null; if (!child.isDirectory() || fs instanceof DistributedFileSystem == false) { blkLocations = fs.getFileBlockLocations(child, 0, length); } else if (length != 0) { throw new IOException("non-zero length directory on HDFS:" + path.toUri().toString()); } if ((length != 0) && isSplitable(job, path)) { long blockSize = child.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } } catch (InvalidInputException ex) { String inPath = conf.get(ConfigConstants.CONF_INPUT_DIRECTORY); String pattern = conf.get(ConfigConstants.CONF_INPUT_FILE_PATTERN, ".*"); throw new IOException("No input files found with the specified input path " + inPath + " and input file pattern " + pattern, ex); } PathFilter jobFilter = getInputPathFilter(job); List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); // take a second pass of the splits generated to extract files from // directories int count = 0; // flatten directories until reaching SPLIT_COUNT_LIMIT while (count < splits.size() && splits.size() < SPLIT_COUNT_LIMIT) { FileSplit split = (FileSplit) splits.get(count); Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); FileStatus status = fs.getFileStatus(file); if (status.isDirectory()) { FileStatus[] children = fs.listStatus(file, inputFilter); if (children.length + count < SPLIT_COUNT_LIMIT) { splits.remove(count); for (FileStatus stat : children) { FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null); splits.add(child); } } else { count++; } } else { count++; } } return splits; }
From source file:com.marklogic.contentpump.utilities.FileIterator.java
License:Apache License
@Override public FileSplit next() { while (iterator.hasNext() || !fileDirSplits.isEmpty()) { try {//from w w w.j a va 2 s . co m if (iterator.hasNext()) { FileSplit split = iterator.next(); Path file = ((FileSplit) split).getPath(); FileSystem fs = file.getFileSystem(conf); FileStatus status = fs.getFileStatus(file); if (status.isDirectory()) { FileStatus[] children = fs.listStatus(status.getPath(), inputFilter); for (FileStatus stat : children) { FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null); fileDirSplits.add(child); } } else return split; } else if (!fileDirSplits.isEmpty()) { FileSplit split = (FileSplit) fileDirSplits.remove(0); Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); FileStatus status = fs.getFileStatus(file); if (!status.isDirectory()) { return split; } FileStatus[] children = fs.listStatus(status.getPath(), inputFilter); List<FileSplit> expdFileSpts = new LinkedList<FileSplit>(); for (FileStatus stat : children) { FileSplit child = new FileSplit(stat.getPath(), 0, stat.getLen(), null); expdFileSpts.add(child); } iterator = expdFileSpts.iterator(); continue; } } catch (IOException e) { LOG.error("Invalid next file", e); } } return null; }
From source file:com.marklogic.mapreduce.LargeBinaryDocument.java
License:Apache License
public byte[] getContentAsByteArray(int offset, int len) { FileSystem fs; FSDataInputStream is = null;// w w w . j a v a2 s. c om try { fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new RuntimeException("File not found: " + path); } FileStatus status = fs.getFileStatus(path); if (status.getLen() < offset) { throw new RuntimeException("Reached end of file: " + path); } byte[] buf = new byte[len]; is = fs.open(path); for (int toSkip = offset, skipped = 0; toSkip < offset; toSkip -= skipped) { skipped = is.skipBytes(offset); } for (int bytesRead = 0; bytesRead < len;) { bytesRead += is.read(buf, bytesRead, len - bytesRead); } return buf; } catch (IOException e) { throw new RuntimeException("Error accessing file: " + path, e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { } } } }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
/** * Returns the stat information about the file. * /* w ww.ja v a2s .c o m*/ * @throws FileNotFoundException * if the file does not exist. */ @Override public FileStatus getFileStatus(Path f) throws IOException { statistics.incrementReadOps(1); Path absF = fixRelativePart(f); return new FileSystemLinkResolver<FileStatus>() { @Override public FileStatus doCall(final Path p) throws IOException, UnresolvedLinkException { HdfsFileStatus fi = dfs.getFileInfo(getPathName(p)); if (fi != null) { return fi.makeQualified(getUri(), p); } else { throw new FileNotFoundException("File does not exist: " + p); } } @Override public FileStatus next(final FileSystem fs, final Path p) throws IOException { return fs.getFileStatus(p); } }.resolve(this, absF); }