List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.google.mr4c.content.S3ContentFactory.java
License:Open Source License
public long getContentLength(URI uri) throws IOException { Path path = toPath(uri);//from w w w . j a va 2 s . c o m FileSystem fs = getFileSystem(uri); FileStatus file = fs.getFileStatus(path); return file.getLen(); }
From source file:com.google.mr4c.sources.URIDataFileSource.java
License:Open Source License
@Override public BlockLocation[] getBlockLocation() throws IOException { URI uri = ContentFactories.scrubURI(m_uri); FileSystem fs = FileSystem.get(uri, s_config); Path path = new Path(uri); FileStatus status = fs.getFileStatus(path); return fs.getFileBlockLocations(status, 0, status.getBlockSize()); }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
private InputStream forMagic(Path p, FileSystem srcFs) throws IOException { FSDataInputStream i = srcFs.open(p); switch (i.readShort()) { case 0x1f8b: // RFC 1952 i.seek(0);//from ww w.j a v a2s .c o m return new GZIPInputStream(i); case 0x5345: // 'S' 'E' if (i.readByte() == 'Q') { i.close(); return new TextRecordInputStream(srcFs.getFileStatus(p)); } break; } i.seek(0); /** * snappy */ if (isSnappy(p.getName())) { return getSnappyCodec().createInputStream(i); } /** * end */ return i; }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
int runCmdHandler(CmdHandler handler, String[] args, int startIndex, boolean recursive) throws IOException { int errors = 0; for (int i = startIndex; i < args.length; i++) { Path srcPath = new Path(args[i]); FileSystem srcFs = srcPath.getFileSystem(getConf()); Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath); // if nothing matches to given glob pattern then increment error count if (paths.length == 0) { System.err.println(handler.getName() + ": could not get status for '" + args[i] + "'"); errors++;/*from ww w. jav a 2s . c o m*/ } for (Path path : paths) { try { FileStatus file = srcFs.getFileStatus(path); if (file == null) { System.err.println(handler.getName() + ": could not get status for '" + path + "'"); errors++; } else { errors += runCmdHandler(handler, file, srcFs, recursive); } } catch (IOException e) { String msg = (e.getMessage() != null ? e.getLocalizedMessage() : (e.getCause().getMessage() != null ? e.getCause().getLocalizedMessage() : "null")); System.err.println( handler.getName() + ": could not get status for '" + path + "': " + msg.split("\n")[0]); errors++; } } } return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0; }
From source file:com.hadoop.compression.fourmc.FourMcInputStream.java
License:BSD License
/** * Reads blocks index at tail of file./*from w ww. ja v a 2 s . c o m*/ * * @param fs filesystem * @param file path to 4mc file * @return block index * @throws IOException */ public static FourMcBlockIndex readIndex(FileSystem fs, Path file) throws IOException { long fileSize = fs.getFileStatus(file).getLen(); if (fileSize < (12 + 20)) { // file too small return new FourMcBlockIndex(); } FSDataInputStream indexIn = fs.open(file); /* 4mc Footer: Footer size: 4 bytes Footer version: 4 byte (1) Block index offset: 4 bytes delta offset for each stored block, the delta between offset between previous file position and next block Footer size: 4 bytes (repeated to be able to read from end of file) MAGIC SIGNATURE: 4 bytes: "4MC\0" Footer checksum: 4 bytes (always in XXHASH32) */ /** * jump to file tail and read-ahead last 4KB of file which should be enough in most cases * Improvement: we could estimate a best case compression factor of 10% and calc forecast * based on filesize and blocksize, to see if better to read-head more. */ int readTailSize = 4 * 1024; if (readTailSize > (fileSize - 12)) readTailSize = (int) (fileSize - 12); indexIn.seek(fileSize - readTailSize); byte[] buf = new byte[readTailSize]; readFully(indexIn, buf, 0, buf.length); int footerSize = getInt(buf, buf.length - 12); int magic = getInt(buf, buf.length - 8); int checksum = getInt(buf, buf.length - 4); if (magic != FourMcCodec.FOURMC_MAGIC) { throw new IOException("Invalid 4mc footer magic"); } if (footerSize >= (fileSize - 12)) { throw new IOException("Invalid 4mc footer checksum"); } // very rare case: read head was not enough! seek back and read it all if (footerSize > readTailSize) { readTailSize = footerSize; indexIn.seek(fileSize - readTailSize); buf = new byte[readTailSize]; readFully(indexIn, buf, 0, buf.length); } indexIn.close(); int startFooterOffset = readTailSize - footerSize; if (getInt(buf, startFooterOffset) != footerSize) { // size again throw new IOException("Invalid 4mc footer size"); } if (getInt(buf, startFooterOffset + 4) != FourMcCodec.FOURMC_VERSION) { // version throw new IOException("Invalid 4mc footer version (" + getInt(buf, startFooterOffset + 4) + ")"); } if (checksum != Lz4Decompressor.xxhash32(buf, startFooterOffset, footerSize - 4, 0)) { throw new IOException("Invalid 4mc footer checksum"); } int totalBlocks = (footerSize - 20) / 4; FourMcBlockIndex index = new FourMcBlockIndex(totalBlocks); long curOffset = 0; for (int i = 0; i < totalBlocks; ++i) { curOffset += getInt(buf, startFooterOffset + 8 + (i * 4)); index.set(i, curOffset); } return index; }
From source file:com.hadoop.compression.fourmc.FourMzInputStream.java
License:BSD License
/** * Reads blocks index at tail of file.//from w ww .j a v a 2 s. c om * * @param fs filesystem * @param file path to 4mc file * @return block index * @throws IOException */ public static FourMzBlockIndex readIndex(FileSystem fs, Path file) throws IOException { long fileSize = fs.getFileStatus(file).getLen(); if (fileSize < (12 + 20)) { // file too small return new FourMzBlockIndex(); } FSDataInputStream indexIn = fs.open(file); /** * jump to file tail and read-ahead last 4KB of file which should be enough in most cases * Improvement: we could estimate a best case compression factor of 10% and calc forecast * based on filesize and blocksize, to see if better to read-head more. */ int readTailSize = 4 * 1024; if (readTailSize > (fileSize - 12)) readTailSize = (int) (fileSize - 12); indexIn.seek(fileSize - readTailSize); byte[] buf = new byte[readTailSize]; readFully(indexIn, buf, 0, buf.length); int footerSize = getInt(buf, buf.length - 12); int magic = getInt(buf, buf.length - 8); int checksum = getInt(buf, buf.length - 4); if (magic != FourMzCodec.FOURMZ_MAGIC) { throw new IOException("Invalid 4mc footer magic"); } if (footerSize >= (fileSize - 12)) { throw new IOException("Invalid 4mc footer checksum"); } // very rare case: read head was not enough! seek back and read it all if (footerSize > readTailSize) { readTailSize = footerSize; indexIn.seek(fileSize - readTailSize); buf = new byte[readTailSize]; readFully(indexIn, buf, 0, buf.length); } indexIn.close(); int startFooterOffset = readTailSize - footerSize; if (getInt(buf, startFooterOffset) != footerSize) { // size again throw new IOException("Invalid 4mc footer size"); } if (getInt(buf, startFooterOffset + 4) != FourMzCodec.FOURMZ_VERSION) { // version throw new IOException("Invalid 4mc footer version (" + getInt(buf, startFooterOffset + 4) + ")"); } if (checksum != ZstdDecompressor.xxhash32(buf, startFooterOffset, footerSize - 4, 0)) { throw new IOException("Invalid 4mc footer checksum"); } int totalBlocks = (footerSize - 20) / 4; FourMzBlockIndex index = new FourMzBlockIndex(totalBlocks); long curOffset = 0; for (int i = 0; i < totalBlocks; ++i) { curOffset += getInt(buf, startFooterOffset + 8 + (i * 4)); index.set(i, curOffset); } return index; }
From source file:com.hadoop.compression.lzo.LzoIndexer.java
License:Open Source License
/** * Lzo index a given path, calling recursively to index directories when encountered. * Files are only indexed if they end in .lzo and have no existing .lzo.index file. * /*from w w w . j av a 2 s. c o m*/ * @param lzoPath The base path to index. * @param nestingLevel For pretty printing, the nesting level. * @throws IOException */ private void indexInternal(Path lzoPath, int nestingLevel) throws IOException { FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_); FileStatus fileStatus = fs.getFileStatus(lzoPath); // Recursively walk if (fileStatus.isDir()) { LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "..."); FileStatus[] statuses = fs.listStatus(lzoPath); for (FileStatus childStatus : statuses) { indexInternal(childStatus.getPath(), nestingLevel + 1); } } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) { Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (fs.exists(lzoIndexPath)) { LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n"); } else { long startTime = System.currentTimeMillis(); long fileSize = fileStatus.getLen(); LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size " + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB..."); if (indexSingleFile(fs, lzoPath)) { long indexSize = fs.getFileStatus(lzoIndexPath).getLen(); double elapsed = (System.currentTimeMillis() - startTime) / 1000.0; LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed) + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed)) + " MB/s). Index size is " + df_.format(indexSize / 1024.0) + " KB.\n"); } } } }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits); // Find new starts/ends of the filesplit that align with the LZO blocks. List<FileSplit> result = new ArrayList<FileSplit>(); for (FileSplit fileSplit : splits) { Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // non-LZO file, keep the input split as is. result.add(fileSplit);//from w w w .j a v a 2 s. c o m continue; } // LZO file, try to split if the .index file was found LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); } if (index.isEmpty()) { // Empty index, keep it as is. result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long lzoStart = index.alignSliceStartToIndex(start, end); long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) { result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations())); } } return result.toArray(new FileSplit[result.size()]); }
From source file:com.hadoop.mapreduce.FourMcInputFormat.java
License:BSD License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = HadoopUtils.getConfiguration(job); List<InputSplit> defaultSplits = super.getSplits(job); List<InputSplit> result = new ArrayList<InputSplit>(); Path prevFile = null;/*www .ja v a 2 s .c o m*/ FourMcBlockIndex prevIndex = null; for (InputSplit genericSplit : defaultSplits) { // Load the index. FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FourMcBlockIndex index; if (file.equals(prevFile)) { index = prevIndex; } else { index = FourMcBlockIndex.readIndex(fs, file); prevFile = file; prevIndex = index; } if (index == null) { throw new IOException("BlockIndex unreadable for " + file); } if (index.isEmpty()) { // leave the default split for empty block index result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long fourMcStart = index.alignSliceStartToIndex(start, end); long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (fourMcStart != FourMcBlockIndex.NOT_FOUND && fourMcEnd != FourMcBlockIndex.NOT_FOUND) { result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations())); LOG.debug("Added 4mc split for " + file + "[start=" + fourMcStart + ", length=" + (fourMcEnd - fourMcStart) + "]"); } } return result; }
From source file:com.hadoop.mapreduce.FourMzInputFormat.java
License:BSD License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { Configuration conf = HadoopUtils.getConfiguration(job); List<InputSplit> defaultSplits = super.getSplits(job); List<InputSplit> result = new ArrayList<InputSplit>(); Path prevFile = null;/*ww w . j a va 2 s . co m*/ FourMzBlockIndex prevIndex = null; for (InputSplit genericSplit : defaultSplits) { // Load the index. FileSplit fileSplit = (FileSplit) genericSplit; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FourMzBlockIndex index; if (file.equals(prevFile)) { index = prevIndex; } else { index = FourMzBlockIndex.readIndex(fs, file); prevFile = file; prevIndex = index; } if (index == null) { throw new IOException("BlockIndex unreadable for " + file); } if (index.isEmpty()) { // leave the default split for empty block index result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long fourMcStart = index.alignSliceStartToIndex(start, end); long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (fourMcStart != FourMzBlockIndex.NOT_FOUND && fourMcEnd != FourMzBlockIndex.NOT_FOUND) { result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations())); LOG.debug("Added 4mz split for " + file + "[start=" + fourMcStart + ", length=" + (fourMcEnd - fourMcStart) + "]"); } } return result; }