List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) throw new IOException("Failed to delete earlier output of task: " + taskId); if (!fs.rename(taskOutput, finalOutputPath)) throw new IOException("Failed to save output of task: " + taskId); }//w w w. ja va 2s .c o m LOG.debug("Moved {} to {}", taskOutput, finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } }
From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java
License:Apache License
public static void convert(Path path) throws IOException { FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration()); if (!fileSystem.exists(path)) { System.out.println(path + " does not exists."); return;/* w w w. ja v a2s . c o m*/ } FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { FileStatus[] listStatus = fileSystem.listStatus(path); for (FileStatus status : listStatus) { convert(status.getPath()); } } else { System.out.println("Converting file [" + path + "]"); HdfsMetaBlock block = new HdfsMetaBlock(); block.realPosition = 0; block.logicalPosition = 0; block.length = fileStatus.getLen(); FSDataOutputStream outputStream = fileSystem.append(path); block.write(outputStream); outputStream.writeInt(1); outputStream.writeLong(fileStatus.getLen()); outputStream.writeInt(HdfsFileWriter.VERSION); outputStream.close(); } }
From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java
License:Apache License
public HdfsFileReader(FileSystem fileSystem, Path path, int bufferSize) throws IOException { if (!fileSystem.exists(path)) { throw new FileNotFoundException(path.toString()); }/* w w w . jav a 2 s . co m*/ FileStatus fileStatus = fileSystem.getFileStatus(path); _hdfsLength = fileStatus.getLen(); _inputStream = fileSystem.open(path, bufferSize); // read meta blocks _inputStream.seek(_hdfsLength - 16); int numberOfBlocks = _inputStream.readInt(); _length = _inputStream.readLong(); int version = _inputStream.readInt(); if (version != VERSION) { throw new RuntimeException("Version of file [" + version + "] does not match reader [" + VERSION + "]"); } _inputStream.seek(_hdfsLength - 16 - (numberOfBlocks * 24)); // 3 longs per // block _metaBlocks = new ArrayList<HdfsMetaBlock>(numberOfBlocks); for (int i = 0; i < numberOfBlocks; i++) { HdfsMetaBlock hdfsMetaBlock = new HdfsMetaBlock(); hdfsMetaBlock.readFields(_inputStream); _metaBlocks.add(hdfsMetaBlock); } seek(0); }
From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java
License:Apache License
public static long getLength(FileSystem fileSystem, Path path) throws IOException { FSDataInputStream inputStream = null; try {//from w w w . j a v a 2 s.c o m FileStatus fileStatus = fileSystem.getFileStatus(path); inputStream = fileSystem.open(path); long hdfsLength = fileStatus.getLen(); inputStream.seek(hdfsLength - 12); long length = inputStream.readLong(); int version = inputStream.readInt(); if (version != VERSION) { throw new RuntimeException( "Version of file [" + version + "] does not match reader [" + VERSION + "]"); } return length; } finally { if (inputStream != null) { inputStream.close(); } } }
From source file:cc.wikitools.lucene.hadoop.FileSystemDirectory.java
License:Apache License
/** * Constructor/* w ww . ja v a2 s .c om*/ * @param fs * @param directory * @param create * @param conf * @throws IOException */ public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf) throws IOException { this.fs = fs; this.directory = directory; this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096); if (create) { create(); } boolean isDir = false; try { FileStatus status = fs.getFileStatus(directory); if (status != null) { isDir = status.isDir(); } } catch (IOException e) { // file does not exist, isDir already set to false } if (!isDir) { throw new IOException(directory + " is not a directory"); } }
From source file:clone.ReadSequenceFile.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length < 1) { System.out.println("args: [path] [max-num-of-records-per-file]"); System.exit(-1);/*from w w w . j ava 2 s . c o m*/ } String f = args[0]; int max = Integer.MAX_VALUE; if (args.length >= 2) { max = Integer.parseInt(args[1]); } boolean useLocal = args.length >= 3 && args[2].equals("local") ? true : false; if (useLocal) { System.out.println("Reading from local filesystem"); } FileSystem fs = useLocal ? FileSystem.getLocal(new Configuration()) : FileSystem.get(new Configuration()); Path p = new Path(f); if (fs.getFileStatus(p).isDir()) { readSequenceFilesInDir(p, fs, max); } else { readSequenceFile(p, fs, max); } }
From source file:co.cask.cdap.common.io.Locations.java
License:Apache License
/** * Creates a {@link StreamSizeProvider} for determining the size of the given {@link FSDataInputStream}. *///from w ww .ja va2s. c o m private static StreamSizeProvider createDFSStreamSizeProvider(final FileSystem fs, final Path path, FSDataInputStream input) { // This is the default provider to use. It will try to determine if the file is closed and return the size of it. final StreamSizeProvider defaultSizeProvider = new StreamSizeProvider() { @Override public long size() throws IOException { if (fs instanceof DistributedFileSystem) { if (((DistributedFileSystem) fs).isFileClosed(path)) { return fs.getFileStatus(path).getLen(); } else { return -1L; } } // If the the underlying file system is not DistributedFileSystem, just assume the file length tells the size return fs.getFileStatus(path).getLen(); } }; // This supplier is to abstract out the logic for getting the DFSInputStream#getFileLength method using reflection // Reflection is used to avoid ClassLoading error if the DFSInputStream class is moved or method get renamed final InputStream wrappedStream = input.getWrappedStream(); final Supplier<Method> getFileLengthMethodSupplier = Suppliers.memoize(new Supplier<Method>() { @Override public Method get() { try { // This is a hack to get to the underlying DFSInputStream // Need to revisit it when need to support different distributed file system Class<? extends InputStream> cls = wrappedStream.getClass(); String expectedName = "org.apache.hadoop.hdfs.DFSInputStream"; if (!cls.getName().equals(expectedName)) { throw new Exception( "Expected wrapper class be " + expectedName + ", but got " + cls.getName()); } Method getFileLengthMethod = cls.getMethod("getFileLength"); if (!getFileLengthMethod.isAccessible()) { getFileLengthMethod.setAccessible(true); } return getFileLengthMethod; } catch (Exception e) { throw Throwables.propagate(e); } } }); return new StreamSizeProvider() { @Override public long size() throws IOException { // Try to determine the size using default provider long size = defaultSizeProvider.size(); if (size >= 0) { return size; } try { // If not able to get length from the default provider, use the DFSInputStream#getFileLength method return (Long) getFileLengthMethodSupplier.get().invoke(wrappedStream); } catch (Throwable t) { LOG.warn("Unable to get actual file length from DFS input.", t); return size; } } }; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
/** * Merge two paths together. Anything in from will be moved into to, if there * are any name conflicts while merging the files or directories in from win. * @param fs the File System to use/*from ww w . jav a 2 s . c om*/ * @param from the path data is coming from. * @param to the path data is going to. * @throws IOException on any error */ private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException { if (from.isFile()) { if (fs.exists(to)) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else if (from.isDirectory()) { if (fs.exists(to)) { FileStatus toStat = fs.getFileStatus(to); if (!toStat.isDirectory()) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else { //It is a directory so merge everything in the directories for (FileStatus subFrom : fs.listStatus(from.getPath())) { Path subTo = new Path(to, subFrom.getPath().getName()); mergePaths(fs, subFrom, subTo); } } } else { //it does not exist just rename if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } } }
From source file:coldstorage.io.Reader.java
License:Apache License
public static void main(String[] args) throws IOException { List<Long> idsToFind = new ArrayList<Long>(); int maxId = 100000000; Random random = new Random(1); for (int i = 0; i < 1000; i++) { long id = (long) random.nextInt(maxId); // System.out.println(id); idsToFind.add(id);// w w w . j av a 2s . co m } // idsToFind.clear(); // idsToFind.add(58998000L); // Path pathData = new Path("./out/data.avro"); // Path pathIndex = new Path("./out/data.index"); Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro"); Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index"); Configuration configuration = new Configuration(); FileSystem fileSystem = pathData.getFileSystem(configuration); FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex); FileStatus dataFileStatus = fileSystem.getFileStatus(pathData); FSDataInputStream indexInputStream = fileSystem.open(pathIndex); FSDataInputStream dataInputStream = fileSystem.open(pathData); AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen()); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr); List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen()); for (Long idToFind : idsToFind) { long t1 = System.nanoTime(); GenericRecord lookupRecord = lookupRecord(reader, list, idToFind); long t2 = System.nanoTime(); System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord); } }
From source file:colossal.pipe.ColFile.java
License:Apache License
public long getTimestamp(JobConf conf) { try {//from w w w. j ava 2 s. co m Path dfsPath = new Path(path); FileSystem fs = dfsPath.getFileSystem(conf); return fs.getFileStatus(dfsPath).getModificationTime(); } catch (IOException e) { throw new RuntimeException(e); } }