Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }//w w w. ja  va 2s  .c  o m

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;/*  w  w w.  ja v a2s .  c  o m*/
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public HdfsFileReader(FileSystem fileSystem, Path path, int bufferSize) throws IOException {
    if (!fileSystem.exists(path)) {
        throw new FileNotFoundException(path.toString());
    }/* w w w . jav a 2 s . co  m*/
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    _hdfsLength = fileStatus.getLen();
    _inputStream = fileSystem.open(path, bufferSize);

    // read meta blocks
    _inputStream.seek(_hdfsLength - 16);
    int numberOfBlocks = _inputStream.readInt();
    _length = _inputStream.readLong();
    int version = _inputStream.readInt();
    if (version != VERSION) {
        throw new RuntimeException("Version of file [" + version + "] does not match reader [" + VERSION + "]");
    }
    _inputStream.seek(_hdfsLength - 16 - (numberOfBlocks * 24)); // 3 longs per
                                                                 // block
    _metaBlocks = new ArrayList<HdfsMetaBlock>(numberOfBlocks);
    for (int i = 0; i < numberOfBlocks; i++) {
        HdfsMetaBlock hdfsMetaBlock = new HdfsMetaBlock();
        hdfsMetaBlock.readFields(_inputStream);
        _metaBlocks.add(hdfsMetaBlock);
    }
    seek(0);
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public static long getLength(FileSystem fileSystem, Path path) throws IOException {
    FSDataInputStream inputStream = null;
    try {//from   w  w  w .  j  a  v  a  2  s.c  o  m
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        inputStream = fileSystem.open(path);
        long hdfsLength = fileStatus.getLen();
        inputStream.seek(hdfsLength - 12);
        long length = inputStream.readLong();
        int version = inputStream.readInt();
        if (version != VERSION) {
            throw new RuntimeException(
                    "Version of file [" + version + "] does not match reader [" + VERSION + "]");
        }
        return length;
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
    }
}

From source file:cc.wikitools.lucene.hadoop.FileSystemDirectory.java

License:Apache License

/**
 * Constructor/*  w  ww .  ja v a2  s  .c  om*/
 * @param fs
 * @param directory
 * @param create
 * @param conf
 * @throws IOException
 */
public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf)
        throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDir();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }
}

From source file:clone.ReadSequenceFile.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 1) {
        System.out.println("args: [path] [max-num-of-records-per-file]");
        System.exit(-1);/*from   w w  w . j ava 2  s  . c o  m*/
    }

    String f = args[0];

    int max = Integer.MAX_VALUE;
    if (args.length >= 2) {
        max = Integer.parseInt(args[1]);
    }

    boolean useLocal = args.length >= 3 && args[2].equals("local") ? true : false;

    if (useLocal) {
        System.out.println("Reading from local filesystem");
    }

    FileSystem fs = useLocal ? FileSystem.getLocal(new Configuration()) : FileSystem.get(new Configuration());
    Path p = new Path(f);

    if (fs.getFileStatus(p).isDir()) {
        readSequenceFilesInDir(p, fs, max);
    } else {
        readSequenceFile(p, fs, max);
    }
}

From source file:co.cask.cdap.common.io.Locations.java

License:Apache License

/**
 * Creates a {@link StreamSizeProvider} for determining the size of the given {@link FSDataInputStream}.
 *///from  w ww .ja  va2s.  c  o  m
private static StreamSizeProvider createDFSStreamSizeProvider(final FileSystem fs, final Path path,
        FSDataInputStream input) {
    // This is the default provider to use. It will try to determine if the file is closed and return the size of it.
    final StreamSizeProvider defaultSizeProvider = new StreamSizeProvider() {
        @Override
        public long size() throws IOException {
            if (fs instanceof DistributedFileSystem) {
                if (((DistributedFileSystem) fs).isFileClosed(path)) {
                    return fs.getFileStatus(path).getLen();
                } else {
                    return -1L;
                }
            }
            // If the the underlying file system is not DistributedFileSystem, just assume the file length tells the size
            return fs.getFileStatus(path).getLen();
        }
    };

    // This supplier is to abstract out the logic for getting the DFSInputStream#getFileLength method using reflection
    // Reflection is used to avoid ClassLoading error if the DFSInputStream class is moved or method get renamed
    final InputStream wrappedStream = input.getWrappedStream();
    final Supplier<Method> getFileLengthMethodSupplier = Suppliers.memoize(new Supplier<Method>() {
        @Override
        public Method get() {
            try {
                // This is a hack to get to the underlying DFSInputStream
                // Need to revisit it when need to support different distributed file system
                Class<? extends InputStream> cls = wrappedStream.getClass();
                String expectedName = "org.apache.hadoop.hdfs.DFSInputStream";
                if (!cls.getName().equals(expectedName)) {
                    throw new Exception(
                            "Expected wrapper class be " + expectedName + ", but got " + cls.getName());
                }

                Method getFileLengthMethod = cls.getMethod("getFileLength");
                if (!getFileLengthMethod.isAccessible()) {
                    getFileLengthMethod.setAccessible(true);
                }
                return getFileLengthMethod;
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    });

    return new StreamSizeProvider() {
        @Override
        public long size() throws IOException {
            // Try to determine the size using default provider
            long size = defaultSizeProvider.size();
            if (size >= 0) {
                return size;
            }
            try {
                // If not able to get length from the default provider, use the DFSInputStream#getFileLength method
                return (Long) getFileLengthMethodSupplier.get().invoke(wrappedStream);
            } catch (Throwable t) {
                LOG.warn("Unable to get actual file length from DFS input.", t);
                return size;
            }
        }
    };
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use/*from  ww w . jav a 2  s .  c om*/
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException {
    if (from.isFile()) {
        if (fs.exists(to)) {
            if (!fs.delete(to, true)) {
                throw new IOException("Failed to delete " + to);
            }
        }

        if (!fs.rename(from.getPath(), to)) {
            throw new IOException("Failed to rename " + from + " to " + to);
        }
    } else if (from.isDirectory()) {
        if (fs.exists(to)) {
            FileStatus toStat = fs.getFileStatus(to);
            if (!toStat.isDirectory()) {
                if (!fs.delete(to, true)) {
                    throw new IOException("Failed to delete " + to);
                }
                if (!fs.rename(from.getPath(), to)) {
                    throw new IOException("Failed to rename " + from + " to " + to);
                }
            } else {
                //It is a directory so merge everything in the directories
                for (FileStatus subFrom : fs.listStatus(from.getPath())) {
                    Path subTo = new Path(to, subFrom.getPath().getName());
                    mergePaths(fs, subFrom, subTo);
                }
            }
        } else {
            //it does not exist just rename
            if (!fs.rename(from.getPath(), to)) {
                throw new IOException("Failed to rename " + from + " to " + to);
            }
        }
    }
}

From source file:coldstorage.io.Reader.java

License:Apache License

public static void main(String[] args) throws IOException {

    List<Long> idsToFind = new ArrayList<Long>();
    int maxId = 100000000;
    Random random = new Random(1);
    for (int i = 0; i < 1000; i++) {
        long id = (long) random.nextInt(maxId);
        //      System.out.println(id);
        idsToFind.add(id);// w  w w .  j av a 2s .  co m
    }

    // idsToFind.clear();
    // idsToFind.add(58998000L);

    //    Path pathData = new Path("./out/data.avro");
    //    Path pathIndex = new Path("./out/data.index");

    Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro");
    Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index");

    Configuration configuration = new Configuration();
    FileSystem fileSystem = pathData.getFileSystem(configuration);
    FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex);
    FileStatus dataFileStatus = fileSystem.getFileStatus(pathData);
    FSDataInputStream indexInputStream = fileSystem.open(pathIndex);
    FSDataInputStream dataInputStream = fileSystem.open(pathData);

    AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr);

    List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen());

    for (Long idToFind : idsToFind) {
        long t1 = System.nanoTime();
        GenericRecord lookupRecord = lookupRecord(reader, list, idToFind);
        long t2 = System.nanoTime();
        System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public long getTimestamp(JobConf conf) {
    try {//from w w  w. j ava 2  s.  co m
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        return fs.getFileStatus(dfsPath).getModificationTime();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}