Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }//w w w. ja  va 2s  .c  o m

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;/*  w  w w.  ja v a2s .  c  o m*/
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public HdfsFileReader(FileSystem fileSystem, Path path, int bufferSize) throws IOException {
    if (!fileSystem.exists(path)) {
        throw new FileNotFoundException(path.toString());
    }/* w w w . jav a 2 s . co  m*/
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    _hdfsLength = fileStatus.getLen();
    _inputStream = fileSystem.open(path, bufferSize);

    // read meta blocks
    _inputStream.seek(_hdfsLength - 16);
    int numberOfBlocks = _inputStream.readInt();
    _length = _inputStream.readLong();
    int version = _inputStream.readInt();
    if (version != VERSION) {
        throw new RuntimeException("Version of file [" + version + "] does not match reader [" + VERSION + "]");
    }
    _inputStream.seek(_hdfsLength - 16 - (numberOfBlocks * 24)); // 3 longs per
                                                                 // block
    _metaBlocks = new ArrayList<HdfsMetaBlock>(numberOfBlocks);
    for (int i = 0; i < numberOfBlocks; i++) {
        HdfsMetaBlock hdfsMetaBlock = new HdfsMetaBlock();
        hdfsMetaBlock.readFields(_inputStream);
        _metaBlocks.add(hdfsMetaBlock);
    }
    seek(0);
}

From source file:cc.solr.lucene.store.hdfs.HdfsFileReader.java

License:Apache License

public static long getLength(FileSystem fileSystem, Path path) throws IOException {
    FSDataInputStream inputStream = null;
    try {//from   w  w  w .  j  a  v  a  2  s.c  o  m
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        inputStream = fileSystem.open(path);
        long hdfsLength = fileStatus.getLen();
        inputStream.seek(hdfsLength - 12);
        long length = inputStream.readLong();
        int version = inputStream.readInt();
        if (version != VERSION) {
            throw new RuntimeException(
                    "Version of file [" + version + "] does not match reader [" + VERSION + "]");
        }
        return length;
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
    }
}

From source file:cc.wikitools.lucene.hadoop.FileSystemDirectory.java

License:Apache License

/**
 * Constructor/*  w  ww .  ja v a2  s  .c  om*/
 * @param fs
 * @param directory
 * @param create
 * @param conf
 * @throws IOException
 */
public FileSystemDirectory(FileSystem fs, Path directory, boolean create, Configuration conf)
        throws IOException {

    this.fs = fs;
    this.directory = directory;
    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);

    if (create) {
        create();
    }

    boolean isDir = false;
    try {
        FileStatus status = fs.getFileStatus(directory);
        if (status != null) {
            isDir = status.isDir();
        }
    } catch (IOException e) {
        // file does not exist, isDir already set to false
    }
    if (!isDir) {
        throw new IOException(directory + " is not a directory");
    }
}

From source file:clone.ReadSequenceFile.java

License:Apache License

public static void main(String[] args) throws IOException {
    if (args.length < 1) {
        System.out.println("args: [path] [max-num-of-records-per-file]");
        System.exit(-1);/*from   w w  w . j ava 2  s  . c o  m*/
    }

    String f = args[0];

    int max = Integer.MAX_VALUE;
    if (args.length >= 2) {
        max = Integer.parseInt(args[1]);
    }

    boolean useLocal = args.length >= 3 && args[2].equals("local") ? true : false;

    if (useLocal) {
        System.out.println("Reading from local filesystem");
    }

    FileSystem fs = useLocal ? FileSystem.getLocal(new Configuration()) : FileSystem.get(new Configuration());
    Path p = new Path(f);

    if (fs.getFileStatus(p).isDir()) {
        readSequenceFilesInDir(p, fs, max);
    } else {
        readSequenceFile(p, fs, max);
    }
}

From source file:co.cask.cdap.common.io.Locations.java

License:Apache License

/**
 * Creates a {@link StreamSizeProvider} for determining the size of the given {@link FSDataInputStream}.
 *///from  w ww .ja  va2s.  c  o  m
private static StreamSizeProvider createDFSStreamSizeProvider(final FileSystem fs, final Path path,
        FSDataInputStream input) {
    // This is the default provider to use. It will try to determine if the file is closed and return the size of it.
    final StreamSizeProvider defaultSizeProvider = new StreamSizeProvider() {
        @Override
        public long size() throws IOException {
            if (fs instanceof DistributedFileSystem) {
                if (((DistributedFileSystem) fs).isFileClosed(path)) {
                    return fs.getFileStatus(path).getLen();
                } else {
                    return -1L;
                }
            }
            // If the the underlying file system is not DistributedFileSystem, just assume the file length tells the size
            return fs.getFileStatus(path).getLen();
        }
    };

    // This supplier is to abstract out the logic for getting the DFSInputStream#getFileLength method using reflection
    // Reflection is used to avoid ClassLoading error if the DFSInputStream class is moved or method get renamed
    final InputStream wrappedStream = input.getWrappedStream();
    final Supplier<Method> getFileLengthMethodSupplier = Suppliers.memoize(new Supplier<Method>() {
        @Override
        public Method get() {
            try {
                // This is a hack to get to the underlying DFSInputStream
                // Need to revisit it when need to support different distributed file system
                Class<? extends InputStream> cls = wrappedStream.getClass();
                String expectedName = "org.apache.hadoop.hdfs.DFSInputStream";
                if (!cls.getName().equals(expectedName)) {
                    throw new Exception(
                            "Expected wrapper class be " + expectedName + ", but got " + cls.getName());
                }

                Method getFileLengthMethod = cls.getMethod("getFileLength");
                if (!getFileLengthMethod.isAccessible()) {
                    getFileLengthMethod.setAccessible(true);
                }
                return getFileLengthMethod;
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    });

    return new StreamSizeProvider() {
        @Override
        public long size() throws IOException {
            // Try to determine the size using default provider
            long size = defaultSizeProvider.size();
            if (size >= 0) {
                return size;
            }
            try {
                // If not able to get length from the default provider, use the DFSInputStream#getFileLength method
                return (Long) getFileLengthMethodSupplier.get().invoke(wrappedStream);
            } catch (Throwable t) {
                LOG.warn("Unable to get actual file length from DFS input.", t);
                return size;
            }
        }
    };
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * Merge two paths together.  Anything in from will be moved into to, if there
 * are any name conflicts while merging the files or directories in from win.
 * @param fs the File System to use/*from  ww w . jav a 2  s .  c om*/
 * @param from the path data is coming from.
 * @param to the path data is going to.
 * @throws IOException on any error
 */
private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException {
    if (from.isFile()) {
        if (fs.exists(to)) {
            if (!fs.delete(to, true)) {
                throw new IOException("Failed to delete " + to);
            }
        }

        if (!fs.rename(from.getPath(), to)) {
            throw new IOException("Failed to rename " + from + " to " + to);
        }
    } else if (from.isDirectory()) {
        if (fs.exists(to)) {
            FileStatus toStat = fs.getFileStatus(to);
            if (!toStat.isDirectory()) {
                if (!fs.delete(to, true)) {
                    throw new IOException("Failed to delete " + to);
                }
                if (!fs.rename(from.getPath(), to)) {
                    throw new IOException("Failed to rename " + from + " to " + to);
                }
            } else {
                //It is a directory so merge everything in the directories
                for (FileStatus subFrom : fs.listStatus(from.getPath())) {
                    Path subTo = new Path(to, subFrom.getPath().getName());
                    mergePaths(fs, subFrom, subTo);
                }
            }
        } else {
            //it does not exist just rename
            if (!fs.rename(from.getPath(), to)) {
                throw new IOException("Failed to rename " + from + " to " + to);
            }
        }
    }
}

From source file:coldstorage.io.Reader.java

License:Apache License

public static void main(String[] args) throws IOException {

    List<Long> idsToFind = new ArrayList<Long>();
    int maxId = 100000000;
    Random random = new Random(1);
    for (int i = 0; i < 1000; i++) {
        long id = (long) random.nextInt(maxId);
        //      System.out.println(id);
        idsToFind.add(id);// w  w w .  j av a 2s .  co m
    }

    // idsToFind.clear();
    // idsToFind.add(58998000L);

    //    Path pathData = new Path("./out/data.avro");
    //    Path pathIndex = new Path("./out/data.index");

    Path pathData = new Path("hdfs://localhost:9000/avro/out/data.avro");
    Path pathIndex = new Path("hdfs://localhost:9000/avro/out/data.index");

    Configuration configuration = new Configuration();
    FileSystem fileSystem = pathData.getFileSystem(configuration);
    FileStatus indexFileStatus = fileSystem.getFileStatus(pathIndex);
    FileStatus dataFileStatus = fileSystem.getFileStatus(pathData);
    FSDataInputStream indexInputStream = fileSystem.open(pathIndex);
    FSDataInputStream dataInputStream = fileSystem.open(pathData);

    AvroFSInput fsInput = new AvroFSInput(dataInputStream, dataFileStatus.getLen());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(fsInput, gdr);

    List<IndexKey> list = getList(indexInputStream, indexFileStatus.getLen());

    for (Long idToFind : idsToFind) {
        long t1 = System.nanoTime();
        GenericRecord lookupRecord = lookupRecord(reader, list, idToFind);
        long t2 = System.nanoTime();
        System.out.println("Found [" + idToFind + "] in [" + (t2 - t1) / 1000000.0 + " ms]:" + lookupRecord);
    }
}

From source file:colossal.pipe.ColFile.java

License:Apache License

public long getTimestamp(JobConf conf) {
    try {//from w w  w. j ava 2  s.  co m
        Path dfsPath = new Path(path);
        FileSystem fs = dfsPath.getFileSystem(conf);
        return fs.getFileStatus(dfsPath).getModificationTime();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}