Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

/**
 * If a single file is being copied to a location where the file (of the same
 * name) already exists, then the file shouldn't be skipped.
 *///  w  w w .  j  a  va2  s.c o m
@Test
public void testSingleFileCopy() {
    try {
        deleteState();
        touchFile(SOURCE_PATH + "/1.gz");
        Path sourceFilePath = pathList.get(0);
        Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
        touchFile(targetFilePath.toString());

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);

        context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
                targetFilePath.getParent().toString()); // Parent directory.
        copyMapper.setup(context);

        final FileStatus sourceFileStatus = fs.getFileStatus(sourceFilePath);

        long before = fs.getFileStatus(targetFilePath).getModificationTime();
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)),
                sourceFileStatus, context);
        long after = fs.getFileStatus(targetFilePath).getModificationTime();

        Assert.assertTrue("File should have been skipped", before == after);

        context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetFilePath.toString()); // Specify the file path.
        copyMapper.setup(context);

        before = fs.getFileStatus(targetFilePath).getModificationTime();
        try {
            Thread.sleep(2);
        } catch (Throwable ignore) {
        }
        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)),
                sourceFileStatus, context);
        after = fs.getFileStatus(targetFilePath).getModificationTime();

        Assert.assertTrue("File should have been overwritten.", before < after);

    } catch (Exception exception) {
        Assert.fail("Unexpected exception: " + exception.getMessage());
        exception.printStackTrace();
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private void testPreserveUserGroupImpl(boolean preserve) {
    try {//from   w w  w.j a  v  a 2 s .  c  o m

        deleteState();
        createSourceData();
        changeUserGroup("Michael", "Corleone");

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);

        Configuration configuration = context.getConfiguration();
        EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
        if (preserve) {
            fileAttributes.add(DistCpOptions.FileAttribute.USER);
            fileAttributes.add(DistCpOptions.FileAttribute.GROUP);
            fileAttributes.add(DistCpOptions.FileAttribute.PERMISSION);
        }

        configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
                DistCpUtils.packAttributes(fileAttributes));
        copyMapper.setup(context);

        for (Path path : pathList) {
            final FileStatus fileStatus = fs.getFileStatus(path);
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fileStatus,
                    context);
        }

        // Check that the user/group attributes are preserved
        // (only) as necessary.
        for (Path path : pathList) {
            final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
            final FileStatus source = fs.getFileStatus(path);
            final FileStatus target = fs.getFileStatus(targetPath);
            if (!source.isDir()) {
                Assert.assertTrue(!preserve || source.getOwner().equals(target.getOwner()));
                Assert.assertTrue(!preserve || source.getGroup().equals(target.getGroup()));
                Assert.assertTrue(!preserve || source.getPermission().equals(target.getPermission()));
                Assert.assertTrue(preserve || !source.getOwner().equals(target.getOwner()));
                Assert.assertTrue(preserve || !source.getGroup().equals(target.getGroup()));
                Assert.assertTrue(preserve || !source.getPermission().equals(target.getPermission()));
                Assert.assertTrue(source.isDir() || source.getReplication() != target.getReplication());
            }
        }
    } catch (Exception e) {
        Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
        e.printStackTrace();
    }
}

From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java

License:Apache License

/** {@inheritDoc} */
@Override//from ww w  .j  a va  2 s . co  m
public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {

    SequenceFile.Writer fileListWriter = null;

    try {
        fileListWriter = getWriter(pathToListingFile);

        for (Path path : options.getSourcePaths()) {
            FileSystem sourceFS = path.getFileSystem(getConf());
            path = makeQualified(path);

            FileStatus rootStatus = sourceFS.getFileStatus(path);
            Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
            boolean localFile = (rootStatus.getClass() != FileStatus.class);

            FileStatus[] sourceFiles = sourceFS.listStatus(path);
            if (sourceFiles != null && sourceFiles.length > 0) {
                for (FileStatus sourceStatus : sourceFiles) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
                    }
                    writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile, options);

                    if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
                        }
                        traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile,
                                options);
                    }
                }
            } else {
                writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile, options);
            }
        }
    } finally {
        try {
            if (fileListWriter != null)
                fileListWriter.close();
        } catch (IOException exception) {
            LOG.error("Could not close output-steam to the file-list: ", exception);
            throw exception;
        }
    }
}

From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java

License:Apache License

/**
 * Preserve attribute on file matching that of the file status being sent
 * as argument. Barring the block size, all the other attributes are preserved
 * by this function//from  w w  w .  j ava  2  s  .com
 *
 * @param targetFS - File system
 * @param path - Path that needs to preserve original file status
 * @param srcFileStatus - Original file status
 * @param attributes - Attribute set that need to be preserved
 * @throws IOException - Exception if any (particularly relating to group/owner
 *                       change or any transient error)
 */
public static void preserve(FileSystem targetFS, Path path, FileStatus srcFileStatus,
        EnumSet<FileAttribute> attributes) throws IOException {

    FileStatus targetFileStatus = targetFS.getFileStatus(path);
    String group = targetFileStatus.getGroup();
    String user = targetFileStatus.getOwner();
    boolean chown = false;

    if (attributes.contains(FileAttribute.PERMISSION)
            && !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) {
        targetFS.setPermission(path, srcFileStatus.getPermission());
    }

    if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDir()
            && srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
        targetFS.setReplication(path, srcFileStatus.getReplication());
    }

    if (attributes.contains(FileAttribute.GROUP) && !group.equals(srcFileStatus.getGroup())) {
        group = srcFileStatus.getGroup();
        chown = true;
    }

    if (attributes.contains(FileAttribute.USER) && !user.equals(srcFileStatus.getOwner())) {
        user = srcFileStatus.getOwner();
        chown = true;
    }

    if (chown) {
        targetFS.setOwner(path, user, group);
    }
}

From source file:com.inmobi.conduit.distcp.tools.util.TestDistCpUtils.java

License:Apache License

@Test
public void testPreserve() {
    try {/*ww w .j  a v a  2  s .c  o  m*/
        FileSystem fs = FileSystem.get(config);
        EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);

        Path path = new Path("/tmp/abc");
        Path src = new Path("/tmp/src");
        fs.mkdirs(path);
        fs.mkdirs(src);
        FileStatus srcStatus = fs.getFileStatus(src);

        FsPermission noPerm = new FsPermission((short) 0);
        fs.setPermission(path, noPerm);
        fs.setOwner(path, "nobody", "nobody");

        DistCpUtils.preserve(fs, path, srcStatus, attributes);
        FileStatus target = fs.getFileStatus(path);
        Assert.assertEquals(target.getPermission(), noPerm);
        Assert.assertEquals(target.getOwner(), "nobody");
        Assert.assertEquals(target.getGroup(), "nobody");

        attributes.add(FileAttribute.PERMISSION);
        DistCpUtils.preserve(fs, path, srcStatus, attributes);
        target = fs.getFileStatus(path);
        Assert.assertEquals(target.getPermission(), srcStatus.getPermission());
        Assert.assertEquals(target.getOwner(), "nobody");
        Assert.assertEquals(target.getGroup(), "nobody");

        attributes.add(FileAttribute.GROUP);
        attributes.add(FileAttribute.USER);
        DistCpUtils.preserve(fs, path, srcStatus, attributes);
        target = fs.getFileStatus(path);
        Assert.assertEquals(target.getPermission(), srcStatus.getPermission());
        Assert.assertEquals(target.getOwner(), srcStatus.getOwner());
        Assert.assertEquals(target.getGroup(), srcStatus.getGroup());

        fs.delete(path, true);
        fs.delete(src, true);
    } catch (IOException e) {
        LOG.error("Exception encountered ", e);
        Assert.fail("Preserve test failure");
    }
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

protected long createMRInput(Path inputPath, Map<FileStatus, String> fileListing, Set<FileStatus> trashSet,
        Table<String, String, String> checkpointPaths) throws IOException {
    FileSystem fs = FileSystem.get(srcCluster.getHadoopConf());

    createListing(fs, fs.getFileStatus(srcCluster.getDataDir()), fileListing, trashSet, checkpointPaths);
    // the total size of data present in all files
    long totalSize = 0;
    // if file listing is empty, simply return
    if (fileListing.isEmpty()) {
        return 0;
    }/*from  ww w  .j av  a 2s .  c  o  m*/
    SequenceFile.Writer out = SequenceFile.createWriter(fs, srcCluster.getHadoopConf(), inputPath, Text.class,
            FileStatus.class);
    try {
        Iterator<Entry<FileStatus, String>> it = fileListing.entrySet().iterator();
        while (it.hasNext()) {
            Entry<FileStatus, String> entry = it.next();
            FileStatus status = FileUtil.getFileStatus(entry.getKey(), buffer, in);
            out.append(new Text(entry.getValue()), status);

            // Create a sync point after each entry. This will ensure that
            // SequenceFile
            // Reader can work at file entry level granularity, given that
            // SequenceFile
            // Reader reads from the starting of sync point.
            out.sync();

            totalSize += entry.getKey().getLen();
        }
    } finally {
        out.close();
    }

    return totalSize;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results,
        Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException {
    List<FileStatus> streamsFileStatus = new ArrayList<FileStatus>();
    FileSystem srcFs = FileSystem.get(srcCluster.getHadoopConf());
    for (String stream : streamsToProcess) {
        streamsFileStatus.add(srcFs.getFileStatus(new Path(srcCluster.getDataDir(), stream)));
    }/*from  www  .  j  av  a 2 s  . c  o  m*/
    for (FileStatus stream : streamsFileStatus) {
        String streamName = stream.getPath().getName();
        LOG.debug("createListing working on Stream [" + streamName + "]");
        FileStatus[] collectors;
        try {
            collectors = fs.listStatus(stream.getPath());
        } catch (FileNotFoundException ex) {
            collectors = new FileStatus[0];
        }
        long minOfLatestCollectorTimeStamp = -1;
        for (FileStatus collector : collectors) {
            TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>();
            // check point for this collector
            String collectorName = collector.getPath().getName();
            String checkPointKey = getCheckPointKey(this.getClass().getSimpleName(), streamName, collectorName);

            String checkPointValue = null;
            byte[] value = checkpointProvider.read(checkPointKey);
            if (value == null) {
                // In case checkpointKey with newer name format is absent,read old
                // checkpoint key
                String oldCheckPointKey = streamName + collectorName;
                value = checkpointProvider.read(oldCheckPointKey);
            }
            if (value != null)
                checkPointValue = new String(value);
            LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]");
            FileStatus[] files = null;
            try {
                files = fs.listStatus(collector.getPath(), new CollectorPathFilter());
            } catch (FileNotFoundException e) {
            }

            if (files == null) {
                LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory");
                continue;
            }
            TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator());
            String currentFile = getCurrentFile(fs, files, sortedFiles);
            LOG.debug("last file " + currentFile + " in the collector directory " + collector.getPath());

            Iterator<FileStatus> it = sortedFiles.iterator();
            numberOfFilesProcessed = 0;
            long latestCollectorFileTimeStamp = -1;
            while (it.hasNext() && numberOfFilesProcessed < filesPerCollector) {
                FileStatus file = it.next();
                LOG.debug("Processing " + file.getPath());
                /*
                 * fileTimeStamp value will be -1 for the files which are already processed
                 */
                long fileTimeStamp = processFile(file, currentFile, checkPointValue, fs, results,
                        collectorPaths, streamName);
                if (fileTimeStamp > latestCollectorFileTimeStamp) {
                    latestCollectorFileTimeStamp = fileTimeStamp;
                }
            }
            populateTrash(collectorPaths, trashSet);
            populateCheckpointPathForCollector(checkpointPaths, collectorPaths);

            if ((latestCollectorFileTimeStamp < minOfLatestCollectorTimeStamp
                    || minOfLatestCollectorTimeStamp == -1) && latestCollectorFileTimeStamp != -1) {
                minOfLatestCollectorTimeStamp = latestCollectorFileTimeStamp;
            }
        } // all files in a collector
        if (minOfLatestCollectorTimeStamp != -1) {
            lastProcessedFile.put(streamName, minOfLatestCollectorTimeStamp);
        } else {
            LOG.warn("No new files in " + streamName + " stream");
        }
    }
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private void createMRInput(Path inputPath, Map<FileStatus, String> fileListing, Set<FileStatus> trashSet,
        Map<String, FileStatus> checkpointPaths) throws IOException {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());

    createListing(fs, fs.getFileStatus(cluster.getDataDir()), fileListing, trashSet, checkpointPaths);

    FSDataOutputStream out = fs.create(inputPath);
    try {/*from   w  w w .j ava2s.  c om*/
        Iterator<Entry<FileStatus, String>> it = fileListing.entrySet().iterator();
        while (it.hasNext()) {
            Entry<FileStatus, String> entry = it.next();
            out.writeBytes(entry.getKey().getPath().toString());
            out.writeBytes("\t");
            out.writeBytes(entry.getValue());
            out.writeBytes("\n");
        }
    } finally {
        out.close();
    }
}

From source file:com.inmobi.messaging.consumer.util.HadoopUtil.java

License:Apache License

public static HadoopStreamFile getOlderFile(Path streamDirPrefix, FileSystem fs, Path databusFile)
        throws IOException {
    FileStatus stat = fs.getFileStatus(databusFile);
    HadoopStreamFile hs = HadoopStreamFile.create(stat);
    Calendar cal = Calendar.getInstance();
    Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDirPrefix, hs.getParent());
    cal.setTime(date);// w ww.jav  a  2s  . co m
    return new HadoopStreamFile(DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, cal.getTime()),
            "myfile", hs.getTimestamp() - 36000);
}

From source file:com.inmobi.messaging.consumer.util.HadoopUtil.java

License:Apache License

public static HadoopStreamFile getHigherFile(Path streamDirPrefix, FileSystem fs, Path databusFile)
        throws IOException {
    FileStatus stat = fs.getFileStatus(databusFile);
    HadoopStreamFile hs = HadoopStreamFile.create(stat);
    Calendar cal = Calendar.getInstance();
    Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDirPrefix, hs.getParent());
    cal.setTime(date);/*from  w ww  .ja  v  a2 s. c  om*/
    cal.add(Calendar.MINUTE, 1);
    return new HadoopStreamFile(DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, cal.getTime()),
            "myfile", hs.getTimestamp() + 36000);
}