Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:gobblin.util.filesystem.ThrottledFileSystemTest.java

License:Apache License

@Test
public void testSimpleCalls() throws Exception {
    FileSystem fs = Mockito.mock(FileSystem.class);
    Mockito.when(fs.getFileStatus(Mockito.any(Path.class)))
            .thenReturn(new FileStatus(0, false, 0, 0, 0, new Path("/")));

    Limiter limiter = new CountBasedLimiter(2);

    ThrottledFileSystem throttledFileSystem = new ThrottledFileSystem(fs, limiter);

    Assert.assertNotNull(throttledFileSystem.getFileStatus(new Path("/myFile")));
    Assert.assertNotNull(throttledFileSystem.getFileStatus(new Path("/myFile")));
    try {/* w  ww. jav  a 2s . c  om*/
        throttledFileSystem.getFileStatus(new Path("/myFile"));
        Assert.fail();
    } catch (NotEnoughPermitsException expected) {
        // Expected
    }

}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

/**
 * This method is an additive implementation of the {@link FileSystem#rename(Path, Path)} method. It moves all the
 * files/directories under 'from' path to the 'to' path without overwriting existing directories in the 'to' path.
 *
 * <p>//from ww w. j a v a 2  s  .co  m
 * The rename operation happens at the first non-existent sub-directory. If a directory at destination path already
 * exists, it recursively tries to move sub-directories. If all the sub-directories also exist at the destination,
 * a file level move is done
 * </p>
 *
 * @param fileSystem on which the data needs to be moved
 * @param from path of the data to be moved
 * @param to path of the data to be moved
 */
public static void renameRecursively(FileSystem fileSystem, Path from, Path to) throws IOException {

    log.info(String.format("Recursively renaming %s in %s to %s.", from, fileSystem.getUri(), to));

    FileSystem throttledFS = getOptionallyThrottledFileSystem(fileSystem, 10000);

    ExecutorService executorService = ScalingThreadPoolExecutor.newScalingThreadPool(1, 100, 100,
            ExecutorsUtils.newThreadFactory(Optional.of(log), Optional.of("rename-thread-%d")));
    Queue<Future<?>> futures = Queues.newConcurrentLinkedQueue();

    try {
        if (!fileSystem.exists(from)) {
            throw new IOException("Trying to rename a path that does not exist! " + from);
        }

        futures.add(executorService.submit(new RenameRecursively(throttledFS, fileSystem.getFileStatus(from),
                to, executorService, futures)));
        int futuresUsed = 0;
        while (!futures.isEmpty()) {
            try {
                futures.poll().get();
                futuresUsed++;
            } catch (ExecutionException | InterruptedException ee) {
                throw new IOException(ee.getCause());
            }
        }

        log.info(String.format("Recursive renaming of %s to %s. (details: used %d futures)", from, to,
                futuresUsed));

    } finally {
        ExecutorsUtils.shutdownExecutorService(executorService, Optional.of(log), 1, TimeUnit.SECONDS);
    }
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

/**
 * Set the group associated with a given path.
 *
 * @param fs the {@link FileSystem} instance used to perform the file operation
 * @param path the given path/*from  w  ww .j  av  a2s. c om*/
 * @param group the group associated with the path
 * @throws IOException
 */
public static void setGroup(FileSystem fs, Path path, String group) throws IOException {
    fs.setOwner(path, fs.getFileStatus(path).getOwner(), group);
}

From source file:gobblin.util.HadoopUtilsTest.java

License:Apache License

@Test(groups = { "performance" })
public void testRenamePerformance() throws Exception {

    FileSystem fs = Mockito.mock(FileSystem.class);

    Path sourcePath = new Path("/source");
    Path s1 = new Path(sourcePath, "d1");

    FileStatus[] sourceStatuses = new FileStatus[10000];
    FileStatus[] targetStatuses = new FileStatus[1000];

    for (int i = 0; i < sourceStatuses.length; i++) {
        sourceStatuses[i] = getFileStatus(new Path(s1, "path" + i), false);
    }//from w ww.j ava2  s .  com
    for (int i = 0; i < targetStatuses.length; i++) {
        targetStatuses[i] = getFileStatus(new Path(s1, "path" + i), false);
    }

    Mockito.when(fs.getUri()).thenReturn(new URI("file:///"));
    Mockito.when(fs.getFileStatus(sourcePath)).thenAnswer(getDelayedAnswer(getFileStatus(sourcePath, true)));
    Mockito.when(fs.exists(sourcePath)).thenAnswer(getDelayedAnswer(true));
    Mockito.when(fs.listStatus(sourcePath))
            .thenAnswer(getDelayedAnswer(new FileStatus[] { getFileStatus(s1, true) }));
    Mockito.when(fs.exists(s1)).thenAnswer(getDelayedAnswer(true));
    Mockito.when(fs.listStatus(s1)).thenAnswer(getDelayedAnswer(sourceStatuses));

    Path target = new Path("/target");
    Path s1Target = new Path(target, "d1");
    Mockito.when(fs.exists(target)).thenAnswer(getDelayedAnswer(true));
    Mockito.when(fs.exists(s1Target)).thenAnswer(getDelayedAnswer(true));

    Mockito.when(fs.mkdirs(Mockito.any(Path.class))).thenAnswer(getDelayedAnswer(true));
    Mockito.when(fs.rename(Mockito.any(Path.class), Mockito.any(Path.class)))
            .thenAnswer(getDelayedAnswer(true));

    HadoopUtils.renameRecursively(fs, sourcePath, target);
}

From source file:gobblin.util.io.StreamUtils.java

License:Apache License

/**
 * Similiar to {@link #tar(FileSystem, Path, Path)} except the source and destination {@link FileSystem} can be different.
 *
 * @see #tar(FileSystem, Path, Path)/*from  ww  w  .  j  a  va2 s  .  com*/
 */
public static void tar(FileSystem sourceFs, FileSystem destFs, Path sourcePath, Path destPath)
        throws IOException {
    try (FSDataOutputStream fsDataOutputStream = destFs.create(destPath);
            TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream(
                    new GzipCompressorOutputStream(fsDataOutputStream),
                    ConfigurationKeys.DEFAULT_CHARSET_ENCODING.name())) {

        FileStatus fileStatus = sourceFs.getFileStatus(sourcePath);

        if (sourceFs.isDirectory(sourcePath)) {
            dirToTarArchiveOutputStreamRecursive(fileStatus, sourceFs, Optional.<Path>absent(),
                    tarArchiveOutputStream);
        } else {
            try (FSDataInputStream fsDataInputStream = sourceFs.open(sourcePath)) {
                fileToTarArchiveOutputStream(fileStatus, fsDataInputStream, new Path(sourcePath.getName()),
                        tarArchiveOutputStream);
            }
        }
    }
}

From source file:gobblin.util.ParallelRunnerTest.java

License:Apache License

@Test
public void testMovePath() throws IOException, URISyntaxException {
    String expected = "test";
    ByteArrayOutputStream actual = new ByteArrayOutputStream();

    Path src = new Path("/src/file.txt");
    Path dst = new Path("/dst/file.txt");
    FileSystem fs1 = Mockito.mock(FileSystem.class);
    Mockito.when(fs1.exists(src)).thenReturn(true);
    Mockito.when(fs1.isFile(src)).thenReturn(true);
    Mockito.when(fs1.getUri()).thenReturn(new URI("fs1:////"));
    Mockito.when(fs1.getFileStatus(src)).thenReturn(new FileStatus(1, false, 1, 1, 1, src));
    Mockito.when(fs1.open(src)).thenReturn(
            new FSDataInputStream(new SeekableFSInputStream(new ByteArrayInputStream(expected.getBytes()))));
    Mockito.when(fs1.delete(src, true)).thenReturn(true);

    FileSystem fs2 = Mockito.mock(FileSystem.class);
    Mockito.when(fs2.exists(dst)).thenReturn(false);
    Mockito.when(fs2.getUri()).thenReturn(new URI("fs2:////"));
    Mockito.when(fs2.getConf()).thenReturn(new Configuration());
    Mockito.when(fs2.create(dst, false)).thenReturn(new FSDataOutputStream(actual, null));

    try (ParallelRunner parallelRunner = new ParallelRunner(1, fs1)) {
        parallelRunner.movePath(src, fs2, dst, Optional.<String>absent());
    }//from   w ww .  j a  va 2s  . c  om

    Assert.assertEquals(actual.toString(), expected);
}

From source file:gobblin.util.WriterUtils.java

License:Apache License

/**
 * Create the given dir as well as all missing ancestor dirs. All created dirs will have the given permission.
 * This should be used instead of {@link FileSystem#mkdirs(Path, FsPermission)}, since that method only sets
 * the permission for the given dir, and not recursively for the ancestor dirs.
 *
 * @param fs FileSystem//from w w  w  .j  ava 2s . c o m
 * @param path The dir to be created
 * @param perm The permission to be set
 * @throws IOException if failing to create dir or set permission.
 */
public static void mkdirsWithRecursivePermission(FileSystem fs, Path path, FsPermission perm)
        throws IOException {
    if (fs.exists(path)) {
        return;
    }
    if (path.getParent() != null && !fs.exists(path.getParent())) {
        mkdirsWithRecursivePermission(fs, path.getParent(), perm);
    }
    if (!fs.mkdirs(path, perm)) {
        throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm));
    }

    // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly
    if (!fs.getFileStatus(path).getPermission().equals(perm)) {
        fs.setPermission(path, perm);
    }
}

From source file:gobblin.yarn.YarnHelixUtils.java

License:Apache License

/**
 * Add a file as a Yarn {@link org.apache.hadoop.yarn.api.records.LocalResource}.
 *
 * @param fs a {@link FileSystem} instance
 * @param destFilePath the destination file path
 * @param resourceType the {@link org.apache.hadoop.yarn.api.records.LocalResourceType} of the file
 * @param resourceMap a {@link Map} of file names to their corresponding
 *                    {@link org.apache.hadoop.yarn.api.records.LocalResource}s
 * @throws IOException if there's something wrong adding the file as a
 *                     {@link org.apache.hadoop.yarn.api.records.LocalResource}
 *///from   w  w  w  .j  a v  a  2s. c  o m
public static void addFileAsLocalResource(FileSystem fs, Path destFilePath, LocalResourceType resourceType,
        Map<String, LocalResource> resourceMap) throws IOException {
    LocalResource fileResource = Records.newRecord(LocalResource.class);
    FileStatus fileStatus = fs.getFileStatus(destFilePath);
    fileResource.setResource(ConverterUtils.getYarnUrlFromPath(destFilePath));
    fileResource.setSize(fileStatus.getLen());
    fileResource.setTimestamp(fileStatus.getModificationTime());
    fileResource.setType(resourceType);
    fileResource.setVisibility(LocalResourceVisibility.APPLICATION);
    resourceMap.put(destFilePath.getName(), fileResource);
}

From source file:gov.jgi.meta.MetaUtils.java

License:Open Source License

/**
 * find all files from a given root.//from www.  j  a v a2s . c om
 * @param p is the root on which to search
 * @return a set of file paths
 * @throws IOException if filesystem can't find file or has other io problems
 */
public static Set<Path> findAllPaths(Path p) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    HashSet<Path> s = new HashSet<Path>();

    if (fs.getFileStatus(p).isDir()) {
        for (FileStatus f : fs.listStatus(p)) {
            if (!f.isDir()) {
                s.add(f.getPath());
            }
        }
    } else {
        s.add(p);
    }

    return (s);
}

From source file:gov.jgi.meta.MetaUtils.java

License:Open Source License

/**
 * see @{link #countSequences(String) countSequences(String)}
 * @param contigFileName the file or directory name
 * @param conf the hadoop configuration object specifiing filesystem
 * @return an integer count of number of sequences found
 * @throws IOException if filesystem has error
 *//*from   w w w  .  j a  va2s .co  m*/
public static int countSequences(String contigFileName, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    Path filenamePath = new Path(contigFileName);
    int count = 0;

    if (!fs.exists(filenamePath)) {
        throw new IOException("file not found: " + contigFileName);
    }

    for (Path f : findAllPaths(filenamePath)) {
        FSDataInputStream in = fs.open(f);
        FastaBlockLineReader fblr = new FastaBlockLineReader(in);

        Text key = new Text();
        long length = fs.getFileStatus(f).getLen();
        HashMap<String, String> tmpcontigs = new HashMap<String, String>();
        fblr.readLine(key, tmpcontigs, Integer.MAX_VALUE, (int) length);
        count += tmpcontigs.size();
        in.close();
    }

    return (count);
}