List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:gobblin.util.filesystem.ThrottledFileSystemTest.java
License:Apache License
@Test public void testSimpleCalls() throws Exception { FileSystem fs = Mockito.mock(FileSystem.class); Mockito.when(fs.getFileStatus(Mockito.any(Path.class))) .thenReturn(new FileStatus(0, false, 0, 0, 0, new Path("/"))); Limiter limiter = new CountBasedLimiter(2); ThrottledFileSystem throttledFileSystem = new ThrottledFileSystem(fs, limiter); Assert.assertNotNull(throttledFileSystem.getFileStatus(new Path("/myFile"))); Assert.assertNotNull(throttledFileSystem.getFileStatus(new Path("/myFile"))); try {/* w ww. jav a 2s . c om*/ throttledFileSystem.getFileStatus(new Path("/myFile")); Assert.fail(); } catch (NotEnoughPermitsException expected) { // Expected } }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * This method is an additive implementation of the {@link FileSystem#rename(Path, Path)} method. It moves all the * files/directories under 'from' path to the 'to' path without overwriting existing directories in the 'to' path. * * <p>//from ww w. j a v a 2 s .co m * The rename operation happens at the first non-existent sub-directory. If a directory at destination path already * exists, it recursively tries to move sub-directories. If all the sub-directories also exist at the destination, * a file level move is done * </p> * * @param fileSystem on which the data needs to be moved * @param from path of the data to be moved * @param to path of the data to be moved */ public static void renameRecursively(FileSystem fileSystem, Path from, Path to) throws IOException { log.info(String.format("Recursively renaming %s in %s to %s.", from, fileSystem.getUri(), to)); FileSystem throttledFS = getOptionallyThrottledFileSystem(fileSystem, 10000); ExecutorService executorService = ScalingThreadPoolExecutor.newScalingThreadPool(1, 100, 100, ExecutorsUtils.newThreadFactory(Optional.of(log), Optional.of("rename-thread-%d"))); Queue<Future<?>> futures = Queues.newConcurrentLinkedQueue(); try { if (!fileSystem.exists(from)) { throw new IOException("Trying to rename a path that does not exist! " + from); } futures.add(executorService.submit(new RenameRecursively(throttledFS, fileSystem.getFileStatus(from), to, executorService, futures))); int futuresUsed = 0; while (!futures.isEmpty()) { try { futures.poll().get(); futuresUsed++; } catch (ExecutionException | InterruptedException ee) { throw new IOException(ee.getCause()); } } log.info(String.format("Recursive renaming of %s to %s. (details: used %d futures)", from, to, futuresUsed)); } finally { ExecutorsUtils.shutdownExecutorService(executorService, Optional.of(log), 1, TimeUnit.SECONDS); } }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Set the group associated with a given path. * * @param fs the {@link FileSystem} instance used to perform the file operation * @param path the given path/*from w ww .j av a2s. c om*/ * @param group the group associated with the path * @throws IOException */ public static void setGroup(FileSystem fs, Path path, String group) throws IOException { fs.setOwner(path, fs.getFileStatus(path).getOwner(), group); }
From source file:gobblin.util.HadoopUtilsTest.java
License:Apache License
@Test(groups = { "performance" }) public void testRenamePerformance() throws Exception { FileSystem fs = Mockito.mock(FileSystem.class); Path sourcePath = new Path("/source"); Path s1 = new Path(sourcePath, "d1"); FileStatus[] sourceStatuses = new FileStatus[10000]; FileStatus[] targetStatuses = new FileStatus[1000]; for (int i = 0; i < sourceStatuses.length; i++) { sourceStatuses[i] = getFileStatus(new Path(s1, "path" + i), false); }//from w ww.j ava2 s . com for (int i = 0; i < targetStatuses.length; i++) { targetStatuses[i] = getFileStatus(new Path(s1, "path" + i), false); } Mockito.when(fs.getUri()).thenReturn(new URI("file:///")); Mockito.when(fs.getFileStatus(sourcePath)).thenAnswer(getDelayedAnswer(getFileStatus(sourcePath, true))); Mockito.when(fs.exists(sourcePath)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.listStatus(sourcePath)) .thenAnswer(getDelayedAnswer(new FileStatus[] { getFileStatus(s1, true) })); Mockito.when(fs.exists(s1)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.listStatus(s1)).thenAnswer(getDelayedAnswer(sourceStatuses)); Path target = new Path("/target"); Path s1Target = new Path(target, "d1"); Mockito.when(fs.exists(target)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.exists(s1Target)).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.mkdirs(Mockito.any(Path.class))).thenAnswer(getDelayedAnswer(true)); Mockito.when(fs.rename(Mockito.any(Path.class), Mockito.any(Path.class))) .thenAnswer(getDelayedAnswer(true)); HadoopUtils.renameRecursively(fs, sourcePath, target); }
From source file:gobblin.util.io.StreamUtils.java
License:Apache License
/** * Similiar to {@link #tar(FileSystem, Path, Path)} except the source and destination {@link FileSystem} can be different. * * @see #tar(FileSystem, Path, Path)/*from ww w . j a va2 s . com*/ */ public static void tar(FileSystem sourceFs, FileSystem destFs, Path sourcePath, Path destPath) throws IOException { try (FSDataOutputStream fsDataOutputStream = destFs.create(destPath); TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream( new GzipCompressorOutputStream(fsDataOutputStream), ConfigurationKeys.DEFAULT_CHARSET_ENCODING.name())) { FileStatus fileStatus = sourceFs.getFileStatus(sourcePath); if (sourceFs.isDirectory(sourcePath)) { dirToTarArchiveOutputStreamRecursive(fileStatus, sourceFs, Optional.<Path>absent(), tarArchiveOutputStream); } else { try (FSDataInputStream fsDataInputStream = sourceFs.open(sourcePath)) { fileToTarArchiveOutputStream(fileStatus, fsDataInputStream, new Path(sourcePath.getName()), tarArchiveOutputStream); } } } }
From source file:gobblin.util.ParallelRunnerTest.java
License:Apache License
@Test public void testMovePath() throws IOException, URISyntaxException { String expected = "test"; ByteArrayOutputStream actual = new ByteArrayOutputStream(); Path src = new Path("/src/file.txt"); Path dst = new Path("/dst/file.txt"); FileSystem fs1 = Mockito.mock(FileSystem.class); Mockito.when(fs1.exists(src)).thenReturn(true); Mockito.when(fs1.isFile(src)).thenReturn(true); Mockito.when(fs1.getUri()).thenReturn(new URI("fs1:////")); Mockito.when(fs1.getFileStatus(src)).thenReturn(new FileStatus(1, false, 1, 1, 1, src)); Mockito.when(fs1.open(src)).thenReturn( new FSDataInputStream(new SeekableFSInputStream(new ByteArrayInputStream(expected.getBytes())))); Mockito.when(fs1.delete(src, true)).thenReturn(true); FileSystem fs2 = Mockito.mock(FileSystem.class); Mockito.when(fs2.exists(dst)).thenReturn(false); Mockito.when(fs2.getUri()).thenReturn(new URI("fs2:////")); Mockito.when(fs2.getConf()).thenReturn(new Configuration()); Mockito.when(fs2.create(dst, false)).thenReturn(new FSDataOutputStream(actual, null)); try (ParallelRunner parallelRunner = new ParallelRunner(1, fs1)) { parallelRunner.movePath(src, fs2, dst, Optional.<String>absent()); }//from w ww . j a va 2s . c om Assert.assertEquals(actual.toString(), expected); }
From source file:gobblin.util.WriterUtils.java
License:Apache License
/** * Create the given dir as well as all missing ancestor dirs. All created dirs will have the given permission. * This should be used instead of {@link FileSystem#mkdirs(Path, FsPermission)}, since that method only sets * the permission for the given dir, and not recursively for the ancestor dirs. * * @param fs FileSystem//from w w w .j ava 2s . c o m * @param path The dir to be created * @param perm The permission to be set * @throws IOException if failing to create dir or set permission. */ public static void mkdirsWithRecursivePermission(FileSystem fs, Path path, FsPermission perm) throws IOException { if (fs.exists(path)) { return; } if (path.getParent() != null && !fs.exists(path.getParent())) { mkdirsWithRecursivePermission(fs, path.getParent(), perm); } if (!fs.mkdirs(path, perm)) { throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm)); } // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly if (!fs.getFileStatus(path).getPermission().equals(perm)) { fs.setPermission(path, perm); } }
From source file:gobblin.yarn.YarnHelixUtils.java
License:Apache License
/** * Add a file as a Yarn {@link org.apache.hadoop.yarn.api.records.LocalResource}. * * @param fs a {@link FileSystem} instance * @param destFilePath the destination file path * @param resourceType the {@link org.apache.hadoop.yarn.api.records.LocalResourceType} of the file * @param resourceMap a {@link Map} of file names to their corresponding * {@link org.apache.hadoop.yarn.api.records.LocalResource}s * @throws IOException if there's something wrong adding the file as a * {@link org.apache.hadoop.yarn.api.records.LocalResource} *///from w w w .j a v a 2s. c o m public static void addFileAsLocalResource(FileSystem fs, Path destFilePath, LocalResourceType resourceType, Map<String, LocalResource> resourceMap) throws IOException { LocalResource fileResource = Records.newRecord(LocalResource.class); FileStatus fileStatus = fs.getFileStatus(destFilePath); fileResource.setResource(ConverterUtils.getYarnUrlFromPath(destFilePath)); fileResource.setSize(fileStatus.getLen()); fileResource.setTimestamp(fileStatus.getModificationTime()); fileResource.setType(resourceType); fileResource.setVisibility(LocalResourceVisibility.APPLICATION); resourceMap.put(destFilePath.getName(), fileResource); }
From source file:gov.jgi.meta.MetaUtils.java
License:Open Source License
/** * find all files from a given root.//from www. j a v a2s . c om * @param p is the root on which to search * @return a set of file paths * @throws IOException if filesystem can't find file or has other io problems */ public static Set<Path> findAllPaths(Path p) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); HashSet<Path> s = new HashSet<Path>(); if (fs.getFileStatus(p).isDir()) { for (FileStatus f : fs.listStatus(p)) { if (!f.isDir()) { s.add(f.getPath()); } } } else { s.add(p); } return (s); }
From source file:gov.jgi.meta.MetaUtils.java
License:Open Source License
/** * see @{link #countSequences(String) countSequences(String)} * @param contigFileName the file or directory name * @param conf the hadoop configuration object specifiing filesystem * @return an integer count of number of sequences found * @throws IOException if filesystem has error *//*from w w w . j a va2s .co m*/ public static int countSequences(String contigFileName, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); Path filenamePath = new Path(contigFileName); int count = 0; if (!fs.exists(filenamePath)) { throw new IOException("file not found: " + contigFileName); } for (Path f : findAllPaths(filenamePath)) { FSDataInputStream in = fs.open(f); FastaBlockLineReader fblr = new FastaBlockLineReader(in); Text key = new Text(); long length = fs.getFileStatus(f).getLen(); HashMap<String, String> tmpcontigs = new HashMap<String, String>(); fblr.readLine(key, tmpcontigs, Integer.MAX_VALUE, (int) length); count += tmpcontigs.size(); in.close(); } return (count); }