List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus
public abstract FileStatus getFileStatus(Path f) throws IOException;
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
/** * If a single file is being copied to a location where the file (of the same * name) already exists, then the file shouldn't be skipped. */// w w w . j a va2 s.c o m @Test public void testSingleFileCopy() { try { deleteState(); touchFile(SOURCE_PATH + "/1.gz"); Path sourceFilePath = pathList.get(0); Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); touchFile(targetFilePath.toString()); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetFilePath.getParent().toString()); // Parent directory. copyMapper.setup(context); final FileStatus sourceFileStatus = fs.getFileStatus(sourceFilePath); long before = fs.getFileStatus(targetFilePath).getModificationTime(); copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context); long after = fs.getFileStatus(targetFilePath).getModificationTime(); Assert.assertTrue("File should have been skipped", before == after); context.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetFilePath.toString()); // Specify the file path. copyMapper.setup(context); before = fs.getFileStatus(targetFilePath).getModificationTime(); try { Thread.sleep(2); } catch (Throwable ignore) { } copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context); after = fs.getFileStatus(targetFilePath).getModificationTime(); Assert.assertTrue("File should have been overwritten.", before < after); } catch (Exception exception) { Assert.fail("Unexpected exception: " + exception.getMessage()); exception.printStackTrace(); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private void testPreserveUserGroupImpl(boolean preserve) { try {//from w w w.j a v a 2 s . c o m deleteState(); createSourceData(); changeUserGroup("Michael", "Corleone"); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); Configuration configuration = context.getConfiguration(); EnumSet<DistCpOptions.FileAttribute> fileAttributes = EnumSet.noneOf(DistCpOptions.FileAttribute.class); if (preserve) { fileAttributes.add(DistCpOptions.FileAttribute.USER); fileAttributes.add(DistCpOptions.FileAttribute.GROUP); fileAttributes.add(DistCpOptions.FileAttribute.PERMISSION); } configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(), DistCpUtils.packAttributes(fileAttributes)); copyMapper.setup(context); for (Path path : pathList) { final FileStatus fileStatus = fs.getFileStatus(path); copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fileStatus, context); } // Check that the user/group attributes are preserved // (only) as necessary. for (Path path : pathList) { final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); final FileStatus source = fs.getFileStatus(path); final FileStatus target = fs.getFileStatus(targetPath); if (!source.isDir()) { Assert.assertTrue(!preserve || source.getOwner().equals(target.getOwner())); Assert.assertTrue(!preserve || source.getGroup().equals(target.getGroup())); Assert.assertTrue(!preserve || source.getPermission().equals(target.getPermission())); Assert.assertTrue(preserve || !source.getOwner().equals(target.getOwner())); Assert.assertTrue(preserve || !source.getGroup().equals(target.getGroup())); Assert.assertTrue(preserve || !source.getPermission().equals(target.getPermission())); Assert.assertTrue(source.isDir() || source.getReplication() != target.getReplication()); } } } catch (Exception e) { Assert.assertTrue("Unexpected exception: " + e.getMessage(), false); e.printStackTrace(); } }
From source file:com.inmobi.conduit.distcp.tools.SimpleCopyListing.java
License:Apache License
/** {@inheritDoc} */ @Override//from ww w .j a va 2 s . co m public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException { SequenceFile.Writer fileListWriter = null; try { fileListWriter = getWriter(pathToListingFile); for (Path path : options.getSourcePaths()) { FileSystem sourceFS = path.getFileSystem(getConf()); path = makeQualified(path); FileStatus rootStatus = sourceFS.getFileStatus(path); Path sourcePathRoot = computeSourceRootPath(rootStatus, options); boolean localFile = (rootStatus.getClass() != FileStatus.class); FileStatus[] sourceFiles = sourceFS.listStatus(path); if (sourceFiles != null && sourceFiles.length > 0) { for (FileStatus sourceStatus : sourceFiles) { if (LOG.isDebugEnabled()) { LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy."); } writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile, options); if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) { if (LOG.isDebugEnabled()) { LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath()); } traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile, options); } } } else { writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile, options); } } } finally { try { if (fileListWriter != null) fileListWriter.close(); } catch (IOException exception) { LOG.error("Could not close output-steam to the file-list: ", exception); throw exception; } } }
From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java
License:Apache License
/** * Preserve attribute on file matching that of the file status being sent * as argument. Barring the block size, all the other attributes are preserved * by this function//from w w w . j ava 2 s .com * * @param targetFS - File system * @param path - Path that needs to preserve original file status * @param srcFileStatus - Original file status * @param attributes - Attribute set that need to be preserved * @throws IOException - Exception if any (particularly relating to group/owner * change or any transient error) */ public static void preserve(FileSystem targetFS, Path path, FileStatus srcFileStatus, EnumSet<FileAttribute> attributes) throws IOException { FileStatus targetFileStatus = targetFS.getFileStatus(path); String group = targetFileStatus.getGroup(); String user = targetFileStatus.getOwner(); boolean chown = false; if (attributes.contains(FileAttribute.PERMISSION) && !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) { targetFS.setPermission(path, srcFileStatus.getPermission()); } if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDir() && srcFileStatus.getReplication() != targetFileStatus.getReplication()) { targetFS.setReplication(path, srcFileStatus.getReplication()); } if (attributes.contains(FileAttribute.GROUP) && !group.equals(srcFileStatus.getGroup())) { group = srcFileStatus.getGroup(); chown = true; } if (attributes.contains(FileAttribute.USER) && !user.equals(srcFileStatus.getOwner())) { user = srcFileStatus.getOwner(); chown = true; } if (chown) { targetFS.setOwner(path, user, group); } }
From source file:com.inmobi.conduit.distcp.tools.util.TestDistCpUtils.java
License:Apache License
@Test public void testPreserve() { try {/*ww w .j a v a 2 s .c o m*/ FileSystem fs = FileSystem.get(config); EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class); Path path = new Path("/tmp/abc"); Path src = new Path("/tmp/src"); fs.mkdirs(path); fs.mkdirs(src); FileStatus srcStatus = fs.getFileStatus(src); FsPermission noPerm = new FsPermission((short) 0); fs.setPermission(path, noPerm); fs.setOwner(path, "nobody", "nobody"); DistCpUtils.preserve(fs, path, srcStatus, attributes); FileStatus target = fs.getFileStatus(path); Assert.assertEquals(target.getPermission(), noPerm); Assert.assertEquals(target.getOwner(), "nobody"); Assert.assertEquals(target.getGroup(), "nobody"); attributes.add(FileAttribute.PERMISSION); DistCpUtils.preserve(fs, path, srcStatus, attributes); target = fs.getFileStatus(path); Assert.assertEquals(target.getPermission(), srcStatus.getPermission()); Assert.assertEquals(target.getOwner(), "nobody"); Assert.assertEquals(target.getGroup(), "nobody"); attributes.add(FileAttribute.GROUP); attributes.add(FileAttribute.USER); DistCpUtils.preserve(fs, path, srcStatus, attributes); target = fs.getFileStatus(path); Assert.assertEquals(target.getPermission(), srcStatus.getPermission()); Assert.assertEquals(target.getOwner(), srcStatus.getOwner()); Assert.assertEquals(target.getGroup(), srcStatus.getGroup()); fs.delete(path, true); fs.delete(src, true); } catch (IOException e) { LOG.error("Exception encountered ", e); Assert.fail("Preserve test failure"); } }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
protected long createMRInput(Path inputPath, Map<FileStatus, String> fileListing, Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException { FileSystem fs = FileSystem.get(srcCluster.getHadoopConf()); createListing(fs, fs.getFileStatus(srcCluster.getDataDir()), fileListing, trashSet, checkpointPaths); // the total size of data present in all files long totalSize = 0; // if file listing is empty, simply return if (fileListing.isEmpty()) { return 0; }/*from ww w .j av a 2s . c o m*/ SequenceFile.Writer out = SequenceFile.createWriter(fs, srcCluster.getHadoopConf(), inputPath, Text.class, FileStatus.class); try { Iterator<Entry<FileStatus, String>> it = fileListing.entrySet().iterator(); while (it.hasNext()) { Entry<FileStatus, String> entry = it.next(); FileStatus status = FileUtil.getFileStatus(entry.getKey(), buffer, in); out.append(new Text(entry.getValue()), status); // Create a sync point after each entry. This will ensure that // SequenceFile // Reader can work at file entry level granularity, given that // SequenceFile // Reader reads from the starting of sync point. out.sync(); totalSize += entry.getKey().getLen(); } } finally { out.close(); } return totalSize; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results, Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException { List<FileStatus> streamsFileStatus = new ArrayList<FileStatus>(); FileSystem srcFs = FileSystem.get(srcCluster.getHadoopConf()); for (String stream : streamsToProcess) { streamsFileStatus.add(srcFs.getFileStatus(new Path(srcCluster.getDataDir(), stream))); }/*from www . j av a 2 s . c o m*/ for (FileStatus stream : streamsFileStatus) { String streamName = stream.getPath().getName(); LOG.debug("createListing working on Stream [" + streamName + "]"); FileStatus[] collectors; try { collectors = fs.listStatus(stream.getPath()); } catch (FileNotFoundException ex) { collectors = new FileStatus[0]; } long minOfLatestCollectorTimeStamp = -1; for (FileStatus collector : collectors) { TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>(); // check point for this collector String collectorName = collector.getPath().getName(); String checkPointKey = getCheckPointKey(this.getClass().getSimpleName(), streamName, collectorName); String checkPointValue = null; byte[] value = checkpointProvider.read(checkPointKey); if (value == null) { // In case checkpointKey with newer name format is absent,read old // checkpoint key String oldCheckPointKey = streamName + collectorName; value = checkpointProvider.read(oldCheckPointKey); } if (value != null) checkPointValue = new String(value); LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]"); FileStatus[] files = null; try { files = fs.listStatus(collector.getPath(), new CollectorPathFilter()); } catch (FileNotFoundException e) { } if (files == null) { LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory"); continue; } TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator()); String currentFile = getCurrentFile(fs, files, sortedFiles); LOG.debug("last file " + currentFile + " in the collector directory " + collector.getPath()); Iterator<FileStatus> it = sortedFiles.iterator(); numberOfFilesProcessed = 0; long latestCollectorFileTimeStamp = -1; while (it.hasNext() && numberOfFilesProcessed < filesPerCollector) { FileStatus file = it.next(); LOG.debug("Processing " + file.getPath()); /* * fileTimeStamp value will be -1 for the files which are already processed */ long fileTimeStamp = processFile(file, currentFile, checkPointValue, fs, results, collectorPaths, streamName); if (fileTimeStamp > latestCollectorFileTimeStamp) { latestCollectorFileTimeStamp = fileTimeStamp; } } populateTrash(collectorPaths, trashSet); populateCheckpointPathForCollector(checkpointPaths, collectorPaths); if ((latestCollectorFileTimeStamp < minOfLatestCollectorTimeStamp || minOfLatestCollectorTimeStamp == -1) && latestCollectorFileTimeStamp != -1) { minOfLatestCollectorTimeStamp = latestCollectorFileTimeStamp; } } // all files in a collector if (minOfLatestCollectorTimeStamp != -1) { lastProcessedFile.put(streamName, minOfLatestCollectorTimeStamp); } else { LOG.warn("No new files in " + streamName + " stream"); } } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private void createMRInput(Path inputPath, Map<FileStatus, String> fileListing, Set<FileStatus> trashSet, Map<String, FileStatus> checkpointPaths) throws IOException { FileSystem fs = FileSystem.get(cluster.getHadoopConf()); createListing(fs, fs.getFileStatus(cluster.getDataDir()), fileListing, trashSet, checkpointPaths); FSDataOutputStream out = fs.create(inputPath); try {/*from w w w .j ava2s. c om*/ Iterator<Entry<FileStatus, String>> it = fileListing.entrySet().iterator(); while (it.hasNext()) { Entry<FileStatus, String> entry = it.next(); out.writeBytes(entry.getKey().getPath().toString()); out.writeBytes("\t"); out.writeBytes(entry.getValue()); out.writeBytes("\n"); } } finally { out.close(); } }
From source file:com.inmobi.messaging.consumer.util.HadoopUtil.java
License:Apache License
public static HadoopStreamFile getOlderFile(Path streamDirPrefix, FileSystem fs, Path databusFile) throws IOException { FileStatus stat = fs.getFileStatus(databusFile); HadoopStreamFile hs = HadoopStreamFile.create(stat); Calendar cal = Calendar.getInstance(); Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDirPrefix, hs.getParent()); cal.setTime(date);// w ww.jav a 2s . co m return new HadoopStreamFile(DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, cal.getTime()), "myfile", hs.getTimestamp() - 36000); }
From source file:com.inmobi.messaging.consumer.util.HadoopUtil.java
License:Apache License
public static HadoopStreamFile getHigherFile(Path streamDirPrefix, FileSystem fs, Path databusFile) throws IOException { FileStatus stat = fs.getFileStatus(databusFile); HadoopStreamFile hs = HadoopStreamFile.create(stat); Calendar cal = Calendar.getInstance(); Date date = DatabusStreamWaitingReader.getDateFromStreamDir(streamDirPrefix, hs.getParent()); cal.setTime(date);/*from w ww .ja v a2 s. c om*/ cal.add(Calendar.MINUTE, 1); return new HadoopStreamFile(DatabusStreamWaitingReader.getMinuteDirPath(streamDirPrefix, cal.getTime()), "myfile", hs.getTimestamp() + 36000); }