List of usage examples for org.apache.hadoop.fs FileSystem listStatus
public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
Map<Path, Path> prepareForCommit(long commitTime) throws Exception { FileSystem fs = FileSystem.get(srcCluster.getHadoopConf()); // find final destination paths Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>(); FileStatus[] categories;/*from w w w. j a va2s . co m*/ try { categories = fs.listStatus(tmpJobOutputPath); } catch (FileNotFoundException e) { categories = new FileStatus[0]; } for (FileStatus categoryDir : categories) { String categoryName = categoryDir.getPath().getName(); Path destDir = new Path(srcCluster.getLocalDestDir(categoryName, commitTime)); FileStatus[] files; try { files = fs.listStatus(categoryDir.getPath()); } catch (FileNotFoundException e) { files = new FileStatus[0]; } for (FileStatus file : files) { Path destPath = new Path(destDir, file.getPath().getName()); LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]"); mvPaths.put(file.getPath(), destPath); } } publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess); return mvPaths; }
From source file:com.inmobi.conduit.local.LocalStreamService.java
License:Apache License
public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results, Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException { List<FileStatus> streamsFileStatus = new ArrayList<FileStatus>(); FileSystem srcFs = FileSystem.get(srcCluster.getHadoopConf()); for (String stream : streamsToProcess) { streamsFileStatus.add(srcFs.getFileStatus(new Path(srcCluster.getDataDir(), stream))); }/*from w w w. j a va 2 s . c om*/ for (FileStatus stream : streamsFileStatus) { String streamName = stream.getPath().getName(); LOG.debug("createListing working on Stream [" + streamName + "]"); FileStatus[] collectors; try { collectors = fs.listStatus(stream.getPath()); } catch (FileNotFoundException ex) { collectors = new FileStatus[0]; } long minOfLatestCollectorTimeStamp = -1; for (FileStatus collector : collectors) { TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>(); // check point for this collector String collectorName = collector.getPath().getName(); String checkPointKey = getCheckPointKey(this.getClass().getSimpleName(), streamName, collectorName); String checkPointValue = null; byte[] value = checkpointProvider.read(checkPointKey); if (value == null) { // In case checkpointKey with newer name format is absent,read old // checkpoint key String oldCheckPointKey = streamName + collectorName; value = checkpointProvider.read(oldCheckPointKey); } if (value != null) checkPointValue = new String(value); LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]"); FileStatus[] files = null; try { files = fs.listStatus(collector.getPath(), new CollectorPathFilter()); } catch (FileNotFoundException e) { } if (files == null) { LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory"); continue; } TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator()); String currentFile = getCurrentFile(fs, files, sortedFiles); LOG.debug("last file " + currentFile + " in the collector directory " + collector.getPath()); Iterator<FileStatus> it = sortedFiles.iterator(); numberOfFilesProcessed = 0; long latestCollectorFileTimeStamp = -1; while (it.hasNext() && numberOfFilesProcessed < filesPerCollector) { FileStatus file = it.next(); LOG.debug("Processing " + file.getPath()); /* * fileTimeStamp value will be -1 for the files which are already processed */ long fileTimeStamp = processFile(file, currentFile, checkPointValue, fs, results, collectorPaths, streamName); if (fileTimeStamp > latestCollectorFileTimeStamp) { latestCollectorFileTimeStamp = fileTimeStamp; } } populateTrash(collectorPaths, trashSet); populateCheckpointPathForCollector(checkpointPaths, collectorPaths); if ((latestCollectorFileTimeStamp < minOfLatestCollectorTimeStamp || minOfLatestCollectorTimeStamp == -1) && latestCollectorFileTimeStamp != -1) { minOfLatestCollectorTimeStamp = latestCollectorFileTimeStamp; } } // all files in a collector if (minOfLatestCollectorTimeStamp != -1) { lastProcessedFile.put(streamName, minOfLatestCollectorTimeStamp); } else { LOG.warn("No new files in " + streamName + " stream"); } } }
From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java
License:Apache License
private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception { FileStatus[] files = createTestData(2, "/conduit/data/stream", true); FileStatus[] stream1 = createTestData(2, "/conduit/data/stream1/collector", true); FileStatus[] stream3 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector1/file", true); FileStatus[] stream4 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector2/file", true); FileStatus[] stream2 = createTestData(2, "/conduit/data/stream2/collector", true); FileStatus[] stream5 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector1/file", true); FileStatus[] stream6 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector2/file", true); when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/")); when(fs.getUri()).thenReturn(new URI("localhost")); when(fs.listStatus(cluster.getDataDir())).thenReturn(files); when(fs.listStatus(new Path("/conduit/data/stream1"))).thenReturn(stream1); when(fs.listStatus(new Path("/conduit/data/stream1/collector1"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream3); when(fs.listStatus(new Path("/conduit/data/stream2"))).thenReturn(stream2); when(fs.listStatus(new Path("/conduit/data/stream1/collector2"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream4); when(fs.listStatus(new Path("/conduit/data/stream2/collector1"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream5); when(fs.listStatus(new Path("/conduit/data/stream2/collector2"), any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream6); Path file = mock(Path.class); when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/conduit/data/stream1/collector1/")); }
From source file:com.inmobi.conduit.purge.DataPurgerService.java
License:Apache License
private FileStatus[] getAllFilesInDir(Path dir, FileSystem fs) throws Exception { FileStatus[] files = null;/*w ww . j a v a2 s . co m*/ try { files = fs.listStatus(dir); } catch (FileNotFoundException e) { } return files; }
From source file:com.inmobi.conduit.utils.CollapseFilesInDir.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration configuration = new Configuration(); configuration.set("fs.default.name", args[0]); String dir = args[1];//from w w w. j a v a 2s . c om FileSystem fs = FileSystem.get(configuration); FileStatus[] fileList; try { fileList = fs.listStatus(new Path(dir)); } catch (FileNotFoundException fe) { fileList = null; } if (fileList != null) { if (fileList.length > 1) { Set<Path> sourceFiles = new HashSet<Path>(); Set<String> consumePaths = new HashSet<String>(); //inputPath has have multiple files due to backlog //read all and create a tmp file for (int i = 0; i < fileList.length; i++) { Path consumeFilePath = fileList[i].getPath().makeQualified(fs); sourceFiles.add(consumeFilePath); FSDataInputStream fsDataInputStream = fs.open(consumeFilePath); try { while (fsDataInputStream.available() > 0) { String fileName = fsDataInputStream.readLine(); if (fileName != null) { consumePaths.add(fileName.trim()); System.out.println("Adding [" + fileName + "] to pull"); } } } finally { fsDataInputStream.close(); } } Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString()); FSDataOutputStream out = fs.create(finalPath); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); try { for (String consumePath : consumePaths) { System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList"); writer.write(consumePath); writer.write("\n"); } } finally { writer.close(); } LOG.warn("Final File - [" + finalPath + "]"); for (Path deletePath : sourceFiles) { System.out.println("Deleting - [" + deletePath + "]"); fs.delete(deletePath); } } } }
From source file:com.inmobi.databus.AbstractService.java
License:Apache License
private Path getLatestDir(FileSystem fs, Path Dir) throws Exception { FileStatus[] fileStatus = fs.listStatus(Dir); if (fileStatus != null && fileStatus.length > 0) { FileStatus latestfile = fileStatus[0]; for (FileStatus currentfile : fileStatus) { if (currentfile.getPath().getName().compareTo(latestfile.getPath().getName()) > 0) latestfile = currentfile; }/*from w ww.ja v a 2s.c o m*/ return latestfile.getPath(); } return null; }
From source file:com.inmobi.databus.AbstractService.java
License:Apache License
protected Map<String, Set<Path>> publishMissingPaths(FileSystem fs, String destDir) throws Exception { Map<String, Set<Path>> missingDirectories = new HashMap<String, Set<Path>>(); Set<Path> missingdirsinstream = null; FileStatus[] fileStatus = fs.listStatus(new Path(destDir)); LOG.info("Create All the Missing Paths in " + destDir); if (fileStatus != null) { for (FileStatus file : fileStatus) { missingdirsinstream = publishMissingPaths(fs, destDir, System.currentTimeMillis(), file.getPath().getName()); if (missingdirsinstream.size() > 0) missingDirectories.put(file.getPath().getName(), missingdirsinstream); }//from w ww . j av a2 s .c om } LOG.info("Done Creating All the Missing Paths in " + destDir); return missingDirectories; }
From source file:com.inmobi.databus.distcp.MirrorStreamService.java
License:Apache License
void createListing(FileSystem fs, FileStatus fileStatus, List<FileStatus> results) throws IOException { if (fileStatus.isDir()) { FileStatus[] stats = fs.listStatus(fileStatus.getPath()); if (stats.length == 0) { results.add(fileStatus);//from ww w .ja v a 2s . c o m LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]"); } for (FileStatus stat : stats) { createListing(fs, stat, results); } } else { LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]"); results.add(fileStatus); } }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
private Map<Path, Path> prepareForCommit(long commitTime, Map<FileStatus, String> fileListing) throws Exception { FileSystem fs = FileSystem.get(cluster.getHadoopConf()); // find final destination paths Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>(); FileStatus[] categories = fs.listStatus(tmpJobOutputPath); for (FileStatus categoryDir : categories) { String categoryName = categoryDir.getPath().getName(); Path destDir = new Path(cluster.getLocalDestDir(categoryName, commitTime)); FileStatus[] files = fs.listStatus(categoryDir.getPath()); for (FileStatus file : files) { Path destPath = new Path(destDir, file.getPath().getName()); LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]"); mvPaths.put(file.getPath(), destPath); }// w ww . j a v a2s . c om publishMissingPaths(fs, cluster.getLocalFinalDestDirRoot(), commitTime, categoryName); } // find input files for consumer Map<Path, Path> consumerCommitPaths = new HashMap<Path, Path>(); for (Cluster clusterEntry : getConfig().getClusters().values()) { Set<String> destStreams = clusterEntry.getDestinationStreams().keySet(); boolean consumeCluster = false; for (String destStream : destStreams) { if (clusterEntry.getPrimaryDestinationStreams().contains(destStream) && cluster.getSourceStreams().contains(destStream)) { consumeCluster = true; } } if (consumeCluster) { Path tmpConsumerPath = new Path(tmpPath, clusterEntry.getName()); boolean isFileOpened = false; FSDataOutputStream out = null; try { for (Path destPath : mvPaths.values()) { String category = getCategoryFromDestPath(destPath); if (clusterEntry.getDestinationStreams().containsKey(category)) { if (!isFileOpened) { out = fs.create(tmpConsumerPath); isFileOpened = true; } out.writeBytes(destPath.toString()); LOG.debug("Adding [" + destPath + "] for consumer [" + clusterEntry.getName() + "] to commit Paths in [" + tmpConsumerPath + "]"); out.writeBytes("\n"); } } } finally { if (isFileOpened) { out.close(); Path finalConsumerPath = new Path(cluster.getConsumePath(clusterEntry), Long.toString(System.currentTimeMillis())); LOG.debug("Moving [" + tmpConsumerPath + "] to [ " + finalConsumerPath + "]"); consumerCommitPaths.put(tmpConsumerPath, finalConsumerPath); } } } } Map<Path, Path> commitPaths = new LinkedHashMap<Path, Path>(); commitPaths.putAll(mvPaths); commitPaths.putAll(consumerCommitPaths); return commitPaths; }
From source file:com.inmobi.databus.local.LocalStreamService.java
License:Apache License
public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results, Set<FileStatus> trashSet, Map<String, FileStatus> checkpointPaths, long lastFileTimeout) throws IOException { FileStatus[] streams = fs.listStatus(fileStatus.getPath()); for (FileStatus stream : streams) { String streamName = stream.getPath().getName(); LOG.debug("createListing working on Stream [" + streamName + "]"); FileStatus[] collectors = fs.listStatus(stream.getPath()); for (FileStatus collector : collectors) { TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>(); // check point for this collector String collectorName = collector.getPath().getName(); String checkPointKey = streamName + collectorName; String checkPointValue = null; byte[] value = checkpointProvider.read(checkPointKey); if (value != null) checkPointValue = new String(value); LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]"); FileStatus[] files = fs.listStatus(collector.getPath(), new CollectorPathFilter()); if (files == null) { LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory"); continue; }/*from ww w. j a v a2 s .co m*/ String currentFile = getCurrentFile(fs, files, lastFileTimeout); for (FileStatus file : files) { processFile(file, currentFile, checkPointValue, fs, results, collectorPaths); } populateTrash(collectorPaths, trashSet); populateCheckpointPathForCollector(checkpointPaths, collectorPaths, checkPointKey); } // all files in a collector } }