Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

Map<Path, Path> prepareForCommit(long commitTime) throws Exception {
    FileSystem fs = FileSystem.get(srcCluster.getHadoopConf());

    // find final destination paths
    Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>();
    FileStatus[] categories;/*from   w  w  w.  j a va2s  .  co m*/
    try {
        categories = fs.listStatus(tmpJobOutputPath);
    } catch (FileNotFoundException e) {
        categories = new FileStatus[0];
    }
    for (FileStatus categoryDir : categories) {
        String categoryName = categoryDir.getPath().getName();
        Path destDir = new Path(srcCluster.getLocalDestDir(categoryName, commitTime));
        FileStatus[] files;
        try {
            files = fs.listStatus(categoryDir.getPath());
        } catch (FileNotFoundException e) {
            files = new FileStatus[0];
        }
        for (FileStatus file : files) {
            Path destPath = new Path(destDir, file.getPath().getName());
            LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]");
            mvPaths.put(file.getPath(), destPath);
        }
    }
    publishMissingPaths(fs, srcCluster.getLocalFinalDestDirRoot(), commitTime, streamsToProcess);
    return mvPaths;
}

From source file:com.inmobi.conduit.local.LocalStreamService.java

License:Apache License

public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results,
        Set<FileStatus> trashSet, Table<String, String, String> checkpointPaths) throws IOException {
    List<FileStatus> streamsFileStatus = new ArrayList<FileStatus>();
    FileSystem srcFs = FileSystem.get(srcCluster.getHadoopConf());
    for (String stream : streamsToProcess) {
        streamsFileStatus.add(srcFs.getFileStatus(new Path(srcCluster.getDataDir(), stream)));
    }/*from  w  w w.  j  a va  2 s . c  om*/
    for (FileStatus stream : streamsFileStatus) {
        String streamName = stream.getPath().getName();
        LOG.debug("createListing working on Stream [" + streamName + "]");
        FileStatus[] collectors;
        try {
            collectors = fs.listStatus(stream.getPath());
        } catch (FileNotFoundException ex) {
            collectors = new FileStatus[0];
        }
        long minOfLatestCollectorTimeStamp = -1;
        for (FileStatus collector : collectors) {
            TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>();
            // check point for this collector
            String collectorName = collector.getPath().getName();
            String checkPointKey = getCheckPointKey(this.getClass().getSimpleName(), streamName, collectorName);

            String checkPointValue = null;
            byte[] value = checkpointProvider.read(checkPointKey);
            if (value == null) {
                // In case checkpointKey with newer name format is absent,read old
                // checkpoint key
                String oldCheckPointKey = streamName + collectorName;
                value = checkpointProvider.read(oldCheckPointKey);
            }
            if (value != null)
                checkPointValue = new String(value);
            LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]");
            FileStatus[] files = null;
            try {
                files = fs.listStatus(collector.getPath(), new CollectorPathFilter());
            } catch (FileNotFoundException e) {
            }

            if (files == null) {
                LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory");
                continue;
            }
            TreeSet<FileStatus> sortedFiles = new TreeSet<FileStatus>(new FileTimeStampComparator());
            String currentFile = getCurrentFile(fs, files, sortedFiles);
            LOG.debug("last file " + currentFile + " in the collector directory " + collector.getPath());

            Iterator<FileStatus> it = sortedFiles.iterator();
            numberOfFilesProcessed = 0;
            long latestCollectorFileTimeStamp = -1;
            while (it.hasNext() && numberOfFilesProcessed < filesPerCollector) {
                FileStatus file = it.next();
                LOG.debug("Processing " + file.getPath());
                /*
                 * fileTimeStamp value will be -1 for the files which are already processed
                 */
                long fileTimeStamp = processFile(file, currentFile, checkPointValue, fs, results,
                        collectorPaths, streamName);
                if (fileTimeStamp > latestCollectorFileTimeStamp) {
                    latestCollectorFileTimeStamp = fileTimeStamp;
                }
            }
            populateTrash(collectorPaths, trashSet);
            populateCheckpointPathForCollector(checkpointPaths, collectorPaths);

            if ((latestCollectorFileTimeStamp < minOfLatestCollectorTimeStamp
                    || minOfLatestCollectorTimeStamp == -1) && latestCollectorFileTimeStamp != -1) {
                minOfLatestCollectorTimeStamp = latestCollectorFileTimeStamp;
            }
        } // all files in a collector
        if (minOfLatestCollectorTimeStamp != -1) {
            lastProcessedFile.put(streamName, minOfLatestCollectorTimeStamp);
        } else {
            LOG.warn("No new files in " + streamName + " stream");
        }
    }
}

From source file:com.inmobi.conduit.local.LocalStreamServiceTest.java

License:Apache License

private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception {
    FileStatus[] files = createTestData(2, "/conduit/data/stream", true);

    FileStatus[] stream1 = createTestData(2, "/conduit/data/stream1/collector", true);

    FileStatus[] stream3 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector1/file", true);

    FileStatus[] stream4 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream1/collector2/file", true);

    FileStatus[] stream2 = createTestData(2, "/conduit/data/stream2/collector", true);

    FileStatus[] stream5 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector1/file", true);

    FileStatus[] stream6 = createTestData(NUMBER_OF_FILES, "/conduit/data/stream2/collector2/file", true);

    when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/"));
    when(fs.getUri()).thenReturn(new URI("localhost"));
    when(fs.listStatus(cluster.getDataDir())).thenReturn(files);
    when(fs.listStatus(new Path("/conduit/data/stream1"))).thenReturn(stream1);

    when(fs.listStatus(new Path("/conduit/data/stream1/collector1"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream3);
    when(fs.listStatus(new Path("/conduit/data/stream2"))).thenReturn(stream2);
    when(fs.listStatus(new Path("/conduit/data/stream1/collector2"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream4);
    when(fs.listStatus(new Path("/conduit/data/stream2/collector1"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream5);
    when(fs.listStatus(new Path("/conduit/data/stream2/collector2"),
            any(LocalStreamService.CollectorPathFilter.class))).thenReturn(stream6);

    Path file = mock(Path.class);
    when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/conduit/data/stream1/collector1/"));
}

From source file:com.inmobi.conduit.purge.DataPurgerService.java

License:Apache License

private FileStatus[] getAllFilesInDir(Path dir, FileSystem fs) throws Exception {
    FileStatus[] files = null;/*w  ww  . j  a  v a2  s  .  co m*/
    try {
        files = fs.listStatus(dir);
    } catch (FileNotFoundException e) {

    }
    return files;
}

From source file:com.inmobi.conduit.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];//from w w w. j a  v a  2s . c om
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList;
    try {
        fileList = fs.listStatus(new Path(dir));
    } catch (FileNotFoundException fe) {
        fileList = null;
    }
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.databus.AbstractService.java

License:Apache License

private Path getLatestDir(FileSystem fs, Path Dir) throws Exception {
    FileStatus[] fileStatus = fs.listStatus(Dir);

    if (fileStatus != null && fileStatus.length > 0) {
        FileStatus latestfile = fileStatus[0];
        for (FileStatus currentfile : fileStatus) {
            if (currentfile.getPath().getName().compareTo(latestfile.getPath().getName()) > 0)
                latestfile = currentfile;
        }/*from w ww.ja v a 2s.c  o  m*/
        return latestfile.getPath();
    }
    return null;
}

From source file:com.inmobi.databus.AbstractService.java

License:Apache License

protected Map<String, Set<Path>> publishMissingPaths(FileSystem fs, String destDir) throws Exception {
    Map<String, Set<Path>> missingDirectories = new HashMap<String, Set<Path>>();
    Set<Path> missingdirsinstream = null;
    FileStatus[] fileStatus = fs.listStatus(new Path(destDir));
    LOG.info("Create All the Missing Paths in " + destDir);
    if (fileStatus != null) {
        for (FileStatus file : fileStatus) {
            missingdirsinstream = publishMissingPaths(fs, destDir, System.currentTimeMillis(),
                    file.getPath().getName());
            if (missingdirsinstream.size() > 0)
                missingDirectories.put(file.getPath().getName(), missingdirsinstream);
        }//from w ww .  j av  a2  s  .c  om
    }
    LOG.info("Done Creating All the Missing Paths in " + destDir);
    return missingDirectories;
}

From source file:com.inmobi.databus.distcp.MirrorStreamService.java

License:Apache License

void createListing(FileSystem fs, FileStatus fileStatus, List<FileStatus> results) throws IOException {
    if (fileStatus.isDir()) {
        FileStatus[] stats = fs.listStatus(fileStatus.getPath());
        if (stats.length == 0) {
            results.add(fileStatus);//from  ww  w .ja v a 2s  . c o  m
            LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]");
        }
        for (FileStatus stat : stats) {
            createListing(fs, stat, results);
        }
    } else {
        LOG.debug("createListing :: Adding [" + fileStatus.getPath() + "]");
        results.add(fileStatus);
    }
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

private Map<Path, Path> prepareForCommit(long commitTime, Map<FileStatus, String> fileListing)
        throws Exception {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());

    // find final destination paths
    Map<Path, Path> mvPaths = new LinkedHashMap<Path, Path>();
    FileStatus[] categories = fs.listStatus(tmpJobOutputPath);
    for (FileStatus categoryDir : categories) {
        String categoryName = categoryDir.getPath().getName();
        Path destDir = new Path(cluster.getLocalDestDir(categoryName, commitTime));
        FileStatus[] files = fs.listStatus(categoryDir.getPath());
        for (FileStatus file : files) {
            Path destPath = new Path(destDir, file.getPath().getName());
            LOG.debug("Moving [" + file.getPath() + "] to [" + destPath + "]");
            mvPaths.put(file.getPath(), destPath);
        }// w ww  . j a  v  a2s  . c om
        publishMissingPaths(fs, cluster.getLocalFinalDestDirRoot(), commitTime, categoryName);
    }

    // find input files for consumer
    Map<Path, Path> consumerCommitPaths = new HashMap<Path, Path>();
    for (Cluster clusterEntry : getConfig().getClusters().values()) {
        Set<String> destStreams = clusterEntry.getDestinationStreams().keySet();
        boolean consumeCluster = false;
        for (String destStream : destStreams) {
            if (clusterEntry.getPrimaryDestinationStreams().contains(destStream)
                    && cluster.getSourceStreams().contains(destStream)) {
                consumeCluster = true;
            }
        }

        if (consumeCluster) {
            Path tmpConsumerPath = new Path(tmpPath, clusterEntry.getName());
            boolean isFileOpened = false;
            FSDataOutputStream out = null;
            try {
                for (Path destPath : mvPaths.values()) {
                    String category = getCategoryFromDestPath(destPath);
                    if (clusterEntry.getDestinationStreams().containsKey(category)) {
                        if (!isFileOpened) {
                            out = fs.create(tmpConsumerPath);
                            isFileOpened = true;
                        }
                        out.writeBytes(destPath.toString());
                        LOG.debug("Adding [" + destPath + "]  for consumer [" + clusterEntry.getName()
                                + "] to commit Paths in [" + tmpConsumerPath + "]");

                        out.writeBytes("\n");
                    }
                }
            } finally {
                if (isFileOpened) {
                    out.close();
                    Path finalConsumerPath = new Path(cluster.getConsumePath(clusterEntry),
                            Long.toString(System.currentTimeMillis()));
                    LOG.debug("Moving [" + tmpConsumerPath + "] to [ " + finalConsumerPath + "]");
                    consumerCommitPaths.put(tmpConsumerPath, finalConsumerPath);
                }
            }
        }
    }

    Map<Path, Path> commitPaths = new LinkedHashMap<Path, Path>();
    commitPaths.putAll(mvPaths);
    commitPaths.putAll(consumerCommitPaths);

    return commitPaths;
}

From source file:com.inmobi.databus.local.LocalStreamService.java

License:Apache License

public void createListing(FileSystem fs, FileStatus fileStatus, Map<FileStatus, String> results,
        Set<FileStatus> trashSet, Map<String, FileStatus> checkpointPaths, long lastFileTimeout)
        throws IOException {
    FileStatus[] streams = fs.listStatus(fileStatus.getPath());
    for (FileStatus stream : streams) {
        String streamName = stream.getPath().getName();
        LOG.debug("createListing working on Stream [" + streamName + "]");
        FileStatus[] collectors = fs.listStatus(stream.getPath());
        for (FileStatus collector : collectors) {
            TreeMap<String, FileStatus> collectorPaths = new TreeMap<String, FileStatus>();
            // check point for this collector
            String collectorName = collector.getPath().getName();
            String checkPointKey = streamName + collectorName;
            String checkPointValue = null;
            byte[] value = checkpointProvider.read(checkPointKey);
            if (value != null)
                checkPointValue = new String(value);
            LOG.debug("CheckPoint Key [" + checkPointKey + "] value [ " + checkPointValue + "]");

            FileStatus[] files = fs.listStatus(collector.getPath(), new CollectorPathFilter());

            if (files == null) {
                LOG.warn("No Files Found in the Collector " + collector.getPath() + " Skipping Directory");
                continue;
            }/*from ww w. j  a v a2 s .co  m*/

            String currentFile = getCurrentFile(fs, files, lastFileTimeout);

            for (FileStatus file : files) {
                processFile(file, currentFile, checkPointValue, fs, results, collectorPaths);
            }
            populateTrash(collectorPaths, trashSet);
            populateCheckpointPathForCollector(checkpointPaths, collectorPaths, checkPointKey);
        } // all files in a collector
    }
}