Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.turn.camino.render.functions.FileSystemFunctionsTest.java

License:Open Source License

/**
 * Set up environment/*from ww w  .  j a  v  a2  s  .  co  m*/
 */
@BeforeClass
public void setUp() throws IOException {
    // mock environment
    FileSystem fileSystem = mock(FileSystem.class);
    FileStatus[] fss = new FileStatus[] {
            new FileStatus(1200000L, false, 3, 1000L, 1409302856296L,
                    new org.apache.hadoop.fs.Path("/a/b/1.dat")),
            new FileStatus(1400000L, false, 3, 1000L, 1409302867303L,
                    new org.apache.hadoop.fs.Path("/a/b/2.dat")),
            new FileStatus(1060000L, false, 3, 1000L, 1409302844187L,
                    new org.apache.hadoop.fs.Path("/a/b/3.dat")) };
    org.apache.hadoop.fs.Path dir = new org.apache.hadoop.fs.Path("/a/b");
    when(fileSystem.exists(dir)).thenReturn(true);
    when(fileSystem.isDirectory(dir)).thenReturn(true);
    when(fileSystem.listStatus(dir)).thenReturn(fss);

    when(fileSystem.exists(new org.apache.hadoop.fs.Path("/x/y"))).thenReturn(false);

    dir = new org.apache.hadoop.fs.Path("/u/v");
    when(fileSystem.exists(dir)).thenReturn(true);
    when(fileSystem.isDirectory(dir)).thenReturn(false);

    doThrow(new IOException()).when(fileSystem).listStatus(new org.apache.hadoop.fs.Path("/foo"));

    context = mock(Context.class);
    Env env = mock(Env.class);
    when(context.getEnv()).thenReturn(env);
    when(env.getCurrentTime()).thenReturn(1409389256296L);
    when(env.getTimeZone()).thenReturn(TimeZone.getTimeZone("GMT"));
    when(env.getFileSystem()).thenReturn(fileSystem);
}

From source file:com.twitter.elephanttwin.util.HdfsFsWalker.java

License:Apache License

private void walkInternal(FileStatus fileStatus, FileSystem fs,
        Functional.F2<Boolean, FileStatus, FileSystem> evalFunc, int nestingLevel) throws IOException {
    if (pathFilter != null && !pathFilter.accept(fileStatus.getPath())) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Path Filter did not accept " + fileStatus.getPath() + ", skipping.");
        }/*from   w  w w  . j  av a  2  s .co  m*/
        return;
    }

    // Recursively walk subdirectories
    if (fileStatus.isDir()) {
        FileStatus[] statuses = fs.listStatus(fileStatus.getPath());
        if (statuses != null) {
            for (FileStatus childStatus : statuses) {
                walkInternal(childStatus, fs, evalFunc, nestingLevel + 1);
            }
        }
    }

    // Finally, evaluate the current directory.
    try {
        evalFunc.eval(fileStatus, fs);
    } catch (RuntimeException e) {
        throw new IOException(e);
    }
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static boolean isValidFile(final FileSystem hdfs, final String path) throws IOException {
    FileStatus[] statuses = hdfs.listStatus(new Path(path));
    return (statuses.length == 1 && !statuses[0].isDir() && statuses[0].getBlockSize() > 0L);
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

/**
 * @param result contains the list of FileStatus passed the filtering conditions;
 * @param fs//from   ww w. j  a  v  a 2s  .co  m
 * @param path
 * @param dirFilter : filter works on directories only;
 * @param fileFilter: filer works on files only;
 * @throws IOException
 */
public static void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter dirFilter, PathFilter fileFilter) throws IOException {
    FileStatus[] stats = fs.listStatus(path);
    if (stats != null) {
        for (FileStatus stat : stats) {
            if (stat.isDir() && dirFilter.accept(stat.getPath())) {
                addInputPathRecursively(result, fs, stat.getPath(), dirFilter, fileFilter);
            } else {
                if (fileFilter.accept(stat.getPath())) {
                    result.add(stat);
                }
            }
        }
    }
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory,
        final FileSystem hdfs) throws IOException {

    FileStatus[] fileStat;/*w  w w . j a va 2 s .  co  m*/
    Path basePath = new Path(baseDirectory);
    if (!hdfs.exists(basePath)) {
        throw new IOException(
                hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories");
    }
    FileStatus status = hdfs.getFileStatus(basePath);
    if (!status.isDir()) {
        LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file");
        return Lists.newArrayList(status.getPath());
    }
    // get the stat on all files in the source directory
    fileStat = hdfs.listStatus(basePath);

    if (fileStat == null) {
        throw new IOException(
                "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories");
    }

    // get paths to the files in the source directory
    return Arrays.asList(FileUtil.stat2Paths(fileStat));
}

From source file:com.twitter.hraven.etl.FileLister.java

License:Apache License

/**
 * Recursively traverses the dirs to get the list of
 * files for a given path filtered as per the input path range filter
 *
 *///from  w  ww.  j ava  2s.  c om
private static void traverseDirs(List<FileStatus> fileStatusesList, FileSystem hdfs, Path inputPath,
        JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter) throws IOException {
    // get all the files and dirs in the current dir
    FileStatus allFiles[] = hdfs.listStatus(inputPath);
    for (FileStatus aFile : allFiles) {
        if (aFile.isDir()) {
            //recurse here
            traverseDirs(fileStatusesList, hdfs, aFile.getPath(), jobFileModifiedRangePathFilter);
        } else {
            // check if the pathFilter is accepted for this file
            if (jobFileModifiedRangePathFilter.accept(aFile.getPath())) {
                fileStatusesList.add(aFile);
            }
        }
    }
}

From source file:com.twitter.pig.backend.hadoop.executionengine.tez.TezJobControlCompiler.java

License:Apache License

/**
 * Walks the temporary directory structure to move (rename) files
 * to their final location./*  ww  w  .  j a va 2 s . com*/
 */
private void moveResults(Path p, String rem, FileSystem fs) throws IOException {
    for (FileStatus fstat : fs.listStatus(p)) {
        Path src = fstat.getPath();
        if (fstat.isDir()) {
            log.info("mkdir: " + src);
            fs.mkdirs(removePart(src, rem));
            moveResults(fstat.getPath(), rem, fs);
        } else {
            Path dst = removePart(src, rem);
            log.info("mv: " + src + " " + dst);
            fs.rename(src, dst);
        }
    }
}

From source file:com.uber.hoodie.cli.commands.HoodieLogFileCommand.java

License:Apache License

@CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
public String showLogFileCommits(
        @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final String logFilePathPattern,
        @CliOption(key = {// w ww.  j ava  2s  .  c  o  m
                "limit" }, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
        @CliOption(key = {
                "sortBy" }, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
        @CliOption(key = {
                "desc" }, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
        @CliOption(key = {
                "headeronly" }, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
        throws IOException {

    FileSystem fs = HoodieCLI.tableMetadata.getFs();
    List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
            .map(status -> status.getPath().toString()).collect(Collectors.toList());
    Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata = Maps
            .newHashMap();
    int totalEntries = 0;
    int numCorruptBlocks = 0;
    int dummyInstantTimeCount = 0;

    for (String logFilePath : logFilePaths) {
        FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
        Schema writerSchema = new AvroSchemaConverter().convert(
                SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
        HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()),
                writerSchema);

        // read the avro blocks
        while (reader.hasNext()) {
            HoodieLogBlock n = reader.next();
            String instantTime;
            int recordCount = 0;
            if (n instanceof HoodieCorruptBlock) {
                try {
                    instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                    if (instantTime == null) {
                        throw new Exception("Invalid instant time " + instantTime);
                    }
                } catch (Exception e) {
                    numCorruptBlocks++;
                    instantTime = "corrupt_block_" + numCorruptBlocks;
                    // could not read metadata for corrupt block
                }
            } else {
                instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
                if (instantTime == null) {
                    // This can happen when reading archived commit files since they were written without any instant time
                    dummyInstantTimeCount++;
                    instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
                }
                if (n instanceof HoodieAvroDataBlock) {
                    recordCount = ((HoodieAvroDataBlock) n).getRecords().size();
                }
            }
            if (commitCountAndMetadata.containsKey(instantTime)) {
                commitCountAndMetadata.get(instantTime).add(new Tuple3<>(n.getBlockType(),
                        new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
                totalEntries++;
            } else {
                List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list = new ArrayList<>();
                list.add(new Tuple3<>(n.getBlockType(),
                        new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
                commitCountAndMetadata.put(instantTime, list);
                totalEntries++;
            }
        }
        reader.close();
    }
    List<Comparable[]> rows = new ArrayList<>();
    int i = 0;
    ObjectMapper objectMapper = new ObjectMapper();
    for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata
            .entrySet()) {
        String instantTime = entry.getKey().toString();
        for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry
                .getValue()) {
            Comparable[] output = new Comparable[5];
            output[0] = instantTime;
            output[1] = tuple3._3();
            output[2] = tuple3._1().toString();
            output[3] = objectMapper.writeValueAsString(tuple3._2()._1());
            output[4] = objectMapper.writeValueAsString(tuple3._2()._2());
            rows.add(output);
            i++;
        }
    }

    TableHeader header = new TableHeader().addTableHeaderField("InstantTime").addTableHeaderField("RecordCount")
            .addTableHeaderField("BlockType").addTableHeaderField("HeaderMetadata")
            .addTableHeaderField("FooterMetadata");

    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
}

From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java

License:Apache License

public String getFilenameForRecord(FileSystem fs, final HoodieRecord record, String fileId) {
    try {/* ww w . java 2 s  .c o m*/
        FileStatus[] files = fs.listStatus(new Path(basePath, record.getPartitionPath()));
        Map<String, List<FileStatus>> fileIdToVersions = groupFilesByFileId(files, commits.lastCommit());
        // If the record is not found
        if (!fileIdToVersions.containsKey(fileId)) {
            throw new FileNotFoundException("Cannot find valid versions for fileId " + fileId);
        }

        List<FileStatus> statuses = fileIdToVersions.get(fileId);
        return statuses.get(0).getPath().getName();
    } catch (IOException e) {
        throw new HoodieIOException("Could not get Filename for record " + record, e);
    }
}

From source file:com.uber.hoodie.common.model.HoodieTableMetadata.java

License:Apache License

/**
 * Get only the latest file in the partition with precondition commitTime(file) lt maxCommitTime
 *
 * @param fs/*from   w w w .j a  v a2 s .  c o  m*/
 * @param partitionPathStr
 * @param maxCommitTime
 * @return
 */
public FileStatus[] getLatestVersionInPartition(FileSystem fs, String partitionPathStr, String maxCommitTime) {
    try {
        Path partitionPath = new Path(basePath, partitionPathStr);
        if (!fs.exists(partitionPath)) {
            return new FileStatus[0];
        }
        FileStatus[] files = fs.listStatus(partitionPath);
        Map<String, List<FileStatus>> fileIdToVersions = groupFilesByFileId(files, commits.lastCommit());
        HashMap<String, FileStatus> validFiles = new HashMap<>();
        for (String fileId : fileIdToVersions.keySet()) {
            List<FileStatus> versions = fileIdToVersions.get(fileId);
            for (FileStatus file : versions) {
                String filename = file.getPath().getName();
                String commitTime = FSUtils.getCommitTime(filename);
                if (HoodieCommits.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
                    validFiles.put(fileId, file);
                    break;
                }
            }
        }
        return validFiles.values().toArray(new FileStatus[validFiles.size()]);
    } catch (IOException e) {
        throw new HoodieIOException("Could not get latest versions in Partition " + partitionPathStr, e);
    }
}