Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory,
        List<String> filePrefixes, boolean deleteEmptyDirectories) {
    FileStatus[] allFiles;/*  ww  w .  j a  v  a 2 s .co  m*/
    try {
        allFiles = fileSystem.listStatus(directory);
    } catch (IOException e) {
        ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder();
        notDeletedItems.add(directory.toString() + "/**");
        return new RecursiveDeleteResult(false, notDeletedItems.build());
    }

    boolean allDescendentsDeleted = true;
    ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
    for (FileStatus fileStatus : allFiles) {
        if (HadoopFileStatus.isFile(fileStatus)) {
            Path filePath = fileStatus.getPath();
            String fileName = filePath.getName();
            boolean eligible = false;
            for (String filePrefix : filePrefixes) {
                if (fileName.startsWith(filePrefix)) {
                    eligible = true;
                    break;
                }
            }
            if (eligible) {
                if (!deleteIfExists(fileSystem, filePath, false)) {
                    allDescendentsDeleted = false;
                    notDeletedEligibleItems.add(filePath.toString());
                }
            } else {
                allDescendentsDeleted = false;
            }
        } else if (HadoopFileStatus.isDirectory(fileStatus)) {
            RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(),
                    filePrefixes, deleteEmptyDirectories);
            if (!subResult.isDirectoryNoLongerExists()) {
                allDescendentsDeleted = false;
            }
            if (!subResult.getNotDeletedEligibleItems().isEmpty()) {
                notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems());
            }
        } else {
            allDescendentsDeleted = false;
            notDeletedEligibleItems.add(fileStatus.getPath().toString());
        }
    }
    if (allDescendentsDeleted && deleteEmptyDirectories) {
        verify(notDeletedEligibleItems.build().isEmpty());
        if (!deleteIfExists(fileSystem, directory, false)) {
            return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/"));
        }
        return new RecursiveDeleteResult(true, ImmutableList.of());
    }
    return new RecursiveDeleteResult(false, notDeletedEligibleItems.build());
}

From source file:com.flipkart.fdp.migration.distcp.utils.FileCountDriver.java

License:Apache License

public List<String> getFileStatusRecursive(Path path, FileSystem fs, String destBasePath) throws IOException {
    List<String> response = new ArrayList<String>();
    FileStatus file = fs.getFileStatus(path);
    if (file != null && file.isFile()) {
        response.add(trimExtension(file.getPath().toUri().getPath(), destBasePath));
        return response;
    }//from  w  w w. jav  a2s.co  m

    FileStatus[] fstats = fs.listStatus(path);

    if (fstats != null && fstats.length > 0) {

        for (FileStatus fstat : fstats) {

            if (fstat.isDirectory()) {
                response.addAll(getFileStatusRecursive(fstat.getPath(), fs, destBasePath));
            } else {
                response.add(trimExtension(fstat.getPath().toUri().getPath(), destBasePath));
            }
        }
    }
    return response;
}

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java

License:Apache License

/**
 * If we have a directory recursively gather the files we care about for this job.
 *
 * @param file Root file/directory.//  w  w  w . j  av  a  2 s.co m
 * @param job Job context.
 * @return All files we care about.
 * @throws IOException
 */
private Collection<FileStatus> handleFile(final FileStatus file, final JobContext job) throws IOException {
    final List<FileStatus> results = Lists.newArrayList();

    if (file.isDir()) {
        final Path p = file.getPath();
        LOG.debug("Expanding {}", p);
        final FileSystem fs = p.getFileSystem(job.getConfiguration());
        final FileStatus[] children = fs.listStatus(p);
        for (FileStatus child : children) {
            results.addAll(handleFile(child, job));
        }
    } else {
        results.add(file);
    }

    return results;
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndexer.java

License:Apache License

public void index(final Path sstablePath) throws IOException {

    final FileSystem fileSystem = FileSystem.get(URI.create(sstablePath.toString()), configuration);
    final FileStatus fileStatus = fileSystem.getFileStatus(sstablePath);

    if (fileStatus.isDir()) {
        LOG.info("SSTable Indexing directory {}", sstablePath);
        final FileStatus[] statuses = fileSystem.listStatus(sstablePath);
        for (final FileStatus childStatus : statuses) {
            index(childStatus.getPath());
        }/*from   www.  ja  v  a  2  s  .  c  o  m*/
    } else if (sstablePath.toString().endsWith(SST_EXTENSION)) {
        final Path sstableIndexPath = new Path(sstablePath.toString() + SSTableIndexIndex.SSTABLE_INDEX_SUFFIX);
        if (fileSystem.exists(sstableIndexPath)) {
            LOG.info("Skipping as SSTable index file already exists for {}", sstablePath);
        } else {
            // Kick a thread for the index.
            final ListenableFuture<IndexRequest> indexFuture = service.submit(new Callable<IndexRequest>() {
                @Override
                public IndexRequest call() throws Exception {
                    final long startTime = System.currentTimeMillis();
                    final long fileSize = fileStatus.getLen();

                    LOG.info("Indexing SSTABLE Indexing file {}, size {} GB...", sstablePath,
                            decimalFormat.format(fileSize / (1024.0 * 1024.0 * 1024.0)));

                    indexSingleFile(fileSystem, sstablePath);

                    return new IndexRequest(sstableIndexPath, startTime, fileSize);
                }
            });

            Futures.addCallback(indexFuture, new FutureCallback<IndexRequest>() {
                public void onSuccess(final IndexRequest indexRequest) {
                    long indexSize = 0;

                    try {
                        indexSize = fileSystem.getFileStatus(indexRequest.getIndexPath()).getLen();
                    } catch (IOException e) {
                        LOG.error("Error getting file status for index path: {}", indexRequest.getIndexPath());
                    }

                    final double elapsed = (System.currentTimeMillis() - indexRequest.getStartTime()) / 1000.0;

                    LOG.info("Completed SSTABLE Indexing in {} seconds ({} MB/s).  Index size is {} KB.",
                            decimalFormat.format(elapsed),
                            decimalFormat.format(indexRequest.getFileSize() / (1024.0 * 1024.0 * elapsed)),
                            decimalFormat.format(indexSize / 1024.0));
                }

                public void onFailure(Throwable e) {
                    LOG.error("Failed to index.", e);
                }
            });

        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

/**
 * Returns a list of of hoplogs present in the bucket's directory, expected to be called during
 * hoplog set initialization/*from w  w  w . java 2  s  .  c o  m*/
 */
List<Hoplog> identifyAndLoadSortedOplogs(boolean countSize) throws IOException {
    FileSystem fs = store.getFileSystem();
    if (!fs.exists(bucketPath)) {
        return new ArrayList<Hoplog>();
    }

    FileStatus allFiles[] = fs.listStatus(bucketPath);
    ArrayList<FileStatus> validFiles = new ArrayList<FileStatus>();
    for (FileStatus file : allFiles) {
        // All hoplog files contribute to disk usage
        Matcher matcher = HOPLOG_NAME_PATTERN.matcher(file.getPath().getName());
        if (!matcher.matches()) {
            // not a hoplog
            continue;
        }

        // account for the disk used by this file
        if (countSize) {
            incrementDiskUsage(file.getLen());
        }

        // All valid hoplog files must match the regex
        matcher = SORTED_HOPLOG_PATTERN.matcher(file.getPath().getName());
        if (matcher.matches()) {
            validFiles.add(file);
        }
    }

    FileStatus[] markers = getExpiryMarkers();
    FileStatus[] validHoplogs = filterValidHoplogs(validFiles.toArray(new FileStatus[validFiles.size()]),
            markers);

    ArrayList<Hoplog> results = new ArrayList<Hoplog>();
    if (validHoplogs == null || validHoplogs.length == 0) {
        return results;
    }

    for (int i = 0; i < validHoplogs.length; i++) {
        // Skip directories
        if (validHoplogs[i].isDirectory()) {
            continue;
        }

        final Path p = validHoplogs[i].getPath();
        // skip empty file
        if (fs.getFileStatus(p).getLen() <= 0) {
            continue;
        }

        Hoplog hoplog = new HFileSortedOplog(store, p, store.getBlockCache(), stats, store.getStats());
        results.add(hoplog);
    }

    return results;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type,
        long start, long end) throws IOException {
    Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>();

    // hoplog files names follow this pattern
    String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type;
    String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION;
    final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX);
    final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX);

    Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME);
    long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath);

    // a region directory contains directories for individual buckets. A bucket
    // has a integer name.
    FileStatus[] bucketDirs = fs.listStatus(regionPath);

    for (FileStatus bucket : bucketDirs) {
        if (!bucket.isDirectory()) {
            continue;
        }//w w w .  ja v  a 2 s.com
        try {
            Integer.valueOf(bucket.getPath().getName());
        } catch (NumberFormatException e) {
            continue;
        }

        ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>();

        // identify all the flush hoplogs and seq hoplogs by visiting all the
        // bucket directories
        FileStatus[] bucketFiles = fs.listStatus(bucket.getPath());

        Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern);

        FileStatus oldestHopAfterEndTS = null;
        long oldestHopTS = Long.MAX_VALUE;
        long currentTimeStamp = System.currentTimeMillis();
        for (FileStatus file : bucketFiles) {
            if (!file.isFile()) {
                continue;
            }

            Matcher match = pattern.matcher(file.getPath().getName());
            if (!match.matches()) {
                continue;
            }

            long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match);
            if (start > 0 && timeStamp < start) {
                // this hoplog contains records less than the start time stamp
                continue;
            }

            if (end > 0 && timeStamp > end) {
                // this hoplog contains records mutated after end time stamp. Ignore
                // this hoplog if it is not the oldest.
                if (oldestHopTS > timeStamp) {
                    oldestHopTS = timeStamp;
                    oldestHopAfterEndTS = file;
                }
                continue;
            }
            long expiredTimeStamp = expiredTime(file, expiredHoplogs);
            if (expiredTimeStamp > 0 && intervalDurationMillis > 0) {
                if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) {
                    continue;
                }
            }
            bucketHoplogs.add(file);
        }

        if (oldestHopAfterEndTS != null) {
            long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs);
            if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0
                    || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) {
                bucketHoplogs.add(oldestHopAfterEndTS);
            }
        }

        if (bucketHoplogs.size() > 0) {
            allBuckets.add(bucketHoplogs);
        }
    }

    return allBuckets;
}

From source file:com.github.joshelser.accumulo.DelimitedIngest.java

License:Apache License

private List<Path> convertInputToPaths() throws IOException {
    List<String> inputs = args.getInput();
    List<Path> paths = new ArrayList<>(inputs.size());
    for (String input : inputs) {
        Path p = new Path(input);
        FileSystem fs = p.getFileSystem(conf);
        FileStatus fstat = fs.getFileStatus(p);
        if (fstat.isFile()) {
            paths.add(p);/*from  w  ww .  j a v  a  2s  .  co  m*/
        } else if (fstat.isDirectory()) {
            for (FileStatus child : fs.listStatus(p)) {
                if (child.isFile()) {
                    paths.add(child.getPath());
                }
            }
        } else {
            throw new IllegalStateException("Unable to handle that which is not file nor directory: " + p);
        }
    }
    return paths;
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static String readString(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(path);
    StringBuilder re = new StringBuilder();
    for (FileStatus status : statuses) {
        if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) {
            FSDataInputStream streaming = fs.open(status.getPath());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming));
            re.append(bufferedReader.readLine() + System.lineSeparator());
        }/* w  ww . ja v a  2  s  . c o  m*/
    }
    return re.toString();
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/** helper returns listStatus() */
private static FileStatus[] shellListStatus(String cmd, FileSystem srcFs, FileStatus src) {
    if (!src.isDir()) {
        FileStatus[] files = { src };//from   w w w.ja  va 2 s .co  m
        return files;
    }
    Path path = src.getPath();
    try {
        FileStatus[] files = srcFs.listStatus(path);
        if (files == null) {
            System.err.println(cmd + ": could not get listing for '" + path + "'");
        }
        return files;
    } catch (IOException e) {
        System.err.println(
                cmd + ": could not get get listing for '" + path + "' : " + e.getMessage().split("\n")[0]);
    }
    return null;
}

From source file:com.hadoop.compression.lzo.LzoIndexer.java

License:Open Source License

/**
 * Lzo index a given path, calling recursively to index directories when encountered.
 * Files are only indexed if they end in .lzo and have no existing .lzo.index file.
 * // ww w .ja  va  2s.co m
 * @param lzoPath The base path to index.
 * @param nestingLevel For pretty printing, the nesting level.
 * @throws IOException
 */
private void indexInternal(Path lzoPath, int nestingLevel) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_);
    FileStatus fileStatus = fs.getFileStatus(lzoPath);

    // Recursively walk
    if (fileStatus.isDir()) {
        LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "...");
        FileStatus[] statuses = fs.listStatus(lzoPath);
        for (FileStatus childStatus : statuses) {
            indexInternal(childStatus.getPath(), nestingLevel + 1);
        }
    } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) {
        Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX);
        if (fs.exists(lzoIndexPath)) {
            LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n");
        } else {
            long startTime = System.currentTimeMillis();
            long fileSize = fileStatus.getLen();

            LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size "
                    + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB...");
            if (indexSingleFile(fs, lzoPath)) {
                long indexSize = fs.getFileStatus(lzoIndexPath).getLen();
                double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
                LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed)
                        + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed))
                        + " MB/s).  Index size is " + df_.format(indexSize / 1024.0) + " KB.\n");
            }
        }
    }
}