Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory,
        List<String> filePrefixes, boolean deleteEmptyDirectories) {
    FileStatus[] allFiles;/*  ww  w .  j a  v  a 2 s .co  m*/
    try {
        allFiles = fileSystem.listStatus(directory);
    } catch (IOException e) {
        ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder();
        notDeletedItems.add(directory.toString() + "/**");
        return new RecursiveDeleteResult(false, notDeletedItems.build());
    }

    boolean allDescendentsDeleted = true;
    ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
    for (FileStatus fileStatus : allFiles) {
        if (HadoopFileStatus.isFile(fileStatus)) {
            Path filePath = fileStatus.getPath();
            String fileName = filePath.getName();
            boolean eligible = false;
            for (String filePrefix : filePrefixes) {
                if (fileName.startsWith(filePrefix)) {
                    eligible = true;
                    break;
                }
            }
            if (eligible) {
                if (!deleteIfExists(fileSystem, filePath, false)) {
                    allDescendentsDeleted = false;
                    notDeletedEligibleItems.add(filePath.toString());
                }
            } else {
                allDescendentsDeleted = false;
            }
        } else if (HadoopFileStatus.isDirectory(fileStatus)) {
            RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(),
                    filePrefixes, deleteEmptyDirectories);
            if (!subResult.isDirectoryNoLongerExists()) {
                allDescendentsDeleted = false;
            }
            if (!subResult.getNotDeletedEligibleItems().isEmpty()) {
                notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems());
            }
        } else {
            allDescendentsDeleted = false;
            notDeletedEligibleItems.add(fileStatus.getPath().toString());
        }
    }
    if (allDescendentsDeleted && deleteEmptyDirectories) {
        verify(notDeletedEligibleItems.build().isEmpty());
        if (!deleteIfExists(fileSystem, directory, false)) {
            return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/"));
        }
        return new RecursiveDeleteResult(true, ImmutableList.of());
    }
    return new RecursiveDeleteResult(false, notDeletedEligibleItems.build());
}

From source file:com.flipkart.fdp.migration.distcp.utils.FileCountDriver.java

License:Apache License

public List<String> getFileStatusRecursive(Path path, FileSystem fs, String destBasePath) throws IOException {
    List<String> response = new ArrayList<String>();
    FileStatus file = fs.getFileStatus(path);
    if (file != null && file.isFile()) {
        response.add(trimExtension(file.getPath().toUri().getPath(), destBasePath));
        return response;
    }//from  w  w w. jav  a2s.co  m

    FileStatus[] fstats = fs.listStatus(path);

    if (fstats != null && fstats.length > 0) {

        for (FileStatus fstat : fstats) {

            if (fstat.isDirectory()) {
                response.addAll(getFileStatusRecursive(fstat.getPath(), fs, destBasePath));
            } else {
                response.add(trimExtension(fstat.getPath().toUri().getPath(), destBasePath));
            }
        }
    }
    return response;
}

From source file:com.fullcontact.sstable.hadoop.mapreduce.SSTableInputFormat.java

License:Apache License

/**
 * If we have a directory recursively gather the files we care about for this job.
 *
 * @param file Root file/directory.//  w  w  w . j  av  a  2 s.co m
 * @param job Job context.
 * @return All files we care about.
 * @throws IOException
 */
private Collection<FileStatus> handleFile(final FileStatus file, final JobContext job) throws IOException {
    final List<FileStatus> results = Lists.newArrayList();

    if (file.isDir()) {
        final Path p = file.getPath();
        LOG.debug("Expanding {}", p);
        final FileSystem fs = p.getFileSystem(job.getConfiguration());
        final FileStatus[] children = fs.listStatus(p);
        for (FileStatus child : children) {
            results.addAll(handleFile(child, job));
        }
    } else {
        results.add(file);
    }

    return results;
}

From source file:com.fullcontact.sstable.index.SSTableIndexIndexer.java

License:Apache License

public void index(final Path sstablePath) throws IOException {

    final FileSystem fileSystem = FileSystem.get(URI.create(sstablePath.toString()), configuration);
    final FileStatus fileStatus = fileSystem.getFileStatus(sstablePath);

    if (fileStatus.isDir()) {
        LOG.info("SSTable Indexing directory {}", sstablePath);
        final FileStatus[] statuses = fileSystem.listStatus(sstablePath);
        for (final FileStatus childStatus : statuses) {
            index(childStatus.getPath());
        }/*from   www.  ja  v  a  2  s  .  c  o  m*/
    } else if (sstablePath.toString().endsWith(SST_EXTENSION)) {
        final Path sstableIndexPath = new Path(sstablePath.toString() + SSTableIndexIndex.SSTABLE_INDEX_SUFFIX);
        if (fileSystem.exists(sstableIndexPath)) {
            LOG.info("Skipping as SSTable index file already exists for {}", sstablePath);
        } else {
            // Kick a thread for the index.
            final ListenableFuture<IndexRequest> indexFuture = service.submit(new Callable<IndexRequest>() {
                @Override
                public IndexRequest call() throws Exception {
                    final long startTime = System.currentTimeMillis();
                    final long fileSize = fileStatus.getLen();

                    LOG.info("Indexing SSTABLE Indexing file {}, size {} GB...", sstablePath,
                            decimalFormat.format(fileSize / (1024.0 * 1024.0 * 1024.0)));

                    indexSingleFile(fileSystem, sstablePath);

                    return new IndexRequest(sstableIndexPath, startTime, fileSize);
                }
            });

            Futures.addCallback(indexFuture, new FutureCallback<IndexRequest>() {
                public void onSuccess(final IndexRequest indexRequest) {
                    long indexSize = 0;

                    try {
                        indexSize = fileSystem.getFileStatus(indexRequest.getIndexPath()).getLen();
                    } catch (IOException e) {
                        LOG.error("Error getting file status for index path: {}", indexRequest.getIndexPath());
                    }

                    final double elapsed = (System.currentTimeMillis() - indexRequest.getStartTime()) / 1000.0;

                    LOG.info("Completed SSTABLE Indexing in {} seconds ({} MB/s).  Index size is {} KB.",
                            decimalFormat.format(elapsed),
                            decimalFormat.format(indexRequest.getFileSize() / (1024.0 * 1024.0 * elapsed)),
                            decimalFormat.format(indexSize / 1024.0));
                }

                public void onFailure(Throwable e) {
                    LOG.error("Failed to index.", e);
                }
            });

        }
    }
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.HdfsSortedOplogOrganizer.java

License:Apache License

/**
 * Returns a list of of hoplogs present in the bucket's directory, expected to be called during
 * hoplog set initialization/*from w  w  w . java 2  s  .  c o  m*/
 */
List<Hoplog> identifyAndLoadSortedOplogs(boolean countSize) throws IOException {
    FileSystem fs = store.getFileSystem();
    if (!fs.exists(bucketPath)) {
        return new ArrayList<Hoplog>();
    }

    FileStatus allFiles[] = fs.listStatus(bucketPath);
    ArrayList<FileStatus> validFiles = new ArrayList<FileStatus>();
    for (FileStatus file : allFiles) {
        // All hoplog files contribute to disk usage
        Matcher matcher = HOPLOG_NAME_PATTERN.matcher(file.getPath().getName());
        if (!matcher.matches()) {
            // not a hoplog
            continue;
        }

        // account for the disk used by this file
        if (countSize) {
            incrementDiskUsage(file.getLen());
        }

        // All valid hoplog files must match the regex
        matcher = SORTED_HOPLOG_PATTERN.matcher(file.getPath().getName());
        if (matcher.matches()) {
            validFiles.add(file);
        }
    }

    FileStatus[] markers = getExpiryMarkers();
    FileStatus[] validHoplogs = filterValidHoplogs(validFiles.toArray(new FileStatus[validFiles.size()]),
            markers);

    ArrayList<Hoplog> results = new ArrayList<Hoplog>();
    if (validHoplogs == null || validHoplogs.length == 0) {
        return results;
    }

    for (int i = 0; i < validHoplogs.length; i++) {
        // Skip directories
        if (validHoplogs[i].isDirectory()) {
            continue;
        }

        final Path p = validHoplogs[i].getPath();
        // skip empty file
        if (fs.getFileStatus(p).getLen() <= 0) {
            continue;
        }

        Hoplog hoplog = new HFileSortedOplog(store, p, store.getBlockCache(), stats, store.getStats());
        results.add(hoplog);
    }

    return results;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

public static Collection<Collection<FileStatus>> getBucketHoplogs(Path regionPath, FileSystem fs, String type,
        long start, long end) throws IOException {
    Collection<Collection<FileStatus>> allBuckets = new ArrayList<Collection<FileStatus>>();

    // hoplog files names follow this pattern
    String HOPLOG_NAME_REGEX = AbstractHoplogOrganizer.HOPLOG_NAME_REGEX + type;
    String EXPIRED_HOPLOG_NAME_REGEX = HOPLOG_NAME_REGEX + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION;
    final Pattern pattern = Pattern.compile(HOPLOG_NAME_REGEX);
    final Pattern expiredPattern = Pattern.compile(EXPIRED_HOPLOG_NAME_REGEX);

    Path cleanUpIntervalPath = new Path(regionPath.getParent(), HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME);
    long intervalDurationMillis = readCleanUpIntervalMillis(fs, cleanUpIntervalPath);

    // a region directory contains directories for individual buckets. A bucket
    // has a integer name.
    FileStatus[] bucketDirs = fs.listStatus(regionPath);

    for (FileStatus bucket : bucketDirs) {
        if (!bucket.isDirectory()) {
            continue;
        }//w w w .  ja v  a 2 s.com
        try {
            Integer.valueOf(bucket.getPath().getName());
        } catch (NumberFormatException e) {
            continue;
        }

        ArrayList<FileStatus> bucketHoplogs = new ArrayList<FileStatus>();

        // identify all the flush hoplogs and seq hoplogs by visiting all the
        // bucket directories
        FileStatus[] bucketFiles = fs.listStatus(bucket.getPath());

        Map<String, Long> expiredHoplogs = getExpiredHoplogs(fs, bucketFiles, expiredPattern);

        FileStatus oldestHopAfterEndTS = null;
        long oldestHopTS = Long.MAX_VALUE;
        long currentTimeStamp = System.currentTimeMillis();
        for (FileStatus file : bucketFiles) {
            if (!file.isFile()) {
                continue;
            }

            Matcher match = pattern.matcher(file.getPath().getName());
            if (!match.matches()) {
                continue;
            }

            long timeStamp = AbstractHoplogOrganizer.getHoplogTimestamp(match);
            if (start > 0 && timeStamp < start) {
                // this hoplog contains records less than the start time stamp
                continue;
            }

            if (end > 0 && timeStamp > end) {
                // this hoplog contains records mutated after end time stamp. Ignore
                // this hoplog if it is not the oldest.
                if (oldestHopTS > timeStamp) {
                    oldestHopTS = timeStamp;
                    oldestHopAfterEndTS = file;
                }
                continue;
            }
            long expiredTimeStamp = expiredTime(file, expiredHoplogs);
            if (expiredTimeStamp > 0 && intervalDurationMillis > 0) {
                if ((currentTimeStamp - expiredTimeStamp) > 0.8 * intervalDurationMillis) {
                    continue;
                }
            }
            bucketHoplogs.add(file);
        }

        if (oldestHopAfterEndTS != null) {
            long expiredTimeStamp = expiredTime(oldestHopAfterEndTS, expiredHoplogs);
            if (expiredTimeStamp <= 0 || intervalDurationMillis <= 0
                    || (currentTimeStamp - expiredTimeStamp) <= 0.8 * intervalDurationMillis) {
                bucketHoplogs.add(oldestHopAfterEndTS);
            }
        }

        if (bucketHoplogs.size() > 0) {
            allBuckets.add(bucketHoplogs);
        }
    }

    return allBuckets;
}

From source file:com.github.joshelser.accumulo.DelimitedIngest.java

License:Apache License

private List<Path> convertInputToPaths() throws IOException {
    List<String> inputs = args.getInput();
    List<Path> paths = new ArrayList<>(inputs.size());
    for (String input : inputs) {
        Path p = new Path(input);
        FileSystem fs = p.getFileSystem(conf);
        FileStatus fstat = fs.getFileStatus(p);
        if (fstat.isFile()) {
            paths.add(p);/*from  w  ww .  j a v  a  2s  .  co  m*/
        } else if (fstat.isDirectory()) {
            for (FileStatus child : fs.listStatus(p)) {
                if (child.isFile()) {
                    paths.add(child.getPath());
                }
            }
        } else {
            throw new IllegalStateException("Unable to handle that which is not file nor directory: " + p);
        }
    }
    return paths;
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static String readString(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(path);
    StringBuilder re = new StringBuilder();
    for (FileStatus status : statuses) {
        if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) {
            FSDataInputStream streaming = fs.open(status.getPath());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming));
            re.append(bufferedReader.readLine() + System.lineSeparator());
        }/* w  ww . ja v a  2  s  . c o  m*/
    }
    return re.toString();
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/** helper returns listStatus() */
private static FileStatus[] shellListStatus(String cmd, FileSystem srcFs, FileStatus src) {
    if (!src.isDir()) {
        FileStatus[] files = { src };//from   w w w.ja  va 2 s .co  m
        return files;
    }
    Path path = src.getPath();
    try {
        FileStatus[] files = srcFs.listStatus(path);
        if (files == null) {
            System.err.println(cmd + ": could not get listing for '" + path + "'");
        }
        return files;
    } catch (IOException e) {
        System.err.println(
                cmd + ": could not get get listing for '" + path + "' : " + e.getMessage().split("\n")[0]);
    }
    return null;
}

From source file:com.hadoop.compression.lzo.LzoIndexer.java

License:Open Source License

/**
 * Lzo index a given path, calling recursively to index directories when encountered.
 * Files are only indexed if they end in .lzo and have no existing .lzo.index file.
 * // ww w .ja  va  2s.co m
 * @param lzoPath The base path to index.
 * @param nestingLevel For pretty printing, the nesting level.
 * @throws IOException
 */
private void indexInternal(Path lzoPath, int nestingLevel) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(lzoPath.toString()), conf_);
    FileStatus fileStatus = fs.getFileStatus(lzoPath);

    // Recursively walk
    if (fileStatus.isDir()) {
        LOG.info(getNesting(nestingLevel) + "LZO Indexing directory " + lzoPath + "...");
        FileStatus[] statuses = fs.listStatus(lzoPath);
        for (FileStatus childStatus : statuses) {
            indexInternal(childStatus.getPath(), nestingLevel + 1);
        }
    } else if (lzoPath.toString().endsWith(LZO_EXTENSION)) {
        Path lzoIndexPath = new Path(lzoPath.toString() + LzoIndex.LZO_INDEX_SUFFIX);
        if (fs.exists(lzoIndexPath)) {
            LOG.info(getNesting(nestingLevel) + "[SKIP] LZO index file already exists for " + lzoPath + "\n");
        } else {
            long startTime = System.currentTimeMillis();
            long fileSize = fileStatus.getLen();

            LOG.info(getNesting(nestingLevel) + "[INDEX] LZO Indexing file " + lzoPath + ", size "
                    + df_.format(fileSize / (1024.0 * 1024.0 * 1024.0)) + " GB...");
            if (indexSingleFile(fs, lzoPath)) {
                long indexSize = fs.getFileStatus(lzoIndexPath).getLen();
                double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
                LOG.info(getNesting(nestingLevel) + "Completed LZO Indexing in " + df_.format(elapsed)
                        + " seconds (" + df_.format(fileSize / (1024.0 * 1024.0 * elapsed))
                        + " MB/s).  Index size is " + df_.format(indexSize / 1024.0) + " KB.\n");
            }
        }
    }
}