Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java

License:Apache License

protected static void handleDirectory(String relpath, FileSystem fs, Path path, ZipOutputStream zout,
        byte[] buf) throws IOException {
    // NB: dirs must end with '/'. If we don't add dirs, then only
    // dirs which also have files in them will be created when the
    // archive is unzipped.
    final ZipEntry entry = new ZipEntry(relpath + '/');
    zout.putNextEntry(entry);// w w w. jav a2 s.c  o m
    zout.closeEntry();

    for (FileStatus stat : fs.listStatus(path)) {
        traverse(fs, stat.getPath(), zout, buf);
    }
}

From source file:com.linkedin.cubert.examples.ListFiles.java

License:Open Source License

private void listFiles(FileSystem fs, Path path, List<String> files) throws IOException {
    FileStatus[] allStatus = fs.listStatus(path);
    if (allStatus == null)
        return;// ww w.j ava  2 s  .  com

    for (FileStatus status : allStatus) {
        if (status.isDir()) {
            listFiles(fs, status.getPath(), files);
        } else {
            files.add(status.getPath().toUri().getPath());
        }
    }

}

From source file:com.linkedin.cubert.utils.FileSystemUtils.java

License:Open Source License

public static List<Path> getDailyDurationPaths(FileSystem fs, Path root, long startDate, long endDate)
        throws IOException {
    List<Path> paths = new ArrayList<Path>();

    for (FileStatus years : fs.listStatus(root)) {
        int year = Integer.parseInt(years.getPath().getName());

        for (FileStatus months : fs.listStatus(years.getPath())) {
            int month = Integer.parseInt(months.getPath().getName());

            for (FileStatus days : fs.listStatus(months.getPath())) {
                int day = Integer.parseInt(days.getPath().getName());

                long timestamp = 10000L * year + 100L * month + day;
                if (timestamp >= startDate && timestamp <= endDate) {
                    paths.add(days.getPath());
                }//w  w w  . j a v  a2s .  com
            }
        }
    }

    return paths;
}

From source file:com.linkedin.cubert.utils.FileSystemUtils.java

License:Open Source License

public static List<Path> getHourlyDurationPaths(FileSystem fs, Path root, long startDateHour, long endDateHour)
        throws IOException {
    List<Path> paths = new ArrayList<Path>();

    for (FileStatus years : fs.listStatus(root)) {
        int year = Integer.parseInt(years.getPath().getName());

        for (FileStatus months : fs.listStatus(years.getPath())) {
            int month = Integer.parseInt(months.getPath().getName());

            for (FileStatus days : fs.listStatus(months.getPath())) {
                int day = Integer.parseInt(days.getPath().getName());

                for (FileStatus hours : fs.listStatus(days.getPath())) {
                    int hour = Integer.parseInt(hours.getPath().getName());

                    long timestamp = 1000000L * year + 10000L * month + 100L * day + hour;
                    if (timestamp >= startDateHour && timestamp <= endDateHour) {
                        paths.add(hours.getPath());
                    }//w ww. j a  va  2 s  .com
                }
            }
        }
    }

    return paths;
}

From source file:com.linkedin.hadoop.jobs.HdfsWaitJob.java

License:Apache License

/**
 * Method checkDirectory loops through the folders pointed to by dirPath, and will
 * cause the job to succeed if any of the folders are fresh enough. However, if the
 * parameter checkExactPath is true, this method only checks for the existence of
 * dirPath in HDFS.//from   w  ww .j  a v  a2  s.co  m
 *
 * @param dirPath The path to the directory we are searching for fresh folders
 * @param freshness The timeframe in which the folder has to have been modified by
 * @param checkExactPath The boolean that decides if we only check for the existence of dirPath in HDFS
 * @throws IOException If there is an HDFS exception
 * @return A boolean value corresponding to whether a fresh folder was found
 */
public boolean checkDirectory(String dirPath, long freshness, boolean checkExactPath)
        throws IOException, NullPointerException {
    FileSystem fileSys = FileSystem.get(getConf());

    if (fileSys == null) {
        String errMessage = "ERROR: The file system trying to be accessed does not exist. JOB TERMINATED.";
        log.info(errMessage);
        throw new NullPointerException(errMessage);
    }

    if (checkExactPath) {
        if (fileSys.exists(new Path(dirPath))) {
            log.info("SUCCESS: The exact path: " + dirPath + " was found in HDFS. Program now quitting.");
            return true;
        }
        log.info("STATUS: The exact path: " + dirPath + " was not found during latest polling.");
        return false;
    }

    FileStatus[] status = fileSys.listStatus(new Path(dirPath));

    if (status == null) {
        String errMessage = "ERROR: dirPath -> " + dirPath + " is empty or does not exist. JOB TERMINATED.";
        log.info(errMessage);
        throw new IOException(errMessage);
    }

    for (FileStatus file : status) {
        if (file.isDirectory()) {
            long timeModified = file.getModificationTime();
            if ((System.currentTimeMillis() - timeModified) <= freshness) {
                String fileName = file.getPath().toString();
                log.info("We found this fresh folder in the filePath: "
                        + fileName.substring(fileName.lastIndexOf("/") + 1));
                log.info("SUCCESS: Program now quitting after successfully finding a fresh folder.");
                return true;
            }
        }
    }
    log.info("STATUS: No fresh folders found during latest polling.");
    return false;
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

private List<FileStatus> getAllSubFileStatus(JobContext jobContext, Path filterMemberPath) throws IOException {
    List<FileStatus> list = new ArrayList<FileStatus>();

    FileSystem fs = filterMemberPath.getFileSystem(jobContext.getConfiguration());
    FileStatus[] subFiles = fs.listStatus(filterMemberPath);

    if (null != subFiles) {
        if (fs.getFileStatus(filterMemberPath).isDir()) {
            for (FileStatus subFile : subFiles) {
                if (!subFile.getPath().getName().startsWith("_")) {
                    list.addAll(getAllSubFileStatus(jobContext, subFile.getPath()));
                }/*  w w w .  j  a  v  a  2  s  . co m*/
            }
        } else {
            if (subFiles.length > 0 && !subFiles[0].getPath().getName().startsWith("_")) {
                list.add(subFiles[0]);
            }
        }
    }

    return list;
}

From source file:com.linkedin.mapred.AvroHdfsFileReader.java

License:Open Source License

@Override
protected List<Path> getPaths(String filePath) throws IOException {
    Path path = new Path(filePath);
    FileSystem fs = path.getFileSystem(getConf());
    List<Path> paths = new ArrayList<Path>();

    for (FileStatus status : fs.listStatus(path)) {
        if (status.isDir() && !AvroUtils.shouldPathBeIgnored(status.getPath())) {
            paths.addAll(getPaths(status.getPath().toString()));
        } else if (isAvro(status.getPath())) {
            paths.add(status.getPath());
        }//from  w ww.  j  a v a2  s . c o  m
    }
    return paths;
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

/**
 * Adds all subdirectories under a root path to the input format.
 * /*from  w w w. j a va 2s  .  com*/
 * @param conf The JobConf.
 * @param path The root path.
 * @throws IOException
 */
public static void addAllSubPaths(JobConf conf, Path path) throws IOException {
    if (shouldPathBeIgnored(path)) {
        throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path));
    }

    final FileSystem fs = path.getFileSystem(conf);

    if (fs.exists(path)) {
        for (FileStatus status : fs.listStatus(path)) {
            if (!shouldPathBeIgnored(status.getPath())) {
                if (status.isDir()) {
                    addAllSubPaths(conf, status.getPath());
                } else {
                    AvroInputFormat.addInputPath(conf, status.getPath());
                }
            }
        }
    }
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

/**
 * Enumerates all the files under a given path.
 * /*  ww  w .  j av  a2 s  .c  o m*/
 * @param conf The JobConf.
 * @param basePath The base path.
 * @return A list of files found under the base path.
 * @throws IOException
 */
public static List<Path> enumerateFiles(JobConf conf, Path basePath) throws IOException {
    if (shouldPathBeIgnored(basePath)) {
        throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", basePath));
    }
    List<Path> paths = new ArrayList<Path>();
    FileSystem fs = basePath.getFileSystem(conf);

    if (!fs.exists(basePath)) {
        return Collections.emptyList();
    }

    for (FileStatus s : fs.listStatus(basePath)) {
        if (!shouldPathBeIgnored(s.getPath())) {
            if (s.isDir()) {
                paths.addAll(enumerateFiles(conf, s.getPath()));
            } else {
                paths.add(s.getPath());
            }
        }
    }
    return paths;
}

From source file:com.linkedin.mlease.utils.Util.java

License:Open Source License

public static List<Path> findPartFiles(JobConf conf, Path root) throws IOException {
    FileSystem fs = root.getFileSystem(new JobConf());
    List<Path> files = new ArrayList<Path>();

    for (FileStatus status : fs.listStatus(root)) {
        if (status.isDir()) {
            files.addAll(findPartFiles(conf, status.getPath()));
        } else {/*ww  w.  j a v  a 2 s .c  o m*/
            files.add(status.getPath());
        }
    }
    return files;
}