Example usage for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.toy.Client.java

License:Apache License

private static void registerLocalResource(Map<String, LocalResource> localResources, ApplicationId appId,
        FileSystem fs, Path src) throws IOException {
    String pathSuffix = Constants.TOY_PREFIX + appId.toString() + "/" + src.getName();
    Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
    LOG.info("Copy {} from local filesystem to {} and add to local environment", src.getName(), dst.toUri());
    fs.copyFromLocalFile(false, true, src, dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put(src.getName(), amJarRsrc);
}

From source file:com.toy.Client.java

License:Apache License

private void uploadDepAndRegister(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs,
        String depname) throws IOException {
    File dep = new File(depname);
    if (!dep.exists())
        throw new IOException(dep.getAbsolutePath() + " does not exist");
    Path dst = new Path(fs.getHomeDirectory(), Constants.TOY_PREFIX + appId.toString() + "/" + dep.getName());
    LOG.info("Copy {} from local filesystem to {} and add to local environment", dep.getName(), dst.toUri());
    FileInputStream input = new FileInputStream(dep);
    final FSDataOutputStream outputStream = fs.create(dst, true);
    ByteStreams.copy(input, outputStream);
    input.close();/*from   w  w  w.  j  av  a  2 s.c o m*/
    outputStream.close();
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    FileStatus destStatus = fs.getFileStatus(dst);
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put(dep.getName(), amJarRsrc);

}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path baseDir = null;/*w w  w.  ja  va2  s  .  c  om*/
    String localPath = null;
    String preservePath = null;
    String sIgnoreTablesFilename = null;
    String sNoPreserveFilename = null;
    String sDateString = null;
    long size = 0;

    // UNIX dates for right now
    long now = new java.util.Date().getTime() / 1000;
    long maxDate = now;

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--hdfs-path")) {
            baseDir = new Path(args[++i]);
            continue;
        }
        if (args[i].equals("--local-path")) {
            localPath = args[++i];
            continue;
        }
        if (args[i].equals("--preserve-path")) {
            preservePath = args[++i];
            continue;
        }
        if (args[i].equals("--no-preserve")) {
            sNoPreserveFilename = args[++i];
            continue;
        }
        if (args[i].equals("--ignore-tables")) {
            sIgnoreTablesFilename = args[++i];
            continue;
        }
        if (args[i].equals("--sleep")) {
            try {
                m_nSleepSeconds = Integer.parseInt(args[++i]);
            } catch (Exception e) {
                System.err.println("ERROR: " + e.toString() + "\n");
                usage();
            }
            continue;
        }
        if (args[i].equals("--dry-run")) {
            m_bDryRun = true;
            continue;
        }
        if (args[i].equals("--date")) {
            sDateString = args[++i];
            continue;
        }
        if (args[i].equals("--max-date")) {
            maxDate = Long.parseLong(args[++i]);
            continue;
        }
        if (args[i].equals("--max-bytes")) {
            size = Long.parseLong(args[++i]);
            continue;
        }

        System.err.println("ERROR: unknown arg " + args[i]);
        usage();
    }

    if (baseDir == null || localPath == null || preservePath == null || sDateString == null) {
        usage();
    }

    long minDate;

    if ("yesterday".equals(sDateString)) {
        // figure out yesterday's dates
        Calendar cal = Calendar.getInstance();
        cal.roll(Calendar.DAY_OF_YEAR, -1);

        // yesterday midnight
        cal.set(Calendar.HOUR_OF_DAY, 0);
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        cal.set(Calendar.MILLISECOND, 0);

        minDate = cal.getTimeInMillis() / 1000;

        // yesterday end of day
        cal.set(Calendar.HOUR_OF_DAY, 23);
        cal.set(Calendar.MINUTE, 59);
        cal.set(Calendar.SECOND, 59);
        cal.set(Calendar.MILLISECOND, 999);

        maxDate = cal.getTimeInMillis() / 1000;
    } else if ("last-week".equals(sDateString)) {
        minDate = maxDate - (7 * 24 * 60 * 60);
    } else if ("last-day".equals(sDateString)) {
        minDate = maxDate - (24 * 60 * 60);
    } else {
        // UNIX date since epoch of last backup
        minDate = Long.parseLong(sDateString);
    }

    long tmpDate = 0;
    BackupHdfs bak = new BackupHdfs();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
        bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    // initialize list of files to not preserve
    if (sNoPreserveFilename != null) {
        bak.initializeNoPreserve(sNoPreserveFilename);
    }

    ArrayList<Path> pathList = new ArrayList<Path>(2000);
    HashMap<Path, Long> hmTimestamps = new HashMap<Path, Long>();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // If the HDFS path is a dir continue
    if (fs.getFileStatus(baseDir).isDir()) {
        Calendar cal = Calendar.getInstance();

        System.err.println("");
        cal.setTimeInMillis(minDate * 1000);
        System.err.println("min date = " + cal.getTime().toString());

        cal.setTimeInMillis(maxDate * 1000);
        System.err.println("max date = " + cal.getTime().toString());

        System.err.println("");
        System.err.println("Searching filesystem: " + baseDir.toUri().getPath());

        bak.checkDir(fs, minDate, maxDate, baseDir, pathList, hmTimestamps);

        System.err.println("");
        System.err.println("Skipped " + m_nIgnoredTables + " files due to ignored tables");

        System.err.println("");
        System.err.println("Number of files to backup = " + pathList.size());

        System.err.println("Total bytes to backup = " + prettyPrintBytes(m_nTotalBytes));

        System.err.println("");
        System.err.println("sorting list of files...");
        Collections.sort(pathList, new DateComparator(hmTimestamps));
        System.err.println("done");

        System.err.println("");
        System.err.println("starting backup...");
        tmpDate = bak.backupFiles(localPath, preservePath, fs, pathList, size);

        bak.closeFiles();

        System.err.println("");
        System.err.println("backup completed...");
    }

    if (tmpDate == 0) {
        // If not size limit reached print out date for right now
        System.out.println(maxDate);
    } else {
        // Print out date for last file backed up
        System.err.println("Size limit reached.");
        System.out.println(tmpDate);
    }

    System.exit(0);
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to move files from HDFS to local filesystem
 *
 * localPath: Path on the machines filesystem
 * fs:FileSystem object from HDFS//w w w. j a v a2s .c o m
 * pathList:List of paths for files that might need to be backed
 * up
 * size:max size in bytes to be backed up
 *
 * ReturnsDate of the last files backed up if reached size limit,
 * else, zero
 **/
public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList,
        long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
        try {
            long nFileSize = fs.getContentSummary(hdfsPath).getLength();
            tmpSize = tmpSize + nFileSize;

            if ((tmpSize <= size) || (size == 0)) {
                FileStatus stat = fs.getFileStatus(hdfsPath);

                System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, "
                        + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", "
                        + stat.getPermission().toString());

                tmpDate = stat.getModificationTime() / 1000;

                String sFsPath = localPath + hdfsPath.toUri().getPath();
                fsPath = new Path(sFsPath);

                File f = new File(sFsPath);

                // COMMENTED OUT: until a few backup cycles run
                // and the mtime gets in fact set on all copied
                // files.
                //
                // ignore it if the file exists and has the same mtime
                // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
                // {
                // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
                // continue;
                // }

                if (false == m_bDryRun) {
                    // check if we need to back up the local file
                    // (not directory), if it already exists.
                    if (f.exists() && f.isFile()) {
                        // ignore files with substrings in the
                        // no-preserve file
                        if (true == doPreserveFile(sFsPath)) {
                            // move it to the backup path
                            String sNewPath = preservePath + hdfsPath.toUri().getPath();
                            File newFile = new File(sNewPath);

                            // create directory structure for new file?
                            if (false == newFile.getParentFile().exists()) {
                                if (false == newFile.getParentFile().mkdirs()) {
                                    System.err
                                            .println("Failed to mkdirs " + newFile.getParentFile().toString());
                                    System.exit(1);
                                }
                            }

                            // rename existing file to new location
                            if (false == f.renameTo(newFile)) {
                                System.err.println(
                                        "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                                System.exit(1);
                            }

                            System.out.println("preserved " + f.toString() + " into " + newFile.toString());
                        } else {
                            System.out.println("skipped preservation of " + f.toString());
                        }
                    }

                    // copy from hdfs to local filesystem
                    fs.copyToLocalFile(hdfsPath, fsPath);

                    // set the mtime to match hdfs file
                    f.setLastModified(stat.getModificationTime());

                    // compare checksums on both files
                    compareChecksums(fs, hdfsPath, sFsPath);
                }

                // don't print the progress after every file -- go
                // by at least 1% increments
                long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
                if (nPercentDone > m_nLastPercentBytesDone) {
                    System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone
                            + "% done" + ", tstamp=" + tmpDate);

                    m_nLastPercentBytesDone = nPercentDone;
                }

                if (m_nSleepSeconds > 0) {
                    try {
                        Thread.sleep(1000 * m_nSleepSeconds);
                    } catch (Exception e2) {
                        // ignore
                    }
                }
            } else {
                return tmpDate;
            }
        } catch (IOException e) {
            System.err.println("FATAL ERROR: Something wrong with the file");
            System.err.println(e);
            System.out.println(tmpDate);
            System.exit(1);

            return 0;
        }
    }

    return 0;
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files//w  ww.j av a2 s.c  om
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/** Compare the checksums of the hdfs file as well as the local
 * copied file.//w  w  w. j a  v  a2s  .  c  o m
 *
 * @author tpalka@tripadvisor.com
 * @date   Fri Jan 27 06:06:00 2012
 */
boolean compareChecksums(FileSystem fs, Path p, String sFsPath) {
    try {
        // get hdfs file info
        FileStatus stat = fs.getFileStatus(p);

        // get HDFS checksum
        FileChecksum ck = fs.getFileChecksum(p);
        String sCk, sCkShort;
        if (ck == null) {
            sCk = sCkShort = "<null>";
        } else {
            sCk = ck.toString();
            sCkShort = sCk.replaceAll("^.*:", "");
        }

        // System.out.println(p.toUri().getPath() + " len=" + stat.getLen()
        // + " " + stat.getOwner() + "/" + stat.getGroup()
        // + " checksum=" + sCk);

        // find the local file
        File fLocal = new File(sFsPath);
        if (!fLocal.exists()) {
            System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath);
            return false;
        }
        if (!fLocal.isFile()) {
            System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath);
            return false;
        }
        if (stat.getLen() != fLocal.length()) {
            System.out.println("CHECKSUM-ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                    + " fslen=" + fLocal.length());
            return false;
        }

        // get local fs checksum
        FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
        if (ckLocal == null) {
            System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
            return false;
        }

        // compare checksums as a string, after stripping the
        // algorithm name from the beginning
        String sCkLocal = ckLocal.toString();
        String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

        if (false == sCkShort.equals(sCkLocalShort)) {
            System.out.println("CHECKSUM-ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= "
                    + sCkLocal);
            return false;
        }

        return true;
    } catch (IOException e) {
        System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString());
    }

    return false;
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path baseDir = null;/*from  ww w . ja  v a 2 s . co  m*/
    String sLocalPathRoot = null;
    String sIgnoreTablesFilename = null;
    String sMaxDateString = null;
    String sFromFilename = null;

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--hdfs-path")) {
            baseDir = new Path(args[++i]);
            continue;
        }
        if (args[i].equals("--local-path")) {
            sLocalPathRoot = args[++i];
            continue;
        }
        if (args[i].equals("--ignore-tables")) {
            sIgnoreTablesFilename = args[++i];
            continue;
        }
        if (args[i].equals("--max-date")) {
            sMaxDateString = args[++i];
            continue;
        }
        if (args[i].equals("--from-file")) {
            sFromFilename = args[++i];
            continue;
        }

        System.err.println("ERROR: unknown arg " + args[i]);
        usage();
    }

    if (baseDir == null || sLocalPathRoot == null) {
        usage();
    }

    // UNIX date for right now
    long maxDate = new java.util.Date().getTime() / 1000;

    if (sMaxDateString != null) {
        // UNIX date since epoch of last backup
        maxDate = Long.parseLong(sMaxDateString);
    }

    VerifyHdfsBackup bak = new VerifyHdfsBackup();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
        bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    if (sFromFilename != null) {
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader(sFromFilename));
            String sFile;
            while ((sFile = in.readLine()) != null) {
                bak.checkDir(fs, new Path(sFile), sLocalPathRoot, maxDate);
            }
        } catch (Exception e) {
            System.out.println("ERROR: Failed to read from-file " + sFromFilename + ": " + e);
        } finally {
            try {
                in.close();
            } catch (Exception e2) {
            }
        }
    } else {
        // If the HDFS path is a dir continue
        if (fs.getFileStatus(baseDir).isDir()) {
            System.out.println("Searching filesystem: " + baseDir.toUri().getPath());

            bak.checkDir(fs, baseDir, sLocalPathRoot, maxDate);
        }
    }

    System.exit(0);
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*from  ww  w.j av  a2s. co m*/
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.twitter.elephanttwin.util.HdfsFsWalker.java

License:Apache License

/**
* Walk recursively (depth-first) through the file system beneath path, calling the
* passed in function on each path./*w w  w  . j  ava  2s.c o m*/
*
* @param path the path at which to start walking
* @param evalFunc a functional representing the desired action to take on each path
* @throws IOException
*/
public void walk(Path path, Functional.F2<Boolean, FileStatus, FileSystem> evalFunc) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(path.toString()), conf);
    if (fs.exists(path) && (pathFilter == null || pathFilter.accept(path))) {
        walkInternal(fs.getFileStatus(path), fs, evalFunc, 0);
    } else {
        LOG.info("Refusing to walk. fs.exists? " + fs.exists(path) + "pathFilter accepts? "
                + pathFilter.accept(path));
    }
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory,
        final FileSystem hdfs) throws IOException {

    FileStatus[] fileStat;/*from w w w.  ja v a 2s  . com*/
    Path basePath = new Path(baseDirectory);
    if (!hdfs.exists(basePath)) {
        throw new IOException(
                hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories");
    }
    FileStatus status = hdfs.getFileStatus(basePath);
    if (!status.isDir()) {
        LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file");
        return Lists.newArrayList(status.getPath());
    }
    // get the stat on all files in the source directory
    fileStat = hdfs.listStatus(basePath);

    if (fileStat == null) {
        throw new IOException(
                "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories");
    }

    // get paths to the files in the source directory
    return Arrays.asList(FileUtil.stat2Paths(fileStat));
}