Example usage for org.apache.hadoop.fs FileSystem getFileStatus

List of usage examples for org.apache.hadoop.fs FileSystem getFileStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileStatus.

Prototype

public abstract FileStatus getFileStatus(Path f) throws IOException;

Source Link

Document

Return a file status object that represents the path.

Usage

From source file:com.toy.Client.java

License:Apache License

private static void registerLocalResource(Map<String, LocalResource> localResources, ApplicationId appId,
        FileSystem fs, Path src) throws IOException {
    String pathSuffix = Constants.TOY_PREFIX + appId.toString() + "/" + src.getName();
    Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
    LOG.info("Copy {} from local filesystem to {} and add to local environment", src.getName(), dst.toUri());
    fs.copyFromLocalFile(false, true, src, dst);
    FileStatus destStatus = fs.getFileStatus(dst);
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put(src.getName(), amJarRsrc);
}

From source file:com.toy.Client.java

License:Apache License

private void uploadDepAndRegister(Map<String, LocalResource> localResources, ApplicationId appId, FileSystem fs,
        String depname) throws IOException {
    File dep = new File(depname);
    if (!dep.exists())
        throw new IOException(dep.getAbsolutePath() + " does not exist");
    Path dst = new Path(fs.getHomeDirectory(), Constants.TOY_PREFIX + appId.toString() + "/" + dep.getName());
    LOG.info("Copy {} from local filesystem to {} and add to local environment", dep.getName(), dst.toUri());
    FileInputStream input = new FileInputStream(dep);
    final FSDataOutputStream outputStream = fs.create(dst, true);
    ByteStreams.copy(input, outputStream);
    input.close();/*from   w  w  w.  j  av  a  2 s.c o m*/
    outputStream.close();
    LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
    amJarRsrc.setType(LocalResourceType.FILE);
    amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);
    amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst));
    FileStatus destStatus = fs.getFileStatus(dst);
    amJarRsrc.setTimestamp(destStatus.getModificationTime());
    amJarRsrc.setSize(destStatus.getLen());
    localResources.put(dep.getName(), amJarRsrc);

}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path baseDir = null;/*w w  w.  ja  va2  s  .  c  om*/
    String localPath = null;
    String preservePath = null;
    String sIgnoreTablesFilename = null;
    String sNoPreserveFilename = null;
    String sDateString = null;
    long size = 0;

    // UNIX dates for right now
    long now = new java.util.Date().getTime() / 1000;
    long maxDate = now;

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--hdfs-path")) {
            baseDir = new Path(args[++i]);
            continue;
        }
        if (args[i].equals("--local-path")) {
            localPath = args[++i];
            continue;
        }
        if (args[i].equals("--preserve-path")) {
            preservePath = args[++i];
            continue;
        }
        if (args[i].equals("--no-preserve")) {
            sNoPreserveFilename = args[++i];
            continue;
        }
        if (args[i].equals("--ignore-tables")) {
            sIgnoreTablesFilename = args[++i];
            continue;
        }
        if (args[i].equals("--sleep")) {
            try {
                m_nSleepSeconds = Integer.parseInt(args[++i]);
            } catch (Exception e) {
                System.err.println("ERROR: " + e.toString() + "\n");
                usage();
            }
            continue;
        }
        if (args[i].equals("--dry-run")) {
            m_bDryRun = true;
            continue;
        }
        if (args[i].equals("--date")) {
            sDateString = args[++i];
            continue;
        }
        if (args[i].equals("--max-date")) {
            maxDate = Long.parseLong(args[++i]);
            continue;
        }
        if (args[i].equals("--max-bytes")) {
            size = Long.parseLong(args[++i]);
            continue;
        }

        System.err.println("ERROR: unknown arg " + args[i]);
        usage();
    }

    if (baseDir == null || localPath == null || preservePath == null || sDateString == null) {
        usage();
    }

    long minDate;

    if ("yesterday".equals(sDateString)) {
        // figure out yesterday's dates
        Calendar cal = Calendar.getInstance();
        cal.roll(Calendar.DAY_OF_YEAR, -1);

        // yesterday midnight
        cal.set(Calendar.HOUR_OF_DAY, 0);
        cal.set(Calendar.MINUTE, 0);
        cal.set(Calendar.SECOND, 0);
        cal.set(Calendar.MILLISECOND, 0);

        minDate = cal.getTimeInMillis() / 1000;

        // yesterday end of day
        cal.set(Calendar.HOUR_OF_DAY, 23);
        cal.set(Calendar.MINUTE, 59);
        cal.set(Calendar.SECOND, 59);
        cal.set(Calendar.MILLISECOND, 999);

        maxDate = cal.getTimeInMillis() / 1000;
    } else if ("last-week".equals(sDateString)) {
        minDate = maxDate - (7 * 24 * 60 * 60);
    } else if ("last-day".equals(sDateString)) {
        minDate = maxDate - (24 * 60 * 60);
    } else {
        // UNIX date since epoch of last backup
        minDate = Long.parseLong(sDateString);
    }

    long tmpDate = 0;
    BackupHdfs bak = new BackupHdfs();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
        bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    // initialize list of files to not preserve
    if (sNoPreserveFilename != null) {
        bak.initializeNoPreserve(sNoPreserveFilename);
    }

    ArrayList<Path> pathList = new ArrayList<Path>(2000);
    HashMap<Path, Long> hmTimestamps = new HashMap<Path, Long>();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    // If the HDFS path is a dir continue
    if (fs.getFileStatus(baseDir).isDir()) {
        Calendar cal = Calendar.getInstance();

        System.err.println("");
        cal.setTimeInMillis(minDate * 1000);
        System.err.println("min date = " + cal.getTime().toString());

        cal.setTimeInMillis(maxDate * 1000);
        System.err.println("max date = " + cal.getTime().toString());

        System.err.println("");
        System.err.println("Searching filesystem: " + baseDir.toUri().getPath());

        bak.checkDir(fs, minDate, maxDate, baseDir, pathList, hmTimestamps);

        System.err.println("");
        System.err.println("Skipped " + m_nIgnoredTables + " files due to ignored tables");

        System.err.println("");
        System.err.println("Number of files to backup = " + pathList.size());

        System.err.println("Total bytes to backup = " + prettyPrintBytes(m_nTotalBytes));

        System.err.println("");
        System.err.println("sorting list of files...");
        Collections.sort(pathList, new DateComparator(hmTimestamps));
        System.err.println("done");

        System.err.println("");
        System.err.println("starting backup...");
        tmpDate = bak.backupFiles(localPath, preservePath, fs, pathList, size);

        bak.closeFiles();

        System.err.println("");
        System.err.println("backup completed...");
    }

    if (tmpDate == 0) {
        // If not size limit reached print out date for right now
        System.out.println(maxDate);
    } else {
        // Print out date for last file backed up
        System.err.println("Size limit reached.");
        System.out.println(tmpDate);
    }

    System.exit(0);
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to move files from HDFS to local filesystem
 *
 * localPath: Path on the machines filesystem
 * fs:FileSystem object from HDFS//w w w. j a v a2s .c o m
 * pathList:List of paths for files that might need to be backed
 * up
 * size:max size in bytes to be backed up
 *
 * ReturnsDate of the last files backed up if reached size limit,
 * else, zero
 **/
public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList,
        long size) {
    Path fsPath;
    long tmpSize = 0;
    long tmpDate = 0;

    // Start iterating over all paths
    for (Path hdfsPath : pathList) {
        try {
            long nFileSize = fs.getContentSummary(hdfsPath).getLength();
            tmpSize = tmpSize + nFileSize;

            if ((tmpSize <= size) || (size == 0)) {
                FileStatus stat = fs.getFileStatus(hdfsPath);

                System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, "
                        + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", "
                        + stat.getPermission().toString());

                tmpDate = stat.getModificationTime() / 1000;

                String sFsPath = localPath + hdfsPath.toUri().getPath();
                fsPath = new Path(sFsPath);

                File f = new File(sFsPath);

                // COMMENTED OUT: until a few backup cycles run
                // and the mtime gets in fact set on all copied
                // files.
                //
                // ignore it if the file exists and has the same mtime
                // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime())
                // {
                // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs");
                // continue;
                // }

                if (false == m_bDryRun) {
                    // check if we need to back up the local file
                    // (not directory), if it already exists.
                    if (f.exists() && f.isFile()) {
                        // ignore files with substrings in the
                        // no-preserve file
                        if (true == doPreserveFile(sFsPath)) {
                            // move it to the backup path
                            String sNewPath = preservePath + hdfsPath.toUri().getPath();
                            File newFile = new File(sNewPath);

                            // create directory structure for new file?
                            if (false == newFile.getParentFile().exists()) {
                                if (false == newFile.getParentFile().mkdirs()) {
                                    System.err
                                            .println("Failed to mkdirs " + newFile.getParentFile().toString());
                                    System.exit(1);
                                }
                            }

                            // rename existing file to new location
                            if (false == f.renameTo(newFile)) {
                                System.err.println(
                                        "Failed to renameTo " + f.toString() + " to " + newFile.toString());
                                System.exit(1);
                            }

                            System.out.println("preserved " + f.toString() + " into " + newFile.toString());
                        } else {
                            System.out.println("skipped preservation of " + f.toString());
                        }
                    }

                    // copy from hdfs to local filesystem
                    fs.copyToLocalFile(hdfsPath, fsPath);

                    // set the mtime to match hdfs file
                    f.setLastModified(stat.getModificationTime());

                    // compare checksums on both files
                    compareChecksums(fs, hdfsPath, sFsPath);
                }

                // don't print the progress after every file -- go
                // by at least 1% increments
                long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes);
                if (nPercentDone > m_nLastPercentBytesDone) {
                    System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone
                            + "% done" + ", tstamp=" + tmpDate);

                    m_nLastPercentBytesDone = nPercentDone;
                }

                if (m_nSleepSeconds > 0) {
                    try {
                        Thread.sleep(1000 * m_nSleepSeconds);
                    } catch (Exception e2) {
                        // ignore
                    }
                }
            } else {
                return tmpDate;
            }
        } catch (IOException e) {
            System.err.println("FATAL ERROR: Something wrong with the file");
            System.err.println(e);
            System.out.println(tmpDate);
            System.exit(1);

            return 0;
        }
    }

    return 0;
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files//w  ww.j av a2 s.c  om
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/** Compare the checksums of the hdfs file as well as the local
 * copied file.//w  w  w. j a  v  a2s  .  c  o m
 *
 * @author tpalka@tripadvisor.com
 * @date   Fri Jan 27 06:06:00 2012
 */
boolean compareChecksums(FileSystem fs, Path p, String sFsPath) {
    try {
        // get hdfs file info
        FileStatus stat = fs.getFileStatus(p);

        // get HDFS checksum
        FileChecksum ck = fs.getFileChecksum(p);
        String sCk, sCkShort;
        if (ck == null) {
            sCk = sCkShort = "<null>";
        } else {
            sCk = ck.toString();
            sCkShort = sCk.replaceAll("^.*:", "");
        }

        // System.out.println(p.toUri().getPath() + " len=" + stat.getLen()
        // + " " + stat.getOwner() + "/" + stat.getGroup()
        // + " checksum=" + sCk);

        // find the local file
        File fLocal = new File(sFsPath);
        if (!fLocal.exists()) {
            System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath);
            return false;
        }
        if (!fLocal.isFile()) {
            System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath);
            return false;
        }
        if (stat.getLen() != fLocal.length()) {
            System.out.println("CHECKSUM-ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                    + " fslen=" + fLocal.length());
            return false;
        }

        // get local fs checksum
        FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
        if (ckLocal == null) {
            System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
            return false;
        }

        // compare checksums as a string, after stripping the
        // algorithm name from the beginning
        String sCkLocal = ckLocal.toString();
        String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

        if (false == sCkShort.equals(sCkLocalShort)) {
            System.out.println("CHECKSUM-ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= "
                    + sCkLocal);
            return false;
        }

        return true;
    } catch (IOException e) {
        System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString());
    }

    return false;
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path baseDir = null;/*from  ww w . ja  v a 2 s . co  m*/
    String sLocalPathRoot = null;
    String sIgnoreTablesFilename = null;
    String sMaxDateString = null;
    String sFromFilename = null;

    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("--hdfs-path")) {
            baseDir = new Path(args[++i]);
            continue;
        }
        if (args[i].equals("--local-path")) {
            sLocalPathRoot = args[++i];
            continue;
        }
        if (args[i].equals("--ignore-tables")) {
            sIgnoreTablesFilename = args[++i];
            continue;
        }
        if (args[i].equals("--max-date")) {
            sMaxDateString = args[++i];
            continue;
        }
        if (args[i].equals("--from-file")) {
            sFromFilename = args[++i];
            continue;
        }

        System.err.println("ERROR: unknown arg " + args[i]);
        usage();
    }

    if (baseDir == null || sLocalPathRoot == null) {
        usage();
    }

    // UNIX date for right now
    long maxDate = new java.util.Date().getTime() / 1000;

    if (sMaxDateString != null) {
        // UNIX date since epoch of last backup
        maxDate = Long.parseLong(sMaxDateString);
    }

    VerifyHdfsBackup bak = new VerifyHdfsBackup();

    // initialize the list of tables to ignore
    if (sIgnoreTablesFilename != null) {
        bak.initializeTablesToIgnore(sIgnoreTablesFilename);
    }

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    if (sFromFilename != null) {
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader(sFromFilename));
            String sFile;
            while ((sFile = in.readLine()) != null) {
                bak.checkDir(fs, new Path(sFile), sLocalPathRoot, maxDate);
            }
        } catch (Exception e) {
            System.out.println("ERROR: Failed to read from-file " + sFromFilename + ": " + e);
        } finally {
            try {
                in.close();
            } catch (Exception e2) {
            }
        }
    } else {
        // If the HDFS path is a dir continue
        if (fs.getFileStatus(baseDir).isDir()) {
            System.out.println("Searching filesystem: " + baseDir.toUri().getPath());

            bak.checkDir(fs, baseDir, sLocalPathRoot, maxDate);
        }
    }

    System.exit(0);
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*from  ww  w.j av  a2s. co m*/
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.twitter.elephanttwin.util.HdfsFsWalker.java

License:Apache License

/**
* Walk recursively (depth-first) through the file system beneath path, calling the
* passed in function on each path./*w w  w  . j  ava  2s.c o m*/
*
* @param path the path at which to start walking
* @param evalFunc a functional representing the desired action to take on each path
* @throws IOException
*/
public void walk(Path path, Functional.F2<Boolean, FileStatus, FileSystem> evalFunc) throws IOException {
    FileSystem fs = FileSystem.get(URI.create(path.toString()), conf);
    if (fs.exists(path) && (pathFilter == null || pathFilter.accept(path))) {
        walkInternal(fs.getFileStatus(path), fs, evalFunc, 0);
    } else {
        LOG.info("Refusing to walk. fs.exists? " + fs.exists(path) + "pathFilter accepts? "
                + pathFilter.accept(path));
    }
}

From source file:com.twitter.elephanttwin.util.HdfsUtils.java

License:Apache License

public static Iterable<Path> getSubdirectories(final boolean recursive, final String baseDirectory,
        final FileSystem hdfs) throws IOException {

    FileStatus[] fileStat;/*from w w w.  ja v a 2s  . com*/
    Path basePath = new Path(baseDirectory);
    if (!hdfs.exists(basePath)) {
        throw new IOException(
                hdfs.getWorkingDirectory() + baseDirectory + " does not exist, cannot getSubdirectories");
    }
    FileStatus status = hdfs.getFileStatus(basePath);
    if (!status.isDir()) {
        LOG.warning("tried to find subdirectories of " + status.getPath() + ", but it is a file");
        return Lists.newArrayList(status.getPath());
    }
    // get the stat on all files in the source directory
    fileStat = hdfs.listStatus(basePath);

    if (fileStat == null) {
        throw new IOException(
                "FileSystem.listStatus(" + basePath + ") returned null, cannot getSubdirectories");
    }

    // get paths to the files in the source directory
    return Arrays.asList(FileUtil.stat2Paths(fileStat));
}