Example usage for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.TCG.Nutch_DNS.Generator.java

License:Apache License

/**
 * Generate fetchlists in one or more segments. Whether to filter URLs or not
 * is read from the crawl.generate.filter property in the configuration files.
 * If the property is not found, the URLs are filtered. Same for the
 * normalisation./*from   ww w .  j a  v  a  2 s  .  c om*/
 * 
 * @param dbDir
 *          Crawl database directory
 * @param segments
 *          Segments directory
 * @param numLists
 *          Number of reduce tasks
 * @param topN
 *          Number of top URLs to be selected
 * @param curTime
 *          Current time in milliseconds
 * 
 * @return Path to generated segment or null if no entries were selected
 * 
 * @throws IOException
 *           When an I/O error occurs
 */
public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime, boolean filter,
        boolean norm, boolean force, int maxNumSegments) throws IOException {

    Path tempDir = new Path(
            getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + UUID.randomUUID().toString());

    Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
    FileSystem fs = FileSystem.get(getConf());
    LockUtil.createLockFile(fs, lock, force);

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Generator: starting at " + sdf.format(start));
    LOG.info("Generator: Selecting best-scoring urls due for fetch.");
    LOG.info("Generator: filtering: " + filter);
    LOG.info("Generator: normalizing: " + norm);
    if (topN != Long.MAX_VALUE) {
        LOG.info("Generator: topN: " + topN);
    }

    // map to inverted subset due for fetch, sort by score
    JobConf job = new NutchJob(getConf());
    job.setJobName("generate: select from " + dbDir);

    if (numLists == -1) { // for politeness make
        numLists = job.getNumMapTasks(); // a partition per fetch task
    }
    if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
        // override
        LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
        numLists = 1;
    }
    job.setLong(GENERATOR_CUR_TIME, curTime);
    // record real generation time
    long generateTime = System.currentTimeMillis();
    job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
    job.setLong(GENERATOR_TOP_N, topN);
    job.setBoolean(GENERATOR_FILTER, filter);
    job.setBoolean(GENERATOR_NORMALISE, norm);
    job.setInt(GENERATOR_MAX_NUM_SEGMENTS, maxNumSegments);

    FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(Selector.class);
    job.setPartitionerClass(Selector.class);
    job.setReducerClass(Selector.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputKeyComparatorClass(DecreasingFloatComparator.class);
    job.setOutputValueClass(SelectorEntry.class);
    job.setOutputFormat(GeneratorOutputFormat.class);

    try {
        JobClient.runJob(job);
    } catch (IOException e) {
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        throw e;
    }

    // read the subdirectories generated in the temp
    // output and turn them into segments
    List<Path> generatedSegments = new ArrayList<Path>();

    FileStatus[] status = fs.listStatus(tempDir);
    try {
        for (FileStatus stat : status) {
            Path subfetchlist = stat.getPath();
            if (!subfetchlist.getName().startsWith("fetchlist-"))
                continue;
            // start a new partition job for this segment
            Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);
            generatedSegments.add(newSeg);
        }
    } catch (Exception e) {
        LOG.warn("Generator: exception while partitioning segments, exiting ...");
        fs.delete(tempDir, true);
        return null;
    }

    if (generatedSegments.size() == 0) {
        LOG.warn("Generator: 0 records selected for fetching, exiting ...");
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        return null;
    }

    if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
        // update the db from tempDir
        Path tempDir2 = new Path(
                getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + UUID.randomUUID().toString());

        job = new NutchJob(getConf());
        job.setJobName("generate: updatedb " + dbDir);
        job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
        for (Path segmpaths : generatedSegments) {
            Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
            FileInputFormat.addInputPath(job, subGenDir);
        }
        FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setMapperClass(CrawlDbUpdater.class);
        job.setReducerClass(CrawlDbUpdater.class);
        job.setOutputFormat(MapFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlDatum.class);
        FileOutputFormat.setOutputPath(job, tempDir2);
        try {
            JobClient.runJob(job);
            CrawlDb.install(job, dbDir);
        } catch (IOException e) {
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            fs.delete(tempDir2, true);
            throw e;
        }
        fs.delete(tempDir2, true);
    }

    LockUtil.removeLockFile(fs, lock);
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));

    Path[] patharray = new Path[generatedSegments.size()];
    return generatedSegments.toArray(patharray);
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation.
 * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. 
 * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails,
 * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final
 * result to the API parameter if the work flow has been executed successfully.
 * /* www  .  ja v  a  2  s.  c o m*/
 * @param workFlow The {@link PlumeWorkflow} to execute 
 * @param outputTo Output folder where the result of the work flow will be stored if executed successfully
 * 
 * @throws IOException If the work flow had to be canceled
 * @throws InterruptedException 
 */
public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException {
    Optimizer optimizer = new Optimizer();
    ExecutionStep step = optimizer.optimize(workFlow);
    int nStep = 0;
    final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis();
    do {
        nStep++;
        log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName());
        // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel
        final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size());
        // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow
        // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance -
        final AtomicBoolean abort = new AtomicBoolean(false);
        // For each MSCR that can be executed concurrently...
        for (final MSCR mscr : step.mscrSteps) {
            final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId;
            final String jobId = workFlowId + "/" + mscr.getId();
            final String jobOutputPath = tmpOutputFolder + "/" + jobId;
            log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to "
                    + jobOutputPath);
            // ... Get its MapRed Job
            final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath);
            final FileSystem fS = FileSystem.getLocal(job.getConfiguration());
            // ... Submit it to the ThreadPool
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        job.waitForCompletion(true);
                        // job completed successfully - materialize outputs
                        log.info("jobId " + jobId + " completed successfully, now materializing outputs.");
                        for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
                            LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels()
                                    .get(entry.getKey()).output;
                            // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id
                            // This way, mappers that read unmaterialized collections will know where to find intermediate states.
                            FileStatus[] files = fS.listStatus(new Path(jobOutputPath));
                            Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId());
                            fS.mkdirs(materializedPath);
                            for (FileStatus file : files) {
                                if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) {
                                    FileUtil.copy(fS, file.getPath(), fS, materializedPath, false,
                                            job.getConfiguration());
                                    oCol.setFile(materializedPath.toString());
                                }
                            }
                            log.info(
                                    "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile());
                        }
                    } catch (IOException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (InterruptedException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (ClassNotFoundException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } finally {
                        latch.countDown(); // Count down under any circumstance
                    }
                }
            });
        }
        latch.await(); // wait until all MSCRs from this step are completed
        if (abort.get()) {
            throw new IOException("Current Workflow was aborted");
        }
        step = step.nextStep;
    } while (step != null);
    log.info("Workflow ended correctly.");
    // Move temporary result to where API user wants to: WARN: Local-specific implementation
    Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo));
}

From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java

License:Apache License

private static Schema getBaseSchema(final Path pathToParqetFiles, Configuration conf) throws IOException {
    fileSchema = null;//from   w ww.jav  a 2  s  .  co m
    FileSystem fsystem = pathToParqetFiles.getFileSystem(conf);
    FileStatus fstatus = fsystem.getFileStatus(pathToParqetFiles);

    if (fstatus.isDir()) {
        FileStatus[] files = fsystem.listStatus(fstatus.getPath());
        for (FileStatus file : files) {
            if (!file.isDir()) {
                if (file.getPath().toString().toLowerCase().endsWith(".parquet")) {
                    ParquetReader<GenericRecord> reader_schema = new AvroParquetReader<GenericRecord>(
                            file.getPath());
                    GenericRecord tmp_schema = reader_schema.read();
                    fileSchema = tmp_schema.getSchema();
                    reader_schema.close();
                    break;
                }
            }
        }
    }
    // Print the Schema of one of the parquet files, which will be used as
    // schema for the final file!
    // System.out.println(fileSchema.toString());
    return fileSchema;
}

From source file:com.thinkbiganalytics.datalake.authorization.hdfs.HDFSUtil.java

License:Apache License

private void listAllDirAndFlushPolicy(FileSystem fileSystem, Path path)
        throws FileNotFoundException, IOException {

    FileStatus[] fileStatus = fileSystem.listStatus(path);

    for (FileStatus status : fileStatus) {

        // Apply ACL recursively on each file/directory.
        if (status.isDirectory()) {

            // Flush ACL before creating new one.
            flushAcl(fileSystem, status.getPath());

            listAllDirAndFlushPolicy(fileSystem, status.getPath());
        } else {// w w  w .jav  a2s  .com

            // Flush ACL before creating new one.
            flushAcl(fileSystem, status.getPath());
        }
    }
}

From source file:com.thinkbiganalytics.datalake.authorization.hdfs.HDFSUtil.java

License:Apache License

/**
 * @param fileSystem : HDFS fileSystem object
 * @param path       : Path on which ACL needs to be created
 * @param groups     : List of group to which permission needs to be granted.
 *//*ww w  .  j a v a  2  s.  co  m*/

public void listAllDirAndApplyPolicy(FileSystem fileSystem, Path path, String groups, String hdfsPermission)
        throws FileNotFoundException, IOException {
    FsAction fsActionObject = getFinalPermission(hdfsPermission);
    FileStatus[] fileStatus = fileSystem.listStatus(path);

    for (FileStatus status : fileStatus) {

        // Flush ACL before creating new one.
        flushAcl(fileSystem, status.getPath());

        // Apply ACL recursively on each file/directory.
        if (status.isDirectory()) {
            String[] groupListForPermission = groups.split(",");
            for (int groupCounter = 0; groupCounter < groupListForPermission.length; groupCounter++) {

                // Create HDFS ACL for each for each Path on HDFS
                AclEntry aclEntryOwner = new AclEntry.Builder().setName(groupListForPermission[groupCounter])
                        .setPermission(fsActionObject).setScope(AclEntryScope.ACCESS)
                        .setType(AclEntryType.GROUP).build();

                AclEntry aclEntryOther = new AclEntry.Builder().setPermission(FsAction.NONE)
                        .setScope(AclEntryScope.ACCESS).setType(AclEntryType.OTHER).build();

                // Apply ACL on Path
                applyAcl(fileSystem, status.getPath(), aclEntryOwner);
                applyAcl(fileSystem, status.getPath(), aclEntryOther);

            }

            // Recursive call made to apply acl on each sub directory
            listAllDirAndApplyPolicy(fileSystem, status.getPath(), groups, hdfsPermission);
        } else {
            String[] groupListForPermission = groups.split(",");
            for (int groupCounter = 0; groupCounter < groupListForPermission.length; groupCounter++) {

                // Create HDFS ACL for each for each Path on HDFS
                AclEntry aclEntryOwner = new AclEntry.Builder().setName(groupListForPermission[groupCounter])
                        .setPermission(fsActionObject).setScope(AclEntryScope.ACCESS)
                        .setType(AclEntryType.GROUP).build();

                AclEntry aclEntryOther = new AclEntry.Builder().setPermission(FsAction.NONE)
                        .setScope(AclEntryScope.ACCESS).setType(AclEntryType.OTHER).build();

                // Apply ACL on Path
                applyAcl(fileSystem, status.getPath(), aclEntryOwner);
                applyAcl(fileSystem, status.getPath(), aclEntryOther);

            }
        }
    }
}

From source file:com.thinkbiganalytics.kerberos.TestKerberosKinit.java

License:Apache License

private void searchHDFS(Configuration configuration, final String environment, String hdfsPath, String hdfsUrl)
        throws Exception {
    configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
    configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    FileSystem fs = FileSystem.get(configuration);

    if (environment.equalsIgnoreCase(ENVIRONMENT_CLOUDERA)) {
        FileStatus[] status = fs.listStatus(new Path(hdfsUrl + hdfsPath));
        System.out.println("File Count: " + status.length);
    } else {//  w ww  .  j a v a 2s . c o m
        if (environment.equalsIgnoreCase(ENVIRONMENT_HDP)) {
            FileStatus[] status = fs.listStatus(new Path(hdfsUrl + hdfsPath));
            System.out.println("File Count: " + status.length);
        }
    }
}

From source file:com.thinkbiganalytics.kylo.catalog.file.DefaultCatalogFileManager.java

License:Apache License

/**
 * Lists the files at the specified path.
 *//*from   w  w  w  .j av  a2s  .  c  o  m*/
@Nonnull
private List<DataSetFile> listFiles(@Nonnull final FileSystem fs, @Nonnull final Path path) throws IOException {
    return Arrays.stream(fs.listStatus(path)).map(status -> {
        final DataSetFile file = new DataSetFile();
        file.setDirectory(status.isDirectory());
        file.setLength(status.getLen());
        file.setModificationTime(status.getModificationTime());
        file.setName(status.getPath().getName());
        file.setPath(status.getPath().toString());
        return file;
    }).collect(Collectors.toList());
}

From source file:com.trace.hadoop.TestDFSRename.java

License:Apache License

void list(FileSystem fs, String name) throws IOException {
    FileSystem.LOG.info("\n\n" + name);
    for (FileStatus s : fs.listStatus(dir)) {
        FileSystem.LOG.info("" + s.getPath());
    }/* w  ww  . j a  v  a  2 s.  co  m*/
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*  w  w w  . ja v a 2 s  .  co  m*/
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files//from  w  ww  . j  a  v a  2  s.c o  m
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}