Example usage for org.apache.hadoop.fs FileSystem listStatus

List of usage examples for org.apache.hadoop.fs FileSystem listStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listStatus.

Prototype

public FileStatus[] listStatus(Path[] files) throws FileNotFoundException, IOException 

Source Link

Document

Filter files/directories in the given list of paths using default path filter.

Usage

From source file:com.TCG.Nutch_DNS.Generator.java

License:Apache License

/**
 * Generate fetchlists in one or more segments. Whether to filter URLs or not
 * is read from the crawl.generate.filter property in the configuration files.
 * If the property is not found, the URLs are filtered. Same for the
 * normalisation./*from   ww w .  j a  v  a  2 s  .  c om*/
 * 
 * @param dbDir
 *          Crawl database directory
 * @param segments
 *          Segments directory
 * @param numLists
 *          Number of reduce tasks
 * @param topN
 *          Number of top URLs to be selected
 * @param curTime
 *          Current time in milliseconds
 * 
 * @return Path to generated segment or null if no entries were selected
 * 
 * @throws IOException
 *           When an I/O error occurs
 */
public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime, boolean filter,
        boolean norm, boolean force, int maxNumSegments) throws IOException {

    Path tempDir = new Path(
            getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + UUID.randomUUID().toString());

    Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
    FileSystem fs = FileSystem.get(getConf());
    LockUtil.createLockFile(fs, lock, force);

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    LOG.info("Generator: starting at " + sdf.format(start));
    LOG.info("Generator: Selecting best-scoring urls due for fetch.");
    LOG.info("Generator: filtering: " + filter);
    LOG.info("Generator: normalizing: " + norm);
    if (topN != Long.MAX_VALUE) {
        LOG.info("Generator: topN: " + topN);
    }

    // map to inverted subset due for fetch, sort by score
    JobConf job = new NutchJob(getConf());
    job.setJobName("generate: select from " + dbDir);

    if (numLists == -1) { // for politeness make
        numLists = job.getNumMapTasks(); // a partition per fetch task
    }
    if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
        // override
        LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
        numLists = 1;
    }
    job.setLong(GENERATOR_CUR_TIME, curTime);
    // record real generation time
    long generateTime = System.currentTimeMillis();
    job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
    job.setLong(GENERATOR_TOP_N, topN);
    job.setBoolean(GENERATOR_FILTER, filter);
    job.setBoolean(GENERATOR_NORMALISE, norm);
    job.setInt(GENERATOR_MAX_NUM_SEGMENTS, maxNumSegments);

    FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
    job.setInputFormat(SequenceFileInputFormat.class);

    job.setMapperClass(Selector.class);
    job.setPartitionerClass(Selector.class);
    job.setReducerClass(Selector.class);

    FileOutputFormat.setOutputPath(job, tempDir);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(FloatWritable.class);
    job.setOutputKeyComparatorClass(DecreasingFloatComparator.class);
    job.setOutputValueClass(SelectorEntry.class);
    job.setOutputFormat(GeneratorOutputFormat.class);

    try {
        JobClient.runJob(job);
    } catch (IOException e) {
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        throw e;
    }

    // read the subdirectories generated in the temp
    // output and turn them into segments
    List<Path> generatedSegments = new ArrayList<Path>();

    FileStatus[] status = fs.listStatus(tempDir);
    try {
        for (FileStatus stat : status) {
            Path subfetchlist = stat.getPath();
            if (!subfetchlist.getName().startsWith("fetchlist-"))
                continue;
            // start a new partition job for this segment
            Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);
            generatedSegments.add(newSeg);
        }
    } catch (Exception e) {
        LOG.warn("Generator: exception while partitioning segments, exiting ...");
        fs.delete(tempDir, true);
        return null;
    }

    if (generatedSegments.size() == 0) {
        LOG.warn("Generator: 0 records selected for fetching, exiting ...");
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        return null;
    }

    if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
        // update the db from tempDir
        Path tempDir2 = new Path(
                getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + UUID.randomUUID().toString());

        job = new NutchJob(getConf());
        job.setJobName("generate: updatedb " + dbDir);
        job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
        for (Path segmpaths : generatedSegments) {
            Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
            FileInputFormat.addInputPath(job, subGenDir);
        }
        FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setMapperClass(CrawlDbUpdater.class);
        job.setReducerClass(CrawlDbUpdater.class);
        job.setOutputFormat(MapFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlDatum.class);
        FileOutputFormat.setOutputPath(job, tempDir2);
        try {
            JobClient.runJob(job);
            CrawlDb.install(job, dbDir);
        } catch (IOException e) {
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            fs.delete(tempDir2, true);
            throw e;
        }
        fs.delete(tempDir2, true);
    }

    LockUtil.removeLockFile(fs, lock);
    fs.delete(tempDir, true);

    long end = System.currentTimeMillis();
    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));

    Path[] patharray = new Path[generatedSegments.size()];
    return generatedSegments.toArray(patharray);
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation.
 * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. 
 * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails,
 * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final
 * result to the API parameter if the work flow has been executed successfully.
 * /* www  .  ja v  a  2  s.  c o m*/
 * @param workFlow The {@link PlumeWorkflow} to execute 
 * @param outputTo Output folder where the result of the work flow will be stored if executed successfully
 * 
 * @throws IOException If the work flow had to be canceled
 * @throws InterruptedException 
 */
public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException {
    Optimizer optimizer = new Optimizer();
    ExecutionStep step = optimizer.optimize(workFlow);
    int nStep = 0;
    final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis();
    do {
        nStep++;
        log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName());
        // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel
        final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size());
        // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow
        // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance -
        final AtomicBoolean abort = new AtomicBoolean(false);
        // For each MSCR that can be executed concurrently...
        for (final MSCR mscr : step.mscrSteps) {
            final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId;
            final String jobId = workFlowId + "/" + mscr.getId();
            final String jobOutputPath = tmpOutputFolder + "/" + jobId;
            log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to "
                    + jobOutputPath);
            // ... Get its MapRed Job
            final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath);
            final FileSystem fS = FileSystem.getLocal(job.getConfiguration());
            // ... Submit it to the ThreadPool
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        job.waitForCompletion(true);
                        // job completed successfully - materialize outputs
                        log.info("jobId " + jobId + " completed successfully, now materializing outputs.");
                        for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
                            LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels()
                                    .get(entry.getKey()).output;
                            // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id
                            // This way, mappers that read unmaterialized collections will know where to find intermediate states.
                            FileStatus[] files = fS.listStatus(new Path(jobOutputPath));
                            Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId());
                            fS.mkdirs(materializedPath);
                            for (FileStatus file : files) {
                                if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) {
                                    FileUtil.copy(fS, file.getPath(), fS, materializedPath, false,
                                            job.getConfiguration());
                                    oCol.setFile(materializedPath.toString());
                                }
                            }
                            log.info(
                                    "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile());
                        }
                    } catch (IOException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (InterruptedException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (ClassNotFoundException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } finally {
                        latch.countDown(); // Count down under any circumstance
                    }
                }
            });
        }
        latch.await(); // wait until all MSCRs from this step are completed
        if (abort.get()) {
            throw new IOException("Current Workflow was aborted");
        }
        step = step.nextStep;
    } while (step != null);
    log.info("Workflow ended correctly.");
    // Move temporary result to where API user wants to: WARN: Local-specific implementation
    Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo));
}

From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java

License:Apache License

private static Schema getBaseSchema(final Path pathToParqetFiles, Configuration conf) throws IOException {
    fileSchema = null;//from   w ww.jav  a 2  s  .  co m
    FileSystem fsystem = pathToParqetFiles.getFileSystem(conf);
    FileStatus fstatus = fsystem.getFileStatus(pathToParqetFiles);

    if (fstatus.isDir()) {
        FileStatus[] files = fsystem.listStatus(fstatus.getPath());
        for (FileStatus file : files) {
            if (!file.isDir()) {
                if (file.getPath().toString().toLowerCase().endsWith(".parquet")) {
                    ParquetReader<GenericRecord> reader_schema = new AvroParquetReader<GenericRecord>(
                            file.getPath());
                    GenericRecord tmp_schema = reader_schema.read();
                    fileSchema = tmp_schema.getSchema();
                    reader_schema.close();
                    break;
                }
            }
        }
    }
    // Print the Schema of one of the parquet files, which will be used as
    // schema for the final file!
    // System.out.println(fileSchema.toString());
    return fileSchema;
}

From source file:com.thinkbiganalytics.datalake.authorization.hdfs.HDFSUtil.java

License:Apache License

private void listAllDirAndFlushPolicy(FileSystem fileSystem, Path path)
        throws FileNotFoundException, IOException {

    FileStatus[] fileStatus = fileSystem.listStatus(path);

    for (FileStatus status : fileStatus) {

        // Apply ACL recursively on each file/directory.
        if (status.isDirectory()) {

            // Flush ACL before creating new one.
            flushAcl(fileSystem, status.getPath());

            listAllDirAndFlushPolicy(fileSystem, status.getPath());
        } else {// w w  w .jav  a2s  .com

            // Flush ACL before creating new one.
            flushAcl(fileSystem, status.getPath());
        }
    }
}

From source file:com.thinkbiganalytics.datalake.authorization.hdfs.HDFSUtil.java

License:Apache License

/**
 * @param fileSystem : HDFS fileSystem object
 * @param path       : Path on which ACL needs to be created
 * @param groups     : List of group to which permission needs to be granted.
 *//*ww w  .  j a v a  2  s.  co  m*/

public void listAllDirAndApplyPolicy(FileSystem fileSystem, Path path, String groups, String hdfsPermission)
        throws FileNotFoundException, IOException {
    FsAction fsActionObject = getFinalPermission(hdfsPermission);
    FileStatus[] fileStatus = fileSystem.listStatus(path);

    for (FileStatus status : fileStatus) {

        // Flush ACL before creating new one.
        flushAcl(fileSystem, status.getPath());

        // Apply ACL recursively on each file/directory.
        if (status.isDirectory()) {
            String[] groupListForPermission = groups.split(",");
            for (int groupCounter = 0; groupCounter < groupListForPermission.length; groupCounter++) {

                // Create HDFS ACL for each for each Path on HDFS
                AclEntry aclEntryOwner = new AclEntry.Builder().setName(groupListForPermission[groupCounter])
                        .setPermission(fsActionObject).setScope(AclEntryScope.ACCESS)
                        .setType(AclEntryType.GROUP).build();

                AclEntry aclEntryOther = new AclEntry.Builder().setPermission(FsAction.NONE)
                        .setScope(AclEntryScope.ACCESS).setType(AclEntryType.OTHER).build();

                // Apply ACL on Path
                applyAcl(fileSystem, status.getPath(), aclEntryOwner);
                applyAcl(fileSystem, status.getPath(), aclEntryOther);

            }

            // Recursive call made to apply acl on each sub directory
            listAllDirAndApplyPolicy(fileSystem, status.getPath(), groups, hdfsPermission);
        } else {
            String[] groupListForPermission = groups.split(",");
            for (int groupCounter = 0; groupCounter < groupListForPermission.length; groupCounter++) {

                // Create HDFS ACL for each for each Path on HDFS
                AclEntry aclEntryOwner = new AclEntry.Builder().setName(groupListForPermission[groupCounter])
                        .setPermission(fsActionObject).setScope(AclEntryScope.ACCESS)
                        .setType(AclEntryType.GROUP).build();

                AclEntry aclEntryOther = new AclEntry.Builder().setPermission(FsAction.NONE)
                        .setScope(AclEntryScope.ACCESS).setType(AclEntryType.OTHER).build();

                // Apply ACL on Path
                applyAcl(fileSystem, status.getPath(), aclEntryOwner);
                applyAcl(fileSystem, status.getPath(), aclEntryOther);

            }
        }
    }
}

From source file:com.thinkbiganalytics.kerberos.TestKerberosKinit.java

License:Apache License

private void searchHDFS(Configuration configuration, final String environment, String hdfsPath, String hdfsUrl)
        throws Exception {
    configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
    configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
    FileSystem fs = FileSystem.get(configuration);

    if (environment.equalsIgnoreCase(ENVIRONMENT_CLOUDERA)) {
        FileStatus[] status = fs.listStatus(new Path(hdfsUrl + hdfsPath));
        System.out.println("File Count: " + status.length);
    } else {//  w ww  .  j a v a 2s . c o m
        if (environment.equalsIgnoreCase(ENVIRONMENT_HDP)) {
            FileStatus[] status = fs.listStatus(new Path(hdfsUrl + hdfsPath));
            System.out.println("File Count: " + status.length);
        }
    }
}

From source file:com.thinkbiganalytics.kylo.catalog.file.DefaultCatalogFileManager.java

License:Apache License

/**
 * Lists the files at the specified path.
 *//*from   w  w  w  .j av  a2s  .  c  o  m*/
@Nonnull
private List<DataSetFile> listFiles(@Nonnull final FileSystem fs, @Nonnull final Path path) throws IOException {
    return Arrays.stream(fs.listStatus(path)).map(status -> {
        final DataSetFile file = new DataSetFile();
        file.setDirectory(status.isDirectory());
        file.setLength(status.getLen());
        file.setModificationTime(status.getModificationTime());
        file.setName(status.getPath().getName());
        file.setPath(status.getPath().toString());
        return file;
    }).collect(Collectors.toList());
}

From source file:com.trace.hadoop.TestDFSRename.java

License:Apache License

void list(FileSystem fs, String name) throws IOException {
    FileSystem.LOG.info("\n\n" + name);
    for (FileStatus s : fs.listStatus(dir)) {
        FileSystem.LOG.info("" + s.getPath());
    }/* w  ww  . j a  v  a  2 s.  co  m*/
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files/*  w  w w  . ja v a 2 s  .  co  m*/
 *
 * fs:FileSystem object from HDFS
 * minDate:      Oldest date for files to be backed up
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 * pathList:Will be filled with all files in p
 * hmTimestamps: hashmap of timestamps for later sorting
 **/
public void checkDir(FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList,
        HashMap<Path, Long> hmTimestamps) {
    long tmpDate;
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            // dump the mkdir and chmod commands for this
            // directory -- skip root directory only
            {
                FileStatus stat = fs.getFileStatus(p);

                if (!sPath.equals("/")) {
                    m_wrMkdirs.println("hadoop fs -mkdir " + sPath);
                }

                m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

                Short sh = new Short(stat.getPermission().toShort());
                m_wrChmods.println("hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath);
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // another database to regular hive tables to
            // partitioned hive tables.  We use table names to
            // both exclude some from the backup, and for the rest
            // to dump out the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                m_nIgnoredTables++;

                if (m_nIgnoredTables < 5) {
                    System.out.println("Skipping ignore-table file: " + sPath);
                } else if (m_nIgnoredTables == 5) {
                    System.out.println("(...not showing other skipped tables...)");
                }
                return;
            }

            FileStatus stat = fs.getFileStatus(p);

            tmpDate = stat.getModificationTime() / 1000;

            // store the chmods/chowns for all files
            m_wrChmods.println("hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath);

            m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath);

            // check dates.  is it too young?
            if (tmpDate < minDate) {
                return;
            }

            // is the file too recent?
            if (tmpDate > maxDate) {
                //System.out.println("file too recent: " + sPath);
                return;
            }

            // file timestamp is ok
            pathList.add(p);

            hmTimestamps.put(p, new Long(tmpDate));

            // store info about total bytes neeed to backup
            m_nTotalBytes += fs.getContentSummary(p).getLength();
        }
    } catch (IOException e) {
        System.err.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files//from  w  ww  . j  a  v a  2  s.c o  m
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}