Example usage for org.apache.hadoop.fs FileSystem getFileChecksum

List of usage examples for org.apache.hadoop.fs FileSystem getFileChecksum

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getFileChecksum.

Prototype

public FileChecksum getFileChecksum(Path f) throws IOException 

Source Link

Document

Get the checksum of a file, if the FS supports checksums.

Usage

From source file:co.cask.cdap.data.tools.ReplicationStatusTool.java

License:Apache License

private static SortedMap<String, String> getClusterChecksumMap() throws IOException {
    FileSystem fileSystem = FileSystem.get(hConf);
    List<String> fileList = addAllFiles(fileSystem);
    SortedMap<String, String> checksumMap = new TreeMap<String, String>();
    for (String file : fileList) {
        FileChecksum fileChecksum = fileSystem.getFileChecksum(new Path(file));
        checksumMap.put(normalizedFileName(file), fileChecksum.toString());
    }//w w w.j a  v  a 2s  .  c o  m
    LOG.info("Added " + checksumMap.size() + " checksums for snapshot files.");
    return checksumMap;
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

@Test
public void testRun() {
    try {//from   www. j a v a2s  . c  o  m
        deleteState();
        createSourceData();

        FileSystem fs = cluster.getFileSystem();
        CopyMapper copyMapper = new CopyMapper();
        StatusReporter reporter = new StubStatusReporter();
        InMemoryWriter writer = new InMemoryWriter();
        Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter,
                writer);
        copyMapper.setup(context);

        for (Path path : pathList) {
            copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
                    fs.getFileStatus(path), context);
        }
        // Check that the maps worked.
        for (Path path : pathList) {
            final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
            Assert.assertTrue(fs.exists(targetPath));
            Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
            Assert.assertEquals(fs.getFileStatus(path).getReplication(),
                    fs.getFileStatus(targetPath).getReplication());
            Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
                    fs.getFileStatus(targetPath).getBlockSize());
            Assert.assertTrue(
                    !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path)));
        }

        Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue());
        // Here file is compressed file. So, we should compare the file length
        // with the number of bytes read
        long totalSize = 0;
        for (Path path : pathList) {
            totalSize += fs.getFileStatus(path).getLen();
        }
        Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue());
        long totalCounterValue = 0;
        for (Text value : writer.values()) {
            String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER);
            Assert.assertEquals(4, tmp.length);
            Long numOfMsgs = Long.parseLong(tmp[3]);
            totalCounterValue += numOfMsgs;
        }
        Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue);
        testCopyingExistingFiles(fs, copyMapper, context);
    } catch (Exception e) {
        LOG.error("Unexpected exception: ", e);
        Assert.assertTrue(false);
    }
}

From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java

License:Apache License

/**
 * Utility to compare checksums for the paths specified.
 *
 * If checksums's can't be retrieved, it doesn't fail the test
 * Only time the comparison would fail is when checksums are
 * available and they don't match/*from  w  w  w .ja  va  2  s .c o m*/
 *                                  
 * @param sourceFS FileSystem for the source path.
 * @param source The source path.
 * @param targetFS FileSystem for the target path.
 * @param target The target path.
 * @return If either checksum couldn't be retrieved, the function returns
 * false. If checksums are retrieved, the function returns true if they match,
 * and false otherwise.
 * @throws IOException if there's an exception while retrieving checksums.
 */
public static boolean checksumsAreEqual(FileSystem sourceFS, Path source, FileSystem targetFS, Path target)
        throws IOException {
    try {
        FileChecksum sourceChecksum = sourceFS.getFileChecksum(source);
        if (sourceChecksum == null) {
            LOG.warn("Checksum for " + source + " is null. Checksum match skipped.");
            return true;
        }

        FileChecksum targetChecksum = targetFS.getFileChecksum(target);
        if (targetChecksum == null) {
            LOG.warn("Checksum for " + source + " is null. Checksum match skipped.");
            return true;
        }

        return (sourceChecksum.equals(targetChecksum));

    } catch (IOException e) {
        LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);
        return true;
    }
}

From source file:com.mellanox.r4h.DistributedFileSystem.java

License:Apache License

@Override
public FileChecksum getFileChecksum(Path f) throws IOException {
    statistics.incrementReadOps(1);// w  ww  .  ja  va  2 s.  com
    Path absF = fixRelativePart(f);
    return new FileSystemLinkResolver<FileChecksum>() {
        @Override
        public FileChecksum doCall(final Path p) throws IOException, UnresolvedLinkException {
            return dfs.getFileChecksum(getPathName(p), Long.MAX_VALUE);
        }

        @Override
        public FileChecksum next(final FileSystem fs, final Path p) throws IOException {
            return fs.getFileChecksum(p);
        }
    }.resolve(this, absF);
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Check whether the contents of src and dst are the same.
 *
 * Return false if dstpath does not exist
 *
 * If the files have different sizes, return false.
 *
 * If the files have the same sizes, the file checksums will be compared.
 *
 * When file checksum is not supported in any of file systems,
 * two files are considered as the same if they have the same size.
 *///from  w  w w .  j a v a2  s.c  o m
static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath)
        throws IOException {
    FileStatus dststatus;
    try {
        dststatus = dstfs.getFileStatus(dstpath);
    } catch (FileNotFoundException fnfe) {
        return false;
    }

    //same length?
    if (srcstatus.getLen() != dststatus.getLen()) {
        return false;
    }

    //compare checksums
    try {
        final FileChecksum srccs = srcfs.getFileChecksum(srcstatus.getPath());
        final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
        //return true if checksum is not supported
        //(i.e. some of the checksums is null)
        return srccs == null || dstcs == null || srccs.equals(dstcs);
    } catch (FileNotFoundException fnfe) {
        return false;
    }
}

From source file:com.scaleunlimited.cascading.DistCp.java

License:Apache License

/**
 * Check whether the contents of src and dst are the same.
 * /*from   w  w w  .  j  ava2  s .  c om*/
 * Return false if dstpath does not exist
 * 
 * If the files have different sizes, return false.
 * 
 * If the files have the same sizes, the file checksums will be compared.
 * 
 * When file checksum is not supported in any of file systems,
 * two files are considered as the same if they have the same size.
 */
static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath)
        throws IOException {
    FileStatus dststatus;
    try {
        dststatus = dstfs.getFileStatus(dstpath);
    } catch (FileNotFoundException fnfe) {
        return false;
    }

    //same length?
    if (srcstatus.getLen() != dststatus.getLen()) {
        return false;
    }

    //get src checksum
    final FileChecksum srccs;
    try {
        srccs = srcfs.getFileChecksum(srcstatus.getPath());
    } catch (FileNotFoundException fnfe) {
        /*
         * Two possible cases:
         * (1) src existed once but was deleted between the time period that
         *     srcstatus was obtained and the try block above.
         * (2) srcfs does not support file checksum and (incorrectly) throws
         *     FNFE, e.g. some previous versions of HftpFileSystem.
         * For case (1), it is okay to return true since src was already deleted.
         * For case (2), true should be returned.  
         */
        return true;
    }

    //compare checksums
    try {
        final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath());
        //return true if checksum is not supported
        //(i.e. some of the checksums is null)
        return srccs == null || dstcs == null || srccs.equals(dstcs);
    } catch (FileNotFoundException fnfe) {
        return false;
    }
}

From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksums.java

License:Apache License

@Override
public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session)
        throws ProcessException {
    FlowFile flowFile = session.get();//from   w  w  w .j av a2  s  . c o m
    if (flowFile == null) {
        return;
    }
    final FileSystem fs = getFileSystem(context);
    if (fs == null) {
        getLog().error("Couldn't initialize HDFS");
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue();
    String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
    Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM)
            .evaluateAttributeExpressions(flowFile).asBoolean();
    Gson jsonParser = new Gson();
    File[] filesList;
    try {
        filesList = jsonParser.fromJson(filesJSON, File[].class);
        if (filesList == null) {
            filesList = new File[0];
        }

        for (File f : filesList) {
            String name = f.getName();
            Path filePath;
            if (absolutePath == null || absolutePath.isEmpty()) {
                filePath = new Path(name);
            } else {
                filePath = new Path(absolutePath, name);
            }
            FileChecksum computed_checksum = fs.getFileChecksum(filePath);
            String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes());
            f.setComputedChecksum(
                    new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName()));
            if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) {
                getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: "
                        + f.getChecksum().getValue() + " checksum computed: " + b64_checksum);
                session.transfer(flowFile, REL_FAILURE);
                return;
            }
        }
    } catch (JsonSyntaxException e) {
        getLog().error("Files list attribute does not contain a proper JSON array");
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (FileNotFoundException e) {
        getLog().error("One of the provided files not found.\n" + e.getMessage());
        session.transfer(flowFile, REL_FAILURE);
        return;
    } catch (IOException e) {
        throw new ProcessException(e);
    }
    flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList));
    session.transfer(flowFile, REL_SUCCESS);
}

From source file:com.tripadvisor.hadoop.BackupHdfs.java

License:Apache License

/** Compare the checksums of the hdfs file as well as the local
 * copied file./*from   w  w w  .j a  va2 s .c  o  m*/
 *
 * @author tpalka@tripadvisor.com
 * @date   Fri Jan 27 06:06:00 2012
 */
boolean compareChecksums(FileSystem fs, Path p, String sFsPath) {
    try {
        // get hdfs file info
        FileStatus stat = fs.getFileStatus(p);

        // get HDFS checksum
        FileChecksum ck = fs.getFileChecksum(p);
        String sCk, sCkShort;
        if (ck == null) {
            sCk = sCkShort = "<null>";
        } else {
            sCk = ck.toString();
            sCkShort = sCk.replaceAll("^.*:", "");
        }

        // System.out.println(p.toUri().getPath() + " len=" + stat.getLen()
        // + " " + stat.getOwner() + "/" + stat.getGroup()
        // + " checksum=" + sCk);

        // find the local file
        File fLocal = new File(sFsPath);
        if (!fLocal.exists()) {
            System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath);
            return false;
        }
        if (!fLocal.isFile()) {
            System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath);
            return false;
        }
        if (stat.getLen() != fLocal.length()) {
            System.out.println("CHECKSUM-ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                    + " fslen=" + fLocal.length());
            return false;
        }

        // get local fs checksum
        FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
        if (ckLocal == null) {
            System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
            return false;
        }

        // compare checksums as a string, after stripping the
        // algorithm name from the beginning
        String sCkLocal = ckLocal.toString();
        String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

        if (false == sCkShort.equals(sCkLocalShort)) {
            System.out.println("CHECKSUM-ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= "
                    + sCkLocal);
            return false;
        }

        return true;
    } catch (IOException e) {
        System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString());
    }

    return false;
}

From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java

License:Apache License

/**
 * Method to go though the HDFS filesystem in a DFS to find all
 * files//from   w  w  w. j a v a  2s  . c  o m
 *
 * fs:FileSystem object from HDFS
 * maxDate:Newest date for files to be backed up
 * p:Path in HDFS to look for files
 **/
public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) {
    FileStatus[] fStat;

    try {
        String sPath = p.toUri().getPath();

        // If this is a directory
        if (fs.getFileStatus(p).isDir()) {
            // ignore certain directories
            if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName())
                    || sPath.startsWith("/mapred") || "ops".equals(p.getName())
                    || p.getName().startsWith("_distcp_logs")) {
                return;
            }

            fStat = fs.listStatus(p);

            // Do a recursive call to all elements
            for (int i = 0; i < fStat.length; i++) {
                checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate);
            }
        } else {
            // If not a directory then we've found a file

            // ignore crc files
            if (p.getName().endsWith(".crc")) {
                return;
            }

            // ignore other files
            if (sPath.startsWith("/user/oozie/etl/workflows/")) {
                return;
            }

            // try to get the table name from the path. There are
            // various types of tables, from those replicated from
            // tripmonster to regular hive tables to partitioned
            // hive tables.  We use table names to both exclude
            // some from the backup, and for the rest to dump out
            // the schema and partition name.
            if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) {
                return;
            }

            // check the file
            FileStatus stat = fs.getFileStatus(p);

            // ignore files that are too new
            if ((stat.getModificationTime() / 1000) > maxDate) {
                System.out.println("IGNORING: " + sPath + " too new");
                return;
            }

            // warn about files that have a mis-matching block
            // size.  The checksum check will fail for them
            // anyways, so just catch it here.
            if (stat.getBlockSize() != N_BLOCK_SIZE) {
                System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024))
                        + "M) would fail checksum: " + sPath);
                return;
            }

            // get HDFS checksum
            FileChecksum ck = fs.getFileChecksum(p);
            String sCk, sCkShort;
            if (ck == null) {
                sCk = sCkShort = "<null>";
            } else {
                sCk = ck.toString();
                sCkShort = sCk.replaceAll("^.*:", "");
            }

            System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup()
                    + " checksum=" + sCk);

            // find the local file
            String sFsPath = sLocalPathRoot + p.toUri().getPath();
            File fLocal = new File(sFsPath);
            if (!fLocal.exists()) {
                Calendar cal = Calendar.getInstance();
                cal.setTimeInMillis(stat.getModificationTime());

                System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime="
                        + cal.getTime().toString());
                return;
            }
            if (!fLocal.isFile()) {
                System.out.println("ERROR: path is not a file: " + sFsPath);
                return;
            }
            if (stat.getLen() != fLocal.length()) {
                System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen()
                        + " fslen=" + fLocal.length());
                return;
            }

            // get local fs checksum
            FileChecksum ckLocal = getLocalFileChecksum(sFsPath);
            if (ckLocal == null) {
                System.out.println("ERROR Failed to get checksum for local file " + sFsPath);
                return;
            }

            // compare checksums as a string, to strip the
            // algorithm name from the beginning
            String sCkLocal = ckLocal.toString();
            String sCkLocalShort = sCkLocal.replaceAll("^.*:", "");

            if (false == sCkShort.equals(sCkLocalShort)) {
                System.out.println(
                        "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal);
                return;
            }
        }
    } catch (IOException e) {
        System.out.println("ERROR: could not open " + p + ": " + e);

        // System.exit(1) ;
    }
}

From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java

License:Open Source License

/**
 * Create a FileIndexDescriptor to describe what columns have been indexed
 * @param path//from  w w  w  . j  a v a  2  s . c  o  m
 *          the path to the directory where index files are stored for the
 *          input file
 * @return FileIndexDescriptor
 * @throws IOException
 */

protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException {
    Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath());

    FileIndexDescriptor fid = new FileIndexDescriptor();
    fid.setSourcePath(inputFile.getPath().toString());
    fid.setDocType(getExpectedDocType());
    LOG.info("getting checksum from:" + inputFile.getPath());
    FileChecksum cksum = fs.getFileChecksum(inputFile.getPath());
    com.twitter.elephanttwin.gen.FileChecksum fidCksum = null;
    if (cksum != null)
        fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(),
                ByteBuffer.wrap(cksum.getBytes()), cksum.getLength());
    fid.setChecksum(fidCksum);
    fid.setIndexedFields(getIndexedFields());
    fid.setIndexType(getIndexType());
    fid.setIndexVersion(getIndexVersion());

    Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME);
    FSDataOutputStream os = fs.create(idxPath, true);
    @SuppressWarnings("unchecked")
    ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable
            .newInstance(fid.getClass());
    writable.set(fid);
    writable.write(os);
    os.close();
}