List of usage examples for org.apache.hadoop.fs FileSystem getFileChecksum
public FileChecksum getFileChecksum(Path f) throws IOException
From source file:co.cask.cdap.data.tools.ReplicationStatusTool.java
License:Apache License
private static SortedMap<String, String> getClusterChecksumMap() throws IOException { FileSystem fileSystem = FileSystem.get(hConf); List<String> fileList = addAllFiles(fileSystem); SortedMap<String, String> checksumMap = new TreeMap<String, String>(); for (String file : fileList) { FileChecksum fileChecksum = fileSystem.getFileChecksum(new Path(file)); checksumMap.put(normalizedFileName(file), fileChecksum.toString()); }//w w w.j a v a 2s . c o m LOG.info("Added " + checksumMap.size() + " checksums for snapshot files."); return checksumMap; }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
@Test public void testRun() { try {//from www. j a v a2s . c o m deleteState(); createSourceData(); FileSystem fs = cluster.getFileSystem(); CopyMapper copyMapper = new CopyMapper(); StatusReporter reporter = new StubStatusReporter(); InMemoryWriter writer = new InMemoryWriter(); Mapper<Text, FileStatus, NullWritable, Text>.Context context = getMapperContext(copyMapper, reporter, writer); copyMapper.setup(context); for (Path path : pathList) { copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)), fs.getFileStatus(path), context); } // Check that the maps worked. for (Path path : pathList) { final Path targetPath = new Path(path.toString().replaceAll(SOURCE_PATH, TARGET_PATH)); Assert.assertTrue(fs.exists(targetPath)); Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path)); Assert.assertEquals(fs.getFileStatus(path).getReplication(), fs.getFileStatus(targetPath).getReplication()); Assert.assertEquals(fs.getFileStatus(path).getBlockSize(), fs.getFileStatus(targetPath).getBlockSize()); Assert.assertTrue( !fs.isFile(targetPath) || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path))); } Assert.assertEquals(pathList.size(), reporter.getCounter(CopyMapper.Counter.PATHS_COPIED).getValue()); // Here file is compressed file. So, we should compare the file length // with the number of bytes read long totalSize = 0; for (Path path : pathList) { totalSize += fs.getFileStatus(path).getLen(); } Assert.assertEquals(totalSize, reporter.getCounter(CopyMapper.Counter.BYTES_COPIED).getValue()); long totalCounterValue = 0; for (Text value : writer.values()) { String tmp[] = value.toString().split(ConduitConstants.AUDIT_COUNTER_NAME_DELIMITER); Assert.assertEquals(4, tmp.length); Long numOfMsgs = Long.parseLong(tmp[3]); totalCounterValue += numOfMsgs; } Assert.assertEquals(nFiles * NUMBER_OF_MESSAGES_PER_FILE, totalCounterValue); testCopyingExistingFiles(fs, copyMapper, context); } catch (Exception e) { LOG.error("Unexpected exception: ", e); Assert.assertTrue(false); } }
From source file:com.inmobi.conduit.distcp.tools.util.DistCpUtils.java
License:Apache License
/** * Utility to compare checksums for the paths specified. * * If checksums's can't be retrieved, it doesn't fail the test * Only time the comparison would fail is when checksums are * available and they don't match/*from w w w .ja va 2 s .c o m*/ * * @param sourceFS FileSystem for the source path. * @param source The source path. * @param targetFS FileSystem for the target path. * @param target The target path. * @return If either checksum couldn't be retrieved, the function returns * false. If checksums are retrieved, the function returns true if they match, * and false otherwise. * @throws IOException if there's an exception while retrieving checksums. */ public static boolean checksumsAreEqual(FileSystem sourceFS, Path source, FileSystem targetFS, Path target) throws IOException { try { FileChecksum sourceChecksum = sourceFS.getFileChecksum(source); if (sourceChecksum == null) { LOG.warn("Checksum for " + source + " is null. Checksum match skipped."); return true; } FileChecksum targetChecksum = targetFS.getFileChecksum(target); if (targetChecksum == null) { LOG.warn("Checksum for " + source + " is null. Checksum match skipped."); return true; } return (sourceChecksum.equals(targetChecksum)); } catch (IOException e) { LOG.error("Unable to retrieve checksum for " + source + " or " + target, e); return true; } }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
@Override public FileChecksum getFileChecksum(Path f) throws IOException { statistics.incrementReadOps(1);// w ww . ja va 2 s. com Path absF = fixRelativePart(f); return new FileSystemLinkResolver<FileChecksum>() { @Override public FileChecksum doCall(final Path p) throws IOException, UnresolvedLinkException { return dfs.getFileChecksum(getPathName(p), Long.MAX_VALUE); } @Override public FileChecksum next(final FileSystem fs, final Path p) throws IOException { return fs.getFileChecksum(p); } }.resolve(this, absF); }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Check whether the contents of src and dst are the same. * * Return false if dstpath does not exist * * If the files have different sizes, return false. * * If the files have the same sizes, the file checksums will be compared. * * When file checksum is not supported in any of file systems, * two files are considered as the same if they have the same size. *///from w w w . j a v a2 s.c o m static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException { FileStatus dststatus; try { dststatus = dstfs.getFileStatus(dstpath); } catch (FileNotFoundException fnfe) { return false; } //same length? if (srcstatus.getLen() != dststatus.getLen()) { return false; } //compare checksums try { final FileChecksum srccs = srcfs.getFileChecksum(srcstatus.getPath()); final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath()); //return true if checksum is not supported //(i.e. some of the checksums is null) return srccs == null || dstcs == null || srccs.equals(dstcs); } catch (FileNotFoundException fnfe) { return false; } }
From source file:com.scaleunlimited.cascading.DistCp.java
License:Apache License
/** * Check whether the contents of src and dst are the same. * /*from w w w . j ava2 s . c om*/ * Return false if dstpath does not exist * * If the files have different sizes, return false. * * If the files have the same sizes, the file checksums will be compared. * * When file checksum is not supported in any of file systems, * two files are considered as the same if they have the same size. */ static private boolean sameFile(FileSystem srcfs, FileStatus srcstatus, FileSystem dstfs, Path dstpath) throws IOException { FileStatus dststatus; try { dststatus = dstfs.getFileStatus(dstpath); } catch (FileNotFoundException fnfe) { return false; } //same length? if (srcstatus.getLen() != dststatus.getLen()) { return false; } //get src checksum final FileChecksum srccs; try { srccs = srcfs.getFileChecksum(srcstatus.getPath()); } catch (FileNotFoundException fnfe) { /* * Two possible cases: * (1) src existed once but was deleted between the time period that * srcstatus was obtained and the try block above. * (2) srcfs does not support file checksum and (incorrectly) throws * FNFE, e.g. some previous versions of HftpFileSystem. * For case (1), it is okay to return true since src was already deleted. * For case (2), true should be returned. */ return true; } //compare checksums try { final FileChecksum dstcs = dstfs.getFileChecksum(dststatus.getPath()); //return true if checksum is not supported //(i.e. some of the checksums is null) return srccs == null || dstcs == null || srccs.equals(dstcs); } catch (FileNotFoundException fnfe) { return false; } }
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksums.java
License:Apache License
@Override public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();//from w w w .j av a2 s . c o m if (flowFile == null) { return; } final FileSystem fs = getFileSystem(context); if (fs == null) { getLog().error("Couldn't initialize HDFS"); session.transfer(flowFile, REL_FAILURE); return; } String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue(); String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue(); Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM) .evaluateAttributeExpressions(flowFile).asBoolean(); Gson jsonParser = new Gson(); File[] filesList; try { filesList = jsonParser.fromJson(filesJSON, File[].class); if (filesList == null) { filesList = new File[0]; } for (File f : filesList) { String name = f.getName(); Path filePath; if (absolutePath == null || absolutePath.isEmpty()) { filePath = new Path(name); } else { filePath = new Path(absolutePath, name); } FileChecksum computed_checksum = fs.getFileChecksum(filePath); String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes()); f.setComputedChecksum( new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName())); if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) { getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: " + f.getChecksum().getValue() + " checksum computed: " + b64_checksum); session.transfer(flowFile, REL_FAILURE); return; } } } catch (JsonSyntaxException e) { getLog().error("Files list attribute does not contain a proper JSON array"); session.transfer(flowFile, REL_FAILURE); return; } catch (FileNotFoundException e) { getLog().error("One of the provided files not found.\n" + e.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } catch (IOException e) { throw new ProcessException(e); } flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList)); session.transfer(flowFile, REL_SUCCESS); }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** Compare the checksums of the hdfs file as well as the local * copied file./*from w w w .j a va2 s .c o m*/ * * @author tpalka@tripadvisor.com * @date Fri Jan 27 06:06:00 2012 */ boolean compareChecksums(FileSystem fs, Path p, String sFsPath) { try { // get hdfs file info FileStatus stat = fs.getFileStatus(p); // get HDFS checksum FileChecksum ck = fs.getFileChecksum(p); String sCk, sCkShort; if (ck == null) { sCk = sCkShort = "<null>"; } else { sCk = ck.toString(); sCkShort = sCk.replaceAll("^.*:", ""); } // System.out.println(p.toUri().getPath() + " len=" + stat.getLen() // + " " + stat.getOwner() + "/" + stat.getGroup() // + " checksum=" + sCk); // find the local file File fLocal = new File(sFsPath); if (!fLocal.exists()) { System.out.println("CHECKSUM-ERROR: file does not exist: " + sFsPath); return false; } if (!fLocal.isFile()) { System.out.println("CHECKSUM-ERROR: path is not a file: " + sFsPath); return false; } if (stat.getLen() != fLocal.length()) { System.out.println("CHECKSUM-ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen() + " fslen=" + fLocal.length()); return false; } // get local fs checksum FileChecksum ckLocal = getLocalFileChecksum(sFsPath); if (ckLocal == null) { System.out.println("ERROR Failed to get checksum for local file " + sFsPath); return false; } // compare checksums as a string, after stripping the // algorithm name from the beginning String sCkLocal = ckLocal.toString(); String sCkLocalShort = sCkLocal.replaceAll("^.*:", ""); if (false == sCkShort.equals(sCkLocalShort)) { System.out.println("CHECKSUM-ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal); return false; } return true; } catch (IOException e) { System.out.println("CHECKSUM-ERROR: " + sFsPath + " exception " + e.toString()); } return false; }
From source file:com.tripadvisor.hadoop.VerifyHdfsBackup.java
License:Apache License
/** * Method to go though the HDFS filesystem in a DFS to find all * files//from w w w. j a v a 2s . c o m * * fs:FileSystem object from HDFS * maxDate:Newest date for files to be backed up * p:Path in HDFS to look for files **/ public void checkDir(FileSystem fs, Path p, String sLocalPathRoot, long maxDate) { FileStatus[] fStat; try { String sPath = p.toUri().getPath(); // If this is a directory if (fs.getFileStatus(p).isDir()) { // ignore certain directories if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName()) || sPath.startsWith("/mapred") || "ops".equals(p.getName()) || p.getName().startsWith("_distcp_logs")) { return; } fStat = fs.listStatus(p); // Do a recursive call to all elements for (int i = 0; i < fStat.length; i++) { checkDir(fs, fStat[i].getPath(), sLocalPathRoot, maxDate); } } else { // If not a directory then we've found a file // ignore crc files if (p.getName().endsWith(".crc")) { return; } // ignore other files if (sPath.startsWith("/user/oozie/etl/workflows/")) { return; } // try to get the table name from the path. There are // various types of tables, from those replicated from // tripmonster to regular hive tables to partitioned // hive tables. We use table names to both exclude // some from the backup, and for the rest to dump out // the schema and partition name. if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) { return; } // check the file FileStatus stat = fs.getFileStatus(p); // ignore files that are too new if ((stat.getModificationTime() / 1000) > maxDate) { System.out.println("IGNORING: " + sPath + " too new"); return; } // warn about files that have a mis-matching block // size. The checksum check will fail for them // anyways, so just catch it here. if (stat.getBlockSize() != N_BLOCK_SIZE) { System.out.println("ERROR: non-default block size (" + (stat.getBlockSize() / (1024 * 1024)) + "M) would fail checksum: " + sPath); return; } // get HDFS checksum FileChecksum ck = fs.getFileChecksum(p); String sCk, sCkShort; if (ck == null) { sCk = sCkShort = "<null>"; } else { sCk = ck.toString(); sCkShort = sCk.replaceAll("^.*:", ""); } System.out.println(sPath + " len=" + stat.getLen() + " " + stat.getOwner() + "/" + stat.getGroup() + " checksum=" + sCk); // find the local file String sFsPath = sLocalPathRoot + p.toUri().getPath(); File fLocal = new File(sFsPath); if (!fLocal.exists()) { Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(stat.getModificationTime()); System.out.println("ERROR: file does not exist: " + sFsPath + " hdfs-last-mtime=" + cal.getTime().toString()); return; } if (!fLocal.isFile()) { System.out.println("ERROR: path is not a file: " + sFsPath); return; } if (stat.getLen() != fLocal.length()) { System.out.println("ERROR: length mismatch: " + sFsPath + " hdfslen=" + stat.getLen() + " fslen=" + fLocal.length()); return; } // get local fs checksum FileChecksum ckLocal = getLocalFileChecksum(sFsPath); if (ckLocal == null) { System.out.println("ERROR Failed to get checksum for local file " + sFsPath); return; } // compare checksums as a string, to strip the // algorithm name from the beginning String sCkLocal = ckLocal.toString(); String sCkLocalShort = sCkLocal.replaceAll("^.*:", ""); if (false == sCkShort.equals(sCkLocalShort)) { System.out.println( "ERROR: checksum mismatch: " + sFsPath + "\nhdfs = " + sCk + "\nlocal= " + sCkLocal); return; } } } catch (IOException e) { System.out.println("ERROR: could not open " + p + ": " + e); // System.exit(1) ; } }
From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java
License:Open Source License
/** * Create a FileIndexDescriptor to describe what columns have been indexed * @param path//from w w w . j a v a 2 s . c o m * the path to the directory where index files are stored for the * input file * @return FileIndexDescriptor * @throws IOException */ protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException { Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath()); FileIndexDescriptor fid = new FileIndexDescriptor(); fid.setSourcePath(inputFile.getPath().toString()); fid.setDocType(getExpectedDocType()); LOG.info("getting checksum from:" + inputFile.getPath()); FileChecksum cksum = fs.getFileChecksum(inputFile.getPath()); com.twitter.elephanttwin.gen.FileChecksum fidCksum = null; if (cksum != null) fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength()); fid.setChecksum(fidCksum); fid.setIndexedFields(getIndexedFields()); fid.setIndexType(getIndexType()); fid.setIndexVersion(getIndexVersion()); Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME); FSDataOutputStream os = fs.create(idxPath, true); @SuppressWarnings("unchecked") ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable .newInstance(fid.getClass()); writable.set(fid); writable.write(os); os.close(); }