List of usage examples for java.util.zip CRC32 CRC32
public CRC32()
From source file:org.commoncrawl.util.MultiFileMergeUtils.java
public static void main(String[] args) { Path testPath = new Path(args[0]); LOG.info("Initializing Hadoop Config"); Configuration conf = new Configuration(); conf.addResource("nutch-default.xml"); conf.addResource("nutch-site.xml"); conf.addResource("mapred-site.xml"); conf.addResource("hdfs-site.xml"); conf.addResource("commoncrawl-default.xml"); conf.addResource("commoncrawl-site.xml"); conf.setClass(MultiFileInputReader.MULTIFILE_COMPARATOR_CLASS, URLFPV2RawComparator.class, RawComparator.class); conf.setClass(MultiFileInputReader.MULTIFILE_KEY_CLASS, URLFPV2.class, WritableComparable.class); CrawlEnvironment.setHadoopConfig(conf); CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/"); try {//from w w w . j ava2 s . co m FileSystem fs = CrawlEnvironment.getDefaultFileSystem(); Vector<Path> paths = new Vector<Path>(); paths.add(new Path(testPath, "part-00000")); // paths.add(new Path(testPath,"part-00000")); paths.add(new Path(testPath, "part-00001")); TreeSet<URLFPV2> directReadSet = new TreeSet<URLFPV2>(); TreeSet<URLFPV2> multiFileReadSet = new TreeSet<URLFPV2>(); MultiFileInputReader<URLFPV2> inputReader = new MultiFileInputReader<URLFPV2>(fs, paths, conf); KeyAndValueData<URLFPV2> keyValueData = null; int multiFileKeyCount = 0; while ((keyValueData = inputReader.readNextItem()) != null) { LOG.info("Got Key Domain:" + keyValueData._keyObject.getDomainHash() + " URLHash:" + keyValueData._keyObject.getUrlHash() + " Item Count:" + keyValueData._values.size() + " Path[0]:" + keyValueData._values.get(0).source); if (keyValueData._values.size() > 1) { LOG.error("Got more than one item"); for (int i = 0; i < keyValueData._values.size(); ++i) { CRC32 crc = new CRC32(); crc.update(keyValueData._keyData.getData(), 0, keyValueData._keyData.getLength()); LOG.error("Item at[" + i + "] Path:" + keyValueData._values.get(i).source + " CRC:" + crc.getValue()); } } if (multiFileKeyCount++ < 1000) multiFileReadSet.add((URLFPV2) keyValueData._keyObject.clone()); } inputReader.close(); addFirstNFPItemsToSet(fs, new Path(testPath, "part-00000"), conf, directReadSet, 1000); addFirstNFPItemsToSet(fs, new Path(testPath, "part-00001"), conf, directReadSet, 1000); Iterator<URLFPV2> directReadIterator = directReadSet.iterator(); Iterator<URLFPV2> multiFileReadIterator = multiFileReadSet.iterator(); for (int i = 0; i < 1000; ++i) { URLFPV2 directReadFP = directReadIterator.next(); URLFPV2 multiFileReadFP = multiFileReadIterator.next(); if (directReadFP.compareTo(multiFileReadFP) != 0) { LOG.info("Mismatch at Index:" + i); } } } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); } catch (CloneNotSupportedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
private static void transferLocalCheckpointLog(File crawlLogPath, HDFSCrawlURLWriter writer, long checkpointId) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {//from w w w .j ava2s.c o m inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); // read a crawl url from the stream... while (inputStream.getFilePointer() < header._fileSize) { if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { try { lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); try { // and write out appropriate sequence file entries ... writer.writeCrawlURLItem(new Text(url.getUrl()), url); } catch (IOException e) { LOG.error("Failed to write CrawlURL to SequenceFileWriter with Exception:" + CCStringUtils.stringifyException(e)); throw new URLWriterException(); } } } catch (URLWriterException e) { LOG.error("Caught URLRewriter Exception! - Throwing to outer layer!"); throw e; } catch (Exception e) { LOG.error("Ignoring Error Processing CrawlLog Entry at Position:" + lastReadPosition + " Exception:" + CCStringUtils.stringifyException(e)); } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:com.cisco.dvbu.ps.deploytool.services.RegressionManagerUtils.java
/** * append the checksum value for the entire query to the end of resource URL. * Eliminate any double quote "\"" characters from the URL. * /*from w w w. ja v a 2s .c om*/ * Examples: * incoming from clause outgoing result * ----------------------- ---------------- * CAT1.SCH1.ViewSales --> CAT1.SCH1.ViewSales_1717783081 * * @param query * @param resourceURL * @return resourceURL */ public static String appendUrlChecksum(String query, String resourceURL) { /* 2015-07-06 mtinius - Adding a checksum to the URL allows for unique identification of queries that invoke the same table. * 2015-10-13 mtinius - Moved this code to a separate method from getTableUrl() as it was interfering with the FUNCTIONAL test. * */ // Calculate the CRC for the string to produce a unique identifier Checksum checksum = new CRC32(); long currentLineCheckSumValue = 0L; // Make sure there are no line feeds, carriage returns or double spaces in the query. String queryTmp = query.replace("\n", " ").replaceAll("\r", " ").trim().replaceAll(" ", " "); byte bytes[] = queryTmp.getBytes(); checksum.reset(); checksum.update(bytes, 0, bytes.length); currentLineCheckSumValue = checksum.getValue(); // Rewrite the resource URL to include the query checksum value and make sure there are no double quote "\"" characters present. resourceURL = resourceURL.replaceAll("\"", "") + "_" + currentLineCheckSumValue; return resourceURL; }
From source file:com.blackducksoftware.tools.commonframework.core.config.ConfigurationFileTest.java
@Test public void testLegacyPasswordBase64IsplaintextFalse() throws Exception { final File sourceConfigFile = new File("src/test/resources/psw_encryption/legacy_base64_set.properties"); final File configFile = File.createTempFile( "com.blackducksoftware.tools.commonframework.core.config.ConfigurationFileTest", "test4"); filesToDelete.add(configFile);/* www . j a v a2 s . c o m*/ configFile.deleteOnExit(); FileUtils.copyFile(sourceConfigFile, configFile); final ConfigurationFile cf = new ConfigurationFile(configFile.getAbsolutePath()); List<String> updatedLines = null; if (cf.isInNeedOfUpdate()) { updatedLines = cf.saveWithEncryptedPasswords(); } assertTrue(updatedLines.size() > 0); final Iterator<String> updatedLinesIter = updatedLines.iterator(); while (updatedLinesIter.hasNext()) { String updatedLine = updatedLinesIter.next(); // make sure obsolete properties didn't sneak in somehow assertFalse(updatedLine.matches("^.*\\.password\\.isplaintext=.*$")); // If this is a password, verify that it was encoded, and that the // isencrypted=true was inserted after it if (updatedLine.startsWith("cc.password=")) { assertEquals( "cc.password=,\\(f9b^6ck-Sr-A2!jWeRlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_P", updatedLine); updatedLine = updatedLinesIter.next(); assertEquals("cc.password.isencrypted=true", updatedLine); } else if (updatedLine.startsWith("protex.password=")) { assertEquals( "protex.password=DQp'L-+/0Fq0jsi2f'\\\\OlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_P", updatedLine); updatedLine = updatedLinesIter.next(); assertEquals("protex.password.isencrypted=true", updatedLine); } else if (updatedLine.startsWith("connector.0.password=")) { assertEquals( "connector.0.password=6'ND2^gdVX/0\\$fYH7TeH04Sh8FAG<\\[lI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_P", updatedLine); updatedLine = updatedLinesIter.next(); assertEquals("connector.0.password.isencrypted=true", updatedLine); } } final File testGeneratedUpdatedFile = File.createTempFile( "com.blackducksoftware.tools.commonframework.core.config.ConfigurationFileTest", "test4_testGeneratedUpdatedFile"); filesToDelete.add(testGeneratedUpdatedFile); testGeneratedUpdatedFile.deleteOnExit(); FileUtils.writeLines(testGeneratedUpdatedFile, updatedLines); final long csumTestGeneratedFile = FileUtils.checksum(testGeneratedUpdatedFile, new CRC32()).getValue(); final long csumActualFile = FileUtils.checksum(configFile, new CRC32()).getValue(); assertEquals(csumTestGeneratedFile, csumActualFile); }
From source file:brut.androlib.res.AndrolibResources.java
public void installFramework(File frameFile, String tag) throws AndrolibException { InputStream in = null;/* w w w .jav a 2s . c o m*/ ZipOutputStream out = null; try { ZipFile zip = new ZipFile(frameFile); ZipEntry entry = zip.getEntry("resources.arsc"); if (entry == null) { throw new AndrolibException("Can't find resources.arsc file"); } in = zip.getInputStream(entry); byte[] data = IOUtils.toByteArray(in); ARSCData arsc = ARSCDecoder.decode(new ByteArrayInputStream(data), true, true); publicizeResources(data, arsc.getFlagsOffsets()); File outFile = new File(getFrameworkDir(), String.valueOf(arsc.getOnePackage().getId()) + (tag == null ? "" : '-' + tag) + ".apk"); out = new ZipOutputStream(new FileOutputStream(outFile)); out.setMethod(ZipOutputStream.STORED); CRC32 crc = new CRC32(); crc.update(data); entry = new ZipEntry("resources.arsc"); entry.setSize(data.length); entry.setCrc(crc.getValue()); out.putNextEntry(entry); out.write(data); zip.close(); LOGGER.info("Framework installed to: " + outFile); } catch (IOException ex) { throw new AndrolibException(ex); } finally { IOUtils.closeQuietly(in); IOUtils.closeQuietly(out); } }
From source file:de.mpg.escidoc.services.dataacquisition.DataHandlerBean.java
/** * Operation for fetching data of type FILE. * //from w ww. jav a2 s . co m * @param importSource * @param identifier * @param listOfFormats * @return byte[] of the fetched file, zip file if more than one record was * fetched * @throws RuntimeException * @throws SourceNotAvailableException */ private byte[] fetchData(String identifier, Format[] formats) throws SourceNotAvailableException, RuntimeException, FormatNotAvailableException { byte[] in = null; FullTextVO fulltext = new FullTextVO(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ZipOutputStream zos = new ZipOutputStream(baos); try { // Call fetch file for every given format for (int i = 0; i < formats.length; i++) { Format format = formats[i]; fulltext = this.util.getFtObjectToFetch(this.currentSource, format.getName(), format.getType(), format.getEncoding()); // Replace regex with identifier String decoded = java.net.URLDecoder.decode(fulltext.getFtUrl().toString(), this.currentSource.getEncoding()); fulltext.setFtUrl(new URL(decoded)); fulltext.setFtUrl( new URL(fulltext.getFtUrl().toString().replaceAll(this.regex, identifier.trim()))); this.logger.debug("Fetch file from URL: " + fulltext.getFtUrl()); // escidoc file if (this.currentSource.getHarvestProtocol().equalsIgnoreCase("ejb")) { in = this.fetchEjbFile(fulltext, identifier); } // other file else { in = this.fetchFile(fulltext); } this.setFileProperties(fulltext); // If only one file => return it in fetched format if (formats.length == 1) { return in; } // If more than one file => add it to zip else { // If cone service is not available (we do not get a // fileEnding) we have // to make sure that the zip entries differ in name. String fileName = identifier; if (this.getFileEnding().equals("")) { fileName = fileName + "_" + i; } ZipEntry ze = new ZipEntry(fileName + this.getFileEnding()); ze.setSize(in.length); ze.setTime(this.currentDate()); CRC32 crc321 = new CRC32(); crc321.update(in); ze.setCrc(crc321.getValue()); zos.putNextEntry(ze); zos.write(in); zos.flush(); zos.closeEntry(); } } this.setContentType("application/zip"); this.setFileEnding(".zip"); zos.close(); } catch (SourceNotAvailableException e) { this.logger.error("Import Source " + this.currentSource + " not available.", e); throw new SourceNotAvailableException(e); } catch (FormatNotAvailableException e) { throw new FormatNotAvailableException(e.getMessage()); } catch (Exception e) { throw new RuntimeException(e); } return baos.toByteArray(); }
From source file:org.apache.hadoop.hdfs.server.datanode.BlockReceiver.java
/** * reads in the partial crc chunk and computes checksum * of pre-existing data in partial chunk. *//* w w w . j av a 2s .com*/ private void computePartialChunkCrc(long blkoff, long ckoff, int bytesPerChecksum) throws IOException { // find offset of the beginning of partial chunk. // int sizePartialChunk = (int) (blkoff % bytesPerChecksum); int checksumSize = checksum.getChecksumSize(); blkoff = blkoff - sizePartialChunk; LOG.info("computePartialChunkCrc sizePartialChunk " + sizePartialChunk + " block " + block + " offset in block " + blkoff + " offset in metafile " + ckoff); // create an input stream from the block file // and read in partial crc chunk into temporary buffer // byte[] buf = new byte[sizePartialChunk]; byte[] crcbuf = new byte[checksumSize]; FSDataset.BlockInputStreams instr = null; try { instr = datanode.data.getTmpInputStreams(block, blkoff, ckoff); IOUtils.readFully(instr.dataIn, buf, 0, sizePartialChunk); // open meta file and read in crc value computer earlier IOUtils.readFully(instr.checksumIn, crcbuf, 0, crcbuf.length); } finally { IOUtils.closeStream(instr); } // compute crc of partial chunk from data read in the block file. partialCrc = new CRC32(); partialCrc.update(buf, 0, sizePartialChunk); LOG.info("Read in partial CRC chunk from disk for block " + block); // paranoia! verify that the pre-computed crc matches what we // recalculated just now if (partialCrc.getValue() != FSInputChecker.checksum2long(crcbuf)) { String msg = "Partial CRC " + partialCrc.getValue() + " does not match value computed the " + " last time file was closed " + FSInputChecker.checksum2long(crcbuf); throw new IOException(msg); } //LOG.debug("Partial CRC matches 0x" + // Long.toHexString(partialCrc.getValue())); }
From source file:org.apache.hadoop.raid.Encoder.java
/** * Recovers a corrupt block in a parity file to a local file. * * The encoder generates codec.parityLength parity blocks for a source file stripe. * Since we want only one of the parity blocks, this function creates * null outputs for the blocks to be discarded. * * @param fs The filesystem in which both srcFile and parityFile reside. * @param srcStat fileStatus of The source file. * @param blockSize The block size for the parity files. * @param corruptOffset The location of corruption in the parity file. * @param out The destination for the reovered block. * @param progress A reporter for progress. *///ww w.j a v a 2s .com public CRC32 recoverParityBlockToStream(FileSystem fs, FileStatus srcStat, long blockSize, Path parityFile, long corruptOffset, OutputStream out, Progressable progress) throws IOException { LOG.info("Recovering parity block" + parityFile + ":" + corruptOffset); Path srcFile = srcStat.getPath(); // Get the start offset of the corrupt block. corruptOffset = (corruptOffset / blockSize) * blockSize; // Output streams to each block in the parity file stripe. OutputStream[] outs = new OutputStream[codec.parityLength]; long indexOfCorruptBlockInParityStripe = (corruptOffset / blockSize) % codec.parityLength; LOG.info("Index of corrupt block in parity stripe: " + indexOfCorruptBlockInParityStripe); CRC32[] crcOuts = null; if (checksumStore != null) { crcOuts = new CRC32[codec.parityLength]; } // Create a real output stream for the block we want to recover, // and create null streams for the rest. for (int i = 0; i < codec.parityLength; i++) { if (indexOfCorruptBlockInParityStripe == i) { outs[i] = out; if (checksumStore != null) { crcOuts[i] = new CRC32(); } } else { outs[i] = new NullOutputStream(); } } // Get the stripe index and start offset of stripe. long stripeIdx = corruptOffset / (codec.parityLength * blockSize); StripeReader sReader = StripeReader.getStripeReader(codec, conf, blockSize, fs, stripeIdx, srcStat); // Get input streams to each block in the source file stripe. assert sReader.hasNext() == true; InputStream[] blocks = sReader.getNextStripeInputs().getInputs(); LOG.info("Starting recovery by using source stripe " + srcFile + ": stripe " + stripeIdx); try { // Read the data from the blocks and write to the parity file. encodeStripe(blocks, blockSize, outs, crcOuts, progress, false, null); if (checksumStore != null) { return crcOuts[(int) indexOfCorruptBlockInParityStripe]; } else { return null; } } finally { RaidUtils.closeStreams(blocks); } }
From source file:com.blackducksoftware.tools.commonframework.core.config.ConfigurationFileTest.java
@Test public void testModernPasswordPlainTextIsEncryptedNotSet() throws Exception { final File sourceConfigFile = new File("src/test/resources/psw_encryption/modern_plain_notset.properties"); final File configFile = File.createTempFile( "com.blackducksoftware.tools.commonframework.core.config.ConfigurationFileTest", "test5"); filesToDelete.add(configFile);/* w w w .jav a2 s .c o m*/ configFile.deleteOnExit(); FileUtils.copyFile(sourceConfigFile, configFile); final ConfigurationFile cf = new ConfigurationFile(configFile.getAbsolutePath()); List<String> updatedLines = null; if (cf.isInNeedOfUpdate()) { updatedLines = cf.saveWithEncryptedPasswords(); } assertTrue(updatedLines.size() > 0); final Iterator<String> updatedLinesIter = updatedLines.iterator(); while (updatedLinesIter.hasNext()) { String updatedLine = updatedLinesIter.next(); // make sure obsolete properties didn't sneak in somehow assertFalse(updatedLine.matches("^.*\\.password\\.isplaintext=.*$")); // If this is a password, verify that it was encoded, and that the // isencrypted=true was inserted after it if (updatedLine.startsWith("cc.password=")) { assertEquals( "cc.password=,\\(f9b^6ck-Sr-A2!jWeRlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_P", updatedLine); updatedLine = updatedLinesIter.next(); assertEquals("cc.password.isencrypted=true", updatedLine); } else if (updatedLine.startsWith("protex.password=")) { assertEquals( "protex.password=DQp'L-+/0Fq0jsi2f'\\\\OlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_P", updatedLine); updatedLine = updatedLinesIter.next(); assertEquals("protex.password.isencrypted=true", updatedLine); } else if (updatedLine.startsWith("connector.0.password=")) { assertEquals( "connector.0.password=6'ND2^gdVX/0\\$fYH7TeH04Sh8FAG<\\[lI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_PlI'nKT:u_P", updatedLine); updatedLine = updatedLinesIter.next(); assertEquals("connector.0.password.isencrypted=true", updatedLine); } } final File testGeneratedUpdatedFile = File.createTempFile( "com.blackducksoftware.tools.commonframework.core.config.ConfigurationFileTest", "test5_testGeneratedUpdatedFile"); filesToDelete.add(testGeneratedUpdatedFile); testGeneratedUpdatedFile.deleteOnExit(); FileUtils.writeLines(testGeneratedUpdatedFile, updatedLines); final long csumTestGeneratedFile = FileUtils.checksum(testGeneratedUpdatedFile, new CRC32()).getValue(); final long csumActualFile = FileUtils.checksum(configFile, new CRC32()).getValue(); assertEquals(csumTestGeneratedFile, csumActualFile); }
From source file:net.librec.util.FileUtil.java
/** * Zip a given folder/* w ww .j a v a 2 s . c om*/ * * @param dirPath a given folder: must be all files (not sub-folders) * @param filePath zipped file * @throws Exception if error occurs */ public static void zipFolder(String dirPath, String filePath) throws Exception { File outFile = new File(filePath); ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(outFile)); int bytesRead; byte[] buffer = new byte[1024]; CRC32 crc = new CRC32(); for (File file : listFiles(dirPath)) { BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file)); crc.reset(); while ((bytesRead = bis.read(buffer)) != -1) { crc.update(buffer, 0, bytesRead); } bis.close(); // Reset to beginning of input stream bis = new BufferedInputStream(new FileInputStream(file)); ZipEntry entry = new ZipEntry(file.getName()); entry.setMethod(ZipEntry.STORED); entry.setCompressedSize(file.length()); entry.setSize(file.length()); entry.setCrc(crc.getValue()); zos.putNextEntry(entry); while ((bytesRead = bis.read(buffer)) != -1) { zos.write(buffer, 0, bytesRead); } bis.close(); } zos.close(); LOG.debug("A zip-file is created to: " + outFile.getPath()); }