List of usage examples for java.util.zip CRC32 update
@Override public void update(byte[] b, int off, int len)
From source file:cn.sinobest.jzpt.framework.utils.string.StringUtils.java
/** * CRC,8// w w w . j a va 2 s . com */ public static String getCRC(InputStream in) { CRC32 crc32 = new CRC32(); byte[] b = new byte[4096]; int len = 0; try { while ((len = in.read(b)) != -1) { crc32.update(b, 0, len); } return Long.toHexString(crc32.getValue()); } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeQuietly(in); } }
From source file:jef.tools.StringUtils.java
/** * CRC?,8???//from ww w . j av a2s . com */ public static String getCRC(InputStream in) { CRC32 crc32 = new CRC32(); byte[] b = new byte[65536]; int len = 0; try { while ((len = in.read(b)) != -1) { crc32.update(b, 0, len); } return Long.toHexString(crc32.getValue()); } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeQuietly(in); } }
From source file:org.commoncrawl.service.listcrawler.CrawlHistoryManager.java
private void cacheCrawlHistoryLog(File localCacheDir, long timestamp) throws IOException { SequenceFile.Reader reader = null; Path mapFilePath = new Path(_remoteDataDirectory, CRAWL_HISTORY_HDFS_LOGFILE_PREFIX + timestamp); Path indexFilePath = new Path(mapFilePath, "index"); Path dataFilePath = new Path(mapFilePath, "data"); File cacheFilePath = new File(localCacheDir, CRAWL_HISTORY_HDFS_LOGFILE_PREFIX + timestamp); SequenceFile.Reader indexReader = new SequenceFile.Reader(_remoteFileSystem, dataFilePath, CrawlEnvironment.getHadoopConfig()); ValueBytes valueBytes = indexReader.createValueBytes(); DataOutputBuffer keyBytes = new DataOutputBuffer(); DataInputBuffer keyBuffer = new DataInputBuffer(); DataOutputBuffer finalOutputStream = new DataOutputBuffer(); DataOutputBuffer uncompressedValueBytes = new DataOutputBuffer(); URLFP fp = new URLFP(); try {// www . j a va 2 s .c om while (indexReader.nextRaw(keyBytes, valueBytes) != -1) { keyBuffer.reset(keyBytes.getData(), 0, keyBytes.getLength()); // read fingerprint ... fp.readFields(keyBuffer); // write hash only finalOutputStream.writeLong(fp.getUrlHash()); uncompressedValueBytes.reset(); // write value bytes to intermediate buffer ... valueBytes.writeUncompressedBytes(uncompressedValueBytes); // write out uncompressed length WritableUtils.writeVInt(finalOutputStream, uncompressedValueBytes.getLength()); // write out bytes finalOutputStream.write(uncompressedValueBytes.getData(), 0, uncompressedValueBytes.getLength()); } // delete existing ... cacheFilePath.delete(); // compute crc ... CRC32 crc = new CRC32(); crc.update(finalOutputStream.getData(), 0, finalOutputStream.getLength()); // open final output stream DataOutputStream fileOutputStream = new DataOutputStream( new BufferedOutputStream(new FileOutputStream(cacheFilePath))); try { fileOutputStream.writeLong(crc.getValue()); fileOutputStream.write(finalOutputStream.getData(), 0, finalOutputStream.getLength()); fileOutputStream.flush(); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); fileOutputStream.close(); fileOutputStream = null; cacheFilePath.delete(); throw e; } finally { if (fileOutputStream != null) { fileOutputStream.close(); } } } finally { if (indexReader != null) { indexReader.close(); } } }
From source file:com.cardio3g.MainActivity.java
/** * Go through each of the Expansion APK files and open each as a zip file. * Calculate the CRC for each file and return false if any fail to match. * * @return true if XAPKZipFile is successful *//* www. j a v a 2 s. c o m*/ void validateXAPKZipFiles() { AsyncTask<Object, DownloadProgressInfo, Boolean> validationTask = new AsyncTask<Object, DownloadProgressInfo, Boolean>() { @Override protected void onPreExecute() { mDownloadViewGroup.setVisibility(View.VISIBLE); super.onPreExecute(); } @Override protected Boolean doInBackground(Object... params) { for (XAPKFile xf : xAPKS) { String fileName = Helpers.getExpansionAPKFileName(MainActivity.this, xf.mIsMain, xf.mFileVersion); if (!Helpers.doesFileExist(MainActivity.this, fileName, xf.mFileSize, false)) return false; fileName = Helpers.generateSaveFileName(MainActivity.this, fileName); ZipResourceFile zrf; byte[] buf = new byte[1024 * 256]; try { zrf = new ZipResourceFile(fileName); ZipResourceFile.ZipEntryRO[] entries = zrf.getAllEntries(); /** * First calculate the total compressed length */ long totalCompressedLength = 0; for (ZipResourceFile.ZipEntryRO entry : entries) { totalCompressedLength += entry.mCompressedLength; } float averageVerifySpeed = 0; long totalBytesRemaining = totalCompressedLength; long timeRemaining; /** * Then calculate a CRC for every file in the Zip file, * comparing it to what is stored in the Zip directory. * Note that for compressed Zip files we must extract * the contents to do this comparison. */ for (ZipResourceFile.ZipEntryRO entry : entries) { if (-1 != entry.mCRC32) { long length = entry.mUncompressedLength; CRC32 crc = new CRC32(); DataInputStream dis = null; try { dis = new DataInputStream(zrf.getInputStream(entry.mFileName)); long startTime = SystemClock.uptimeMillis(); while (length > 0) { int seek = (int) (length > buf.length ? buf.length : length); dis.readFully(buf, 0, seek); crc.update(buf, 0, seek); length -= seek; long currentTime = SystemClock.uptimeMillis(); long timePassed = currentTime - startTime; if (timePassed > 0) { float currentSpeedSample = (float) seek / (float) timePassed; if (0 != averageVerifySpeed) { averageVerifySpeed = SMOOTHING_FACTOR * currentSpeedSample + (1 - SMOOTHING_FACTOR) * averageVerifySpeed; } else { averageVerifySpeed = currentSpeedSample; } totalBytesRemaining -= seek; timeRemaining = (long) (totalBytesRemaining / averageVerifySpeed); this.publishProgress(new DownloadProgressInfo(totalCompressedLength, totalCompressedLength - totalBytesRemaining, timeRemaining, averageVerifySpeed)); } startTime = currentTime; if (mCancelValidation) return true; } if (crc.getValue() != entry.mCRC32) { Log.e("EXPANSION ERROR", "CRC does not match for entry: " + entry.mFileName); Log.e("EXPANSION ERROR", "In file: " + entry.getZipFileName()); return false; } } finally { if (null != dis) { dis.close(); } } } } } catch (IOException e) { e.printStackTrace(); return false; } } return true; } @Override protected void onProgressUpdate(DownloadProgressInfo... values) { onDownloadProgress(values[0]); super.onProgressUpdate(values); } @Override protected void onPostExecute(Boolean result) { if (result) { mDownloadViewGroup.setVisibility(View.GONE); } else { mDownloadViewGroup.setVisibility(View.VISIBLE); } super.onPostExecute(result); } }; validationTask.execute(new Object()); }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static void appendTargetsToLogFile(File logFileName, IntrusiveList<CrawlTarget> list) throws IOException { LogFileHeader header = new LogFileHeader(); boolean preExistingHeader = logFileName.exists(); RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); try {//www. j ava 2s .c om long headerOffset = 0; if (preExistingHeader) { headerOffset = readLogFileHeader(file, header); if (header._writePos == 0) { file.seek(headerOffset); } else { // seelk to appropriate write position file.seek(header._writePos); } } else { headerOffset = writeLogFileHeader(file, header); } CustomByteArrayOutputStream bufferOutputStream = new CustomByteArrayOutputStream(1 << 17); DataOutputStream dataOutputStream = new DataOutputStream(bufferOutputStream); CRC32 crc = new CRC32(); for (CrawlTarget target : list) { PersistentCrawlTarget persistentTarget = target.createPersistentTarget(); bufferOutputStream.reset(); // write to intermediate stream ... persistentTarget.write(dataOutputStream); // and crc the data ... crc.reset(); crc.update(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size()); // write out length first file.writeInt(bufferOutputStream.size()); //crc next long computedValue = crc.getValue(); //TODO: waste of space - write 32 bit values as long because having problems with java sign promotion rules during read... file.writeLong(computedValue); // and then the data file.write(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size()); } // now update header ... header._itemCount += list.size(); header._writePos = file.getFilePointer(); // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } }
From source file:io.hops.erasure_coding.ReedSolomonDecoder.java
/** * Decode the inputs provided and write to the output. * * @param inputs/*w ww . j a va2 s. com*/ * array of inputs. * @param erasedLocations * indexes in the inputs which are known to be erased. * @param erasedLocationToFix * index in the inputs which needs to be fixed. * @param limit * maximum number of bytes to be written. * @param out * the output. * @return size of recovered bytes * @throws java.io.IOException */ long writeFixedBlock(FSDataInputStream[] inputs, int[] erasedLocations, int erasedLocationToFix, long limit, OutputStream out, Progressable reporter, ParallelStreamReader parallelReader, CRC32 crc) throws IOException { LOG.info("Need to write " + limit + " bytes for erased location index " + erasedLocationToFix); if (crc != null) { crc.reset(); } int[] tmp = new int[inputs.length]; int[] decoded = new int[erasedLocations.length]; // Loop while the number of written bytes is less than the max. long written; for (written = 0; written < limit;) { erasedLocations = readFromInputs(inputs, erasedLocations, limit, reporter, parallelReader); if (decoded.length != erasedLocations.length) { decoded = new int[erasedLocations.length]; } int toWrite = (int) Math.min((long) bufSize, limit - written); int partSize = (int) Math.ceil(bufSize * 1.0 / parallelism); try { long startTime = System.currentTimeMillis(); for (int i = 0; i < parallelism; i++) { decodeOps.acquire(1); int start = i * partSize; int count = Math.min(bufSize - start, partSize); parallelDecoder.execute( new DecodeOp(readBufs, writeBufs, start, count, erasedLocations, reedSolomonCode[i])); } decodeOps.acquire(parallelism); decodeOps.release(parallelism); decodeTime += (System.currentTimeMillis() - startTime); } catch (InterruptedException e) { throw new IOException("Interrupted while waiting for read result"); } for (int i = 0; i < erasedLocations.length; i++) { if (erasedLocations[i] == erasedLocationToFix) { out.write(writeBufs[i], 0, toWrite); if (crc != null) { crc.update(writeBufs[i], 0, toWrite); } written += toWrite; break; } } } return written; }
From source file:org.apache.hadoop.raid.ReedSolomonDecoder.java
/** * Decode the inputs provided and write to the output. * @param inputs array of inputs./*from w w w . j a v a 2s . c om*/ * @param erasedLocations indexes in the inputs which are known to be erased. * @param erasedLocationToFix index in the inputs which needs to be fixed. * @param limit maximum number of bytes to be written. * @param out the output. * @return size of recovered bytes * @throws IOException */ long writeFixedBlock(FSDataInputStream[] inputs, int[] erasedLocations, int erasedLocationToFix, long limit, OutputStream out, Progressable reporter, ParallelStreamReader parallelReader, CRC32 crc) throws IOException { LOG.info("Need to write " + limit + " bytes for erased location index " + erasedLocationToFix); if (crc != null) { crc.reset(); } int[] tmp = new int[inputs.length]; int[] decoded = new int[erasedLocations.length]; // Loop while the number of written bytes is less than the max. long written; for (written = 0; written < limit;) { erasedLocations = readFromInputs(inputs, erasedLocations, limit, reporter, parallelReader); if (decoded.length != erasedLocations.length) { decoded = new int[erasedLocations.length]; } int toWrite = (int) Math.min((long) bufSize, limit - written); int partSize = (int) Math.ceil(bufSize * 1.0 / parallelism); try { long startTime = System.currentTimeMillis(); for (int i = 0; i < parallelism; i++) { decodeOps.acquire(1); int start = i * partSize; int count = Math.min(bufSize - start, partSize); parallelDecoder.execute( new DecodeOp(readBufs, writeBufs, start, count, erasedLocations, reedSolomonCode[i])); } decodeOps.acquire(parallelism); decodeOps.release(parallelism); decodeTime += (System.currentTimeMillis() - startTime); } catch (InterruptedException e) { throw new IOException("Interrupted while waiting for read result"); } for (int i = 0; i < erasedLocations.length; i++) { if (erasedLocations[i] == erasedLocationToFix) { out.write(writeBufs[i], 0, toWrite); if (crc != null) { crc.update(writeBufs[i], 0, toWrite); } written += toWrite; break; } } } return written; }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount, IntrusiveList<CrawlTarget> targetsOut) throws IOException { int itemsRead = 0; if (logFileName.exists()) { RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); LogFileHeader header = new LogFileHeader(); try {/*www . j a v a 2 s.co m*/ long headerOffset = readLogFileHeader(file, header); // seelk to appropriate write position if (header._readPos != 0) file.seek(header._readPos); int itemsToRead = Math.min(desiredReadAmount, header._itemCount); PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget(); CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16); for (int i = 0; i < itemsToRead; ++i) { // read length ... int urlDataLen = file.readInt(); long urlDataCRC = file.readLong(); buffer.reset(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } file.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { throw new IOException("Crawl Target Log File Corrupt"); } else { //populate a persistentTarget from the (in memory) data stream DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); persistentTarget.clear(); persistentTarget.readFields(bufferReader); //populate a new crawl target structure ... CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget); targetsOut.addTail(newTarget); } } itemsRead = itemsToRead; // now update header ... header._itemCount -= itemsRead; // now if item count is non zero ... if (header._itemCount != 0) { // set read cursor to next record location header._readPos = file.getFilePointer(); } // otherwise ... else { // reset both cursors ... header._readPos = 0; header._writePos = 0; } // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } } return itemsRead; }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
private static void transferLocalCheckpointLog(File crawlLogPath, HDFSCrawlURLWriter writer, long checkpointId) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {// w ww . j av a2s.c o m inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); // read a crawl url from the stream... while (inputStream.getFilePointer() < header._fileSize) { if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { try { lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); try { // and write out appropriate sequence file entries ... writer.writeCrawlURLItem(new Text(url.getUrl()), url); } catch (IOException e) { LOG.error("Failed to write CrawlURL to SequenceFileWriter with Exception:" + CCStringUtils.stringifyException(e)); throw new URLWriterException(); } } } catch (URLWriterException e) { LOG.error("Caught URLRewriter Exception! - Throwing to outer layer!"); throw e; } catch (Exception e) { LOG.error("Ignoring Error Processing CrawlLog Entry at Position:" + lastReadPosition + " Exception:" + CCStringUtils.stringifyException(e)); } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {/* w w w . j a va 2s.c o m*/ inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize); if (startOffset != 0L) { System.out.println("Preseeking to:" + startOffset); reader.seek(startOffset); } Configuration conf = new Configuration(); // read a crawl url from the stream... long recordCount = 0; while (inputStream.getFilePointer() < header._fileSize) { // System.out.println("PRE-SYNC SeekPos:"+ // inputStream.getFilePointer()); if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { // System.out.println("POST-SYNC SeekPos:"+ // inputStream.getFilePointer()); lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { if (recordCount++ % 10000 == 0) { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:" + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:" + url.getContentRaw().getCount() + " LastModified:" + new Date(url.getLastAttemptTime()).toString()); } } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }