List of usage examples for java.util.zip CRC32 reset
@Override public void reset()
From source file:com.nridge.core.base.field.data.DataBag.java
/** * Convenience method will calculate a unique type id property for * the bag based on each field name using a CRC32 algorithm. *//*from w w w . ja v a2s . c o m*/ public void setTypeIdByNames() { CRC32 crc32 = new CRC32(); crc32.reset(); if (StringUtils.isNotEmpty(mName)) crc32.update(mName.getBytes()); else { for (DataField dataField : mFields) crc32.update(dataField.getName().getBytes()); } setTypeId(crc32.getValue()); }
From source file:srebrinb.compress.sevenzip.SevenZOutputFile.java
/** * Finishes the addition of entries to this archive, without closing it. * /*from www . ja v a 2 s .co m*/ * @throws IOException if archive is already closed. */ public void finish() throws IOException { if (finished) { throw new IOException("This archive has already been finished"); } finished = true; final long headerPosition = channel.position(); final ByteArrayOutputStream headerBaos = new ByteArrayOutputStream(); final DataOutputStream header = new DataOutputStream(headerBaos); writeHeader(header); header.flush(); final byte[] headerBytes = headerBaos.toByteArray(); channel.write(ByteBuffer.wrap(headerBytes)); final CRC32 crc32 = new CRC32(); crc32.update(headerBytes); ByteBuffer bb = ByteBuffer.allocate(SevenZFile.sevenZSignature.length + 2 /* version */ + 4 /* start header CRC */ + 8 /* next header position */ + 8 /* next header length */ + 4 /* next header CRC */).order(ByteOrder.LITTLE_ENDIAN); // signature header channel.position(0); bb.put(SevenZFile.sevenZSignature); // version bb.put((byte) 0).put((byte) 2); // placeholder for start header CRC bb.putInt(0); // start header bb.putLong(headerPosition - SevenZFile.SIGNATURE_HEADER_SIZE).putLong(0xffffFFFFL & headerBytes.length) .putInt((int) crc32.getValue()); crc32.reset(); crc32.update(bb.array(), SevenZFile.sevenZSignature.length + 6, 20); bb.putInt(SevenZFile.sevenZSignature.length + 2, (int) crc32.getValue()); bb.flip(); channel.write(bb); }
From source file:org.apache.hadoop.raid.TestBlockCopier.java
private long[] createRandomFileDispersed(Path file, int numBlocks, DatanodeDescriptor primaryNode, DatanodeDescriptor altNode) throws IOException, InterruptedException { BlockPlacementPolicyFakeData bp = BlockPlacementPolicyFakeData.lastInstance; DatanodeDescriptor tmp = bp.overridingDatanode; final int repl = 1; long[] crcs = new long[numBlocks]; CRC32 crc = new CRC32(); Random rand = new Random(); FSDataOutputStream stm = fileSys.create(file, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short) repl, BLOCK_SIZE); // Create the first block on the alt node bp.overridingDatanode = altNode;// www . j a v a 2 s.co m // fill random data into file final byte[] b = new byte[(int) BLOCK_SIZE]; LOG.info("Writing first block (alt. host)"); rand.nextBytes(b); stm.write(b); crc.update(b); crcs[0] = crc.getValue(); stm.flush(); Thread.sleep(1000); // What a hack. Le sigh. // Now we want to write on the altNode bp.overridingDatanode = primaryNode; // Write the rest of the blocks on primaryNode for (int i = 1; i < numBlocks; i++) { LOG.info("Writing block number " + i + " (primary host)"); rand.nextBytes(b); stm.write(b); crc.reset(); crc.update(b); crcs[i] = crc.getValue(); } stm.close(); Thread.sleep(1000); // Reset this guy bp.overridingDatanode = tmp; return crcs; }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static void appendTargetsToLogFile(File logFileName, IntrusiveList<CrawlTarget> list) throws IOException { LogFileHeader header = new LogFileHeader(); boolean preExistingHeader = logFileName.exists(); RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); try {/*from w ww.j a v a 2 s. c o m*/ long headerOffset = 0; if (preExistingHeader) { headerOffset = readLogFileHeader(file, header); if (header._writePos == 0) { file.seek(headerOffset); } else { // seelk to appropriate write position file.seek(header._writePos); } } else { headerOffset = writeLogFileHeader(file, header); } CustomByteArrayOutputStream bufferOutputStream = new CustomByteArrayOutputStream(1 << 17); DataOutputStream dataOutputStream = new DataOutputStream(bufferOutputStream); CRC32 crc = new CRC32(); for (CrawlTarget target : list) { PersistentCrawlTarget persistentTarget = target.createPersistentTarget(); bufferOutputStream.reset(); // write to intermediate stream ... persistentTarget.write(dataOutputStream); // and crc the data ... crc.reset(); crc.update(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size()); // write out length first file.writeInt(bufferOutputStream.size()); //crc next long computedValue = crc.getValue(); //TODO: waste of space - write 32 bit values as long because having problems with java sign promotion rules during read... file.writeLong(computedValue); // and then the data file.write(bufferOutputStream.getBuffer(), 0, bufferOutputStream.size()); } // now update header ... header._itemCount += list.size(); header._writePos = file.getFilePointer(); // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } }
From source file:org.commoncrawl.service.crawler.CrawlList.java
private static int readTargetsFromLogFile(CrawlList domain, File logFileName, int desiredReadAmount, IntrusiveList<CrawlTarget> targetsOut) throws IOException { int itemsRead = 0; if (logFileName.exists()) { RandomAccessFile file = new RandomAccessFile(logFileName, "rw"); LogFileHeader header = new LogFileHeader(); try {/*from w w w. ja v a 2 s.c o m*/ long headerOffset = readLogFileHeader(file, header); // seelk to appropriate write position if (header._readPos != 0) file.seek(header._readPos); int itemsToRead = Math.min(desiredReadAmount, header._itemCount); PersistentCrawlTarget persistentTarget = new PersistentCrawlTarget(); CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 16); for (int i = 0; i < itemsToRead; ++i) { // read length ... int urlDataLen = file.readInt(); long urlDataCRC = file.readLong(); buffer.reset(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } file.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { throw new IOException("Crawl Target Log File Corrupt"); } else { //populate a persistentTarget from the (in memory) data stream DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); persistentTarget.clear(); persistentTarget.readFields(bufferReader); //populate a new crawl target structure ... CrawlTarget newTarget = new CrawlTarget(domain, persistentTarget); targetsOut.addTail(newTarget); } } itemsRead = itemsToRead; // now update header ... header._itemCount -= itemsRead; // now if item count is non zero ... if (header._itemCount != 0) { // set read cursor to next record location header._readPos = file.getFilePointer(); } // otherwise ... else { // reset both cursors ... header._readPos = 0; header._writePos = 0; } // now write out header anew ... writeLogFileHeader(file, header); } finally { if (file != null) { file.close(); } } } return itemsRead; }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
private static void transferLocalCheckpointLog(File crawlLogPath, HDFSCrawlURLWriter writer, long checkpointId) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {// www . java2 s . c om inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); // read a crawl url from the stream... while (inputStream.getFilePointer() < header._fileSize) { if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { try { lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); try { // and write out appropriate sequence file entries ... writer.writeCrawlURLItem(new Text(url.getUrl()), url); } catch (IOException e) { LOG.error("Failed to write CrawlURL to SequenceFileWriter with Exception:" + CCStringUtils.stringifyException(e)); throw new URLWriterException(); } } } catch (URLWriterException e) { LOG.error("Caught URLRewriter Exception! - Throwing to outer layer!"); throw e; } catch (Exception e) { LOG.error("Ignoring Error Processing CrawlLog Entry at Position:" + lastReadPosition + " Exception:" + CCStringUtils.stringifyException(e)); } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " Checkpoint Id:" + checkpointId + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:org.commoncrawl.service.crawler.CrawlLog.java
public static void walkCrawlLogFile(File crawlLogPath, long startOffset) throws IOException { // and open the crawl log file ... RandomAccessFile inputStream = null; IOException exception = null; CRC32 crc = new CRC32(); CustomByteArrayOutputStream buffer = new CustomByteArrayOutputStream(1 << 17); byte[] syncBytesBuffer = new byte[SYNC_BYTES_SIZE]; // save position for potential debug output. long lastReadPosition = 0; try {//from w w w . j a va 2s . co m inputStream = new RandomAccessFile(crawlLogPath, "rw"); // and a data input stream ... RandomAccessFile reader = inputStream; // seek to zero reader.seek(0L); // read the header ... LogFileHeader header = readLogFileHeader(reader); System.out.println("Header ItemCount:" + header._itemCount + " FileSize:" + header._fileSize); if (startOffset != 0L) { System.out.println("Preseeking to:" + startOffset); reader.seek(startOffset); } Configuration conf = new Configuration(); // read a crawl url from the stream... long recordCount = 0; while (inputStream.getFilePointer() < header._fileSize) { // System.out.println("PRE-SYNC SeekPos:"+ // inputStream.getFilePointer()); if (seekToNextSyncBytesPos(syncBytesBuffer, reader, header._fileSize)) { // System.out.println("POST-SYNC SeekPos:"+ // inputStream.getFilePointer()); lastReadPosition = inputStream.getFilePointer(); // skip sync inputStream.skipBytes(SYNC_BYTES_SIZE); // read length ... int urlDataLen = reader.readInt(); long urlDataCRC = reader.readLong(); if (urlDataLen > buffer.getBuffer().length) { buffer = new CustomByteArrayOutputStream(((urlDataLen / 65536) + 1) * 65536); } reader.read(buffer.getBuffer(), 0, urlDataLen); crc.reset(); crc.update(buffer.getBuffer(), 0, urlDataLen); long computedValue = crc.getValue(); // validate crc values ... if (computedValue != urlDataCRC) { LOG.error("CRC Mismatch Detected during HDFS transfer in CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); inputStream.seek(lastReadPosition + 1); } else { if (recordCount++ % 10000 == 0) { // allocate a crawl url data structure CrawlURL url = new CrawlURL(); DataInputStream bufferReader = new DataInputStream( new ByteArrayInputStream(buffer.getBuffer(), 0, urlDataLen)); // populate it from the (in memory) data stream url.readFields(bufferReader); System.out.println("Record:" + recordCount + " At:" + lastReadPosition + " URL:" + url.getUrl() + " BuffSize:" + urlDataLen + " ContentLen:" + url.getContentRaw().getCount() + " LastModified:" + new Date(url.getLastAttemptTime()).toString()); } } } else { break; } } } catch (EOFException e) { LOG.error("Caught EOF Exception during read of local CrawlLog:" + crawlLogPath.getAbsolutePath() + " FilePosition:" + lastReadPosition); } catch (IOException e) { LOG.error(CCStringUtils.stringifyException(e)); exception = e; throw e; } finally { if (inputStream != null) inputStream.close(); } }
From source file:org.anarres.lzo.LzopInputStream.java
/** * Read and verify an lzo header, setting relevant block checksum options * and ignoring most everything else./* w ww. j av a 2s. co m*/ */ protected int readHeader() throws IOException { byte[] buf = new byte[9]; readBytes(buf, 0, 9); if (!Arrays.equals(buf, LzopConstants.LZOP_MAGIC)) throw new IOException("Invalid LZO header"); Arrays.fill(buf, (byte) 0); Adler32 adler = new Adler32(); CRC32 crc32 = new CRC32(); int hitem = readHeaderItem(buf, 2, adler, crc32); // lzop version if (hitem > LzopConstants.LZOP_VERSION) { LOG.debug("Compressed with later version of lzop: " + Integer.toHexString(hitem) + " (expected 0x" + Integer.toHexString(LzopConstants.LZOP_VERSION) + ")"); } hitem = readHeaderItem(buf, 2, adler, crc32); // lzo library version if (hitem > LzoVersion.LZO_LIBRARY_VERSION) { throw new IOException("Compressed with incompatible lzo version: 0x" + Integer.toHexString(hitem) + " (expected 0x" + Integer.toHexString(LzoVersion.LZO_LIBRARY_VERSION) + ")"); } hitem = readHeaderItem(buf, 2, adler, crc32); // lzop extract version if (hitem > LzopConstants.LZOP_VERSION) { throw new IOException("Compressed with incompatible lzop version: 0x" + Integer.toHexString(hitem) + " (expected 0x" + Integer.toHexString(LzopConstants.LZOP_VERSION) + ")"); } hitem = readHeaderItem(buf, 1, adler, crc32); // method switch (hitem) { case LzopConstants.M_LZO1X_1: case LzopConstants.M_LZO1X_1_15: case LzopConstants.M_LZO1X_999: break; default: throw new IOException("Invalid strategy " + Integer.toHexString(hitem)); } readHeaderItem(buf, 1, adler, crc32); // ignore level // flags int flags = readHeaderItem(buf, 4, adler, crc32); boolean useCRC32 = (flags & LzopConstants.F_H_CRC32) != 0; boolean extraField = (flags & LzopConstants.F_H_EXTRA_FIELD) != 0; if ((flags & LzopConstants.F_MULTIPART) != 0) throw new IOException("Multipart lzop not supported"); if ((flags & LzopConstants.F_H_FILTER) != 0) throw new IOException("lzop filter not supported"); if ((flags & LzopConstants.F_RESERVED) != 0) throw new IOException("Unknown flags in header"); // known !F_H_FILTER, so no optional block readHeaderItem(buf, 4, adler, crc32); // ignore mode readHeaderItem(buf, 4, adler, crc32); // ignore mtime readHeaderItem(buf, 4, adler, crc32); // ignore gmtdiff hitem = readHeaderItem(buf, 1, adler, crc32); // fn len if (hitem > 0) { byte[] tmp = (hitem > buf.length) ? new byte[hitem] : buf; readHeaderItem(tmp, hitem, adler, crc32); // skip filename } int checksum = (int) (useCRC32 ? crc32.getValue() : adler.getValue()); hitem = readHeaderItem(buf, 4, adler, crc32); // read checksum if (hitem != checksum) { throw new IOException("Invalid header checksum: " + Long.toHexString(checksum) + " (expected 0x" + Integer.toHexString(hitem) + ")"); } if (extraField) { // lzop 1.08 ultimately ignores this LOG.debug("Extra header field not processed"); adler.reset(); crc32.reset(); hitem = readHeaderItem(buf, 4, adler, crc32); readHeaderItem(new byte[hitem], hitem, adler, crc32); checksum = (int) (useCRC32 ? crc32.getValue() : adler.getValue()); if (checksum != readHeaderItem(buf, 4, adler, crc32)) { throw new IOException("Invalid checksum for extra header field"); } } return flags; }
From source file:org.getlantern.firetweet.util.Utils.java
public static boolean hasAccountSignedWithOfficialKeys(final Context context) { if (context == null) return false; final Cursor cur = ContentResolverUtils.query(context.getContentResolver(), Accounts.CONTENT_URI, Accounts.COLUMNS, null, null, null); if (cur == null) return false; final String[] keySecrets = context.getResources() .getStringArray(R.array.values_official_consumer_secret_crc32); final ParcelableAccount.Indices indices = new ParcelableAccount.Indices(cur); cur.moveToFirst();//from ww w .j a v a2s .c o m final CRC32 crc32 = new CRC32(); try { while (!cur.isAfterLast()) { final String consumerSecret = cur.getString(indices.consumer_secret); if (consumerSecret != null) { final byte[] consumerSecretBytes = consumerSecret.getBytes(Charset.forName("UTF-8")); crc32.update(consumerSecretBytes, 0, consumerSecretBytes.length); final long value = crc32.getValue(); crc32.reset(); for (final String keySecret : keySecrets) { if (Long.parseLong(keySecret, 16) == value) return true; } } cur.moveToNext(); } } finally { cur.close(); } return false; }
From source file:com.hadoop.compression.lzo.LzopInputStream.java
/** * Read and verify an lzo header, setting relevant block checksum options * and ignoring most everything else./*from w w w .j a va 2 s . c o m*/ * @param in InputStream * @throws IOException if there is a error in lzo header */ protected void readHeader(InputStream in) throws IOException { readFully(in, buf, 0, 9); if (!Arrays.equals(buf, LzopCodec.LZO_MAGIC)) { throw new IOException("Invalid LZO header"); } Arrays.fill(buf, (byte) 0); Adler32 adler = new Adler32(); CRC32 crc32 = new CRC32(); int hitem = readHeaderItem(in, buf, 2, adler, crc32); // lzop version if (hitem > LzopCodec.LZOP_VERSION) { LOG.debug("Compressed with later version of lzop: " + Integer.toHexString(hitem) + " (expected 0x" + Integer.toHexString(LzopCodec.LZOP_VERSION) + ")"); } hitem = readHeaderItem(in, buf, 2, adler, crc32); // lzo library version if (hitem < LzoDecompressor.MINIMUM_LZO_VERSION) { throw new IOException("Compressed with incompatible lzo version: 0x" + Integer.toHexString(hitem) + " (expected at least 0x" + Integer.toHexString(LzoDecompressor.MINIMUM_LZO_VERSION) + ")"); } hitem = readHeaderItem(in, buf, 2, adler, crc32); // lzop extract version if (hitem > LzopCodec.LZOP_VERSION) { throw new IOException("Compressed with incompatible lzop version: 0x" + Integer.toHexString(hitem) + " (expected 0x" + Integer.toHexString(LzopCodec.LZOP_VERSION) + ")"); } hitem = readHeaderItem(in, buf, 1, adler, crc32); // method if (hitem < 1 || hitem > 3) { throw new IOException("Invalid strategy: " + Integer.toHexString(hitem)); } readHeaderItem(in, buf, 1, adler, crc32); // ignore level // flags hitem = readHeaderItem(in, buf, 4, adler, crc32); try { for (DChecksum f : dflags) { if (0 == (f.getHeaderMask() & hitem)) { dflags.remove(f); } else { dcheck.put(f, (int) f.getChecksumClass().newInstance().getValue()); } } for (CChecksum f : cflags) { if (0 == (f.getHeaderMask() & hitem)) { cflags.remove(f); } else { ccheck.put(f, (int) f.getChecksumClass().newInstance().getValue()); } } } catch (InstantiationException e) { throw new RuntimeException("Internal error", e); } catch (IllegalAccessException e) { throw new RuntimeException("Internal error", e); } ((LzopDecompressor) decompressor).initHeaderFlags(dflags, cflags); boolean useCRC32 = 0 != (hitem & 0x00001000); // F_H_CRC32 boolean extraField = 0 != (hitem & 0x00000040); // F_H_EXTRA_FIELD if (0 != (hitem & 0x400)) { // F_MULTIPART throw new IOException("Multipart lzop not supported"); } if (0 != (hitem & 0x800)) { // F_H_FILTER throw new IOException("lzop filter not supported"); } if (0 != (hitem & 0x000FC000)) { // F_RESERVED throw new IOException("Unknown flags in header"); } // known !F_H_FILTER, so no optional block readHeaderItem(in, buf, 4, adler, crc32); // ignore mode readHeaderItem(in, buf, 4, adler, crc32); // ignore mtime readHeaderItem(in, buf, 4, adler, crc32); // ignore gmtdiff hitem = readHeaderItem(in, buf, 1, adler, crc32); // fn len if (hitem > 0) { // skip filename int filenameLen = Math.max(4, hitem); // buffer must be at least 4 bytes for readHeaderItem to work. readHeaderItem(in, new byte[filenameLen], hitem, adler, crc32); } int checksum = (int) (useCRC32 ? crc32.getValue() : adler.getValue()); hitem = readHeaderItem(in, buf, 4, adler, crc32); // read checksum if (hitem != checksum) { throw new IOException("Invalid header checksum: " + Long.toHexString(checksum) + " (expected 0x" + Integer.toHexString(hitem) + ")"); } if (extraField) { // lzop 1.08 ultimately ignores this LOG.debug("Extra header field not processed"); adler.reset(); crc32.reset(); hitem = readHeaderItem(in, buf, 4, adler, crc32); readHeaderItem(in, new byte[hitem], hitem, adler, crc32); checksum = (int) (useCRC32 ? crc32.getValue() : adler.getValue()); if (checksum != readHeaderItem(in, buf, 4, adler, crc32)) { throw new IOException("Invalid checksum for extra header field"); } } }