List of usage examples for java.util.zip CRC32 CRC32
public CRC32()
From source file:org.apache.nifi.processors.standard.TailFile.java
private void processTailFile(final ProcessContext context, final ProcessSession session, final String tailFile) { // If user changes the file that is being tailed, we need to consume the already-rolled-over data according // to the Initial Start Position property boolean rolloverOccurred; TailFileObject tfo = states.get(tailFile); if (tfo.isTailFileChanged()) { rolloverOccurred = false;//from w w w. j a v a 2 s . c om final String recoverPosition = context.getProperty(START_POSITION).getValue(); if (START_BEGINNING_OF_TIME.getValue().equals(recoverPosition)) { recoverRolledFiles(context, session, tailFile, tfo.getExpectedRecoveryChecksum(), tfo.getState().getTimestamp(), tfo.getState().getPosition()); } else if (START_CURRENT_FILE.getValue().equals(recoverPosition)) { cleanup(); tfo.setState(new TailFileState(tailFile, null, null, 0L, 0L, 0L, null, tfo.getState().getBuffer())); } else { final String filename = tailFile; final File file = new File(filename); try { final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ); getLogger().debug("Created FileChannel {} for {}", new Object[] { fileChannel, file }); final Checksum checksum = new CRC32(); final long position = file.length(); final long timestamp = file.lastModified(); try (final InputStream fis = new FileInputStream(file); final CheckedInputStream in = new CheckedInputStream(fis, checksum)) { StreamUtils.copy(in, new NullOutputStream(), position); } fileChannel.position(position); cleanup(); tfo.setState(new TailFileState(filename, file, fileChannel, position, timestamp, file.length(), checksum, tfo.getState().getBuffer())); } catch (final IOException ioe) { getLogger().error( "Attempted to position Reader at current position in file {} but failed to do so due to {}", new Object[] { file, ioe.toString() }, ioe); context.yield(); return; } } tfo.setTailFileChanged(false); } else { // Recover any data that may have rolled over since the last time that this processor ran. // If expectedRecoveryChecksum != null, that indicates that this is the first iteration since processor was started, so use whatever checksum value // was present when the state was last persisted. In this case, we must then null out the value so that the next iteration won't keep using the "recovered" // value. If the value is null, then we know that either the processor has already recovered that data, or there was no state persisted. In either case, // use whatever checksum value is currently in the state. Long expectedChecksumValue = tfo.getExpectedRecoveryChecksum(); if (expectedChecksumValue == null) { expectedChecksumValue = tfo.getState().getChecksum() == null ? null : tfo.getState().getChecksum().getValue(); } rolloverOccurred = recoverRolledFiles(context, session, tailFile, expectedChecksumValue, tfo.getState().getTimestamp(), tfo.getState().getPosition()); tfo.setExpectedRecoveryChecksum(null); } // initialize local variables from state object; this is done so that we can easily change the values throughout // the onTrigger method and then create a new state object after we finish processing the files. TailFileState state = tfo.getState(); File file = state.getFile(); FileChannel reader = state.getReader(); Checksum checksum = state.getChecksum(); if (checksum == null) { checksum = new CRC32(); } long position = state.getPosition(); long timestamp = state.getTimestamp(); long length = state.getLength(); // Create a reader if necessary. if (file == null || reader == null) { file = new File(tailFile); reader = createReader(file, position); if (reader == null) { context.yield(); return; } } final long startNanos = System.nanoTime(); // Check if file has rotated if (rolloverOccurred || (timestamp <= file.lastModified() && length > file.length()) || (timestamp < file.lastModified() && length >= file.length())) { // Since file has rotated, we close the reader, create a new one, and then reset our state. try { reader.close(); getLogger().debug("Closed FileChannel {}", new Object[] { reader, reader }); } catch (final IOException ioe) { getLogger().warn("Failed to close reader for {} due to {}", new Object[] { file, ioe }); } reader = createReader(file, 0L); position = 0L; checksum.reset(); } if (file.length() == position || !file.exists()) { // no data to consume so rather than continually running, yield to allow other processors to use the thread. getLogger().debug("No data to consume; created no FlowFiles"); tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum, state.getBuffer())); persistState(tfo, context); context.yield(); return; } // If there is data to consume, read as much as we can. final TailFileState currentState = state; final Checksum chksum = checksum; // data has been written to file. Stream it to a new FlowFile. FlowFile flowFile = session.create(); final FileChannel fileReader = reader; final AtomicLong positionHolder = new AtomicLong(position); flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(final OutputStream rawOut) throws IOException { try (final OutputStream out = new BufferedOutputStream(rawOut)) { positionHolder.set(readLines(fileReader, currentState.getBuffer(), out, chksum)); } } }); // If there ended up being no data, just remove the FlowFile if (flowFile.getSize() == 0) { session.remove(flowFile); getLogger().debug("No data to consume; removed created FlowFile"); } else { // determine filename for FlowFile by using <base filename of log file>.<initial offset>-<final offset>.<extension> final String tailFilename = file.getName(); final String baseName = StringUtils.substringBeforeLast(tailFilename, "."); final String flowFileName; if (baseName.length() < tailFilename.length()) { flowFileName = baseName + "." + position + "-" + positionHolder.get() + "." + StringUtils.substringAfterLast(tailFilename, "."); } else { flowFileName = baseName + "." + position + "-" + positionHolder.get(); } final Map<String, String> attributes = new HashMap<>(3); attributes.put(CoreAttributes.FILENAME.key(), flowFileName); attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain"); attributes.put("tailfile.original.path", tailFile); flowFile = session.putAllAttributes(flowFile, attributes); session.getProvenanceReporter().receive(flowFile, file.toURI().toString(), "FlowFile contains bytes " + position + " through " + positionHolder.get() + " of source file", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); session.transfer(flowFile, REL_SUCCESS); position = positionHolder.get(); // Set timestamp to the latest of when the file was modified and the current timestamp stored in the state. // We do this because when we read a file that has been rolled over, we set the state to 1 millisecond later than the last mod date // in order to avoid ingesting that file again. If we then read from this file during the same second (or millisecond, depending on the // operating system file last mod precision), then we could set the timestamp to a smaller value, which could result in reading in the // rotated file a second time. timestamp = Math.max(state.getTimestamp(), file.lastModified()); length = file.length(); getLogger().debug("Created {} and routed to success", new Object[] { flowFile }); } // Create a new state object to represent our current position, timestamp, etc. tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum, state.getBuffer())); // We must commit session before persisting state in order to avoid data loss on restart session.commit(); persistState(tfo, context); }
From source file:jef.tools.StringUtils.java
/** * CRC?,8???/*from w w w . j a v a 2 s. co m*/ */ public static String getCRC(InputStream in) { CRC32 crc32 = new CRC32(); byte[] b = new byte[65536]; int len = 0; try { while ((len = in.read(b)) != -1) { crc32.update(b, 0, len); } return Long.toHexString(crc32.getValue()); } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeQuietly(in); } }
From source file:org.commoncrawl.service.listcrawler.CacheManager.java
/** * loadCacheItemFromDisk - load a single cache item from disk * // w w w .j a v a 2 s .com * @param file * @param optTargetURL * @param location * @return * @throws IOException */ private CacheItem loadCacheItemFromDisk(FileInputStream file, String optTargetURL, long location) throws IOException { long timeStart = System.currentTimeMillis(); // and read out the Item Header ... CacheItemHeader itemHeader = new CacheItemHeader(); itemHeader.readHeader(new DataInputStream(file)); // see if it is valid ... if (!Arrays.equals(itemHeader._sync, _header._sync)) { LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location + " failed - corrupt sync bytes detected!!!"); } else { CRC32 crc32 = new CRC32(); // ok deserialize the bytes ... CacheItem item = new CacheItem(); CheckedInputStream checkedStream = new CheckedInputStream(file, crc32); DataInputStream itemStream = new DataInputStream(checkedStream); item.readFields(itemStream); // read the content buffer length int contentBufferLen = itemStream.readInt(); if (contentBufferLen != 0) { byte data[] = new byte[contentBufferLen]; itemStream.read(data); item.setContent(new Buffer(data)); } // cache crc long crcValueComputed = crc32.getValue(); // read disk crc long crcValueOnDisk = itemStream.readLong(); // validate if (crcValueComputed == crcValueOnDisk) { String canonicalURL = URLUtils.canonicalizeURL(item.getUrl(), true); if (optTargetURL.length() == 0 || optTargetURL.equals(canonicalURL)) { if (isValidCacheItem(item)) { LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location + " completed in:" + (System.currentTimeMillis() - timeStart)); return item; } else { LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location + " failed with invalid result code"); } } else { LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location + " failed with url mismatch. record url:" + item.getUrl()); } } else { LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location + " failed - crc mismatch!!!"); } } return null; }
From source file:org.apache.jackrabbit.oak.segment.file.TarReader.java
/** * Loads the optional pre-compiled graph entry from the given tar file. * * @return graph buffer, or {@code null} if one was not found * @throws IOException if the tar file could not be read *///from ww w . j a v a 2s. c o m private ByteBuffer loadGraph() throws IOException { // read the graph metadata just before the tar index entry int pos = access.length() - 2 * BLOCK_SIZE - getEntrySize(index.remaining() + 16); ByteBuffer meta = access.read(pos - 16, 16); int crc32 = meta.getInt(); int count = meta.getInt(); int bytes = meta.getInt(); int magic = meta.getInt(); if (magic != GRAPH_MAGIC) { return null; // magic byte mismatch } if (count < 0 || bytes < count * 16 + 16 || BLOCK_SIZE + bytes > pos) { log.warn("Invalid graph metadata in tar file {}", file); return null; // impossible uuid and/or byte counts } // this involves seeking backwards in the file, which might not // perform well, but that's OK since we only do this once per file ByteBuffer graph = access.read(pos - bytes, bytes); byte[] b = new byte[bytes - 16]; graph.mark(); graph.get(b); graph.reset(); CRC32 checksum = new CRC32(); checksum.update(b); if (crc32 != (int) checksum.getValue()) { log.warn("Invalid graph checksum in tar file {}", file); return null; // checksum mismatch } hasGraph = true; return graph; }
From source file:org.openmrs.module.sync.SyncUtil.java
public static byte[] compress(String content) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); CheckedOutputStream cos = new CheckedOutputStream(baos, new CRC32()); GZIPOutputStream zos = new GZIPOutputStream(new BufferedOutputStream(cos)); IOUtils.copy(new ByteArrayInputStream(content.getBytes()), zos); return baos.toByteArray(); }
From source file:org.openmrs.module.sync.SyncUtil.java
public static String decompress(byte[] data) throws IOException { ByteArrayInputStream bais2 = new ByteArrayInputStream(data); CheckedInputStream cis = new CheckedInputStream(bais2, new CRC32()); GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(cis)); InputStreamReader reader = new InputStreamReader(zis); BufferedReader br = new BufferedReader(reader); StringBuffer buffer = new StringBuffer(); String line = ""; while ((line = br.readLine()) != null) { buffer.append(line);//from ww w. jav a2 s.com } return buffer.toString(); }
From source file:org.talend.core.model.metadata.builder.database.ExtractMetaDataUtils.java
public boolean checkFileCRCCode(File targetFile, File sourceFile) throws Exception { // Cyclic Redundancy Check(CRC) if (!targetFile.exists() || !sourceFile.exists()) { return true; }/* www . j av a 2 s . com*/ FileInputStream tagetFilestream = new FileInputStream(targetFile); CRC32 targertCrc32 = new CRC32(); for (CheckedInputStream checkedinputstream = new CheckedInputStream(tagetFilestream, targertCrc32); checkedinputstream.read() != -1;) { // } FileInputStream sourceFilestream = new FileInputStream(sourceFile); CRC32 sourceCrc32 = new CRC32(); for (CheckedInputStream checkedinputstream = new CheckedInputStream(sourceFilestream, sourceCrc32); checkedinputstream.read() != -1;) { // } tagetFilestream.close(); sourceFilestream.close(); return Long.toHexString(targertCrc32.getValue()).equals(Long.toHexString(sourceCrc32.getValue())); }
From source file:com.cisco.dvbu.ps.common.util.CommonUtils.java
/** * Returns a CRC32 checksum of a file as a whole (as opposed to sum of checksums of all lines/rows). * //ww w. j a va 2 s.co m * @param filePath file name with full path * @return checksum value for the input file * @throws IOException */ public static long fileChecksum(String filePath) throws IOException { long checkSumValue = 0L; FileInputStream file = new FileInputStream(filePath); CheckedInputStream check = new CheckedInputStream(file, new CRC32()); BufferedInputStream in = new BufferedInputStream(check); while (in.read() != -1) { // Read file in completely } checkSumValue = check.getChecksum().getValue(); // System.out.println("fileChecksum(): checkSumValue = " + checkSumValue); return checkSumValue; }
From source file:com.zimbra.cs.zimlet.ZimletUtil.java
private static long computeCRC32(File file) throws IOException { byte buf[] = new byte[32 * 1024]; CRC32 crc = new CRC32(); crc.reset();//from ww w . j ava2 s . c om FileInputStream fis = null; try { fis = new FileInputStream(file); int bytesRead; while ((bytesRead = fis.read(buf)) != -1) { crc.update(buf, 0, bytesRead); } return crc.getValue(); } finally { if (fis != null) { try { fis.close(); } catch (IOException e) { } } } }
From source file:com.cisco.dvbu.ps.common.util.CommonUtils.java
/** * Returns a sum of CRC32 checksums of all lines/rows in a file. * This method is used to compare files with the same lines/rows, which may be in different order, in which case we * still want to consider them equal (from the point of view of containing the same data) * In such case this method will return the same result. * * This is useful when the file contains results of a database query and we need to compare * results of two queries that may return the same data but in different order. * //from w ww . j ava2 s. c om * @author SST * @param filePath file name with full path * @return sum of checksums of each line(row) from the input file * The type of this value could be long for files up to probably several GB in size. * BigInteger was chosen in case even bigger files are used. * @throws IOException */ public static BigInteger fileChecksumByRow(String filePath) throws IOException { BigInteger sumOfcheckSumValues = new BigInteger("0"); long currentLineCheckSumValue = 0L; Checksum checksum = new CRC32(); BufferedReader br = new BufferedReader(new FileReader(filePath)); String line; // System.out.println("currentLineCheckSumValue: "); while ((line = br.readLine()) != null) { // Read one line at a time byte bytes[] = line.getBytes(); checksum.reset(); checksum.update(bytes, 0, bytes.length); currentLineCheckSumValue = checksum.getValue(); // System.out.println(currentLineCheckSumValue); sumOfcheckSumValues = sumOfcheckSumValues.add(BigInteger.valueOf(currentLineCheckSumValue)); } br.close(); // System.out.println("fileChecksumByRow(): sumOfcheckSumValues = " + sumOfcheckSumValues); return sumOfcheckSumValues; }