Example usage for java.util.zip CRC32 CRC32

Introduction

In this page you can find the example usage for java.util.zip CRC32 CRC32.

Prototype

public CRC32()

Source Link

Document

Creates a new CRC32 object.

Usage

From source file:org.apache.nifi.processors.standard.TailFile.java

private void processTailFile(final ProcessContext context, final ProcessSession session,
        final String tailFile) {
    // If user changes the file that is being tailed, we need to consume the already-rolled-over data according
    // to the Initial Start Position property
    boolean rolloverOccurred;
    TailFileObject tfo = states.get(tailFile);

    if (tfo.isTailFileChanged()) {
        rolloverOccurred = false;//from  w w  w. j a  v a  2 s . c  om
        final String recoverPosition = context.getProperty(START_POSITION).getValue();

        if (START_BEGINNING_OF_TIME.getValue().equals(recoverPosition)) {
            recoverRolledFiles(context, session, tailFile, tfo.getExpectedRecoveryChecksum(),
                    tfo.getState().getTimestamp(), tfo.getState().getPosition());
        } else if (START_CURRENT_FILE.getValue().equals(recoverPosition)) {
            cleanup();
            tfo.setState(new TailFileState(tailFile, null, null, 0L, 0L, 0L, null, tfo.getState().getBuffer()));
        } else {
            final String filename = tailFile;
            final File file = new File(filename);

            try {
                final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
                getLogger().debug("Created FileChannel {} for {}", new Object[] { fileChannel, file });

                final Checksum checksum = new CRC32();
                final long position = file.length();
                final long timestamp = file.lastModified();

                try (final InputStream fis = new FileInputStream(file);
                        final CheckedInputStream in = new CheckedInputStream(fis, checksum)) {
                    StreamUtils.copy(in, new NullOutputStream(), position);
                }

                fileChannel.position(position);
                cleanup();
                tfo.setState(new TailFileState(filename, file, fileChannel, position, timestamp, file.length(),
                        checksum, tfo.getState().getBuffer()));
            } catch (final IOException ioe) {
                getLogger().error(
                        "Attempted to position Reader at current position in file {} but failed to do so due to {}",
                        new Object[] { file, ioe.toString() }, ioe);
                context.yield();
                return;
            }
        }

        tfo.setTailFileChanged(false);
    } else {
        // Recover any data that may have rolled over since the last time that this processor ran.
        // If expectedRecoveryChecksum != null, that indicates that this is the first iteration since processor was started, so use whatever checksum value
        // was present when the state was last persisted. In this case, we must then null out the value so that the next iteration won't keep using the "recovered"
        // value. If the value is null, then we know that either the processor has already recovered that data, or there was no state persisted. In either case,
        // use whatever checksum value is currently in the state.
        Long expectedChecksumValue = tfo.getExpectedRecoveryChecksum();
        if (expectedChecksumValue == null) {
            expectedChecksumValue = tfo.getState().getChecksum() == null ? null
                    : tfo.getState().getChecksum().getValue();
        }

        rolloverOccurred = recoverRolledFiles(context, session, tailFile, expectedChecksumValue,
                tfo.getState().getTimestamp(), tfo.getState().getPosition());
        tfo.setExpectedRecoveryChecksum(null);
    }

    // initialize local variables from state object; this is done so that we can easily change the values throughout
    // the onTrigger method and then create a new state object after we finish processing the files.
    TailFileState state = tfo.getState();
    File file = state.getFile();
    FileChannel reader = state.getReader();
    Checksum checksum = state.getChecksum();
    if (checksum == null) {
        checksum = new CRC32();
    }
    long position = state.getPosition();
    long timestamp = state.getTimestamp();
    long length = state.getLength();

    // Create a reader if necessary.
    if (file == null || reader == null) {
        file = new File(tailFile);
        reader = createReader(file, position);
        if (reader == null) {
            context.yield();
            return;
        }
    }

    final long startNanos = System.nanoTime();

    // Check if file has rotated
    if (rolloverOccurred || (timestamp <= file.lastModified() && length > file.length())
            || (timestamp < file.lastModified() && length >= file.length())) {

        // Since file has rotated, we close the reader, create a new one, and then reset our state.
        try {
            reader.close();
            getLogger().debug("Closed FileChannel {}", new Object[] { reader, reader });
        } catch (final IOException ioe) {
            getLogger().warn("Failed to close reader for {} due to {}", new Object[] { file, ioe });
        }

        reader = createReader(file, 0L);
        position = 0L;
        checksum.reset();
    }

    if (file.length() == position || !file.exists()) {
        // no data to consume so rather than continually running, yield to allow other processors to use the thread.
        getLogger().debug("No data to consume; created no FlowFiles");
        tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum,
                state.getBuffer()));
        persistState(tfo, context);
        context.yield();
        return;
    }

    // If there is data to consume, read as much as we can.
    final TailFileState currentState = state;
    final Checksum chksum = checksum;
    // data has been written to file. Stream it to a new FlowFile.
    FlowFile flowFile = session.create();

    final FileChannel fileReader = reader;
    final AtomicLong positionHolder = new AtomicLong(position);
    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream rawOut) throws IOException {
            try (final OutputStream out = new BufferedOutputStream(rawOut)) {
                positionHolder.set(readLines(fileReader, currentState.getBuffer(), out, chksum));
            }
        }
    });

    // If there ended up being no data, just remove the FlowFile
    if (flowFile.getSize() == 0) {
        session.remove(flowFile);
        getLogger().debug("No data to consume; removed created FlowFile");
    } else {
        // determine filename for FlowFile by using <base filename of log file>.<initial offset>-<final offset>.<extension>
        final String tailFilename = file.getName();
        final String baseName = StringUtils.substringBeforeLast(tailFilename, ".");
        final String flowFileName;
        if (baseName.length() < tailFilename.length()) {
            flowFileName = baseName + "." + position + "-" + positionHolder.get() + "."
                    + StringUtils.substringAfterLast(tailFilename, ".");
        } else {
            flowFileName = baseName + "." + position + "-" + positionHolder.get();
        }

        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), flowFileName);
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tailFile);
        flowFile = session.putAllAttributes(flowFile, attributes);

        session.getProvenanceReporter().receive(flowFile, file.toURI().toString(),
                "FlowFile contains bytes " + position + " through " + positionHolder.get() + " of source file",
                TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
        session.transfer(flowFile, REL_SUCCESS);
        position = positionHolder.get();

        // Set timestamp to the latest of when the file was modified and the current timestamp stored in the state.
        // We do this because when we read a file that has been rolled over, we set the state to 1 millisecond later than the last mod date
        // in order to avoid ingesting that file again. If we then read from this file during the same second (or millisecond, depending on the
        // operating system file last mod precision), then we could set the timestamp to a smaller value, which could result in reading in the
        // rotated file a second time.
        timestamp = Math.max(state.getTimestamp(), file.lastModified());
        length = file.length();
        getLogger().debug("Created {} and routed to success", new Object[] { flowFile });
    }

    // Create a new state object to represent our current position, timestamp, etc.
    tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum,
            state.getBuffer()));

    // We must commit session before persisting state in order to avoid data loss on restart
    session.commit();
    persistState(tfo, context);
}

From source file:jef.tools.StringUtils.java

/**
 * CRC?,8???/*from w w  w  . j a  v  a 2 s. co m*/
 */
public static String getCRC(InputStream in) {
    CRC32 crc32 = new CRC32();
    byte[] b = new byte[65536];
    int len = 0;
    try {
        while ((len = in.read(b)) != -1) {
            crc32.update(b, 0, len);
        }
        return Long.toHexString(crc32.getValue());
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeQuietly(in);
    }
}

From source file:org.commoncrawl.service.listcrawler.CacheManager.java

/**
 * loadCacheItemFromDisk - load a single cache item from disk 
 * //  w w  w .j  a  v a 2  s .com
 * @param file
 * @param optTargetURL
 * @param location
 * @return
 * @throws IOException
 */
private CacheItem loadCacheItemFromDisk(FileInputStream file, String optTargetURL, long location)
        throws IOException {

    long timeStart = System.currentTimeMillis();

    // and read out the Item Header ...  
    CacheItemHeader itemHeader = new CacheItemHeader();
    itemHeader.readHeader(new DataInputStream(file));
    // see if it is valid ... 
    if (!Arrays.equals(itemHeader._sync, _header._sync)) {
        LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                + " failed - corrupt sync bytes detected!!!");
    } else {
        CRC32 crc32 = new CRC32();
        // ok deserialize the bytes ... 
        CacheItem item = new CacheItem();
        CheckedInputStream checkedStream = new CheckedInputStream(file, crc32);
        DataInputStream itemStream = new DataInputStream(checkedStream);
        item.readFields(itemStream);
        // read the content buffer length 
        int contentBufferLen = itemStream.readInt();
        if (contentBufferLen != 0) {
            byte data[] = new byte[contentBufferLen];
            itemStream.read(data);
            item.setContent(new Buffer(data));
        }

        // cache crc 
        long crcValueComputed = crc32.getValue();
        // read disk crc 
        long crcValueOnDisk = itemStream.readLong();
        // validate 
        if (crcValueComputed == crcValueOnDisk) {
            String canonicalURL = URLUtils.canonicalizeURL(item.getUrl(), true);
            if (optTargetURL.length() == 0 || optTargetURL.equals(canonicalURL)) {
                if (isValidCacheItem(item)) {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " completed in:" + (System.currentTimeMillis() - timeStart));
                    return item;
                } else {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " failed with invalid result code");
                }

            } else {
                LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                        + " failed with url mismatch. record url:" + item.getUrl());
            }
        } else {
            LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                    + " failed - crc mismatch!!!");
        }
    }
    return null;
}

From source file:org.apache.jackrabbit.oak.segment.file.TarReader.java

/**
 * Loads the optional pre-compiled graph entry from the given tar file.
 *
 * @return graph buffer, or {@code null} if one was not found
 * @throws IOException if the tar file could not be read
 *///from ww w  .  j  a  v  a  2s. c o m
private ByteBuffer loadGraph() throws IOException {
    // read the graph metadata just before the tar index entry
    int pos = access.length() - 2 * BLOCK_SIZE - getEntrySize(index.remaining() + 16);
    ByteBuffer meta = access.read(pos - 16, 16);
    int crc32 = meta.getInt();
    int count = meta.getInt();
    int bytes = meta.getInt();
    int magic = meta.getInt();

    if (magic != GRAPH_MAGIC) {
        return null; // magic byte mismatch
    }

    if (count < 0 || bytes < count * 16 + 16 || BLOCK_SIZE + bytes > pos) {
        log.warn("Invalid graph metadata in tar file {}", file);
        return null; // impossible uuid and/or byte counts
    }

    // this involves seeking backwards in the file, which might not
    // perform well, but that's OK since we only do this once per file
    ByteBuffer graph = access.read(pos - bytes, bytes);

    byte[] b = new byte[bytes - 16];
    graph.mark();
    graph.get(b);
    graph.reset();

    CRC32 checksum = new CRC32();
    checksum.update(b);
    if (crc32 != (int) checksum.getValue()) {
        log.warn("Invalid graph checksum in tar file {}", file);
        return null; // checksum mismatch
    }

    hasGraph = true;
    return graph;
}

From source file:org.openmrs.module.sync.SyncUtil.java

public static byte[] compress(String content) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    CheckedOutputStream cos = new CheckedOutputStream(baos, new CRC32());
    GZIPOutputStream zos = new GZIPOutputStream(new BufferedOutputStream(cos));
    IOUtils.copy(new ByteArrayInputStream(content.getBytes()), zos);
    return baos.toByteArray();
}

From source file:org.openmrs.module.sync.SyncUtil.java

public static String decompress(byte[] data) throws IOException {
    ByteArrayInputStream bais2 = new ByteArrayInputStream(data);
    CheckedInputStream cis = new CheckedInputStream(bais2, new CRC32());
    GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(cis));
    InputStreamReader reader = new InputStreamReader(zis);
    BufferedReader br = new BufferedReader(reader);
    StringBuffer buffer = new StringBuffer();
    String line = "";
    while ((line = br.readLine()) != null) {
        buffer.append(line);//from   ww  w.  jav a2  s.com
    }
    return buffer.toString();
}

From source file:org.talend.core.model.metadata.builder.database.ExtractMetaDataUtils.java

public boolean checkFileCRCCode(File targetFile, File sourceFile) throws Exception {
    // Cyclic Redundancy Check(CRC)
    if (!targetFile.exists() || !sourceFile.exists()) {
        return true;
    }/* www  .  j av a 2  s  .  com*/
    FileInputStream tagetFilestream = new FileInputStream(targetFile);
    CRC32 targertCrc32 = new CRC32();
    for (CheckedInputStream checkedinputstream = new CheckedInputStream(tagetFilestream,
            targertCrc32); checkedinputstream.read() != -1;) {
        //
    }
    FileInputStream sourceFilestream = new FileInputStream(sourceFile);
    CRC32 sourceCrc32 = new CRC32();
    for (CheckedInputStream checkedinputstream = new CheckedInputStream(sourceFilestream,
            sourceCrc32); checkedinputstream.read() != -1;) {
        //
    }
    tagetFilestream.close();
    sourceFilestream.close();
    return Long.toHexString(targertCrc32.getValue()).equals(Long.toHexString(sourceCrc32.getValue()));

}

From source file:com.cisco.dvbu.ps.common.util.CommonUtils.java

/**
 * Returns a CRC32 checksum of a file as a whole (as opposed to sum of checksums of all lines/rows).
 * //ww  w. j  a va 2 s.co  m
 * @param filePath      file name with full path
 * @return            checksum value for the input file
 * @throws IOException
 */
public static long fileChecksum(String filePath) throws IOException {

    long checkSumValue = 0L;

    FileInputStream file = new FileInputStream(filePath);

    CheckedInputStream check = new CheckedInputStream(file, new CRC32());

    BufferedInputStream in = new BufferedInputStream(check);

    while (in.read() != -1) {
        // Read file in completely
    }
    checkSumValue = check.getChecksum().getValue();
    //      System.out.println("fileChecksum(): checkSumValue = " + checkSumValue);
    return checkSumValue;
}

From source file:com.zimbra.cs.zimlet.ZimletUtil.java

private static long computeCRC32(File file) throws IOException {
    byte buf[] = new byte[32 * 1024];
    CRC32 crc = new CRC32();
    crc.reset();//from   ww w  . j  ava2 s  . c  om
    FileInputStream fis = null;
    try {
        fis = new FileInputStream(file);
        int bytesRead;
        while ((bytesRead = fis.read(buf)) != -1) {
            crc.update(buf, 0, bytesRead);
        }
        return crc.getValue();
    } finally {
        if (fis != null) {
            try {
                fis.close();
            } catch (IOException e) {
            }
        }
    }
}

From source file:com.cisco.dvbu.ps.common.util.CommonUtils.java

/**
* Returns a sum of CRC32 checksums of all lines/rows in a file.
* This method is used to compare files with the same lines/rows, which may be in different order, in which case we
* still want to consider them equal (from the point of view of containing the same data)
* In such case this method will return the same result.
* 
* This is useful when the file contains results of a database query and we need to compare
* results of two queries that may return the same data but in different order.
*      //from  w  ww . j ava2  s. c om
* @author             SST
* @param filePath      file name with full path
* @return            sum of checksums of each line(row) from the input file
*                   The type of this value could be long for files up to probably several GB in size.
*                   BigInteger was chosen in case even bigger files are used.
* @throws IOException
*/
public static BigInteger fileChecksumByRow(String filePath) throws IOException {

    BigInteger sumOfcheckSumValues = new BigInteger("0");
    long currentLineCheckSumValue = 0L;
    Checksum checksum = new CRC32();

    BufferedReader br = new BufferedReader(new FileReader(filePath));
    String line;

    //       System.out.println("currentLineCheckSumValue: ");
    while ((line = br.readLine()) != null) {
        // Read one line at a time

        byte bytes[] = line.getBytes();
        checksum.reset();
        checksum.update(bytes, 0, bytes.length);

        currentLineCheckSumValue = checksum.getValue();
        //          System.out.println(currentLineCheckSumValue);

        sumOfcheckSumValues = sumOfcheckSumValues.add(BigInteger.valueOf(currentLineCheckSumValue));
    }
    br.close();
    //       System.out.println("fileChecksumByRow(): sumOfcheckSumValues = " + sumOfcheckSumValues);      
    return sumOfcheckSumValues;
}