Example usage for java.util.zip CRC32 CRC32

List of usage examples for java.util.zip CRC32 CRC32

Introduction

In this page you can find the example usage for java.util.zip CRC32 CRC32.

Prototype

public CRC32() 

Source Link

Document

Creates a new CRC32 object.

Usage

From source file:org.apache.nifi.processors.standard.TailFile.java

private void processTailFile(final ProcessContext context, final ProcessSession session,
        final String tailFile) {
    // If user changes the file that is being tailed, we need to consume the already-rolled-over data according
    // to the Initial Start Position property
    boolean rolloverOccurred;
    TailFileObject tfo = states.get(tailFile);

    if (tfo.isTailFileChanged()) {
        rolloverOccurred = false;//from  w w  w. j a  v a  2 s . c  om
        final String recoverPosition = context.getProperty(START_POSITION).getValue();

        if (START_BEGINNING_OF_TIME.getValue().equals(recoverPosition)) {
            recoverRolledFiles(context, session, tailFile, tfo.getExpectedRecoveryChecksum(),
                    tfo.getState().getTimestamp(), tfo.getState().getPosition());
        } else if (START_CURRENT_FILE.getValue().equals(recoverPosition)) {
            cleanup();
            tfo.setState(new TailFileState(tailFile, null, null, 0L, 0L, 0L, null, tfo.getState().getBuffer()));
        } else {
            final String filename = tailFile;
            final File file = new File(filename);

            try {
                final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
                getLogger().debug("Created FileChannel {} for {}", new Object[] { fileChannel, file });

                final Checksum checksum = new CRC32();
                final long position = file.length();
                final long timestamp = file.lastModified();

                try (final InputStream fis = new FileInputStream(file);
                        final CheckedInputStream in = new CheckedInputStream(fis, checksum)) {
                    StreamUtils.copy(in, new NullOutputStream(), position);
                }

                fileChannel.position(position);
                cleanup();
                tfo.setState(new TailFileState(filename, file, fileChannel, position, timestamp, file.length(),
                        checksum, tfo.getState().getBuffer()));
            } catch (final IOException ioe) {
                getLogger().error(
                        "Attempted to position Reader at current position in file {} but failed to do so due to {}",
                        new Object[] { file, ioe.toString() }, ioe);
                context.yield();
                return;
            }
        }

        tfo.setTailFileChanged(false);
    } else {
        // Recover any data that may have rolled over since the last time that this processor ran.
        // If expectedRecoveryChecksum != null, that indicates that this is the first iteration since processor was started, so use whatever checksum value
        // was present when the state was last persisted. In this case, we must then null out the value so that the next iteration won't keep using the "recovered"
        // value. If the value is null, then we know that either the processor has already recovered that data, or there was no state persisted. In either case,
        // use whatever checksum value is currently in the state.
        Long expectedChecksumValue = tfo.getExpectedRecoveryChecksum();
        if (expectedChecksumValue == null) {
            expectedChecksumValue = tfo.getState().getChecksum() == null ? null
                    : tfo.getState().getChecksum().getValue();
        }

        rolloverOccurred = recoverRolledFiles(context, session, tailFile, expectedChecksumValue,
                tfo.getState().getTimestamp(), tfo.getState().getPosition());
        tfo.setExpectedRecoveryChecksum(null);
    }

    // initialize local variables from state object; this is done so that we can easily change the values throughout
    // the onTrigger method and then create a new state object after we finish processing the files.
    TailFileState state = tfo.getState();
    File file = state.getFile();
    FileChannel reader = state.getReader();
    Checksum checksum = state.getChecksum();
    if (checksum == null) {
        checksum = new CRC32();
    }
    long position = state.getPosition();
    long timestamp = state.getTimestamp();
    long length = state.getLength();

    // Create a reader if necessary.
    if (file == null || reader == null) {
        file = new File(tailFile);
        reader = createReader(file, position);
        if (reader == null) {
            context.yield();
            return;
        }
    }

    final long startNanos = System.nanoTime();

    // Check if file has rotated
    if (rolloverOccurred || (timestamp <= file.lastModified() && length > file.length())
            || (timestamp < file.lastModified() && length >= file.length())) {

        // Since file has rotated, we close the reader, create a new one, and then reset our state.
        try {
            reader.close();
            getLogger().debug("Closed FileChannel {}", new Object[] { reader, reader });
        } catch (final IOException ioe) {
            getLogger().warn("Failed to close reader for {} due to {}", new Object[] { file, ioe });
        }

        reader = createReader(file, 0L);
        position = 0L;
        checksum.reset();
    }

    if (file.length() == position || !file.exists()) {
        // no data to consume so rather than continually running, yield to allow other processors to use the thread.
        getLogger().debug("No data to consume; created no FlowFiles");
        tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum,
                state.getBuffer()));
        persistState(tfo, context);
        context.yield();
        return;
    }

    // If there is data to consume, read as much as we can.
    final TailFileState currentState = state;
    final Checksum chksum = checksum;
    // data has been written to file. Stream it to a new FlowFile.
    FlowFile flowFile = session.create();

    final FileChannel fileReader = reader;
    final AtomicLong positionHolder = new AtomicLong(position);
    flowFile = session.write(flowFile, new OutputStreamCallback() {
        @Override
        public void process(final OutputStream rawOut) throws IOException {
            try (final OutputStream out = new BufferedOutputStream(rawOut)) {
                positionHolder.set(readLines(fileReader, currentState.getBuffer(), out, chksum));
            }
        }
    });

    // If there ended up being no data, just remove the FlowFile
    if (flowFile.getSize() == 0) {
        session.remove(flowFile);
        getLogger().debug("No data to consume; removed created FlowFile");
    } else {
        // determine filename for FlowFile by using <base filename of log file>.<initial offset>-<final offset>.<extension>
        final String tailFilename = file.getName();
        final String baseName = StringUtils.substringBeforeLast(tailFilename, ".");
        final String flowFileName;
        if (baseName.length() < tailFilename.length()) {
            flowFileName = baseName + "." + position + "-" + positionHolder.get() + "."
                    + StringUtils.substringAfterLast(tailFilename, ".");
        } else {
            flowFileName = baseName + "." + position + "-" + positionHolder.get();
        }

        final Map<String, String> attributes = new HashMap<>(3);
        attributes.put(CoreAttributes.FILENAME.key(), flowFileName);
        attributes.put(CoreAttributes.MIME_TYPE.key(), "text/plain");
        attributes.put("tailfile.original.path", tailFile);
        flowFile = session.putAllAttributes(flowFile, attributes);

        session.getProvenanceReporter().receive(flowFile, file.toURI().toString(),
                "FlowFile contains bytes " + position + " through " + positionHolder.get() + " of source file",
                TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos));
        session.transfer(flowFile, REL_SUCCESS);
        position = positionHolder.get();

        // Set timestamp to the latest of when the file was modified and the current timestamp stored in the state.
        // We do this because when we read a file that has been rolled over, we set the state to 1 millisecond later than the last mod date
        // in order to avoid ingesting that file again. If we then read from this file during the same second (or millisecond, depending on the
        // operating system file last mod precision), then we could set the timestamp to a smaller value, which could result in reading in the
        // rotated file a second time.
        timestamp = Math.max(state.getTimestamp(), file.lastModified());
        length = file.length();
        getLogger().debug("Created {} and routed to success", new Object[] { flowFile });
    }

    // Create a new state object to represent our current position, timestamp, etc.
    tfo.setState(new TailFileState(tailFile, file, reader, position, timestamp, length, checksum,
            state.getBuffer()));

    // We must commit session before persisting state in order to avoid data loss on restart
    session.commit();
    persistState(tfo, context);
}

From source file:jef.tools.StringUtils.java

/**
 * CRC?,8???/*from w w  w  . j a  v  a 2 s. co m*/
 */
public static String getCRC(InputStream in) {
    CRC32 crc32 = new CRC32();
    byte[] b = new byte[65536];
    int len = 0;
    try {
        while ((len = in.read(b)) != -1) {
            crc32.update(b, 0, len);
        }
        return Long.toHexString(crc32.getValue());
    } catch (IOException e) {
        throw new RuntimeException(e);
    } finally {
        IOUtils.closeQuietly(in);
    }
}

From source file:org.commoncrawl.service.listcrawler.CacheManager.java

/**
 * loadCacheItemFromDisk - load a single cache item from disk 
 * //  w w  w .j  a  v a 2  s .com
 * @param file
 * @param optTargetURL
 * @param location
 * @return
 * @throws IOException
 */
private CacheItem loadCacheItemFromDisk(FileInputStream file, String optTargetURL, long location)
        throws IOException {

    long timeStart = System.currentTimeMillis();

    // and read out the Item Header ...  
    CacheItemHeader itemHeader = new CacheItemHeader();
    itemHeader.readHeader(new DataInputStream(file));
    // see if it is valid ... 
    if (!Arrays.equals(itemHeader._sync, _header._sync)) {
        LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                + " failed - corrupt sync bytes detected!!!");
    } else {
        CRC32 crc32 = new CRC32();
        // ok deserialize the bytes ... 
        CacheItem item = new CacheItem();
        CheckedInputStream checkedStream = new CheckedInputStream(file, crc32);
        DataInputStream itemStream = new DataInputStream(checkedStream);
        item.readFields(itemStream);
        // read the content buffer length 
        int contentBufferLen = itemStream.readInt();
        if (contentBufferLen != 0) {
            byte data[] = new byte[contentBufferLen];
            itemStream.read(data);
            item.setContent(new Buffer(data));
        }

        // cache crc 
        long crcValueComputed = crc32.getValue();
        // read disk crc 
        long crcValueOnDisk = itemStream.readLong();
        // validate 
        if (crcValueComputed == crcValueOnDisk) {
            String canonicalURL = URLUtils.canonicalizeURL(item.getUrl(), true);
            if (optTargetURL.length() == 0 || optTargetURL.equals(canonicalURL)) {
                if (isValidCacheItem(item)) {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " completed in:" + (System.currentTimeMillis() - timeStart));
                    return item;
                } else {
                    LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                            + " failed with invalid result code");
                }

            } else {
                LOG.info("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                        + " failed with url mismatch. record url:" + item.getUrl());
            }
        } else {
            LOG.error("### Item Lookup for URL:" + optTargetURL + " Record at:" + location
                    + " failed - crc mismatch!!!");
        }
    }
    return null;
}

From source file:org.apache.jackrabbit.oak.segment.file.TarReader.java

/**
 * Loads the optional pre-compiled graph entry from the given tar file.
 *
 * @return graph buffer, or {@code null} if one was not found
 * @throws IOException if the tar file could not be read
 *///from ww w  .  j  a  v  a  2s. c o m
private ByteBuffer loadGraph() throws IOException {
    // read the graph metadata just before the tar index entry
    int pos = access.length() - 2 * BLOCK_SIZE - getEntrySize(index.remaining() + 16);
    ByteBuffer meta = access.read(pos - 16, 16);
    int crc32 = meta.getInt();
    int count = meta.getInt();
    int bytes = meta.getInt();
    int magic = meta.getInt();

    if (magic != GRAPH_MAGIC) {
        return null; // magic byte mismatch
    }

    if (count < 0 || bytes < count * 16 + 16 || BLOCK_SIZE + bytes > pos) {
        log.warn("Invalid graph metadata in tar file {}", file);
        return null; // impossible uuid and/or byte counts
    }

    // this involves seeking backwards in the file, which might not
    // perform well, but that's OK since we only do this once per file
    ByteBuffer graph = access.read(pos - bytes, bytes);

    byte[] b = new byte[bytes - 16];
    graph.mark();
    graph.get(b);
    graph.reset();

    CRC32 checksum = new CRC32();
    checksum.update(b);
    if (crc32 != (int) checksum.getValue()) {
        log.warn("Invalid graph checksum in tar file {}", file);
        return null; // checksum mismatch
    }

    hasGraph = true;
    return graph;
}

From source file:org.openmrs.module.sync.SyncUtil.java

public static byte[] compress(String content) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    CheckedOutputStream cos = new CheckedOutputStream(baos, new CRC32());
    GZIPOutputStream zos = new GZIPOutputStream(new BufferedOutputStream(cos));
    IOUtils.copy(new ByteArrayInputStream(content.getBytes()), zos);
    return baos.toByteArray();
}

From source file:org.openmrs.module.sync.SyncUtil.java

public static String decompress(byte[] data) throws IOException {
    ByteArrayInputStream bais2 = new ByteArrayInputStream(data);
    CheckedInputStream cis = new CheckedInputStream(bais2, new CRC32());
    GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(cis));
    InputStreamReader reader = new InputStreamReader(zis);
    BufferedReader br = new BufferedReader(reader);
    StringBuffer buffer = new StringBuffer();
    String line = "";
    while ((line = br.readLine()) != null) {
        buffer.append(line);//from   ww  w.  jav a2  s.com
    }
    return buffer.toString();
}

From source file:org.talend.core.model.metadata.builder.database.ExtractMetaDataUtils.java

public boolean checkFileCRCCode(File targetFile, File sourceFile) throws Exception {
    // Cyclic Redundancy Check(CRC)
    if (!targetFile.exists() || !sourceFile.exists()) {
        return true;
    }/* www  .  j av a 2  s  .  com*/
    FileInputStream tagetFilestream = new FileInputStream(targetFile);
    CRC32 targertCrc32 = new CRC32();
    for (CheckedInputStream checkedinputstream = new CheckedInputStream(tagetFilestream,
            targertCrc32); checkedinputstream.read() != -1;) {
        //
    }
    FileInputStream sourceFilestream = new FileInputStream(sourceFile);
    CRC32 sourceCrc32 = new CRC32();
    for (CheckedInputStream checkedinputstream = new CheckedInputStream(sourceFilestream,
            sourceCrc32); checkedinputstream.read() != -1;) {
        //
    }
    tagetFilestream.close();
    sourceFilestream.close();
    return Long.toHexString(targertCrc32.getValue()).equals(Long.toHexString(sourceCrc32.getValue()));

}

From source file:com.cisco.dvbu.ps.common.util.CommonUtils.java

/**
 * Returns a CRC32 checksum of a file as a whole (as opposed to sum of checksums of all lines/rows).
 * //ww  w. j  a va 2 s.co  m
 * @param filePath      file name with full path
 * @return            checksum value for the input file
 * @throws IOException
 */
public static long fileChecksum(String filePath) throws IOException {

    long checkSumValue = 0L;

    FileInputStream file = new FileInputStream(filePath);

    CheckedInputStream check = new CheckedInputStream(file, new CRC32());

    BufferedInputStream in = new BufferedInputStream(check);

    while (in.read() != -1) {
        // Read file in completely
    }
    checkSumValue = check.getChecksum().getValue();
    //      System.out.println("fileChecksum(): checkSumValue = " + checkSumValue);
    return checkSumValue;
}

From source file:com.zimbra.cs.zimlet.ZimletUtil.java

private static long computeCRC32(File file) throws IOException {
    byte buf[] = new byte[32 * 1024];
    CRC32 crc = new CRC32();
    crc.reset();//from   ww w  . j  ava2 s  . c  om
    FileInputStream fis = null;
    try {
        fis = new FileInputStream(file);
        int bytesRead;
        while ((bytesRead = fis.read(buf)) != -1) {
            crc.update(buf, 0, bytesRead);
        }
        return crc.getValue();
    } finally {
        if (fis != null) {
            try {
                fis.close();
            } catch (IOException e) {
            }
        }
    }
}

From source file:com.cisco.dvbu.ps.common.util.CommonUtils.java

/**
* Returns a sum of CRC32 checksums of all lines/rows in a file.
* This method is used to compare files with the same lines/rows, which may be in different order, in which case we
* still want to consider them equal (from the point of view of containing the same data)
* In such case this method will return the same result.
* 
* This is useful when the file contains results of a database query and we need to compare
* results of two queries that may return the same data but in different order.
*      //from  w  ww . j ava2  s. c om
* @author             SST
* @param filePath      file name with full path
* @return            sum of checksums of each line(row) from the input file
*                   The type of this value could be long for files up to probably several GB in size.
*                   BigInteger was chosen in case even bigger files are used.
* @throws IOException
*/
public static BigInteger fileChecksumByRow(String filePath) throws IOException {

    BigInteger sumOfcheckSumValues = new BigInteger("0");
    long currentLineCheckSumValue = 0L;
    Checksum checksum = new CRC32();

    BufferedReader br = new BufferedReader(new FileReader(filePath));
    String line;

    //       System.out.println("currentLineCheckSumValue: ");
    while ((line = br.readLine()) != null) {
        // Read one line at a time

        byte bytes[] = line.getBytes();
        checksum.reset();
        checksum.update(bytes, 0, bytes.length);

        currentLineCheckSumValue = checksum.getValue();
        //          System.out.println(currentLineCheckSumValue);

        sumOfcheckSumValues = sumOfcheckSumValues.add(BigInteger.valueOf(currentLineCheckSumValue));
    }
    br.close();
    //       System.out.println("fileChecksumByRow(): sumOfcheckSumValues = " + sumOfcheckSumValues);      
    return sumOfcheckSumValues;
}