org.apache.hadoop.util.DataChecksum.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.util.DataChecksum.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.util;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.ChecksumException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;

/**
 * This class provides interface and utilities for processing checksums for
 * DFS data transfers.
 */
@InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" })
@InterfaceStability.Evolving
public class DataChecksum implements Checksum {

    // checksum types
    public static final int CHECKSUM_NULL = 0;
    public static final int CHECKSUM_CRC32 = 1;
    public static final int CHECKSUM_CRC32C = 2;
    public static final int CHECKSUM_DEFAULT = 3;
    public static final int CHECKSUM_MIXED = 4;

    private static final Logger LOG = LoggerFactory.getLogger(DataChecksum.class);
    private static volatile boolean useJava9Crc32C = Shell.isJavaVersionAtLeast(9);

    /** The checksum types */
    public enum Type {
        NULL(CHECKSUM_NULL, 0), CRC32(CHECKSUM_CRC32, 4), CRC32C(CHECKSUM_CRC32C, 4), DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
        MIXED(CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum

        public final int id;
        public final int size;

        Type(int id, int size) {
            this.id = id;
            this.size = size;
        }

        /** @return the type corresponding to the id. */
        public static Type valueOf(int id) {
            if (id < 0 || id >= values().length) {
                throw new IllegalArgumentException("id=" + id + " out of range [0, " + values().length + ")");
            }
            return values()[id];
        }
    }

    /**
     * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm
     * is chosen depending on the platform.
     */
    public static Checksum newCrc32() {
        return new CRC32();
    }

    /**
     * The flag is volatile to avoid synchronization here.
     * Re-entrancy is unlikely except in failure mode (and inexpensive).
     */
    static Checksum newCrc32C() {
        try {
            return useJava9Crc32C ? Java9Crc32CFactory.createChecksum() : new PureJavaCrc32C();
        } catch (ExceptionInInitializerError | RuntimeException e) {
            // should not happen
            LOG.error("CRC32C creation failed, switching to PureJavaCrc32C", e);
            useJava9Crc32C = false;
            return new PureJavaCrc32C();
        }
    }

    /**
     * @return the int representation of the polynomial associated with the
     *     CRC {@code type}, suitable for use with further CRC arithmetic.
     * @throws IOException if there is no CRC polynomial applicable
     *     to the given {@code type}.
     */
    public static int getCrcPolynomialForType(Type type) throws IOException {
        switch (type) {
        case CRC32:
            return CrcUtil.GZIP_POLYNOMIAL;
        case CRC32C:
            return CrcUtil.CASTAGNOLI_POLYNOMIAL;
        default:
            throw new IOException("No CRC polynomial could be associated with type: " + type);
        }
    }

    public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum) {
        if (bytesPerChecksum <= 0) {
            return null;
        }

        switch (type) {
        case NULL:
            return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum);
        case CRC32:
            return new DataChecksum(type, newCrc32(), bytesPerChecksum);
        case CRC32C:
            return new DataChecksum(type, newCrc32C(), bytesPerChecksum);
        default:
            return null;
        }
    }

    /**
     * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
     * @return DataChecksum of the type in the array or null in case of an error.
     */
    public static DataChecksum newDataChecksum(byte bytes[], int offset) {
        if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
            return null;
        }

        // like readInt():
        int bytesPerChecksum = ((bytes[offset + 1] & 0xff) << 24) | ((bytes[offset + 2] & 0xff) << 16)
                | ((bytes[offset + 3] & 0xff) << 8) | ((bytes[offset + 4] & 0xff));
        return newDataChecksum(Type.valueOf(bytes[offset]), bytesPerChecksum);
    }

    /**
     * This constructs a DataChecksum by reading HEADER_LEN bytes from input
     * stream <i>in</i>
     */
    public static DataChecksum newDataChecksum(DataInputStream in) throws IOException {
        int type = in.readByte();
        int bpc = in.readInt();
        DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc);
        if (summer == null) {
            throw new InvalidChecksumSizeException(
                    "Could not create DataChecksum " + "of type " + type + " with bytesPerChecksum " + bpc);
        }
        return summer;
    }

    /**
     * Writes the checksum header to the output stream <i>out</i>.
     */
    public void writeHeader(DataOutputStream out) throws IOException {
        out.writeByte(type.id);
        out.writeInt(bytesPerChecksum);
    }

    public byte[] getHeader() {
        byte[] header = new byte[getChecksumHeaderSize()];
        header[0] = (byte) (type.id & 0xff);
        // Writing in buffer just like DataOutput.WriteInt()
        header[1 + 0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
        header[1 + 1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
        header[1 + 2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
        header[1 + 3] = (byte) (bytesPerChecksum & 0xff);
        return header;
    }

    /**
     * Writes the current checksum to the stream.
     * If <i>reset</i> is true, then resets the checksum.
     * @return number of bytes written. Will be equal to getChecksumSize();
     */
    public int writeValue(DataOutputStream out, boolean reset) throws IOException {
        if (type.size <= 0) {
            return 0;
        }

        if (type.size == 4) {
            out.writeInt((int) summer.getValue());
        } else {
            throw new IOException("Unknown Checksum " + type);
        }

        if (reset) {
            reset();
        }

        return type.size;
    }

    /**
     * Writes the current checksum to a buffer.
     * If <i>reset</i> is true, then resets the checksum.
     * @return number of bytes written. Will be equal to getChecksumSize();
     */
    public int writeValue(byte[] buf, int offset, boolean reset) throws IOException {
        if (type.size <= 0) {
            return 0;
        }

        if (type.size == 4) {
            int checksum = (int) summer.getValue();
            buf[offset + 0] = (byte) ((checksum >>> 24) & 0xff);
            buf[offset + 1] = (byte) ((checksum >>> 16) & 0xff);
            buf[offset + 2] = (byte) ((checksum >>> 8) & 0xff);
            buf[offset + 3] = (byte) (checksum & 0xff);
        } else {
            throw new IOException("Unknown Checksum " + type);
        }

        if (reset) {
            reset();
        }

        return type.size;
    }

    /**
     * Compares the checksum located at buf[offset] with the current checksum.
     * @return true if the checksum matches and false otherwise.
     */
    public boolean compare(byte buf[], int offset) {
        if (type.size == 4) {
            int checksum = ((buf[offset + 0] & 0xff) << 24) | ((buf[offset + 1] & 0xff) << 16)
                    | ((buf[offset + 2] & 0xff) << 8) | ((buf[offset + 3] & 0xff));
            return checksum == (int) summer.getValue();
        }
        return type.size == 0;
    }

    private final Type type;
    private final Checksum summer;
    private final int bytesPerChecksum;
    private int inSum = 0;

    private DataChecksum(Type type, Checksum checksum, int chunkSize) {
        this.type = type;
        summer = checksum;
        bytesPerChecksum = chunkSize;
    }

    /** @return the checksum algorithm type. */
    public Type getChecksumType() {
        return type;
    }

    /** @return the size for a checksum. */
    public int getChecksumSize() {
        return type.size;
    }

    /** @return the required checksum size given the data length. */
    public int getChecksumSize(int dataSize) {
        return ((dataSize - 1) / getBytesPerChecksum() + 1) * getChecksumSize();
    }

    public int getBytesPerChecksum() {
        return bytesPerChecksum;
    }

    public int getNumBytesInSum() {
        return inSum;
    }

    public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;

    static public int getChecksumHeaderSize() {
        return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
    }

    //Checksum Interface. Just a wrapper around member summer.
    @Override
    public long getValue() {
        return summer.getValue();
    }

    @Override
    public void reset() {
        summer.reset();
        inSum = 0;
    }

    @Override
    public void update(byte[] b, int off, int len) {
        if (len > 0) {
            summer.update(b, off, len);
            inSum += len;
        }
    }

    @Override
    public void update(int b) {
        summer.update(b);
        inSum += 1;
    }

    /**
     * Verify that the given checksums match the given data.
     * 
     * The 'mark' of the ByteBuffer parameters may be modified by this function,.
     * but the position is maintained.
     *  
     * @param data the DirectByteBuffer pointing to the data to verify.
     * @param checksums the DirectByteBuffer pointing to a series of stored
     *                  checksums
     * @param fileName the name of the file being read, for error-reporting
     * @param basePos the file position to which the start of 'data' corresponds
     * @throws ChecksumException if the checksums do not match
     */
    public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, String fileName, long basePos)
            throws ChecksumException {
        if (type.size == 0)
            return;

        if (data.hasArray() && checksums.hasArray()) {
            final int dataOffset = data.arrayOffset() + data.position();
            final int crcsOffset = checksums.arrayOffset() + checksums.position();

            if (NativeCrc32.isAvailable()) {
                NativeCrc32.verifyChunkedSumsByteArray(bytesPerChecksum, type.id, checksums.array(), crcsOffset,
                        data.array(), dataOffset, data.remaining(), fileName, basePos);
            } else {
                verifyChunked(type, summer, data.array(), dataOffset, data.remaining(), bytesPerChecksum,
                        checksums.array(), crcsOffset, fileName, basePos);
            }
            return;
        }
        if (NativeCrc32.isAvailable() && data.isDirect()) {
            NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, fileName, basePos);
        } else {
            verifyChunked(type, summer, data, bytesPerChecksum, checksums, fileName, basePos);
        }
    }

    static void verifyChunked(final Type type, final Checksum algorithm, final ByteBuffer data,
            final int bytesPerCrc, final ByteBuffer crcs, final String filename, final long basePos)
            throws ChecksumException {
        final byte[] bytes = new byte[bytesPerCrc];
        final int dataOffset = data.position();
        final int dataLength = data.remaining();
        data.mark();
        crcs.mark();

        try {
            int i = 0;
            for (final int n = dataLength - bytesPerCrc + 1; i < n; i += bytesPerCrc) {
                data.get(bytes);
                algorithm.reset();
                algorithm.update(bytes, 0, bytesPerCrc);
                final int computed = (int) algorithm.getValue();
                final int expected = crcs.getInt();

                if (computed != expected) {
                    long errPos = basePos + data.position() - dataOffset - bytesPerCrc;
                    throwChecksumException(type, algorithm, filename, errPos, expected, computed);
                }
            }

            final int remainder = dataLength - i;
            if (remainder > 0) {
                data.get(bytes, 0, remainder);
                algorithm.reset();
                algorithm.update(bytes, 0, remainder);
                final int computed = (int) algorithm.getValue();
                final int expected = crcs.getInt();

                if (computed != expected) {
                    long errPos = basePos + data.position() - dataOffset - remainder;
                    throwChecksumException(type, algorithm, filename, errPos, expected, computed);
                }
            }
        } finally {
            data.reset();
            crcs.reset();
        }
    }

    /**
     * Implementation of chunked verification specifically on byte arrays. This
     * is to avoid the copy when dealing with ByteBuffers that have array backing.
     */
    static void verifyChunked(final Type type, final Checksum algorithm, final byte[] data, final int dataOffset,
            final int dataLength, final int bytesPerCrc, final byte[] crcs, final int crcsOffset,
            final String filename, final long basePos) throws ChecksumException {
        final int dataEnd = dataOffset + dataLength;
        int i = dataOffset;
        int j = crcsOffset;
        for (final int n = dataEnd - bytesPerCrc + 1; i < n; i += bytesPerCrc, j += 4) {
            algorithm.reset();
            algorithm.update(data, i, bytesPerCrc);
            final int computed = (int) algorithm.getValue();
            final int expected = ((crcs[j] << 24) + ((crcs[j + 1] << 24) >>> 8))
                    + (((crcs[j + 2] << 24) >>> 16) + ((crcs[j + 3] << 24) >>> 24));

            if (computed != expected) {
                final long errPos = basePos + i - dataOffset;
                throwChecksumException(type, algorithm, filename, errPos, expected, computed);
            }
        }
        final int remainder = dataEnd - i;
        if (remainder > 0) {
            algorithm.reset();
            algorithm.update(data, i, remainder);
            final int computed = (int) algorithm.getValue();
            final int expected = ((crcs[j] << 24) + ((crcs[j + 1] << 24) >>> 8))
                    + (((crcs[j + 2] << 24) >>> 16) + ((crcs[j + 3] << 24) >>> 24));

            if (computed != expected) {
                final long errPos = basePos + i - dataOffset;
                throwChecksumException(type, algorithm, filename, errPos, expected, computed);
            }
        }
    }

    private static void throwChecksumException(Type type, Checksum algorithm, String filename, long errPos,
            int expected, int computed) throws ChecksumException {
        throw new ChecksumException("Checksum " + type + " not matched for file " + filename + " at position "
                + errPos + String.format(": expected=%X but computed=%X", expected, computed) + ", algorithm="
                + algorithm.getClass().getSimpleName(), errPos);
    }

    /**
     * Calculate checksums for the given data.
     * 
     * The 'mark' of the ByteBuffer parameters may be modified by this function,
     * but the position is maintained.
     * 
     * @param data the DirectByteBuffer pointing to the data to checksum.
     * @param checksums the DirectByteBuffer into which checksums will be
     *                  stored. Enough space must be available in this
     *                  buffer to put the checksums.
     */
    public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
        if (type.size == 0)
            return;

        if (data.hasArray() && checksums.hasArray()) {
            calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
                    checksums.array(), checksums.arrayOffset() + checksums.position());
            return;
        }

        if (NativeCrc32.isAvailable()) {
            NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id, checksums, data);
            return;
        }

        data.mark();
        checksums.mark();
        try {
            byte[] buf = new byte[bytesPerChecksum];
            while (data.remaining() > 0) {
                int n = Math.min(data.remaining(), bytesPerChecksum);
                data.get(buf, 0, n);
                summer.reset();
                summer.update(buf, 0, n);
                checksums.putInt((int) summer.getValue());
            }
        } finally {
            data.reset();
            checksums.reset();
        }
    }

    /**
     * Implementation of chunked calculation specifically on byte arrays. This
     * is to avoid the copy when dealing with ByteBuffers that have array backing.
     */
    public void calculateChunkedSums(byte[] data, int dataOffset, int dataLength, byte[] sums, int sumsOffset) {
        if (type.size == 0)
            return;

        if (NativeCrc32.isAvailable()) {
            NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id, sums, sumsOffset, data, dataOffset,
                    dataLength);
            return;
        }

        int remaining = dataLength;
        while (remaining > 0) {
            int n = Math.min(remaining, bytesPerChecksum);
            summer.reset();
            summer.update(data, dataOffset, n);
            dataOffset += n;
            remaining -= n;
            long calculated = summer.getValue();
            sums[sumsOffset++] = (byte) (calculated >> 24);
            sums[sumsOffset++] = (byte) (calculated >> 16);
            sums[sumsOffset++] = (byte) (calculated >> 8);
            sums[sumsOffset++] = (byte) (calculated);
        }
    }

    @Override
    public boolean equals(Object other) {
        if (!(other instanceof DataChecksum)) {
            return false;
        }
        DataChecksum o = (DataChecksum) other;
        return o.bytesPerChecksum == this.bytesPerChecksum && o.type == this.type;
    }

    @Override
    public int hashCode() {
        return (this.type.id + 31) * this.bytesPerChecksum;
    }

    @Override
    public String toString() {
        return "DataChecksum(type=" + type + ", chunkSize=" + bytesPerChecksum + ")";
    }

    /**
     * This just provides a dummy implimentation for Checksum class
     * This is used when there is no checksum available or required for 
     * data
     */
    static class ChecksumNull implements Checksum {

        public ChecksumNull() {
        }

        //Dummy interface
        @Override
        public long getValue() {
            return 0;
        }

        @Override
        public void reset() {
        }

        @Override
        public void update(byte[] b, int off, int len) {
        }

        @Override
        public void update(int b) {
        }
    };

    /**
     * Holds constructor handle to let it be initialized on demand.
     */
    private static class Java9Crc32CFactory {
        private static final MethodHandle NEW_CRC32C_MH;

        static {
            MethodHandle newCRC32C = null;
            try {
                newCRC32C = MethodHandles.publicLookup().findConstructor(Class.forName("java.util.zip.CRC32C"),
                        MethodType.methodType(void.class));
            } catch (ReflectiveOperationException e) {
                // Should not reach here.
                throw new RuntimeException(e);
            }
            NEW_CRC32C_MH = newCRC32C;
        }

        public static Checksum createChecksum() {
            try {
                // Should throw nothing
                return (Checksum) NEW_CRC32C_MH.invoke();
            } catch (Throwable t) {
                throw (t instanceof RuntimeException) ? (RuntimeException) t : new RuntimeException(t);
            }
        }
    };
}