org.apache.hadoop.hdfs.server.datanode.CachingBlockSender.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hdfs.server.datanode.CachingBlockSender.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.server.datanode;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileDescriptor;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.SocketException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.hadoop.fs.ChecksumException;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.ReadaheadPool;
import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.net.SocketOutputStream;
import org.apache.hadoop.util.DataChecksum;

/**
 * Reads a block from the disk and sends it to a recipient.
 * Data sent from the BlockeSender in the following format: <br>
 * <b>Data format:</b>
 * 
 * <pre>
 *    +--------------------------------------------------+
 *    | ChecksumHeader | Sequence of data PACKETS...     |
 *    +--------------------------------------------------+
 * </pre>
 * 
 * <b>ChecksumHeader format:</b>
 * 
 * <pre>
 *    +--------------------------------------------------+
 *    | 1 byte CHECKSUM_TYPE | 4 byte BYTES_PER_CHECKSUM |
 *    +--------------------------------------------------+
 * </pre>
 * 
 * An empty packet is sent to mark the end of block and read completion.
 * PACKET Contains a packet header, checksum and data. Amount of data
 * carried is set by BUFFER_SIZE.
 * 
 * <pre>
 *    +-----------------------------------------------------+
 *    | 4 byte packet length (excluding packet header)      |
 *    +-----------------------------------------------------+
 *    | 8 byte offset in the block | 8 byte sequence number |
 *    +-----------------------------------------------------+
 *    | 1 byte isLastPacketInBlock                          |
 *    +-----------------------------------------------------+
 *    | 4 byte Length of actual data                        |
 *    +-----------------------------------------------------+
 *    | x byte checksum data. x is defined below            |
 *    +-----------------------------------------------------+
 *    | actual data ......                                  |
 *    +-----------------------------------------------------+
 *    
 *    Data is made of Chunks. Each chunk is of length <= BYTES_PER_CHECKSUM.
 *    A checksum is calculated for each chunk.
 *    
 *    x = (length of data + BYTE_PER_CHECKSUM - 1)/BYTES_PER_CHECKSUM *
 *        CHECKSUM_SIZE
 *        
 *    CHECKSUM_SIZE depends on CHECKSUM_TYPE (usually, 4 for CRC32)
 * </pre>
 * 
 * The client reads data until it receives a packet with
 * "LastPacketInBlock" set to true or with a zero length. If there is
 * no checksum error, it replies to DataNode with OP_STATUS_CHECKSUM_OK:
 * 
 * <pre>
 *    +------------------------------+
 *    | 2 byte OP_STATUS_CHECKSUM_OK |
 *    +------------------------------+
 * </pre>
 */
class CachingBlockSender implements java.io.Closeable {

    static final Log LOG = CachingDataNode.LOG;

    static final Log ClientTraceLog = DataNode.ClientTraceLog;

    private static final boolean is32Bit = System.getProperty("sun.arch.data.model").equals("32");

    /**
     * Minimum buffer used while sending data to clients. Used only if
     * transferTo() is enabled. 64KB is not that large. It could be larger, but
     * not sure if there will be much more improvement.
     */
    private static final int MIN_BUFFER_WITH_TRANSFERTO = 64 * 1024;

    private static final int TRANSFERTO_BUFFER_SIZE = Math.max(HdfsConstants.IO_FILE_BUFFER_SIZE,
            MIN_BUFFER_WITH_TRANSFERTO);

    private final BlockCache blockCache;

    /**
     * The block to read from
     */
    private final ExtendedBlock block;

    /**
     * Stream to read block data from
     */
    private InputStream blockIn;

    /**
     * Updated while using transferTo()
     */
    private long blockInPosition = -1L;

    /**
     * Stream to read checksum
     */
    private DataInputStream checksumIn;

    /**
     * Checksum utility
     */
    private final DataChecksum checksum;

    /**
     * Initial position to read
     */
    private long initialOffset;

    /**
     * Current position of read
     */
    private long offset;

    /**
     * Position of last byte to read from block file
     */
    private final long endOffset;

    /**
     * Number of bytes in chunk used for computing checksum
     */
    private final int chunkSize;

    /**
     * Number bytes of checksum computed for a chunk
     */
    private final int checksumSize;

    /**
     * If true, failure to read checksum is ignored
     */
    private final boolean corruptChecksumOk;

    /**
     * Sequence number of packet being sent
     */
    private long seqno;

    /**
     * Set to true if transferTo is allowed for sending data to the client
     */
    private final boolean transferToAllowed;

    /**
     * Set to true once entire requested byte range has been sent to the client
     */
    private boolean sentEntireByteRange;

    /**
     * When true, verify checksum while reading from checksum file
     */
    private final boolean verifyChecksum;

    /**
     * Format used to print client trace log messages
     */
    private final String clientTraceFmt;

    private volatile ChunkChecksum lastChunkChecksum = null;

    /**
     * The file descriptor of the block being sent
     */
    private FileDescriptor blockInFd;

    /**
     * Cache-management related fields
     */
    private final long readaheadLength;

    private boolean shouldDropCacheBehindRead;

    private ReadaheadRequest curReadahead;

    private long lastCacheDropOffset;

    private static final long CACHE_DROP_INTERVAL_BYTES = 1024 * 1024; // 1MB

    /**
     * Minimum length of read below which management of the OS
     * buffer cache is disabled.
     */
    private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;

    private static ReadaheadPool readaheadPool = ReadaheadPool.getInstance();

    /**
     * Constructor
     * 
     * @param block
     *        Block that is being read
     * @param startOffset
     *        starting offset to read from
     * @param length
     *        length of data to read
     * @param corruptChecksumOk
     * @param verifyChecksum
     *        verify checksum while reading the data
     * @param datanode
     *        datanode from which the block is being read
     * @param clientTraceFmt
     *        format string used to print client trace logs
     * @throws IOException
     */
    CachingBlockSender(final BlockCache blockCache, final ExtendedBlock block, final long startOffset, long length,
            final boolean corruptChecksumOk, final boolean verifyChecksum, final DataNode datanode,
            final String clientTraceFmt) throws IOException {

        this.blockCache = blockCache;
        this.block = block;
        this.corruptChecksumOk = corruptChecksumOk;
        this.verifyChecksum = verifyChecksum;
        this.clientTraceFmt = clientTraceFmt;
        this.readaheadLength = datanode.getDnConf().readaheadLength;
        this.shouldDropCacheBehindRead = datanode.getDnConf().dropCacheBehindReads;

        try {

            final Replica replica;
            final long replicaVisibleLength;
            synchronized (datanode.data) {
                replica = getReplica(block, datanode);
                replicaVisibleLength = replica.getVisibleLength();
            }
            // if there is a write in progress
            ChunkChecksum chunkChecksum = null;
            if (replica instanceof ReplicaBeingWritten) {
                final ReplicaBeingWritten rbw = (ReplicaBeingWritten) replica;
                waitForMinLength(rbw, startOffset + length);
                chunkChecksum = rbw.getLastChecksumAndDataLen();
            }

            if (replica.getGenerationStamp() < block.getGenerationStamp()) {
                throw new IOException(
                        "Replica gen stamp < block genstamp, block=" + block + ", replica=" + replica);
            }
            if (replicaVisibleLength < 0L) {
                throw new IOException("Replica is not readable, block=" + block + ", replica=" + replica);
            }
            if (DataNode.LOG.isDebugEnabled()) {
                DataNode.LOG.debug("block=" + block + ", replica=" + replica);
            }

            // transferToFully() fails on 32 bit platforms for block sizes >= 2GB, use normal transfer in those cases
            this.transferToAllowed = datanode.getDnConf().transferToAllowed
                    && (!is32Bit || length <= Integer.MAX_VALUE);

            /*
             * (corruptChecksumOK, meta_file_exist): operation
             * True, True: will verify checksum
             * True, False: No verify, e.g., need to read data from a corrupted file
             * False, True: will verify checksum
             * False, False: throws IOException file not found
             */
            DataChecksum csum;
            final InputStream metaIn = datanode.data.getMetaDataInputStream(block);
            if (!corruptChecksumOk || metaIn != null) {
                if (metaIn == null) {
                    // need checksum but meta-data not found
                    throw new FileNotFoundException("Meta-data not found for " + block);
                }

                this.checksumIn = new DataInputStream(
                        new BufferedInputStream(metaIn, HdfsConstants.IO_FILE_BUFFER_SIZE));

                // read and handle the common header here. For now just a version
                final BlockMetadataHeader header = BlockMetadataHeader.readHeader(this.checksumIn);
                short version = header.getVersion();
                if (version != BlockMetadataHeader.VERSION) {
                    LOG.warn("Wrong version (" + version + ") for metadata file for " + block + " ignoring ...");
                }
                csum = header.getChecksum();
            } else {
                LOG.warn("Could not find metadata file for " + block);
                // This only decides the buffer size. Use BUFFER_SIZE?
                csum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_NULL, 16 * 1024);
            }

            /*
             * If chunkSize is very large, then the metadata file is mostly
             * corrupted. For now just truncate bytesPerchecksum to blockLength.
             */
            int size = csum.getBytesPerChecksum();
            if (size > 10 * 1024 * 1024 && size > replicaVisibleLength) {
                csum = DataChecksum.newDataChecksum(csum.getChecksumType(),
                        Math.max((int) replicaVisibleLength, 10 * 1024 * 1024));
                size = csum.getBytesPerChecksum();
            }
            this.chunkSize = size;
            this.checksum = csum;
            this.checksumSize = checksum.getChecksumSize();
            length = length < 0 ? replicaVisibleLength : length;

            // end is either last byte on disk or the length for which we have a checksum
            long end = chunkChecksum != null ? chunkChecksum.getDataLength() : replica.getBytesOnDisk();
            if (startOffset < 0 || startOffset > end || (length + startOffset) > end) {
                final String msg = " Offset " + startOffset + " and length " + length + " don't match block "
                        + block + " ( blockLen " + end + " )";
                LOG.warn(datanode.getDNRegistrationForBP(block.getBlockPoolId()) + ":sendBlock() : " + msg);
                throw new IOException(msg);
            }

            // Ensure read offset is position at the beginning of chunk
            this.offset = startOffset - (startOffset % this.chunkSize);
            if (length >= 0) {
                // Ensure endOffset points to end of chunk.
                long tmpLen = startOffset + length;
                if (tmpLen % this.chunkSize != 0) {
                    tmpLen += (this.chunkSize - tmpLen % this.chunkSize);
                }
                if (tmpLen < end) {
                    // will use on-disk checksum here since the end is a stable chunk
                    end = tmpLen;
                } else if (chunkChecksum != null) {
                    // last chunk is changing. flag that we need to use in-memory checksum
                    this.lastChunkChecksum = chunkChecksum;
                }
            }
            this.endOffset = end;

            // seek to the right offsets
            if (this.offset > 0) {
                long checksumSkip = (this.offset / this.chunkSize) * this.checksumSize;
                // note blockInStream is seeked when created below
                if (checksumSkip > 0) {
                    // Should we use seek() for checksum file as well?
                    IOUtils.skipFully(this.checksumIn, checksumSkip);
                }
            }
            this.seqno = 0;

            if (DataNode.LOG.isDebugEnabled()) {
                DataNode.LOG.debug("replica=" + replica);
            }

            final CachedBlock cachedBlock = this.blockCache.lock(block);
            if (cachedBlock != null) {
                System.out.println(block + " read from cache");
                this.blockIn = new CachedInputStream(block, this.blockCache, cachedBlock);
                this.blockIn.skip(this.offset);
            } else {
                System.out.println(block + " read from disk");
                this.blockIn = new CachingInputStream(block, this.blockCache,
                        datanode.data.getBlockInputStream(block, this.offset), this.offset == 0L);
            }

            if (this.blockIn instanceof FileInputStream) {
                this.blockInFd = ((FileInputStream) blockIn).getFD();
            } else {
                this.blockInFd = null;
            }
        } catch (IOException ioe) {
            IOUtils.closeStream(this);
            IOUtils.closeStream(this.blockIn);
            throw ioe;
        }
    }

    /**
     * close opened files.
     */
    public void close() throws IOException {

        if (this.blockInFd != null && this.shouldDropCacheBehindRead && isLongRead()) {
            // drop the last few MB of the file from cache
            try {
                NativeIO.posixFadviseIfPossible(this.blockInFd, this.lastCacheDropOffset,
                        this.offset - this.lastCacheDropOffset, NativeIO.POSIX_FADV_DONTNEED);
            } catch (Exception e) {
                LOG.warn("Unable to drop cache on file close", e);
            }
        }
        if (this.curReadahead != null) {
            this.curReadahead.cancel();
        }

        IOException ioe = null;
        if (this.checksumIn != null) {
            try {
                this.checksumIn.close(); // close checksum file
            } catch (IOException e) {
                ioe = e;
            }
            this.checksumIn = null;
        }
        if (this.blockIn != null) {
            try {
                this.blockIn.close(); // close data file
            } catch (IOException e) {
                ioe = e;
            }
            this.blockIn = null;
            this.blockInFd = null;
        }
        // throw IOException if there is any
        if (ioe != null) {
            throw ioe;
        }
    }

    private static Replica getReplica(final ExtendedBlock block, final DataNode datanode)
            throws ReplicaNotFoundException {

        final Replica replica = datanode.data.getReplica(block.getBlockPoolId(), block.getBlockId());
        if (replica == null) {
            throw new ReplicaNotFoundException(block);
        }
        return replica;
    }

    /**
     * Wait for rbw replica to reach the length
     * 
     * @param rbw
     *        replica that is being written to
     * @param len
     *        minimum length to reach
     * @throws IOException
     *         on failing to reach the len in given wait time
     */
    private static void waitForMinLength(final ReplicaBeingWritten rbw, final long len) throws IOException {

        // Wait for 3 seconds for rbw replica to reach the minimum length
        for (int i = 0; i < 30 && rbw.getBytesOnDisk() < len; i++) {
            try {
                Thread.sleep(100);
            } catch (InterruptedException ie) {
                throw new IOException(ie);
            }
        }
        final long bytesOnDisk = rbw.getBytesOnDisk();
        if (bytesOnDisk < len) {
            throw new IOException(String.format("Need %d bytes, but only %d bytes available", len, bytesOnDisk));
        }
    }

    /**
     * Converts an IOExcpetion (not subclasses) to SocketException.
     * This is typically done to indicate to upper layers that the error
     * was a socket error rather than often more serious exceptions like
     * disk errors.
     */
    private static IOException ioeToSocketException(IOException ioe) {

        if (ioe.getClass().equals(IOException.class)) {
            // "se" could be a new class in stead of SocketException.
            final IOException se = new SocketException("Original Exception : " + ioe);
            se.initCause(ioe);
            /*
             * Change the stacktrace so that original trace is not truncated
             * when printed.
             */
            se.setStackTrace(ioe.getStackTrace());
            return se;
        }
        // otherwise just return the same exception.
        return ioe;
    }

    /**
     * @param datalen
     *        Length of data
     * @return number of chunks for data of given size
     */
    private int numberOfChunks(final long datalen) {
        return (int) ((datalen + chunkSize - 1) / chunkSize);
    }

    /**
     * Sends a packet with up to maxChunks chunks of data.
     * 
     * @param pkt
     *        buffer used for writing packet data
     * @param maxChunks
     *        maximum number of chunks to send
     * @param out
     *        stream to send data to
     * @param transferTo
     *        use transferTo to send data
     * @param throttler
     *        used for throttling data transfer bandwidth
     */
    private int sendPacket(final ByteBuffer pkt, final int maxChunks, final OutputStream out,
            final boolean transferTo, final DataTransferThrottler throttler) throws IOException {

        final int dataLen = (int) Math.min(this.endOffset - this.offset, (this.chunkSize * (long) maxChunks));

        final int numChunks = numberOfChunks(dataLen); // Number of chunks be sent in the packet
        final int checksumDataLen = numChunks * checksumSize;
        final int packetLen = dataLen + checksumDataLen + 4;
        boolean lastDataPacket = offset + dataLen == endOffset && dataLen > 0;

        writePacketHeader(pkt, dataLen, packetLen);

        final int checksumOff = pkt.position();
        final byte[] buf = pkt.array();

        if (this.checksumSize > 0 && this.checksumIn != null) {
            readChecksum(buf, checksumOff, checksumDataLen);

            // write in progress that we need to use to get last checksum
            if (lastDataPacket && this.lastChunkChecksum != null) {
                final int start = checksumOff + checksumDataLen - checksumSize;
                final byte[] updatedChecksum = this.lastChunkChecksum.getChecksum();

                if (updatedChecksum != null) {
                    System.arraycopy(updatedChecksum, 0, buf, start, checksumSize);
                }
            }
        }

        final int dataOff = checksumOff + checksumDataLen;
        if (!transferTo) { // normal transfer
            IOUtils.readFully(this.blockIn, buf, dataOff, dataLen);

            if (this.verifyChecksum) {
                verifyChecksum(buf, dataOff, dataLen, numChunks, checksumOff);
            }
        }

        try {
            if (transferTo) {
                final SocketOutputStream sockOut = (SocketOutputStream) out;
                sockOut.write(buf, 0, dataOff); // First write checksum

                // no need to flush. since we know out is not a buffered stream.
                sockOut.transferToFully(((FileInputStream) this.blockIn).getChannel(), this.blockInPosition,
                        dataLen);
                this.blockInPosition += dataLen;
            } else {
                // normal transfer
                out.write(buf, 0, dataOff + dataLen);
            }
        } catch (IOException e) {
            /*
             * Exception while writing to the client. Connection closure from
             * the other end is mostly the case and we do not care much about
             * it. But other things can go wrong, especially in transferTo(),
             * which we do not want to ignore.
             * The message parsing below should not be considered as a good
             * coding example. NEVER do it to drive a program logic. NEVER.
             * It was done here because the NIO throws an IOException for EPIPE.
             */
            final String ioem = e.getMessage();
            if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) {
                LOG.error("BlockSender.sendChunks() exception: ", e);
            }
            throw ioeToSocketException(e);
        }

        if (throttler != null) { // rebalancing so throttle
            throttler.throttle(packetLen);
        }

        return dataLen;
    }

    /**
     * Read checksum into given buffer
     * 
     * @param buf
     *        buffer to read the checksum into
     * @param checksumOffset
     *        offset at which to write the checksum into buf
     * @param checksumLen
     *        length of checksum to write
     * @throws IOException
     *         on error
     */
    private void readChecksum(byte[] buf, final int checksumOffset, final int checksumLen) throws IOException {
        if (this.checksumSize <= 0 && this.checksumIn == null) {
            return;
        }
        try {
            this.checksumIn.readFully(buf, checksumOffset, checksumLen);
        } catch (IOException e) {
            LOG.warn(" Could not read or failed to veirfy checksum for data" + " at offset " + offset
                    + " for block " + block, e);
            IOUtils.closeStream(this.checksumIn);
            this.checksumIn = null;
            if (this.corruptChecksumOk) {
                if (checksumOffset < checksumLen) {
                    // Just fill the array with zeros.
                    Arrays.fill(buf, checksumOffset, checksumLen, (byte) 0);
                }
            } else {
                throw e;
            }
        }
    }

    /**
     * Compute checksum for chunks and verify the checksum that is read from
     * the metadata file is correct.
     * 
     * @param buf
     *        buffer that has checksum and data
     * @param dataOffset
     *        position where data is written in the buf
     * @param datalen
     *        length of data
     * @param numChunks
     *        number of chunks corresponding to data
     * @param checksumOffset
     *        offset where checksum is written in the buf
     * @throws ChecksumException
     *         on failed checksum verification
     */
    public void verifyChecksum(final byte[] buf, final int dataOffset, final int datalen, final int numChunks,
            final int checksumOffset) throws ChecksumException {

        int dOff = dataOffset;
        int cOff = checksumOffset;
        int dLeft = datalen;

        for (int i = 0; i < numChunks; i++) {
            this.checksum.reset();
            int dLen = Math.min(dLeft, this.chunkSize);
            this.checksum.update(buf, dOff, dLen);
            if (!checksum.compare(buf, cOff)) {
                long failedPos = this.offset + datalen - dLeft;
                throw new ChecksumException("Checksum failed at " + failedPos, failedPos);
            }
            dLeft -= dLen;
            dOff += dLen;
            cOff += this.checksumSize;
        }
    }

    /**
     * sendBlock() is used to read block and its metadata and stream the data to
     * either a client or to another datanode.
     * 
     * @param out
     *        stream to which the block is written to
     * @param baseStream
     *        optional. if non-null, <code>out</code> is assumed to
     *        be a wrapper over this stream. This enables optimizations for
     *        sending the data, e.g. {@link SocketOutputStream#transferToFully(FileChannel, long, int)}.
     * @param throttler
     *        for sending data.
     * @return total bytes read, including checksum data.
     */
    long sendBlock(final DataOutputStream out, final OutputStream baseStream, final DataTransferThrottler throttler)
            throws IOException {

        if (out == null) {
            throw new IOException("out stream is null");
        }

        this.initialOffset = this.offset;
        long totalRead = 0;
        OutputStream streamForSendChunks = out;

        this.lastCacheDropOffset = this.initialOffset;

        if (isLongRead() && this.blockInFd != null) {
            // Advise that this file descriptor will be accessed sequentially.
            NativeIO.posixFadviseIfPossible(this.blockInFd, 0, 0, NativeIO.POSIX_FADV_SEQUENTIAL);
        }

        // Trigger readahead of beginning of file if configured.
        manageOsCache(); // TODO: Take a closer look at this

        final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
        try {
            int maxChunksPerPacket;
            int pktSize = PacketHeader.PKT_HEADER_LEN;
            final boolean transferTo = this.transferToAllowed && !this.verifyChecksum
                    && baseStream instanceof SocketOutputStream && this.blockIn instanceof FileInputStream;
            if (transferTo) {
                final FileChannel fileChannel = ((FileInputStream) this.blockIn).getChannel();
                this.blockInPosition = fileChannel.position();
                streamForSendChunks = baseStream;
                maxChunksPerPacket = numberOfChunks(TRANSFERTO_BUFFER_SIZE);

                // Smaller packet size to only hold checksum when doing transferTo
                pktSize += this.checksumSize * maxChunksPerPacket;
            } else {
                maxChunksPerPacket = Math.max(1, numberOfChunks(HdfsConstants.IO_FILE_BUFFER_SIZE));
                // Packet size includes both checksum and data
                pktSize += (this.chunkSize + this.checksumSize) * maxChunksPerPacket;
            }

            ByteBuffer pktBuf = ByteBuffer.allocate(pktSize);

            while (this.endOffset > this.offset) {
                manageOsCache();
                long len = sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, transferTo, throttler);
                this.offset += len;
                totalRead += len + (numberOfChunks(len) * this.checksumSize);
                this.seqno++;
            }
            try {
                // send an empty packet to mark the end of the block
                sendPacket(pktBuf, maxChunksPerPacket, streamForSendChunks, transferTo, throttler);
                out.flush();
            } catch (IOException e) { // socket error
                throw ioeToSocketException(e);
            }

            this.sentEntireByteRange = true;
        } finally {
            if (this.clientTraceFmt != null) {
                final long endTime = System.nanoTime();
                ClientTraceLog.info(
                        String.format(this.clientTraceFmt, totalRead, this.initialOffset, endTime - startTime));
            }
            close();
        }
        return totalRead;
    }

    /**
     * Manage the OS buffer cache by performing read-ahead
     * and drop-behind.
     */
    private void manageOsCache() throws IOException {
        if (!isLongRead() || blockInFd == null) {
            // don't manage cache manually for short-reads, like
            // HBase random read workloads.
            return;
        }

        // Perform readahead if necessary
        if (readaheadLength > 0 && readaheadPool != null) {
            curReadahead = readaheadPool.readaheadStream(clientTraceFmt, blockInFd, offset, readaheadLength,
                    Long.MAX_VALUE, curReadahead);
        }

        // Drop what we've just read from cache, since we aren't
        // likely to need it again
        long nextCacheDropOffset = lastCacheDropOffset + CACHE_DROP_INTERVAL_BYTES;
        if (shouldDropCacheBehindRead && offset >= nextCacheDropOffset) {
            long dropLength = offset - lastCacheDropOffset;
            if (dropLength >= 1024) {
                NativeIO.posixFadviseIfPossible(blockInFd, lastCacheDropOffset, dropLength,
                        NativeIO.POSIX_FADV_DONTNEED);
            }
            lastCacheDropOffset += CACHE_DROP_INTERVAL_BYTES;
        }
    }

    private boolean isLongRead() {
        return (endOffset - offset) > LONG_READ_THRESHOLD_BYTES;
    }

    /**
     * Write packet header into {@code pkt}
     */
    private void writePacketHeader(ByteBuffer pkt, int dataLen, int packetLen) {
        pkt.clear();
        PacketHeader header = new PacketHeader(packetLen, offset, seqno, (dataLen == 0), dataLen);
        header.putInBuffer(pkt);
    }

    boolean didSendEntireByteRange() {
        return sentEntireByteRange;
    }

    /**
     * @return the checksum type that will be used with this block transfer.
     */
    DataChecksum getChecksum() {
        return checksum;
    }

    /**
     * @return the offset into the block file where the sender is currently
     *         reading.
     */
    long getOffset() {
        return offset;
    }
}