org.apache.bookkeeper.proto.PerChannelBookieClient.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.bookkeeper.proto.PerChannelBookieClient.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
    
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.bookkeeper.proto;

import static org.apache.bookkeeper.client.LedgerHandle.INVALID_ENTRY_ID;

import com.google.common.base.Joiner;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import com.google.protobuf.ExtensionRegistry;
import com.google.protobuf.UnsafeByteOperations;

import io.netty.bootstrap.Bootstrap;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.Unpooled;
import io.netty.buffer.UnpooledByteBufAllocator;
import io.netty.channel.Channel;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelFutureListener;
import io.netty.channel.ChannelHandler.Sharable;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInboundHandlerAdapter;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.ChannelOption;
import io.netty.channel.ChannelPipeline;
import io.netty.channel.ChannelPromise;
import io.netty.channel.DefaultEventLoopGroup;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.WriteBufferWaterMark;
import io.netty.channel.epoll.EpollEventLoopGroup;
import io.netty.channel.epoll.EpollSocketChannel;
import io.netty.channel.local.LocalChannel;
import io.netty.channel.socket.nio.NioSocketChannel;
import io.netty.channel.unix.Errors.NativeIoException;
import io.netty.handler.codec.CorruptedFrameException;
import io.netty.handler.codec.DecoderException;
import io.netty.handler.codec.LengthFieldBasedFrameDecoder;
import io.netty.handler.codec.LengthFieldPrepender;
import io.netty.handler.codec.TooLongFrameException;
import io.netty.handler.ssl.SslHandler;
import io.netty.util.Recycler;
import io.netty.util.Recycler.Handle;
import io.netty.util.concurrent.Future;
import io.netty.util.concurrent.GenericFutureListener;

import java.io.IOException;
import java.net.SocketAddress;
import java.net.UnknownHostException;
import java.security.cert.Certificate;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.BiPredicate;

import javax.net.ssl.SSLHandshakeException;
import javax.net.ssl.SSLPeerUnverifiedException;

import org.apache.bookkeeper.auth.BookKeeperPrincipal;
import org.apache.bookkeeper.auth.ClientAuthProvider;
import org.apache.bookkeeper.client.BKException;
import org.apache.bookkeeper.client.BookKeeperClientStats;
import org.apache.bookkeeper.client.BookieInfoReader.BookieInfo;
import org.apache.bookkeeper.client.api.WriteFlag;
import org.apache.bookkeeper.common.util.MdcUtils;
import org.apache.bookkeeper.common.util.OrderedExecutor;
import org.apache.bookkeeper.conf.ClientConfiguration;
import org.apache.bookkeeper.net.BookieSocketAddress;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ForceLedgerCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetBookieInfoCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GetListOfEntriesOfLedgerCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadEntryCallbackCtx;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.ReadLacCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.StartTLSCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback;
import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteLacCallback;
import org.apache.bookkeeper.proto.BookkeeperProtocol.AddRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.AddResponse;
import org.apache.bookkeeper.proto.BookkeeperProtocol.BKPacketHeader;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ForceLedgerRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ForceLedgerResponse;
import org.apache.bookkeeper.proto.BookkeeperProtocol.GetBookieInfoRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.GetBookieInfoResponse;
import org.apache.bookkeeper.proto.BookkeeperProtocol.GetListOfEntriesOfLedgerRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.GetListOfEntriesOfLedgerResponse;
import org.apache.bookkeeper.proto.BookkeeperProtocol.OperationType;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ProtocolVersion;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadLacRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadLacResponse;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.ReadResponse;
import org.apache.bookkeeper.proto.BookkeeperProtocol.Request;
import org.apache.bookkeeper.proto.BookkeeperProtocol.Response;
import org.apache.bookkeeper.proto.BookkeeperProtocol.StatusCode;
import org.apache.bookkeeper.proto.BookkeeperProtocol.WriteLacRequest;
import org.apache.bookkeeper.proto.BookkeeperProtocol.WriteLacResponse;
import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.NullStatsLogger;
import org.apache.bookkeeper.stats.OpStatsLogger;
import org.apache.bookkeeper.stats.StatsLogger;
import org.apache.bookkeeper.stats.annotations.StatsDoc;
import org.apache.bookkeeper.tls.SecurityException;
import org.apache.bookkeeper.tls.SecurityHandlerFactory;
import org.apache.bookkeeper.tls.SecurityHandlerFactory.NodeType;
import org.apache.bookkeeper.util.AvailabilityOfEntriesOfLedger;
import org.apache.bookkeeper.util.ByteBufList;
import org.apache.bookkeeper.util.MathUtils;
import org.apache.bookkeeper.util.SafeRunnable;
import org.apache.bookkeeper.util.StringUtils;
import org.apache.bookkeeper.util.collections.ConcurrentOpenHashMap;
import org.apache.bookkeeper.util.collections.SynchronizedHashMultiMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;

/**
 * This class manages all details of connection to a particular bookie. It also
 * has reconnect logic if a connection to a bookie fails.
 */
@StatsDoc(name = BookKeeperClientStats.CHANNEL_SCOPE, help = "Per channel bookie client stats")
@Sharable
public class PerChannelBookieClient extends ChannelInboundHandlerAdapter {

    static final Logger LOG = LoggerFactory.getLogger(PerChannelBookieClient.class);

    // this set contains the bookie error return codes that we do not consider for a bookie to be "faulty"
    private static final Set<Integer> expectedBkOperationErrors = Collections
            .unmodifiableSet(Sets.newHashSet(BKException.Code.BookieHandleNotAvailableException,
                    BKException.Code.NoSuchEntryException, BKException.Code.NoSuchLedgerExistsException,
                    BKException.Code.LedgerFencedException, BKException.Code.LedgerExistException,
                    BKException.Code.DuplicateEntryIdException, BKException.Code.WriteOnReadOnlyBookieException));
    private static final int DEFAULT_HIGH_PRIORITY_VALUE = 100; // We may add finer grained priority later.
    private static final AtomicLong txnIdGenerator = new AtomicLong(0);

    final BookieSocketAddress addr;
    final EventLoopGroup eventLoopGroup;
    final ByteBufAllocator allocator;
    final OrderedExecutor executor;
    final long addEntryTimeoutNanos;
    final long readEntryTimeoutNanos;
    final int maxFrameSize;
    final int getBookieInfoTimeout;
    final int startTLSTimeout;

    private final ConcurrentOpenHashMap<CompletionKey, CompletionValue> completionObjects = new ConcurrentOpenHashMap<CompletionKey, CompletionValue>();

    // Map that hold duplicated read requests. The idea is to only use this map (synchronized) when there is a duplicate
    // read request for the same ledgerId/entryId
    private final SynchronizedHashMultiMap<CompletionKey, CompletionValue> completionObjectsV2Conflicts = new SynchronizedHashMultiMap<>();

    private final StatsLogger statsLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_READ_OP, help = "channel stats of read entries requests")
    private final OpStatsLogger readEntryOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_TIMEOUT_READ, help = "timeout stats of read entries requests")
    private final OpStatsLogger readTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_ADD_OP, help = "channel stats of add entries requests")
    private final OpStatsLogger addEntryOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_WRITE_LAC_OP, help = "channel stats of write_lac requests")
    private final OpStatsLogger writeLacOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_FORCE_OP, help = "channel stats of force requests")
    private final OpStatsLogger forceLedgerOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_READ_LAC_OP, help = "channel stats of read_lac requests")
    private final OpStatsLogger readLacOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_TIMEOUT_ADD, help = "timeout stats of add entries requests")
    private final OpStatsLogger addTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_TIMEOUT_WRITE_LAC, help = "timeout stats of write_lac requests")
    private final OpStatsLogger writeLacTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_TIMEOUT_FORCE, help = "timeout stats of force requests")
    private final OpStatsLogger forceLedgerTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_TIMEOUT_READ_LAC, help = "timeout stats of read_lac requests")
    private final OpStatsLogger readLacTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.GET_BOOKIE_INFO_OP, help = "channel stats of get_bookie_info requests")
    private final OpStatsLogger getBookieInfoOpLogger;
    @StatsDoc(name = BookKeeperClientStats.TIMEOUT_GET_BOOKIE_INFO, help = "timeout stats of get_bookie_info requests")
    private final OpStatsLogger getBookieInfoTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_START_TLS_OP, help = "channel stats of start_tls requests")
    private final OpStatsLogger startTLSOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CHANNEL_TIMEOUT_START_TLS_OP, help = "timeout stats of start_tls requests")
    private final OpStatsLogger startTLSTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.CLIENT_CONNECT_TIMER, help = "channel stats of connect requests")
    private final OpStatsLogger connectTimer;
    private final OpStatsLogger getListOfEntriesOfLedgerCompletionOpLogger;
    private final OpStatsLogger getListOfEntriesOfLedgerCompletionTimeoutOpLogger;
    @StatsDoc(name = BookKeeperClientStats.NETTY_EXCEPTION_CNT, help = "the number of exceptions received from this channel")
    private final Counter exceptionCounter;
    @StatsDoc(name = BookKeeperClientStats.ADD_OP_OUTSTANDING, help = "the number of outstanding add_entry requests")
    private final Counter addEntryOutstanding;
    @StatsDoc(name = BookKeeperClientStats.READ_OP_OUTSTANDING, help = "the number of outstanding add_entry requests")
    private final Counter readEntryOutstanding;
    /* collect stats on all Ops that flows through netty pipeline */
    @StatsDoc(name = BookKeeperClientStats.NETTY_OPS, help = "channel stats for all operations flowing through netty pipeline")
    private final OpStatsLogger nettyOpLogger;
    @StatsDoc(name = BookKeeperClientStats.ACTIVE_NON_TLS_CHANNEL_COUNTER, help = "the number of active non-tls channels")
    private final Counter activeNonTlsChannelCounter;
    @StatsDoc(name = BookKeeperClientStats.ACTIVE_TLS_CHANNEL_COUNTER, help = "the number of active tls channels")
    private final Counter activeTlsChannelCounter;
    @StatsDoc(name = BookKeeperClientStats.FAILED_CONNECTION_COUNTER, help = "the number of failed connections")
    private final Counter failedConnectionCounter;
    @StatsDoc(name = BookKeeperClientStats.FAILED_TLS_HANDSHAKE_COUNTER, help = "the number of failed tls handshakes")
    private final Counter failedTlsHandshakeCounter;

    private final boolean useV2WireProtocol;
    private final boolean preserveMdcForTaskExecution;

    /**
     * The following member variables do not need to be concurrent, or volatile
     * because they are always updated under a lock.
     */
    private volatile Queue<GenericCallback<PerChannelBookieClient>> pendingOps = new ArrayDeque<GenericCallback<PerChannelBookieClient>>();
    volatile Channel channel = null;
    private final ClientConnectionPeer connectionPeer;
    private volatile BookKeeperPrincipal authorizedId = BookKeeperPrincipal.ANONYMOUS;

    enum ConnectionState {
        DISCONNECTED, CONNECTING, CONNECTED, CLOSED, START_TLS
    }

    volatile ConnectionState state;
    final ReentrantReadWriteLock closeLock = new ReentrantReadWriteLock();
    private final ClientConfiguration conf;

    private final PerChannelBookieClientPool pcbcPool;
    private final ClientAuthProvider.Factory authProviderFactory;
    private final ExtensionRegistry extRegistry;
    private final SecurityHandlerFactory shFactory;
    private volatile boolean isWritable = true;

    public PerChannelBookieClient(OrderedExecutor executor, EventLoopGroup eventLoopGroup, BookieSocketAddress addr)
            throws SecurityException {
        this(new ClientConfiguration(), executor, eventLoopGroup, addr, NullStatsLogger.INSTANCE, null, null, null);
    }

    public PerChannelBookieClient(OrderedExecutor executor, EventLoopGroup eventLoopGroup, BookieSocketAddress addr,
            ClientAuthProvider.Factory authProviderFactory, ExtensionRegistry extRegistry)
            throws SecurityException {
        this(new ClientConfiguration(), executor, eventLoopGroup, addr, NullStatsLogger.INSTANCE,
                authProviderFactory, extRegistry, null);
    }

    public PerChannelBookieClient(ClientConfiguration conf, OrderedExecutor executor, EventLoopGroup eventLoopGroup,
            BookieSocketAddress addr, StatsLogger parentStatsLogger, ClientAuthProvider.Factory authProviderFactory,
            ExtensionRegistry extRegistry, PerChannelBookieClientPool pcbcPool) throws SecurityException {
        this(conf, executor, eventLoopGroup, UnpooledByteBufAllocator.DEFAULT, addr, NullStatsLogger.INSTANCE,
                authProviderFactory, extRegistry, pcbcPool, null);
    }

    public PerChannelBookieClient(ClientConfiguration conf, OrderedExecutor executor, EventLoopGroup eventLoopGroup,
            ByteBufAllocator allocator, BookieSocketAddress addr, StatsLogger parentStatsLogger,
            ClientAuthProvider.Factory authProviderFactory, ExtensionRegistry extRegistry,
            PerChannelBookieClientPool pcbcPool, SecurityHandlerFactory shFactory) throws SecurityException {
        this.maxFrameSize = conf.getNettyMaxFrameSizeBytes();
        this.conf = conf;
        this.addr = addr;
        this.executor = executor;
        if (LocalBookiesRegistry.isLocalBookie(addr)) {
            this.eventLoopGroup = new DefaultEventLoopGroup();
        } else {
            this.eventLoopGroup = eventLoopGroup;
        }
        this.allocator = allocator;
        this.state = ConnectionState.DISCONNECTED;
        this.addEntryTimeoutNanos = TimeUnit.SECONDS.toNanos(conf.getAddEntryTimeout());
        this.readEntryTimeoutNanos = TimeUnit.SECONDS.toNanos(conf.getReadEntryTimeout());
        this.getBookieInfoTimeout = conf.getBookieInfoTimeout();
        this.startTLSTimeout = conf.getStartTLSTimeout();
        this.useV2WireProtocol = conf.getUseV2WireProtocol();
        this.preserveMdcForTaskExecution = conf.getPreserveMdcForTaskExecution();

        this.authProviderFactory = authProviderFactory;
        this.extRegistry = extRegistry;
        this.shFactory = shFactory;
        if (shFactory != null) {
            shFactory.init(NodeType.Client, conf, allocator);
        }

        StringBuilder nameBuilder = new StringBuilder();
        nameBuilder.append(addr.getHostName().replace('.', '_').replace('-', '_')).append("_")
                .append(addr.getPort());

        this.statsLogger = parentStatsLogger.scope(BookKeeperClientStats.CHANNEL_SCOPE)
                .scope(nameBuilder.toString());

        readEntryOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_READ_OP);
        addEntryOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_ADD_OP);
        writeLacOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_WRITE_LAC_OP);
        forceLedgerOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_FORCE_OP);
        readLacOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_READ_LAC_OP);
        getBookieInfoOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.GET_BOOKIE_INFO_OP);
        getListOfEntriesOfLedgerCompletionOpLogger = statsLogger
                .getOpStatsLogger(BookKeeperClientStats.GET_LIST_OF_ENTRIES_OF_LEDGER_OP);
        readTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_READ);
        addTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_ADD);
        writeLacTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_WRITE_LAC);
        forceLedgerTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_FORCE);
        readLacTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_READ_LAC);
        getBookieInfoTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.TIMEOUT_GET_BOOKIE_INFO);
        startTLSOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_START_TLS_OP);
        startTLSTimeoutOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.CHANNEL_TIMEOUT_START_TLS_OP);
        getListOfEntriesOfLedgerCompletionTimeoutOpLogger = statsLogger
                .getOpStatsLogger(BookKeeperClientStats.TIMEOUT_GET_LIST_OF_ENTRIES_OF_LEDGER);
        exceptionCounter = statsLogger.getCounter(BookKeeperClientStats.NETTY_EXCEPTION_CNT);
        connectTimer = statsLogger.getOpStatsLogger(BookKeeperClientStats.CLIENT_CONNECT_TIMER);
        addEntryOutstanding = statsLogger.getCounter(BookKeeperClientStats.ADD_OP_OUTSTANDING);
        readEntryOutstanding = statsLogger.getCounter(BookKeeperClientStats.READ_OP_OUTSTANDING);
        nettyOpLogger = statsLogger.getOpStatsLogger(BookKeeperClientStats.NETTY_OPS);
        activeNonTlsChannelCounter = statsLogger.getCounter(BookKeeperClientStats.ACTIVE_NON_TLS_CHANNEL_COUNTER);
        activeTlsChannelCounter = statsLogger.getCounter(BookKeeperClientStats.ACTIVE_TLS_CHANNEL_COUNTER);
        failedConnectionCounter = statsLogger.getCounter(BookKeeperClientStats.FAILED_CONNECTION_COUNTER);
        failedTlsHandshakeCounter = statsLogger.getCounter(BookKeeperClientStats.FAILED_TLS_HANDSHAKE_COUNTER);

        this.pcbcPool = pcbcPool;

        this.connectionPeer = new ClientConnectionPeer() {

            @Override
            public SocketAddress getRemoteAddr() {
                Channel c = channel;
                if (c != null) {
                    return c.remoteAddress();
                } else {
                    return null;
                }
            }

            @Override
            public Collection<Object> getProtocolPrincipals() {
                Channel c = channel;
                if (c == null) {
                    return Collections.emptyList();
                }
                SslHandler ssl = c.pipeline().get(SslHandler.class);
                if (ssl == null) {
                    return Collections.emptyList();
                }
                try {
                    Certificate[] certificates = ssl.engine().getSession().getPeerCertificates();
                    if (certificates == null) {
                        return Collections.emptyList();
                    }
                    List<Object> result = new ArrayList<>();
                    result.addAll(Arrays.asList(certificates));
                    return result;
                } catch (SSLPeerUnverifiedException err) {
                    return Collections.emptyList();
                }
            }

            @Override
            public void disconnect() {
                Channel c = channel;
                if (c != null) {
                    c.close().addListener(x -> makeWritable());
                }
                LOG.info("authplugin disconnected channel {}", channel);
            }

            @Override
            public void setAuthorizedId(BookKeeperPrincipal principal) {
                authorizedId = principal;
                LOG.info("connection {} authenticated as {}", channel, principal);
            }

            @Override
            public BookKeeperPrincipal getAuthorizedId() {
                return authorizedId;
            }

            @Override
            public boolean isSecure() {
                Channel c = channel;
                if (c == null) {
                    return false;
                } else {
                    return c.pipeline().get(SslHandler.class) != null;
                }
            }

        };
    }

    private void completeOperation(GenericCallback<PerChannelBookieClient> op, int rc) {
        //Thread.dumpStack();
        closeLock.readLock().lock();
        try {
            if (ConnectionState.CLOSED == state) {
                op.operationComplete(BKException.Code.ClientClosedException, this);
            } else {
                op.operationComplete(rc, this);
            }
        } finally {
            closeLock.readLock().unlock();
        }
    }

    protected long getNumPendingCompletionRequests() {
        return completionObjects.size();
    }

    protected ChannelFuture connect() {
        final long startTime = MathUtils.nowInNano();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Connecting to bookie: {}", addr);
        }

        // Set up the ClientBootStrap so we can create a new Channel connection to the bookie.
        Bootstrap bootstrap = new Bootstrap();
        bootstrap.group(eventLoopGroup);
        if (eventLoopGroup instanceof EpollEventLoopGroup) {
            bootstrap.channel(EpollSocketChannel.class);
        } else if (eventLoopGroup instanceof DefaultEventLoopGroup) {
            bootstrap.channel(LocalChannel.class);
        } else {
            bootstrap.channel(NioSocketChannel.class);
        }

        bootstrap.option(ChannelOption.ALLOCATOR, this.allocator);
        bootstrap.option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.getClientConnectTimeoutMillis());
        bootstrap.option(ChannelOption.WRITE_BUFFER_WATER_MARK, new WriteBufferWaterMark(
                conf.getClientWriteBufferLowWaterMark(), conf.getClientWriteBufferHighWaterMark()));

        if (!(eventLoopGroup instanceof DefaultEventLoopGroup)) {
            bootstrap.option(ChannelOption.TCP_NODELAY, conf.getClientTcpNoDelay());
            bootstrap.option(ChannelOption.SO_KEEPALIVE, conf.getClientSockKeepalive());

            // if buffer sizes are 0, let OS auto-tune it
            if (conf.getClientSendBufferSize() > 0) {
                bootstrap.option(ChannelOption.SO_SNDBUF, conf.getClientSendBufferSize());
            }

            if (conf.getClientReceiveBufferSize() > 0) {
                bootstrap.option(ChannelOption.SO_RCVBUF, conf.getClientReceiveBufferSize());
            }
        }

        // In the netty pipeline, we need to split packets based on length, so we
        // use the {@link LengthFieldBasedFramDecoder}. Other than that all actions
        // are carried out in this class, e.g., making sense of received messages,
        // prepending the length to outgoing packets etc.
        bootstrap.handler(new ChannelInitializer<Channel>() {
            @Override
            protected void initChannel(Channel ch) throws Exception {
                ChannelPipeline pipeline = ch.pipeline();

                pipeline.addLast("bytebufList", ByteBufList.ENCODER_WITH_SIZE);
                pipeline.addLast("lengthbasedframedecoder",
                        new LengthFieldBasedFrameDecoder(maxFrameSize, 0, 4, 0, 4));
                pipeline.addLast("lengthprepender", new LengthFieldPrepender(4));
                pipeline.addLast("bookieProtoEncoder", new BookieProtoEncoding.RequestEncoder(extRegistry));
                pipeline.addLast("bookieProtoDecoder",
                        new BookieProtoEncoding.ResponseDecoder(extRegistry, useV2WireProtocol));
                pipeline.addLast("authHandler", new AuthHandler.ClientSideHandler(authProviderFactory,
                        txnIdGenerator, connectionPeer, useV2WireProtocol));
                pipeline.addLast("mainhandler", PerChannelBookieClient.this);
            }
        });

        SocketAddress bookieAddr = addr.getSocketAddress();
        if (eventLoopGroup instanceof DefaultEventLoopGroup) {
            bookieAddr = addr.getLocalAddress();
        }

        ChannelFuture future = bootstrap.connect(bookieAddr);
        future.addListener(contextPreservingListener(new ConnectionFutureListener(startTime)));
        future.addListener(x -> makeWritable());
        return future;
    }

    void cleanDisconnectAndClose() {
        disconnect();
        close();
    }

    /**
     *
     * @return boolean, true is PCBC is writable
     */
    public boolean isWritable() {
        return isWritable;
    }

    public void setWritable(boolean val) {
        isWritable = val;
    }

    private void makeWritable() {
        setWritable(true);
    }

    void connectIfNeededAndDoOp(GenericCallback<PerChannelBookieClient> op) {
        boolean completeOpNow = false;
        int opRc = BKException.Code.OK;
        // common case without lock first
        if (channel != null && state == ConnectionState.CONNECTED) {
            completeOpNow = true;
        } else {

            synchronized (this) {
                // check the channel status again under lock
                if (channel != null && state == ConnectionState.CONNECTED) {
                    completeOpNow = true;
                    opRc = BKException.Code.OK;
                } else if (state == ConnectionState.CLOSED) {
                    completeOpNow = true;
                    opRc = BKException.Code.BookieHandleNotAvailableException;
                } else {
                    // channel is either null (first connection attempt), or the
                    // channel is disconnected. Connection attempt is still in
                    // progress, queue up this op. Op will be executed when
                    // connection attempt either fails or succeeds
                    pendingOps.add(op);

                    if (state == ConnectionState.CONNECTING || state == ConnectionState.START_TLS) {
                        // the connection request has already been sent and it is waiting for the response.
                        return;
                    }
                    // switch state to connecting and do connection attempt
                    state = ConnectionState.CONNECTING;
                }
            }
            if (!completeOpNow) {
                // Start connection attempt to the input server host.
                connect();
            }
        }

        if (completeOpNow) {
            completeOperation(op, opRc);
        }

    }

    void writeLac(final long ledgerId, final byte[] masterKey, final long lac, ByteBufList toSend,
            WriteLacCallback cb, Object ctx) {
        final long txnId = getTxnId();
        final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.WRITE_LAC);
        // writeLac is mostly like addEntry hence uses addEntryTimeout
        completionObjects.put(completionKey, new WriteLacCompletion(completionKey, cb, ctx, lac));

        // Build the request
        BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder().setVersion(ProtocolVersion.VERSION_THREE)
                .setOperation(OperationType.WRITE_LAC).setTxnId(txnId);
        ByteString body;
        if (toSend.hasArray()) {
            body = UnsafeByteOperations.unsafeWrap(toSend.array(), toSend.arrayOffset(), toSend.readableBytes());
        } else if (toSend.size() == 1) {
            body = UnsafeByteOperations.unsafeWrap(toSend.getBuffer(0).nioBuffer());
        } else {
            body = UnsafeByteOperations.unsafeWrap(toSend.toArray());
        }
        WriteLacRequest.Builder writeLacBuilder = WriteLacRequest.newBuilder().setLedgerId(ledgerId).setLac(lac)
                .setMasterKey(UnsafeByteOperations.unsafeWrap(masterKey)).setBody(body);

        final Request writeLacRequest = withRequestContext(Request.newBuilder()).setHeader(headerBuilder)
                .setWriteLacRequest(writeLacBuilder).build();
        writeAndFlush(channel, completionKey, writeLacRequest);
    }

    void forceLedger(final long ledgerId, ForceLedgerCallback cb, Object ctx) {
        if (useV2WireProtocol) {
            LOG.error("force is not allowed with v2 protocol");
            executor.executeOrdered(ledgerId, () -> {
                cb.forceLedgerComplete(BKException.Code.IllegalOpException, ledgerId, addr, ctx);
            });
            return;
        }
        final long txnId = getTxnId();
        final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.FORCE_LEDGER);
        // force is mostly like addEntry hence uses addEntryTimeout
        completionObjects.put(completionKey, new ForceLedgerCompletion(completionKey, cb, ctx, ledgerId));

        // Build the request
        BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder().setVersion(ProtocolVersion.VERSION_THREE)
                .setOperation(OperationType.FORCE_LEDGER).setTxnId(txnId);
        ForceLedgerRequest.Builder writeLacBuilder = ForceLedgerRequest.newBuilder().setLedgerId(ledgerId);

        final Request forceLedgerRequest = withRequestContext(Request.newBuilder()).setHeader(headerBuilder)
                .setForceLedgerRequest(writeLacBuilder).build();
        writeAndFlush(channel, completionKey, forceLedgerRequest);
    }

    /**
     * This method should be called only after connection has been checked for
     * {@link #connectIfNeededAndDoOp(GenericCallback)}.
     *
     * @param ledgerId
     *          Ledger Id
     * @param masterKey
     *          Master Key
     * @param entryId
     *          Entry Id
     * @param toSend
     *          Buffer to send
     * @param cb
     *          Write callback
     * @param ctx
     *          Write callback context
     * @param allowFastFail
     *          allowFastFail flag
     * @param writeFlags
     *          WriteFlags
     */
    void addEntry(final long ledgerId, byte[] masterKey, final long entryId, ByteBufList toSend, WriteCallback cb,
            Object ctx, final int options, boolean allowFastFail, final EnumSet<WriteFlag> writeFlags) {
        Object request = null;
        CompletionKey completionKey = null;
        if (useV2WireProtocol) {
            if (writeFlags.contains(WriteFlag.DEFERRED_SYNC)) {
                LOG.error("invalid writeflags {} for v2 protocol", writeFlags);
                executor.executeOrdered(ledgerId, () -> {
                    cb.writeComplete(BKException.Code.IllegalOpException, ledgerId, entryId, addr, ctx);
                });
                return;
            }
            completionKey = acquireV2Key(ledgerId, entryId, OperationType.ADD_ENTRY);
            request = BookieProtocol.AddRequest.create(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, entryId,
                    (short) options, masterKey, toSend);
        } else {
            final long txnId = getTxnId();
            completionKey = new V3CompletionKey(txnId, OperationType.ADD_ENTRY);

            // Build the request and calculate the total size to be included in the packet.
            BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder()
                    .setVersion(ProtocolVersion.VERSION_THREE).setOperation(OperationType.ADD_ENTRY)
                    .setTxnId(txnId);
            if (((short) options & BookieProtocol.FLAG_HIGH_PRIORITY) == BookieProtocol.FLAG_HIGH_PRIORITY) {
                headerBuilder.setPriority(DEFAULT_HIGH_PRIORITY_VALUE);
            }

            ByteString body = null;
            if (toSend.hasArray()) {
                body = UnsafeByteOperations.unsafeWrap(toSend.array(), toSend.arrayOffset(),
                        toSend.readableBytes());
            } else {
                for (int i = 0; i < toSend.size(); i++) {
                    ByteString piece = UnsafeByteOperations.unsafeWrap(toSend.getBuffer(i).nioBuffer());
                    // use ByteString.concat to avoid byte[] allocation when toSend has multiple ByteBufs
                    body = (body == null) ? piece : body.concat(piece);
                }
            }
            AddRequest.Builder addBuilder = AddRequest.newBuilder().setLedgerId(ledgerId).setEntryId(entryId)
                    .setMasterKey(UnsafeByteOperations.unsafeWrap(masterKey)).setBody(body);

            if (((short) options & BookieProtocol.FLAG_RECOVERY_ADD) == BookieProtocol.FLAG_RECOVERY_ADD) {
                addBuilder.setFlag(AddRequest.Flag.RECOVERY_ADD);
            }

            if (!writeFlags.isEmpty()) {
                // add flags only if needed, in order to be able to talk with old bookies
                addBuilder.setWriteFlags(WriteFlag.getWriteFlagsValue(writeFlags));
            }

            request = withRequestContext(Request.newBuilder()).setHeader(headerBuilder).setAddRequest(addBuilder)
                    .build();
        }

        putCompletionKeyValue(completionKey, acquireAddCompletion(completionKey, cb, ctx, ledgerId, entryId));
        final Channel c = channel;
        if (c == null) {
            // usually checked in writeAndFlush, but we have extra check
            // because we need to release toSend.
            errorOut(completionKey);
            toSend.release();
            return;
        } else {
            // addEntry times out on backpressure
            writeAndFlush(c, completionKey, request, allowFastFail);
        }
    }

    public void readLac(final long ledgerId, ReadLacCallback cb, Object ctx) {
        Object request = null;
        CompletionKey completionKey = null;
        if (useV2WireProtocol) {
            request = new BookieProtocol.ReadRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, 0,
                    (short) 0, null);
            completionKey = acquireV2Key(ledgerId, 0, OperationType.READ_LAC);
        } else {
            final long txnId = getTxnId();
            completionKey = new V3CompletionKey(txnId, OperationType.READ_LAC);

            // Build the request and calculate the total size to be included in the packet.
            BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder()
                    .setVersion(ProtocolVersion.VERSION_THREE).setOperation(OperationType.READ_LAC).setTxnId(txnId);
            ReadLacRequest.Builder readLacBuilder = ReadLacRequest.newBuilder().setLedgerId(ledgerId);
            request = withRequestContext(Request.newBuilder()).setHeader(headerBuilder)
                    .setReadLacRequest(readLacBuilder).build();
        }
        putCompletionKeyValue(completionKey, new ReadLacCompletion(completionKey, cb, ctx, ledgerId));
        writeAndFlush(channel, completionKey, request);
    }

    public void getListOfEntriesOfLedger(final long ledgerId, GetListOfEntriesOfLedgerCallback cb) {
        final long txnId = getTxnId();
        final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.GET_LIST_OF_ENTRIES_OF_LEDGER);
        completionObjects.put(completionKey, new GetListOfEntriesOfLedgerCompletion(completionKey, cb, ledgerId));

        // Build the request.
        BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder().setVersion(ProtocolVersion.VERSION_THREE)
                .setOperation(OperationType.GET_LIST_OF_ENTRIES_OF_LEDGER).setTxnId(txnId);

        GetListOfEntriesOfLedgerRequest.Builder getListOfEntriesOfLedgerRequestBuilder = GetListOfEntriesOfLedgerRequest
                .newBuilder().setLedgerId(ledgerId);

        final Request getListOfEntriesOfLedgerRequest = Request.newBuilder().setHeader(headerBuilder)
                .setGetListOfEntriesOfLedgerRequest(getListOfEntriesOfLedgerRequestBuilder).build();

        writeAndFlush(channel, completionKey, getListOfEntriesOfLedgerRequest);
    }

    /**
     * Long Poll Reads.
     */
    public void readEntryWaitForLACUpdate(final long ledgerId, final long entryId, final long previousLAC,
            final long timeOutInMillis, final boolean piggyBackEntry, ReadEntryCallback cb, Object ctx) {
        readEntryInternal(ledgerId, entryId, previousLAC, timeOutInMillis, piggyBackEntry, cb, ctx, (short) 0, null,
                false);
    }

    /**
     * Normal Reads.
     */
    public void readEntry(final long ledgerId, final long entryId, ReadEntryCallback cb, Object ctx, int flags,
            byte[] masterKey, boolean allowFastFail) {
        readEntryInternal(ledgerId, entryId, null, null, false, cb, ctx, (short) flags, masterKey, allowFastFail);
    }

    private void readEntryInternal(final long ledgerId, final long entryId, final Long previousLAC,
            final Long timeOutInMillis, final boolean piggyBackEntry, final ReadEntryCallback cb, final Object ctx,
            int flags, byte[] masterKey, boolean allowFastFail) {
        Object request = null;
        CompletionKey completionKey = null;
        if (useV2WireProtocol) {
            request = new BookieProtocol.ReadRequest(BookieProtocol.CURRENT_PROTOCOL_VERSION, ledgerId, entryId,
                    (short) flags, masterKey);
            completionKey = acquireV2Key(ledgerId, entryId, OperationType.READ_ENTRY);
        } else {
            final long txnId = getTxnId();
            completionKey = new V3CompletionKey(txnId, OperationType.READ_ENTRY);

            // Build the request and calculate the total size to be included in the packet.
            BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder()
                    .setVersion(ProtocolVersion.VERSION_THREE).setOperation(OperationType.READ_ENTRY)
                    .setTxnId(txnId);
            if (((short) flags & BookieProtocol.FLAG_HIGH_PRIORITY) == BookieProtocol.FLAG_HIGH_PRIORITY) {
                headerBuilder.setPriority(DEFAULT_HIGH_PRIORITY_VALUE);
            }

            ReadRequest.Builder readBuilder = ReadRequest.newBuilder().setLedgerId(ledgerId).setEntryId(entryId);

            if (null != previousLAC) {
                readBuilder = readBuilder.setPreviousLAC(previousLAC);
            }

            if (null != timeOutInMillis) {
                // Long poll requires previousLAC
                if (null == previousLAC) {
                    cb.readEntryComplete(BKException.Code.IncorrectParameterException, ledgerId, entryId, null,
                            ctx);
                    return;
                }
                readBuilder = readBuilder.setTimeOut(timeOutInMillis);
            }

            if (piggyBackEntry) {
                // Long poll requires previousLAC
                if (null == previousLAC) {
                    cb.readEntryComplete(BKException.Code.IncorrectParameterException, ledgerId, entryId, null,
                            ctx);
                    return;
                }
                readBuilder = readBuilder.setFlag(ReadRequest.Flag.ENTRY_PIGGYBACK);
            }

            // Only one flag can be set on the read requests
            if (((short) flags & BookieProtocol.FLAG_DO_FENCING) == BookieProtocol.FLAG_DO_FENCING) {
                readBuilder.setFlag(ReadRequest.Flag.FENCE_LEDGER);
                if (masterKey == null) {
                    cb.readEntryComplete(BKException.Code.IncorrectParameterException, ledgerId, entryId, null,
                            ctx);
                    return;
                }
                readBuilder.setMasterKey(ByteString.copyFrom(masterKey));
            }

            request = withRequestContext(Request.newBuilder()).setHeader(headerBuilder).setReadRequest(readBuilder)
                    .build();
        }

        ReadCompletion readCompletion = new ReadCompletion(completionKey, cb, ctx, ledgerId, entryId);
        putCompletionKeyValue(completionKey, readCompletion);

        writeAndFlush(channel, completionKey, request, allowFastFail);
    }

    public void getBookieInfo(final long requested, GetBookieInfoCallback cb, Object ctx) {
        final long txnId = getTxnId();
        final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.GET_BOOKIE_INFO);
        completionObjects.put(completionKey, new GetBookieInfoCompletion(completionKey, cb, ctx));

        // Build the request and calculate the total size to be included in the packet.
        BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder().setVersion(ProtocolVersion.VERSION_THREE)
                .setOperation(OperationType.GET_BOOKIE_INFO).setTxnId(txnId);

        GetBookieInfoRequest.Builder getBookieInfoBuilder = GetBookieInfoRequest.newBuilder()
                .setRequested(requested);

        final Request getBookieInfoRequest = withRequestContext(Request.newBuilder()).setHeader(headerBuilder)
                .setGetBookieInfoRequest(getBookieInfoBuilder).build();

        writeAndFlush(channel, completionKey, getBookieInfoRequest);
    }

    private static final BiPredicate<CompletionKey, CompletionValue> timeoutCheck = (key, value) -> {
        return value.maybeTimeout();
    };

    public void checkTimeoutOnPendingOperations() {
        int timedOutOperations = completionObjects.removeIf(timeoutCheck);

        timedOutOperations += completionObjectsV2Conflicts.removeIf(timeoutCheck);

        if (timedOutOperations > 0) {
            LOG.info("Timed-out {} operations to channel {} for {}", timedOutOperations, channel, addr);
        }
    }

    /**
     * Disconnects the bookie client. It can be reused.
     */
    public void disconnect() {
        disconnect(true);
    }

    public void disconnect(boolean wait) {
        LOG.info("Disconnecting the per channel bookie client for {}", addr);
        closeInternal(false, wait);
    }

    /**
     * Closes the bookie client permanently. It cannot be reused.
     */
    public void close() {
        close(true);
    }

    public void close(boolean wait) {
        LOG.info("Closing the per channel bookie client for {}", addr);
        closeLock.writeLock().lock();
        try {
            if (ConnectionState.CLOSED == state) {
                return;
            }
            state = ConnectionState.CLOSED;
            errorOutOutstandingEntries(BKException.Code.ClientClosedException);
        } finally {
            closeLock.writeLock().unlock();
        }

        if (channel != null && channel.pipeline().get(SslHandler.class) != null) {
            activeTlsChannelCounter.dec();
        } else {
            activeNonTlsChannelCounter.dec();
        }

        closeInternal(true, wait);
    }

    private void closeInternal(boolean permanent, boolean wait) {
        Channel toClose = null;
        synchronized (this) {
            if (permanent) {
                state = ConnectionState.CLOSED;
            } else if (state != ConnectionState.CLOSED) {
                state = ConnectionState.DISCONNECTED;
            }
            toClose = channel;
            channel = null;
            makeWritable();
        }
        if (toClose != null) {
            ChannelFuture cf = closeChannel(toClose);
            if (wait) {
                cf.awaitUninterruptibly();
            }
        }
    }

    private ChannelFuture closeChannel(Channel c) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Closing channel {}", c);
        }
        return c.close().addListener(x -> makeWritable());
    }

    @Override
    public void channelWritabilityChanged(ChannelHandlerContext ctx) throws Exception {
        final Channel c = channel;
        if (c == null || c.isWritable()) {
            makeWritable();
        }
        super.channelWritabilityChanged(ctx);
    }

    private void writeAndFlush(final Channel channel, final CompletionKey key, final Object request) {
        writeAndFlush(channel, key, request, false);
    }

    private void writeAndFlush(final Channel channel, final CompletionKey key, final Object request,
            final boolean allowFastFail) {
        if (channel == null) {
            LOG.warn("Operation {} failed: channel == null", StringUtils.requestToString(request));
            errorOut(key);
            return;
        }

        final boolean isChannelWritable = channel.isWritable();
        if (isWritable != isChannelWritable) {
            // isWritable is volatile so simple "isWritable = channel.isWritable()" would be slower
            isWritable = isChannelWritable;
        }

        if (allowFastFail && !isWritable) {
            LOG.warn("Operation {} failed: TooManyRequestsException", StringUtils.requestToString(request));

            errorOut(key, BKException.Code.TooManyRequestsException);
            return;
        }

        try {
            final long startTime = MathUtils.nowInNano();

            ChannelPromise promise = channel.newPromise().addListener(future -> {
                if (future.isSuccess()) {
                    nettyOpLogger.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS);
                    CompletionValue completion = completionObjects.get(key);
                    if (completion != null) {
                        completion.setOutstanding();
                    }
                } else {
                    nettyOpLogger.registerFailedEvent(MathUtils.elapsedNanos(startTime), TimeUnit.NANOSECONDS);
                }
            });

            channel.writeAndFlush(request, promise);
        } catch (Throwable e) {
            LOG.warn("Operation {} failed", StringUtils.requestToString(request), e);
            errorOut(key);
        }
    }

    void errorOut(final CompletionKey key) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Removing completion key: {}", key);
        }
        CompletionValue completion = completionObjects.remove(key);
        if (completion != null) {
            completion.errorOut();
        } else {
            // If there's no completion object here, try in the multimap
            completionObjectsV2Conflicts.removeAny(key).ifPresent(c -> c.errorOut());
        }
    }

    void errorOut(final CompletionKey key, final int rc) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Removing completion key: {}", key);
        }
        CompletionValue completion = completionObjects.remove(key);
        if (completion != null) {
            completion.errorOut(rc);
        } else {
            // If there's no completion object here, try in the multimap
            completionObjectsV2Conflicts.removeAny(key).ifPresent(c -> c.errorOut(rc));
        }
    }

    /**
     * Errors out pending ops from per channel bookie client. As the channel
     * is being closed, all the operations waiting on the connection
     * will be sent to completion with error.
     */
    void errorOutPendingOps(int rc) {
        Queue<GenericCallback<PerChannelBookieClient>> oldPendingOps;
        synchronized (this) {
            oldPendingOps = pendingOps;
            pendingOps = new ArrayDeque<>();
        }

        for (GenericCallback<PerChannelBookieClient> pendingOp : oldPendingOps) {
            pendingOp.operationComplete(rc, PerChannelBookieClient.this);
        }
    }

    /**
     * Errors out pending entries. We call this method from one thread to avoid
     * concurrent executions to QuorumOpMonitor (implements callbacks). It seems
     * simpler to call it from BookieHandle instead of calling directly from
     * here.
     */

    void errorOutOutstandingEntries(int rc) {
        Optional<CompletionKey> multikey = completionObjectsV2Conflicts.getAnyKey();
        while (multikey.isPresent()) {
            multikey.ifPresent(k -> errorOut(k, rc));
            multikey = completionObjectsV2Conflicts.getAnyKey();
        }
        for (CompletionKey key : completionObjects.keys()) {
            errorOut(key, rc);
        }
    }

    void recordError() {
        if (pcbcPool != null) {
            pcbcPool.recordError();
        }
    }

    /**
     * If our channel has disconnected, we just error out the pending entries.
     */
    @Override
    public void channelInactive(ChannelHandlerContext ctx) throws Exception {
        LOG.info("Disconnected from bookie channel {}", ctx.channel());
        if (ctx.channel() != null) {
            closeChannel(ctx.channel());
            if (ctx.channel().pipeline().get(SslHandler.class) != null) {
                activeTlsChannelCounter.dec();
            } else {
                activeNonTlsChannelCounter.dec();
            }
        }

        errorOutOutstandingEntries(BKException.Code.BookieHandleNotAvailableException);
        errorOutPendingOps(BKException.Code.BookieHandleNotAvailableException);

        synchronized (this) {
            if (this.channel == ctx.channel() && state != ConnectionState.CLOSED) {
                state = ConnectionState.DISCONNECTED;
            }
        }

        // we don't want to reconnect right away. If someone sends a request to
        // this address, we will reconnect.
    }

    /**
     * Called by netty when an exception happens in one of the netty threads
     * (mostly due to what we do in the netty threads).
     */
    @Override
    public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
        exceptionCounter.inc();
        if (cause instanceof CorruptedFrameException || cause instanceof TooLongFrameException) {
            LOG.error("Corrupted frame received from bookie: {}", ctx.channel().remoteAddress());
            ctx.close();
            return;
        }

        if (cause instanceof AuthHandler.AuthenticationException) {
            LOG.error("Error authenticating connection", cause);
            errorOutOutstandingEntries(BKException.Code.UnauthorizedAccessException);
            Channel c = ctx.channel();
            if (c != null) {
                closeChannel(c);
            }
            return;
        }

        if (cause instanceof DecoderException && cause.getCause() instanceof SSLHandshakeException) {
            LOG.error("TLS handshake failed", cause);
            errorOutPendingOps(BKException.Code.SecurityException);
            Channel c = ctx.channel();
            if (c != null) {
                closeChannel(c);
            }
        }

        if (cause instanceof IOException) {
            if (cause instanceof NativeIoException) {
                // Stack trace is not very interesting for native IO exceptio, the important part is in
                // the exception message
                LOG.warn("Exception caught on:{} cause: {}", ctx.channel(), cause.getMessage());
            } else {
                LOG.warn("Exception caught on:{} cause:", ctx.channel(), cause);
            }
            ctx.close();
            return;
        }

        synchronized (this) {
            if (state == ConnectionState.CLOSED) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Unexpected exception caught by bookie client channel handler, "
                            + "but the client is closed, so it isn't important", cause);
                }
            } else {
                LOG.error("Unexpected exception caught by bookie client channel handler", cause);
            }
        }

        // Since we are a library, cant terminate App here, can we?
        ctx.close();
    }

    /**
     * Called by netty when a message is received on a channel.
     */
    @Override
    public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {

        if (msg instanceof BookieProtocol.Response) {
            BookieProtocol.Response response = (BookieProtocol.Response) msg;
            readV2Response(response);
        } else if (msg instanceof Response) {
            Response response = (Response) msg;
            readV3Response(response);
        } else {
            ctx.fireChannelRead(msg);
        }
    }

    private void readV2Response(final BookieProtocol.Response response) {
        OperationType operationType = getOperationType(response.getOpCode());
        StatusCode status = getStatusCodeFromErrorCode(response.errorCode);

        CompletionKey key = acquireV2Key(response.ledgerId, response.entryId, operationType);
        CompletionValue completionValue = getCompletionValue(key);
        key.release();

        if (null == completionValue) {
            // Unexpected response, so log it. The txnId should have been present.
            if (LOG.isDebugEnabled()) {
                LOG.debug("Unexpected response received from bookie : " + addr + " for type : " + operationType
                        + " and ledger:entry : " + response.ledgerId + ":" + response.entryId);
            }
            response.release();
        } else {
            long orderingKey = completionValue.ledgerId;
            executor.executeOrdered(orderingKey, ReadV2ResponseCallback.create(completionValue, response.ledgerId,
                    response.entryId, status, response));
        }
    }

    private static class ReadV2ResponseCallback extends SafeRunnable {
        CompletionValue completionValue;
        long ledgerId;
        long entryId;
        StatusCode status;
        BookieProtocol.Response response;

        static ReadV2ResponseCallback create(CompletionValue completionValue, long ledgerId, long entryId,
                StatusCode status, BookieProtocol.Response response) {
            ReadV2ResponseCallback callback = RECYCLER.get();
            callback.completionValue = completionValue;
            callback.ledgerId = ledgerId;
            callback.entryId = entryId;
            callback.status = status;
            callback.response = response;
            return callback;
        }

        @Override
        public void safeRun() {
            completionValue.handleV2Response(ledgerId, entryId, status, response);
            response.release();
            response.recycle();
            recycle();
        }

        void recycle() {
            completionValue = null;
            ledgerId = -1;
            entryId = -1;
            status = null;
            response = null;
            recyclerHandle.recycle(this);
        }

        private final Handle<ReadV2ResponseCallback> recyclerHandle;

        private ReadV2ResponseCallback(Handle<ReadV2ResponseCallback> recyclerHandle) {
            this.recyclerHandle = recyclerHandle;
        }

        private static final Recycler<ReadV2ResponseCallback> RECYCLER = new Recycler<ReadV2ResponseCallback>() {
            @Override
            protected ReadV2ResponseCallback newObject(Handle<ReadV2ResponseCallback> handle) {
                return new ReadV2ResponseCallback(handle);
            }
        };
    }

    private static OperationType getOperationType(byte opCode) {
        switch (opCode) {
        case BookieProtocol.ADDENTRY:
            return OperationType.ADD_ENTRY;
        case BookieProtocol.READENTRY:
            return OperationType.READ_ENTRY;
        case BookieProtocol.AUTH:
            return OperationType.AUTH;
        case BookieProtocol.READ_LAC:
            return OperationType.READ_LAC;
        case BookieProtocol.WRITE_LAC:
            return OperationType.WRITE_LAC;
        case BookieProtocol.GET_BOOKIE_INFO:
            return OperationType.GET_BOOKIE_INFO;
        default:
            throw new IllegalArgumentException("Invalid operation type " + opCode);
        }
    }

    private static StatusCode getStatusCodeFromErrorCode(int errorCode) {
        switch (errorCode) {
        case BookieProtocol.EOK:
            return StatusCode.EOK;
        case BookieProtocol.ENOLEDGER:
            return StatusCode.ENOLEDGER;
        case BookieProtocol.ENOENTRY:
            return StatusCode.ENOENTRY;
        case BookieProtocol.EBADREQ:
            return StatusCode.EBADREQ;
        case BookieProtocol.EIO:
            return StatusCode.EIO;
        case BookieProtocol.EUA:
            return StatusCode.EUA;
        case BookieProtocol.EBADVERSION:
            return StatusCode.EBADVERSION;
        case BookieProtocol.EFENCED:
            return StatusCode.EFENCED;
        case BookieProtocol.EREADONLY:
            return StatusCode.EREADONLY;
        case BookieProtocol.ETOOMANYREQUESTS:
            return StatusCode.ETOOMANYREQUESTS;
        default:
            throw new IllegalArgumentException("Invalid error code: " + errorCode);
        }
    }

    private void readV3Response(final Response response) {
        final BKPacketHeader header = response.getHeader();

        final CompletionKey key = newCompletionKey(header.getTxnId(), header.getOperation());
        final CompletionValue completionValue = completionObjects.get(key);

        if (null == completionValue) {
            // Unexpected response, so log it. The txnId should have been present.
            if (LOG.isDebugEnabled()) {
                LOG.debug("Unexpected response received from bookie : " + addr + " for type : "
                        + header.getOperation() + " and txnId : " + header.getTxnId());
            }
        } else {
            long orderingKey = completionValue.ledgerId;
            executor.executeOrdered(orderingKey, new SafeRunnable() {
                @Override
                public void safeRun() {
                    completionValue.restoreMdcContext();
                    completionValue.handleV3Response(response);
                }

                @Override
                public String toString() {
                    return String.format("HandleResponse(Txn=%d, Type=%s, Entry=(%d, %d))", header.getTxnId(),
                            header.getOperation(), completionValue.ledgerId, completionValue.entryId);
                }
            });
        }

        completionObjects.remove(key);
    }

    void initTLSHandshake() {
        // create TLS handler
        PerChannelBookieClient parentObj = PerChannelBookieClient.this;
        SslHandler handler = parentObj.shFactory.newTLSHandler();
        channel.pipeline().addFirst(parentObj.shFactory.getHandlerName(), handler);
        handler.handshakeFuture().addListener(new GenericFutureListener<Future<Channel>>() {
            @Override
            public void operationComplete(Future<Channel> future) throws Exception {
                int rc;
                Queue<GenericCallback<PerChannelBookieClient>> oldPendingOps;

                synchronized (PerChannelBookieClient.this) {
                    if (future.isSuccess() && state == ConnectionState.CONNECTING) {
                        LOG.error("Connection state changed before TLS handshake completed {}/{}", addr, state);
                        rc = BKException.Code.BookieHandleNotAvailableException;
                        closeChannel(channel);
                        channel = null;
                        if (state != ConnectionState.CLOSED) {
                            state = ConnectionState.DISCONNECTED;
                        }
                    } else if (future.isSuccess() && state == ConnectionState.START_TLS) {
                        rc = BKException.Code.OK;
                        LOG.info("Successfully connected to bookie using TLS: " + addr);

                        state = ConnectionState.CONNECTED;
                        AuthHandler.ClientSideHandler authHandler = future.get().pipeline()
                                .get(AuthHandler.ClientSideHandler.class);
                        authHandler.authProvider.onProtocolUpgrade();
                        activeTlsChannelCounter.inc();
                    } else if (future.isSuccess()
                            && (state == ConnectionState.CLOSED || state == ConnectionState.DISCONNECTED)) {
                        LOG.warn("Closed before TLS handshake completed, clean up: {}, current state {}", channel,
                                state);
                        closeChannel(channel);
                        rc = BKException.Code.BookieHandleNotAvailableException;
                        channel = null;
                    } else if (future.isSuccess() && state == ConnectionState.CONNECTED) {
                        LOG.debug("Already connected with another channel({}), so close the new channel({})",
                                channel, channel);
                        closeChannel(channel);
                        return; // pendingOps should have been completed when other channel connected
                    } else {
                        LOG.error("TLS handshake failed with bookie: {}/{}, current state {} : ", channel, addr,
                                state, future.cause());
                        rc = BKException.Code.SecurityException;
                        closeChannel(channel);
                        channel = null;
                        if (state != ConnectionState.CLOSED) {
                            state = ConnectionState.DISCONNECTED;
                        }
                        failedTlsHandshakeCounter.inc();
                    }

                    // trick to not do operations under the lock, take the list
                    // of pending ops and assign it to a new variable, while
                    // emptying the pending ops by just assigning it to a new
                    // list
                    oldPendingOps = pendingOps;
                    pendingOps = new ArrayDeque<>();
                }

                makeWritable();

                for (GenericCallback<PerChannelBookieClient> pendingOp : oldPendingOps) {
                    pendingOp.operationComplete(rc, PerChannelBookieClient.this);
                }
            }
        });
    }

    /**
     * Boiler-plate wrapper classes follow.
     *
     */

    // visible for testing
    abstract class CompletionValue {
        private final OpStatsLogger opLogger;
        private final OpStatsLogger timeoutOpLogger;
        private final String operationName;
        private final Map<String, String> mdcContextMap;
        protected Object ctx;
        protected long ledgerId;
        protected long entryId;
        protected long startTime;

        public CompletionValue(String operationName, Object ctx, long ledgerId, long entryId,
                OpStatsLogger opLogger, OpStatsLogger timeoutOpLogger) {
            this.operationName = operationName;
            this.ctx = ctx;
            this.ledgerId = ledgerId;
            this.entryId = entryId;
            this.startTime = MathUtils.nowInNano();
            this.opLogger = opLogger;
            this.timeoutOpLogger = timeoutOpLogger;
            this.mdcContextMap = preserveMdcForTaskExecution ? MDC.getCopyOfContextMap() : null;
        }

        private long latency() {
            return MathUtils.elapsedNanos(startTime);
        }

        void logOpResult(int rc) {
            if (rc != BKException.Code.OK) {
                opLogger.registerFailedEvent(latency(), TimeUnit.NANOSECONDS);
            } else {
                opLogger.registerSuccessfulEvent(latency(), TimeUnit.NANOSECONDS);
            }

            if (rc != BKException.Code.OK && !expectedBkOperationErrors.contains(rc)) {
                recordError();
            }
        }

        boolean maybeTimeout() {
            if (MathUtils.elapsedNanos(startTime) >= readEntryTimeoutNanos) {
                timeout();
                return true;
            } else {
                return false;
            }
        }

        void timeout() {
            errorOut(BKException.Code.TimeoutException);
            timeoutOpLogger.registerSuccessfulEvent(latency(), TimeUnit.NANOSECONDS);
        }

        protected void logResponse(StatusCode status, Object... extraInfo) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Got {} response from bookie:{} rc:{}, {}", operationName, addr, status,
                        Joiner.on(":").join(extraInfo));
            }
        }

        protected int convertStatus(StatusCode status, int defaultStatus) {
            // convert to BKException code
            int rcToRet = statusCodeToExceptionCode(status);
            if (rcToRet == BKException.Code.UNINITIALIZED) {
                LOG.error("{} for failed on bookie {} code {}", operationName, addr, status);
                return defaultStatus;
            } else {
                return rcToRet;
            }
        }

        public void restoreMdcContext() {
            MdcUtils.restoreContext(mdcContextMap);
        }

        public abstract void errorOut();

        public abstract void errorOut(int rc);

        public void setOutstanding() {
            // no-op
        }

        protected void errorOutAndRunCallback(final Runnable callback) {
            executor.executeOrdered(ledgerId, new SafeRunnable() {
                @Override
                public void safeRun() {
                    String bAddress = "null";
                    Channel c = channel;
                    if (c != null && c.remoteAddress() != null) {
                        bAddress = c.remoteAddress().toString();
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Could not write {} request to bookie {} for ledger {}, entry {}", operationName,
                                bAddress, ledgerId, entryId);
                    }
                    callback.run();
                }
            });
        }

        public void handleV2Response(long ledgerId, long entryId, StatusCode status,
                BookieProtocol.Response response) {
            LOG.warn("Unhandled V2 response {}", response);
        }

        public abstract void handleV3Response(BookkeeperProtocol.Response response);
    }

    // visible for testing
    class WriteLacCompletion extends CompletionValue {
        final WriteLacCallback cb;

        public WriteLacCompletion(final CompletionKey key, final WriteLacCallback originalCallback,
                final Object originalCtx, final long ledgerId) {
            super("WriteLAC", originalCtx, ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, writeLacOpLogger,
                    writeLacTimeoutOpLogger);
            this.cb = new WriteLacCallback() {
                @Override
                public void writeLacComplete(int rc, long ledgerId, BookieSocketAddress addr, Object ctx) {
                    logOpResult(rc);
                    originalCallback.writeLacComplete(rc, ledgerId, addr, originalCtx);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> cb.writeLacComplete(rc, ledgerId, addr, ctx));
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            WriteLacResponse writeLacResponse = response.getWriteLacResponse();
            StatusCode status = response.getStatus() == StatusCode.EOK ? writeLacResponse.getStatus()
                    : response.getStatus();
            long ledgerId = writeLacResponse.getLedgerId();

            if (LOG.isDebugEnabled()) {
                logResponse(status, "ledger", ledgerId);
            }
            int rc = convertStatus(status, BKException.Code.WriteException);
            cb.writeLacComplete(rc, ledgerId, addr, ctx);
        }
    }

    class ForceLedgerCompletion extends CompletionValue {
        final ForceLedgerCallback cb;

        public ForceLedgerCompletion(final CompletionKey key, final ForceLedgerCallback originalCallback,
                final Object originalCtx, final long ledgerId) {
            super("ForceLedger", originalCtx, ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, forceLedgerOpLogger,
                    forceLedgerTimeoutOpLogger);
            this.cb = new ForceLedgerCallback() {
                @Override
                public void forceLedgerComplete(int rc, long ledgerId, BookieSocketAddress addr, Object ctx) {
                    logOpResult(rc);
                    originalCallback.forceLedgerComplete(rc, ledgerId, addr, originalCtx);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> cb.forceLedgerComplete(rc, ledgerId, addr, ctx));
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            ForceLedgerResponse forceLedgerResponse = response.getForceLedgerResponse();
            StatusCode status = response.getStatus() == StatusCode.EOK ? forceLedgerResponse.getStatus()
                    : response.getStatus();
            long ledgerId = forceLedgerResponse.getLedgerId();

            if (LOG.isDebugEnabled()) {
                logResponse(status, "ledger", ledgerId);
            }
            int rc = convertStatus(status, BKException.Code.WriteException);
            cb.forceLedgerComplete(rc, ledgerId, addr, ctx);
        }
    }

    // visible for testing
    class ReadLacCompletion extends CompletionValue {
        final ReadLacCallback cb;

        public ReadLacCompletion(final CompletionKey key, ReadLacCallback originalCallback, final Object ctx,
                final long ledgerId) {
            super("ReadLAC", ctx, ledgerId, BookieProtocol.LAST_ADD_CONFIRMED, readLacOpLogger,
                    readLacTimeoutOpLogger);
            this.cb = new ReadLacCallback() {
                @Override
                public void readLacComplete(int rc, long ledgerId, ByteBuf lacBuffer, ByteBuf lastEntryBuffer,
                        Object ctx) {
                    logOpResult(rc);
                    originalCallback.readLacComplete(rc, ledgerId, lacBuffer, lastEntryBuffer, ctx);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> cb.readLacComplete(rc, ledgerId, null, null, ctx));
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            ReadLacResponse readLacResponse = response.getReadLacResponse();
            ByteBuf lacBuffer = Unpooled.EMPTY_BUFFER;
            ByteBuf lastEntryBuffer = Unpooled.EMPTY_BUFFER;
            StatusCode status = response.getStatus() == StatusCode.EOK ? readLacResponse.getStatus()
                    : response.getStatus();

            if (readLacResponse.hasLacBody()) {
                lacBuffer = Unpooled.wrappedBuffer(readLacResponse.getLacBody().asReadOnlyByteBuffer());
            }

            if (readLacResponse.hasLastEntryBody()) {
                lastEntryBuffer = Unpooled.wrappedBuffer(readLacResponse.getLastEntryBody().asReadOnlyByteBuffer());
            }

            if (LOG.isDebugEnabled()) {
                logResponse(status, "ledgerId", ledgerId);
            }

            int rc = convertStatus(status, BKException.Code.ReadException);
            cb.readLacComplete(rc, ledgerId, lacBuffer.slice(), lastEntryBuffer.slice(), ctx);
        }
    }

    // visible for testing
    class ReadCompletion extends CompletionValue {
        final ReadEntryCallback cb;

        public ReadCompletion(final CompletionKey key, final ReadEntryCallback originalCallback,
                final Object originalCtx, long ledgerId, final long entryId) {
            super("Read", originalCtx, ledgerId, entryId, readEntryOpLogger, readTimeoutOpLogger);

            this.cb = new ReadEntryCallback() {
                @Override
                public void readEntryComplete(int rc, long ledgerId, long entryId, ByteBuf buffer, Object ctx) {
                    logOpResult(rc);
                    originalCallback.readEntryComplete(rc, ledgerId, entryId, buffer, originalCtx);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> cb.readEntryComplete(rc, ledgerId, entryId, null, ctx));
        }

        @Override
        public void setOutstanding() {
            readEntryOutstanding.inc();
        }

        @Override
        public void handleV2Response(long ledgerId, long entryId, StatusCode status,
                BookieProtocol.Response response) {
            readEntryOutstanding.dec();
            if (!(response instanceof BookieProtocol.ReadResponse)) {
                return;
            }
            BookieProtocol.ReadResponse readResponse = (BookieProtocol.ReadResponse) response;
            handleReadResponse(ledgerId, entryId, status, readResponse.getData(), INVALID_ENTRY_ID, -1L);
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            readEntryOutstanding.dec();
            ReadResponse readResponse = response.getReadResponse();
            StatusCode status = response.getStatus() == StatusCode.EOK ? readResponse.getStatus()
                    : response.getStatus();
            ByteBuf buffer = Unpooled.EMPTY_BUFFER;
            if (readResponse.hasBody()) {
                buffer = Unpooled.wrappedBuffer(readResponse.getBody().asReadOnlyByteBuffer());
            }
            long maxLAC = INVALID_ENTRY_ID;
            if (readResponse.hasMaxLAC()) {
                maxLAC = readResponse.getMaxLAC();
            }
            long lacUpdateTimestamp = -1L;
            if (readResponse.hasLacUpdateTimestamp()) {
                lacUpdateTimestamp = readResponse.getLacUpdateTimestamp();
            }
            handleReadResponse(readResponse.getLedgerId(), readResponse.getEntryId(), status, buffer, maxLAC,
                    lacUpdateTimestamp);
            buffer.release(); // meaningless using unpooled, but client may expect to hold the last reference
        }

        private void handleReadResponse(long ledgerId, long entryId, StatusCode status, ByteBuf buffer, long maxLAC, // max known lac piggy-back from bookies
                long lacUpdateTimestamp) { // the timestamp when the lac is updated.
            int readableBytes = buffer.readableBytes();
            if (LOG.isDebugEnabled()) {
                logResponse(status, "ledger", ledgerId, "entry", entryId, "entryLength", readableBytes);
            }

            int rc = convertStatus(status, BKException.Code.ReadException);

            if (maxLAC > INVALID_ENTRY_ID && (ctx instanceof ReadEntryCallbackCtx)) {
                ((ReadEntryCallbackCtx) ctx).setLastAddConfirmed(maxLAC);
            }
            if (lacUpdateTimestamp > -1L && (ctx instanceof ReadLastConfirmedAndEntryContext)) {
                ((ReadLastConfirmedAndEntryContext) ctx).setLacUpdateTimestamp(lacUpdateTimestamp);
            }
            cb.readEntryComplete(rc, ledgerId, entryId, buffer.slice(), ctx);
        }
    }

    class StartTLSCompletion extends CompletionValue {
        final StartTLSCallback cb;

        public StartTLSCompletion(final CompletionKey key) {
            super("StartTLS", null, -1, -1, startTLSOpLogger, startTLSTimeoutOpLogger);
            this.cb = new StartTLSCallback() {
                @Override
                public void startTLSComplete(int rc, Object ctx) {
                    logOpResult(rc);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            failTLS(rc);
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            StatusCode status = response.getStatus();

            if (LOG.isDebugEnabled()) {
                logResponse(status);
            }

            int rc = convertStatus(status, BKException.Code.SecurityException);

            // Cancel START_TLS request timeout
            cb.startTLSComplete(rc, null);

            if (state != ConnectionState.START_TLS) {
                LOG.error("Connection state changed before TLS response received");
                failTLS(BKException.Code.BookieHandleNotAvailableException);
            } else if (status != StatusCode.EOK) {
                LOG.error("Client received error {} during TLS negotiation", status);
                failTLS(BKException.Code.SecurityException);
            } else {
                initTLSHandshake();
            }
        }

    }

    // visible for testing
    class GetBookieInfoCompletion extends CompletionValue {
        final GetBookieInfoCallback cb;

        public GetBookieInfoCompletion(final CompletionKey key, final GetBookieInfoCallback origCallback,
                final Object origCtx) {
            super("GetBookieInfo", origCtx, 0L, 0L, getBookieInfoOpLogger, getBookieInfoTimeoutOpLogger);
            this.cb = new GetBookieInfoCallback() {
                @Override
                public void getBookieInfoComplete(int rc, BookieInfo bInfo, Object ctx) {
                    logOpResult(rc);
                    origCallback.getBookieInfoComplete(rc, bInfo, origCtx);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> cb.getBookieInfoComplete(rc, new BookieInfo(), ctx));
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            GetBookieInfoResponse getBookieInfoResponse = response.getGetBookieInfoResponse();
            StatusCode status = response.getStatus() == StatusCode.EOK ? getBookieInfoResponse.getStatus()
                    : response.getStatus();

            long freeDiskSpace = getBookieInfoResponse.getFreeDiskSpace();
            long totalDiskSpace = getBookieInfoResponse.getTotalDiskCapacity();

            if (LOG.isDebugEnabled()) {
                logResponse(status, "freeDisk", freeDiskSpace, "totalDisk", totalDiskSpace);
            }

            int rc = convertStatus(status, BKException.Code.ReadException);
            cb.getBookieInfoComplete(rc, new BookieInfo(totalDiskSpace, freeDiskSpace), ctx);
        }
    }

    class GetListOfEntriesOfLedgerCompletion extends CompletionValue {
        final GetListOfEntriesOfLedgerCallback cb;

        public GetListOfEntriesOfLedgerCompletion(final CompletionKey key,
                final GetListOfEntriesOfLedgerCallback origCallback, final long ledgerId) {
            super("GetListOfEntriesOfLedger", null, ledgerId, 0L, getListOfEntriesOfLedgerCompletionOpLogger,
                    getListOfEntriesOfLedgerCompletionTimeoutOpLogger);
            this.cb = new GetListOfEntriesOfLedgerCallback() {
                @Override
                public void getListOfEntriesOfLedgerComplete(int rc, long ledgerId,
                        AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger) {
                    logOpResult(rc);
                    origCallback.getListOfEntriesOfLedgerComplete(rc, ledgerId, availabilityOfEntriesOfLedger);
                    key.release();
                }
            };
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> cb.getListOfEntriesOfLedgerComplete(rc, ledgerId, null));
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            GetListOfEntriesOfLedgerResponse getListOfEntriesOfLedgerResponse = response
                    .getGetListOfEntriesOfLedgerResponse();
            ByteBuf availabilityOfEntriesOfLedgerBuffer = Unpooled.EMPTY_BUFFER;
            StatusCode status = response.getStatus() == StatusCode.EOK
                    ? getListOfEntriesOfLedgerResponse.getStatus()
                    : response.getStatus();

            if (getListOfEntriesOfLedgerResponse.hasAvailabilityOfEntriesOfLedger()) {
                availabilityOfEntriesOfLedgerBuffer = Unpooled.wrappedBuffer(
                        getListOfEntriesOfLedgerResponse.getAvailabilityOfEntriesOfLedger().asReadOnlyByteBuffer());
            }

            if (LOG.isDebugEnabled()) {
                logResponse(status, "ledgerId", ledgerId);
            }

            int rc = convertStatus(status, BKException.Code.ReadException);
            AvailabilityOfEntriesOfLedger availabilityOfEntriesOfLedger = null;
            if (rc == BKException.Code.OK) {
                availabilityOfEntriesOfLedger = new AvailabilityOfEntriesOfLedger(
                        availabilityOfEntriesOfLedgerBuffer.slice());
            }
            cb.getListOfEntriesOfLedgerComplete(rc, ledgerId, availabilityOfEntriesOfLedger);
        }
    }

    private final Recycler<AddCompletion> addCompletionRecycler = new Recycler<AddCompletion>() {
        @Override
        protected AddCompletion newObject(Recycler.Handle<AddCompletion> handle) {
            return new AddCompletion(handle);
        }
    };

    AddCompletion acquireAddCompletion(final CompletionKey key, final WriteCallback originalCallback,
            final Object originalCtx, final long ledgerId, final long entryId) {
        AddCompletion completion = addCompletionRecycler.get();
        completion.reset(key, originalCallback, originalCtx, ledgerId, entryId);
        return completion;
    }

    // visible for testing
    class AddCompletion extends CompletionValue implements WriteCallback {
        final Recycler.Handle<AddCompletion> handle;

        CompletionKey key = null;
        WriteCallback originalCallback = null;

        AddCompletion(Recycler.Handle<AddCompletion> handle) {
            super("Add", null, -1, -1, addEntryOpLogger, addTimeoutOpLogger);
            this.handle = handle;
        }

        void reset(final CompletionKey key, final WriteCallback originalCallback, final Object originalCtx,
                final long ledgerId, final long entryId) {
            this.key = key;
            this.originalCallback = originalCallback;
            this.ctx = originalCtx;
            this.ledgerId = ledgerId;
            this.entryId = entryId;
            this.startTime = MathUtils.nowInNano();
        }

        @Override
        public void writeComplete(int rc, long ledgerId, long entryId, BookieSocketAddress addr, Object ctx) {
            logOpResult(rc);
            originalCallback.writeComplete(rc, ledgerId, entryId, addr, ctx);
            key.release();
            handle.recycle(this);
        }

        @Override
        boolean maybeTimeout() {
            if (MathUtils.elapsedNanos(startTime) >= addEntryTimeoutNanos) {
                timeout();
                return true;
            } else {
                return false;
            }
        }

        @Override
        public void errorOut() {
            errorOut(BKException.Code.BookieHandleNotAvailableException);
        }

        @Override
        public void errorOut(final int rc) {
            errorOutAndRunCallback(() -> writeComplete(rc, ledgerId, entryId, addr, ctx));
        }

        @Override
        public void setOutstanding() {
            addEntryOutstanding.inc();
        }

        @Override
        public void handleV2Response(long ledgerId, long entryId, StatusCode status,
                BookieProtocol.Response response) {
            addEntryOutstanding.dec();
            handleResponse(ledgerId, entryId, status);
        }

        @Override
        public void handleV3Response(BookkeeperProtocol.Response response) {
            addEntryOutstanding.dec();
            AddResponse addResponse = response.getAddResponse();
            StatusCode status = response.getStatus() == StatusCode.EOK ? addResponse.getStatus()
                    : response.getStatus();
            handleResponse(addResponse.getLedgerId(), addResponse.getEntryId(), status);
        }

        private void handleResponse(long ledgerId, long entryId, StatusCode status) {
            if (LOG.isDebugEnabled()) {
                logResponse(status, "ledger", ledgerId, "entry", entryId);
            }

            int rc = convertStatus(status, BKException.Code.WriteException);
            writeComplete(rc, ledgerId, entryId, addr, ctx);
        }
    }

    // visable for testing
    CompletionKey newCompletionKey(long txnId, OperationType operationType) {
        return new V3CompletionKey(txnId, operationType);
    }

    class V3CompletionKey extends CompletionKey {

        public V3CompletionKey(long txnId, OperationType operationType) {
            super(txnId, operationType);
        }

        @Override
        public boolean equals(Object obj) {
            if (!(obj instanceof V3CompletionKey)) {
                return false;
            }
            V3CompletionKey that = (V3CompletionKey) obj;
            return this.txnId == that.txnId && this.operationType == that.operationType;
        }

        @Override
        public int hashCode() {
            return ((int) txnId);
        }

        @Override
        public String toString() {
            return String.format("TxnId(%d), OperationType(%s)", txnId, operationType);
        }

    }

    abstract class CompletionKey {
        final long txnId;
        OperationType operationType;

        CompletionKey(long txnId, OperationType operationType) {
            this.txnId = txnId;
            this.operationType = operationType;
        }

        public void release() {
        }
    }

    /**
     * Note : Helper functions follow
     */

    /**
     * @param status
     * @return {@link BKException.Code.UNINITIALIZED} if the statuscode is unknown.
     */
    private int statusCodeToExceptionCode(StatusCode status) {
        switch (status) {
        case EOK:
            return BKException.Code.OK;
        case ENOENTRY:
            return BKException.Code.NoSuchEntryException;
        case ENOLEDGER:
            return BKException.Code.NoSuchLedgerExistsException;
        case EBADVERSION:
            return BKException.Code.ProtocolVersionException;
        case EUA:
            return BKException.Code.UnauthorizedAccessException;
        case EFENCED:
            return BKException.Code.LedgerFencedException;
        case EREADONLY:
            return BKException.Code.WriteOnReadOnlyBookieException;
        case ETOOMANYREQUESTS:
            return BKException.Code.TooManyRequestsException;
        default:
            return BKException.Code.UNINITIALIZED;
        }
    }

    private void putCompletionKeyValue(CompletionKey key, CompletionValue value) {
        CompletionValue existingValue = completionObjects.putIfAbsent(key, value);
        if (existingValue != null) { // will only happen for V2 keys, as V3 have unique txnid
            // There's a pending read request on same ledger/entry. Use the multimap to track all of them
            completionObjectsV2Conflicts.put(key, value);
        }
    }

    private CompletionValue getCompletionValue(CompletionKey key) {
        CompletionValue completionValue = completionObjects.remove(key);
        if (completionValue == null) {
            // If there's no completion object here, try in the multimap
            completionValue = completionObjectsV2Conflicts.removeAny(key).orElse(null);
        }
        return completionValue;
    }

    private long getTxnId() {
        return txnIdGenerator.incrementAndGet();
    }

    private final Recycler<V2CompletionKey> v2KeyRecycler = new Recycler<V2CompletionKey>() {
        @Override
        protected V2CompletionKey newObject(Recycler.Handle<V2CompletionKey> handle) {
            return new V2CompletionKey(handle);
        }
    };

    V2CompletionKey acquireV2Key(long ledgerId, long entryId, OperationType operationType) {
        V2CompletionKey key = v2KeyRecycler.get();
        key.reset(ledgerId, entryId, operationType);
        return key;
    }

    private class V2CompletionKey extends CompletionKey {
        private final Handle<V2CompletionKey> recyclerHandle;
        long ledgerId;
        long entryId;

        private V2CompletionKey(Handle<V2CompletionKey> handle) {
            super(-1, null);
            this.recyclerHandle = handle;
        }

        void reset(long ledgerId, long entryId, OperationType operationType) {
            this.ledgerId = ledgerId;
            this.entryId = entryId;
            this.operationType = operationType;
        }

        @Override
        public boolean equals(Object object) {
            if (!(object instanceof V2CompletionKey)) {
                return false;
            }
            V2CompletionKey that = (V2CompletionKey) object;
            return this.entryId == that.entryId && this.ledgerId == that.ledgerId
                    && this.operationType == that.operationType;
        }

        @Override
        public int hashCode() {
            return Long.hashCode(ledgerId) * 31 + Long.hashCode(entryId);
        }

        @Override
        public String toString() {
            return String.format("%d:%d %s", ledgerId, entryId, operationType);
        }

        @Override
        public void release() {
            recyclerHandle.recycle(this);
        }
    }

    Request.Builder withRequestContext(Request.Builder builder) {
        if (preserveMdcForTaskExecution) {
            return appendRequestContext(builder);
        }
        return builder;
    }

    static Request.Builder appendRequestContext(Request.Builder builder) {
        final Map<String, String> mdcContextMap = MDC.getCopyOfContextMap();
        if (mdcContextMap == null || mdcContextMap.isEmpty()) {
            return builder;
        }
        for (Map.Entry<String, String> kv : mdcContextMap.entrySet()) {
            final BookkeeperProtocol.ContextPair context = BookkeeperProtocol.ContextPair.newBuilder()
                    .setKey(kv.getKey()).setValue(kv.getValue()).build();
            builder.addRequestContext(context);
        }
        return builder;
    }

    ChannelFutureListener contextPreservingListener(ChannelFutureListener listener) {
        return preserveMdcForTaskExecution ? new ContextPreservingFutureListener(listener) : listener;
    }

    /**
     * Decorator to preserve MDC for connection listener.
     */
    static class ContextPreservingFutureListener implements ChannelFutureListener {
        private final ChannelFutureListener listener;
        private final Map<String, String> mdcContextMap;

        ContextPreservingFutureListener(ChannelFutureListener listener) {
            this.listener = listener;
            this.mdcContextMap = MDC.getCopyOfContextMap();
        }

        @Override
        public void operationComplete(ChannelFuture future) throws Exception {
            MdcUtils.restoreContext(mdcContextMap);
            try {
                listener.operationComplete(future);
            } finally {
                MDC.clear();
            }
        }
    }

    /**
     * Connection listener.
     */
    class ConnectionFutureListener implements ChannelFutureListener {
        private final long startTime;

        ConnectionFutureListener(long startTime) {
            this.startTime = startTime;
        }

        @Override
        public void operationComplete(ChannelFuture future) throws Exception {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Channel connected ({}) {}", future.isSuccess(), future.channel());
            }
            int rc;
            Queue<GenericCallback<PerChannelBookieClient>> oldPendingOps;

            /* We fill in the timer based on whether the connect operation itself succeeded regardless of
             * whether there was a race */
            if (future.isSuccess()) {
                PerChannelBookieClient.this.connectTimer.registerSuccessfulEvent(MathUtils.elapsedNanos(startTime),
                        TimeUnit.NANOSECONDS);
            } else {
                PerChannelBookieClient.this.connectTimer.registerFailedEvent(MathUtils.elapsedNanos(startTime),
                        TimeUnit.NANOSECONDS);
            }

            synchronized (PerChannelBookieClient.this) {
                if (future.isSuccess() && state == ConnectionState.CONNECTING && future.channel().isActive()) {
                    LOG.info("Successfully connected to bookie: {}", future.channel());
                    rc = BKException.Code.OK;
                    channel = future.channel();
                    if (shFactory != null) {
                        makeWritable();
                        initiateTLS();
                        return;
                    } else {
                        LOG.info("Successfully connected to bookie: " + addr);
                        state = ConnectionState.CONNECTED;
                        activeNonTlsChannelCounter.inc();
                    }
                } else if (future.isSuccess() && state == ConnectionState.START_TLS) {
                    rc = BKException.Code.OK;
                    LOG.info("Successfully connected to bookie using TLS: " + addr);

                    state = ConnectionState.CONNECTED;
                    AuthHandler.ClientSideHandler authHandler = future.channel().pipeline()
                            .get(AuthHandler.ClientSideHandler.class);
                    authHandler.authProvider.onProtocolUpgrade();
                    activeTlsChannelCounter.inc();
                } else if (future.isSuccess()
                        && (state == ConnectionState.CLOSED || state == ConnectionState.DISCONNECTED)) {
                    LOG.warn("Closed before connection completed, clean up: {}, current state {}", future.channel(),
                            state);
                    closeChannel(future.channel());
                    rc = BKException.Code.BookieHandleNotAvailableException;
                    channel = null;
                } else if (future.isSuccess() && state == ConnectionState.CONNECTED) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Already connected with another channel({}), so close the new channel({})",
                                channel, future.channel());
                    }
                    closeChannel(future.channel());
                    return; // pendingOps should have been completed when other channel connected
                } else {
                    Throwable cause = future.cause();
                    if (cause instanceof UnknownHostException || cause instanceof NativeIoException) {
                        // Don't log stack trace for common errors
                        LOG.warn("Could not connect to bookie: {}/{}, current state {} : {}", future.channel(),
                                addr, state, future.cause().getMessage());
                    } else {
                        // Regular exceptions, include stack trace
                        LOG.error("Could not connect to bookie: {}/{}, current state {} : ", future.channel(), addr,
                                state, future.cause());
                    }

                    rc = BKException.Code.BookieHandleNotAvailableException;
                    closeChannel(future.channel());
                    channel = null;
                    if (state != ConnectionState.CLOSED) {
                        state = ConnectionState.DISCONNECTED;
                    }
                    failedConnectionCounter.inc();
                }

                // trick to not do operations under the lock, take the list
                // of pending ops and assign it to a new variable, while
                // emptying the pending ops by just assigning it to a new
                // list
                oldPendingOps = pendingOps;
                pendingOps = new ArrayDeque<>();
            }

            for (GenericCallback<PerChannelBookieClient> pendingOp : oldPendingOps) {
                pendingOp.operationComplete(rc, PerChannelBookieClient.this);
            }

            makeWritable();
        }
    }

    private void initiateTLS() {
        LOG.info("Initializing TLS to {}", channel);
        assert state == ConnectionState.CONNECTING;
        final long txnId = getTxnId();
        final CompletionKey completionKey = new V3CompletionKey(txnId, OperationType.START_TLS);
        completionObjects.put(completionKey, new StartTLSCompletion(completionKey));
        BookkeeperProtocol.Request.Builder h = withRequestContext(BookkeeperProtocol.Request.newBuilder());
        BKPacketHeader.Builder headerBuilder = BKPacketHeader.newBuilder().setVersion(ProtocolVersion.VERSION_THREE)
                .setOperation(OperationType.START_TLS).setTxnId(txnId);
        h.setHeader(headerBuilder.build());
        h.setStartTLSRequest(BookkeeperProtocol.StartTLSRequest.newBuilder().build());
        state = ConnectionState.START_TLS;
        writeAndFlush(channel, completionKey, h.build());
    }

    private void failTLS(int rc) {
        LOG.error("TLS failure on: {}, rc: {}", channel, rc);
        Queue<GenericCallback<PerChannelBookieClient>> oldPendingOps;
        synchronized (this) {
            disconnect();
            oldPendingOps = pendingOps;
            pendingOps = new ArrayDeque<>();
        }
        for (GenericCallback<PerChannelBookieClient> pendingOp : oldPendingOps) {
            pendingOp.operationComplete(rc, null);
        }
        failedTlsHandshakeCounter.inc();
    }
}