com.pinterest.terrapin.client.TerrapinClient.java Source code

Java tutorial

Introduction

Here is the source code for com.pinterest.terrapin.client.TerrapinClient.java

Source

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*    http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.pinterest.terrapin.client;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.RemovalListener;
import com.google.common.cache.RemovalNotification;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.pinterest.terrapin.Constants;
import com.pinterest.terrapin.TerrapinUtil;
import com.pinterest.terrapin.thrift.generated.MultiKey;
import com.pinterest.terrapin.thrift.generated.PartitionerType;
import com.pinterest.terrapin.thrift.generated.TerrapinGetErrorCode;
import com.pinterest.terrapin.thrift.generated.TerrapinGetException;
import com.pinterest.terrapin.thrift.generated.TerrapinInternalGetRequest;
import com.pinterest.terrapin.thrift.generated.TerrapinResponse;
import com.pinterest.terrapin.thrift.generated.TerrapinServerInternal;
import com.pinterest.terrapin.thrift.generated.TerrapinSingleResponse;
import com.pinterest.terrapin.zookeeper.FileSetInfo;
import com.pinterest.terrapin.zookeeper.ViewInfo;
import com.twitter.finagle.Service;
import com.twitter.finagle.builder.ClientBuilder;
import com.twitter.finagle.thrift.ClientId;
import com.twitter.finagle.thrift.ThriftClientFramedCodecFactory;
import com.twitter.finagle.thrift.ThriftClientRequest;
import com.twitter.ostrich.stats.Stats;
import com.twitter.util.Duration;
import com.twitter.util.ExceptionalFunction;
import com.twitter.util.ExecutorServiceFuturePool;
import com.twitter.util.Function;
import com.twitter.util.Function0;
import com.twitter.util.Future;
import com.twitter.util.FuturePool;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;

import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

/**
 * A client for communicating with the terrapin cluster. The client's job is to batch keys
 * by partitions within a resource and determining the right set of instances to issue the
 * requests against. It then collates the responses and returns back to the client.
 *
 * The public API(s) are getMany, getManyNoRetries, getOne and getOneNoRetries.
 *
 * TODO(varun): Add speculative execution for full shard failures. This would give us lower
 * latency.
 */
public class TerrapinClient {
    private static final Logger LOG = LoggerFactory.getLogger(TerrapinClient.class);

    // Of the format, terrapin-client-<cluster_name>-
    private String statsPrefix;
    private FileSetViewManager fileSetViewManager;

    private Cache<String, Pair<Service<ThriftClientRequest, byte[]>, TerrapinServerInternal.ServiceIface>> thriftClientCache;
    private int targetPort;
    private int connectTimeoutMs;
    private int timeoutMs;

    // Future pool establishing connections.
    private FuturePool connectionfuturePool;

    public TerrapinClient(PropertiesConfiguration configuration, int targetPort, int connectTimeoutMs,
            int timeoutMs) throws Exception {
        String zkQuorum = configuration.getString(Constants.ZOOKEEPER_QUORUM);
        String clusterName = configuration.getString(Constants.HELIX_CLUSTER);
        FileSetViewManager fileSetViewManager = new FileSetViewManager(
                TerrapinUtil.getZooKeeperClient(zkQuorum, 30), clusterName);
        init(fileSetViewManager, clusterName, targetPort, connectTimeoutMs, timeoutMs);
    }

    public TerrapinClient(FileSetViewManager fileSetViewManager, String clusterName, int targetPort,
            int connectTimeoutMs, int timeoutMs) throws Exception {
        init(fileSetViewManager, clusterName, targetPort, connectTimeoutMs, timeoutMs);
    }

    private void init(FileSetViewManager fileSetViewManager, String clusterName, int targetPort,
            int connectTimeoutMs, int timeoutMs) throws Exception {
        this.statsPrefix = "terrapin-client-" + clusterName + "-";
        this.fileSetViewManager = fileSetViewManager;
        this.thriftClientCache = CacheBuilder.newBuilder().maximumSize(5000).expireAfterAccess(60, TimeUnit.MINUTES)
                .removalListener(
                        new RemovalListener<String, Pair<Service<ThriftClientRequest, byte[]>, TerrapinServerInternal.ServiceIface>>() {
                            @Override
                            public void onRemoval(
                                    RemovalNotification<String, Pair<Service<ThriftClientRequest, byte[]>, TerrapinServerInternal.ServiceIface>> removalNotification) {
                                removalNotification.getValue().getLeft().release();
                                LOG.info("Closing client connections to " + removalNotification.getKey());
                            }
                        })
                .build();
        this.targetPort = targetPort;
        this.connectTimeoutMs = connectTimeoutMs;
        this.timeoutMs = timeoutMs;
        this.connectionfuturePool = new ExecutorServiceFuturePool(Executors.newFixedThreadPool(10));
    }

    protected Future<TerrapinServerInternal.ServiceIface> getClientFuture(final String hostName) {
        return connectionfuturePool.apply(new Function0<TerrapinServerInternal.ServiceIface>() {
            @Override
            public TerrapinServerInternal.ServiceIface apply() {
                Pair<Service<ThriftClientRequest, byte[]>, TerrapinServerInternal.ServiceIface> client = thriftClientCache
                        .getIfPresent(hostName);
                if (client == null) {
                    Service<ThriftClientRequest, byte[]> service = ClientBuilder
                            .safeBuild(ClientBuilder.get().hosts(new InetSocketAddress(hostName, targetPort))
                                    .codec(new ThriftClientFramedCodecFactory(Option.<ClientId>empty())).retries(1)
                                    .connectTimeout(Duration.fromMilliseconds(connectTimeoutMs))
                                    .requestTimeout(Duration.fromMilliseconds(timeoutMs)).hostConnectionLimit(100)
                                    .failFast(false));

                    client = new ImmutablePair(service,
                            new TerrapinServerInternal.ServiceToClient(service, new TBinaryProtocol.Factory()));
                    // A release is automatically called when an element is kicked out as part of the
                    // removal listener. Doing a double release causes other issues.
                    thriftClientCache.asMap().putIfAbsent(hostName, client);
                }
                return client.getRight();
            }
        });
    }

    /**
     * Issues multiple reads against a fileset but does not retry against replicas. Note
     * that partial failures are communicated through an errorCode inside the map
     * in the TerrapinResponse object.
     *
     * @param fileSet The file set to query.
     * @param keyList The list of keys to query.
     * @return a TerrapinResponse future.
     */
    public Future<TerrapinResponse> getManyNoRetries(String fileSet, Set<ByteBuffer> keyList) {
        return getManyHelper(fileSet, keyList, false);
    }

    /**
     * Issues multiple reads against a fileset with retries against replicas. Note
     * that partial failures are communicated through an errorCode inside the map
     * in the TerrapinResponse object.
     *
     * @param fileSet The file set to query.
     * @param keyList The list of keys to query.
     * @return a TerrapinResponse future.
     */
    public Future<TerrapinResponse> getMany(String fileSet, Set<ByteBuffer> keyList) {
        return getManyHelper(fileSet, keyList, true);
    }

    private ExceptionalFunction<TerrapinResponse, TerrapinSingleResponse> getManyToGetOneFunction(
            final ByteBuffer key) {
        return new ExceptionalFunction<TerrapinResponse, TerrapinSingleResponse>() {
            @Override
            public TerrapinSingleResponse applyE(TerrapinResponse response) throws Throwable {
                TerrapinSingleResponse singleResponse = response.getResponseMap().get(key);
                if (singleResponse == null) {
                    return new TerrapinSingleResponse();
                } else {
                    if (singleResponse.isSetErrorCode()) {
                        throw new TerrapinGetException().setErrorCode(singleResponse.getErrorCode());
                    } else {
                        return singleResponse;
                    }
                }
            }
        };
    }

    /**
     * Get one API without any retries on a terrapin cluster. This will throw an exception if
     * an error occurs. If the value is not found, the value field in the response will not be
     * set.
     *
     * @param fileSet The fileset to use query against.
     * @param key The key to query against.
     * @return The TerrapinSingleResponse object.
     */
    public Future<TerrapinSingleResponse> getOneNoRetries(String fileSet, ByteBuffer key) {
        return getManyNoRetries(fileSet, Sets.newHashSet(key)).map(getManyToGetOneFunction(key));
    }

    /**
     * Get one API with retries against replicas.
     */
    public Future<TerrapinSingleResponse> getOne(String fileSet, ByteBuffer key) {
        return getMany(fileSet, Sets.newHashSet(key)).map(getManyToGetOneFunction(key));
    }

    /**
     * Issues a call for retrieving a response for multiple keys. Does appropriate batching.
     * Uses a 2 pass approach:
     * 1) Pass 1 - Batches the keys by partition, issues the requests to each relevant replica
     *             and tracks failed keys and replicas with failures.
     * 2) Pass 2 - Batches the failed keys from pass 1 by host, issues the request to
     *             relevant replicas, excluding the replicas with failures in pass 1.
     *
     * TODO(varun): Think about using speculative execution for the single key lookup use case.
     *              It could provide better latency.
     *
     * @param fileSet The file set against which the request should be issued.
     * @param keyList The list of keys.
     * @param retry Whether to perform a second try on the cluster.
     * @return A future wrapping a TerrapinResponse object.
     */
    protected Future<TerrapinResponse> getManyHelper(final String fileSet, final Set<ByteBuffer> keyList,
            final boolean retry) {
        Pair<FileSetInfo, ViewInfo> pair = null;
        try {
            pair = fileSetViewManager.getFileSetViewInfo(fileSet);
        } catch (TerrapinGetException e) {
            return Future.exception(e);
        }
        final FileSetInfo info = pair.getLeft();
        final ViewInfo viewInfo = pair.getRight();

        // This runs in two passes. In the first pass, we send a query to all the hosts
        // containing the keys in @keyList. We collect the list of failed keys in the first
        // pass and also the set of hosts which had errors. We send out a second query
        // with the failed keys to the respective set of hosts and attempt to exclude
        // the initial set of failed hosts.
        final Set<String> failedHostsFirstPass = Collections.synchronizedSet(Sets.<String>newHashSet());
        final Map<ByteBuffer, TerrapinSingleResponse> failedKeysFirstPass = Collections
                .synchronizedMap(Maps.<ByteBuffer, TerrapinSingleResponse>newHashMap());
        Map<String, Future<TerrapinResponse>> responseFutureMapFirstPass = getManyHelper(fileSet,
                info.servingInfo.helixResource, info.servingInfo.numPartitions, viewInfo, keyList,
                info.servingInfo.partitionerType, failedHostsFirstPass, failedKeysFirstPass,
                (Set) Sets.newHashSet(), 1);
        List<Future<TerrapinResponse>> responseFutureListFirstPass = Lists
                .newArrayListWithCapacity(responseFutureMapFirstPass.size());
        for (Map.Entry<String, Future<TerrapinResponse>> entry : responseFutureMapFirstPass.entrySet()) {
            responseFutureListFirstPass.add(entry.getValue());
        }
        // For the failed keys.
        return Stats.timeFutureMillis(statsPrefix + fileSet + "-latency",
                Future.<TerrapinResponse>collect(responseFutureListFirstPass)
                        .flatMap(new Function<List<TerrapinResponse>, Future<TerrapinResponse>>() {
                            @Override
                            public Future<TerrapinResponse> apply(final List<TerrapinResponse> responseListPass1) {
                                // At this point, we have a black list of hosts and we also have a list of keys
                                // which did not succeed in the first run.
                                // If the first pass fully succeeded or we have explicitly disabled retries,
                                // then don't perform a retry.
                                if (failedKeysFirstPass.isEmpty() || !retry) {
                                    TerrapinResponse aggResponse = new TerrapinResponse();
                                    aggResponse
                                            .setResponseMap((Map) Maps.newHashMapWithExpectedSize(keyList.size()));
                                    for (TerrapinResponse response : responseListPass1) {
                                        aggResponse.getResponseMap().putAll(response.getResponseMap());
                                    }
                                    aggResponse.getResponseMap().putAll(failedKeysFirstPass);
                                    return Future.value(aggResponse);
                                }
                                // Otherwise, we fire off a second set of futures.
                                Map<String, Future<TerrapinResponse>> responseFutureMapSecondPass = getManyHelper(
                                        fileSet, info.servingInfo.helixResource, info.servingInfo.numPartitions,
                                        viewInfo, failedKeysFirstPass.keySet(), info.servingInfo.partitionerType,
                                        null, null, failedHostsFirstPass, 2);
                                List<Future<TerrapinResponse>> responseFutureListSecondPass = Lists
                                        .newArrayListWithCapacity(responseFutureMapSecondPass.size());
                                responseFutureListSecondPass.addAll(responseFutureMapSecondPass.values());
                                return Future.collect(responseFutureListSecondPass)
                                        .map(new Function<List<TerrapinResponse>, TerrapinResponse>() {
                                            @Override
                                            public TerrapinResponse apply(
                                                    List<TerrapinResponse> responseListPass2) {
                                                // The two responses (first pass and second pass) should be disjoint
                                                // in the set of keys they return, so we can safely merge them.
                                                TerrapinResponse aggResponse = new TerrapinResponse();
                                                aggResponse.setResponseMap(
                                                        (Map) Maps.newHashMapWithExpectedSize(keyList.size()));
                                                for (TerrapinResponse response : responseListPass1) {
                                                    aggResponse.getResponseMap().putAll(response.getResponseMap());
                                                }
                                                for (TerrapinResponse response : responseListPass2) {
                                                    aggResponse.getResponseMap().putAll(response.getResponseMap());
                                                }
                                                return aggResponse;
                                            }
                                        });
                            }
                        }));
    }

    /**
     * Helper function for the getMany call above. Responsible for batching the request
     * by host, tracking hosts/keys with failures and excluding any blacklisted hosts.
     *
     * @param fileSet The original fileset useful for metrics tracking.
     * @param resource The helix resource against which the lookups are being issued.
     * @param numPartitions The number of partitions in the resource.
     * @param keyList The list of keys to be looked up.
     * @param partitionerType The partitioner type (MODULUS etc.)
     * @param failedHostsContainer The container in which failed hosts need to be accumulated.
     * @param failedKeysContainer The container in which failed keys need to be accumulated.
     *                            If null, then we return the failed keys are part of the
     *                            response futures.
     * @param blacklistedHosts The list of hosts which should be excluded from issuing
     *                         rpc(s) against.
     * @param rpcPassNum The rpc pass we are on. The first pass is 1, second is 2 and so on.
     * @return A mapping from host name to a future wrapping a TerrapinResponse object.
     */
    protected Map<String, Future<TerrapinResponse>> getManyHelper(final String fileSet, String resource,
            int numPartitions, final ViewInfo viewInfo, Set<ByteBuffer> keyList, PartitionerType partitionerType,
            final Set<String> failedHostsContainer,
            final Map<ByteBuffer, TerrapinSingleResponse> failedKeysContainer, Set<String> blacklistedHosts,
            final int rpcPassNum) {
        Map<String, Future<TerrapinResponse>> hostResponseMap = Maps.newHashMap();
        TerrapinResponse offlinePartitionResponse = new TerrapinResponse();
        offlinePartitionResponse.setResponseMap((Map) Maps.newHashMap());
        Map<String, MultiKey> partitionKeyMap = Maps.newHashMap();
        for (ByteBuffer key : keyList) {
            String partitionName = TerrapinUtil.getPartitionName(key, partitionerType, numPartitions);
            MultiKey existingMultiKey = partitionKeyMap.get(partitionName);
            if (existingMultiKey != null) {
                existingMultiKey.addToKey(key);
            } else {
                MultiKey multiKey = new MultiKey();
                multiKey.setKey(Lists.newArrayList(key));
                multiKey.setResource(resource);
                multiKey.setPartition(partitionName);
                partitionKeyMap.put(partitionName, multiKey);
            }
        }
        // Batch the requests by hostname.
        Map<String, TerrapinInternalGetRequest> hostRequestMap = Maps.newHashMap();
        for (Map.Entry<String, MultiKey> entry : partitionKeyMap.entrySet()) {
            List<String> instanceList = viewInfo.getInstancesForPartition(resource + "$" + entry.getKey());
            // Do a deterministic hash using the partitionName. The partitionName acts as the randomizing
            // function so that we don't always choose the alphabetically smallest instance otherwise
            // we will have some replicas (alphabetically largest) which never get any traffic.
            if (instanceList == null || instanceList.size() == 0) {
                for (ByteBuffer key : entry.getValue().getKey()) {
                    offlinePartitionResponse.getResponseMap().put(key,
                            new TerrapinSingleResponse().setErrorCode(TerrapinGetErrorCode.PARTITION_OFFLINE));
                }
                continue;
            }
            int index = entry.getKey().hashCode() % instanceList.size();
            int oldIndex = index;
            while (blacklistedHosts.contains(instanceList.get(index))) {
                if ((index + 1) % instanceList.size() == oldIndex) {
                    break;
                } else {
                    index = (index + 1) % instanceList.size();
                }
            }
            String hostName = instanceList.get(index);
            TerrapinInternalGetRequest request = hostRequestMap.get(hostName);
            if (request != null) {
                request.getKeyList().add(entry.getValue());
            } else {
                request = new TerrapinInternalGetRequest().setKeyList(Lists.newArrayList(entry.getValue()));
                hostRequestMap.put(hostName, request);
            }
        }
        // Grab the clients for each host and issue requests.
        for (final Map.Entry<String, TerrapinInternalGetRequest> entry : hostRequestMap.entrySet()) {
            final String hostName = entry.getKey();
            String latencyStatsPrefix = statsPrefix + fileSet + "-host-" + rpcPassNum + "-latency";
            hostResponseMap.put(hostName, Stats.timeFutureMillis(latencyStatsPrefix, getClientFuture(entry.getKey())
                    .flatMap(new Function<TerrapinServerInternal.ServiceIface, Future<TerrapinResponse>>() {
                        @Override
                        public Future<TerrapinResponse> apply(TerrapinServerInternal.ServiceIface c) {
                            return c.get(entry.getValue());
                        }
                    }).map(new Function<TerrapinResponse, TerrapinResponse>() {
                        @Override
                        public TerrapinResponse apply(TerrapinResponse response) {
                            if (failedKeysContainer == null || failedHostsContainer == null) {
                                return response;
                            }
                            Set<ByteBuffer> keysToRemove = Sets.newHashSet();
                            for (Map.Entry<ByteBuffer, TerrapinSingleResponse> entry : response.getResponseMap()
                                    .entrySet()) {
                                if (entry.getValue().isSetErrorCode()) {
                                    TerrapinGetErrorCode errorCode = entry.getValue().getErrorCode();
                                    if (errorCode == TerrapinGetErrorCode.OTHER
                                            || errorCode == TerrapinGetErrorCode.READ_ERROR
                                            || errorCode == TerrapinGetErrorCode.NOT_SERVING_PARTITION
                                            || errorCode == TerrapinGetErrorCode.NOT_SERVING_RESOURCE) {
                                        // Do not add the keys to the response since we will follow up
                                        // with a retry on the failed keys.
                                        keysToRemove.add(entry.getKey());
                                        failedKeysContainer.put(entry.getKey(), entry.getValue());
                                        // This host did have a partial failure.
                                        failedHostsContainer.add(hostName);
                                    }
                                }
                            }
                            for (ByteBuffer key : keysToRemove) {
                                response.getResponseMap().remove(key);
                            }
                            return response;
                        }
                    }).rescue(new Function<Throwable, Future<TerrapinResponse>>() {
                        @Override
                        public Future<TerrapinResponse> apply(Throwable t) {
                            // Track the number of failing rpc(s) for each fileset.
                            Stats.incr(statsPrefix + fileSet + "-host-" + rpcPassNum + "-failures");
                            Stats.incr(statsPrefix + hostName + "-" + rpcPassNum + "-failures");
                            TerrapinGetErrorCode errorCode = TerrapinGetErrorCode.READ_ERROR;
                            if (t instanceof TerrapinGetException) {
                                errorCode = ((TerrapinGetException) t).getErrorCode();
                            }
                            TerrapinResponse response = new TerrapinResponse();
                            response.setResponseMap((Map) Maps.newHashMap());
                            if (failedHostsContainer == null || failedKeysContainer == null) {
                                // Populate the error code for the keys in the request.
                                for (MultiKey multiKey : entry.getValue().getKeyList()) {
                                    for (ByteBuffer key : multiKey.getKey()) {
                                        response.getResponseMap().put(key,
                                                new TerrapinSingleResponse().setErrorCode(errorCode));
                                    }
                                }
                                return Future.value(response);
                            } else {
                                failedHostsContainer.add(hostName);
                                for (MultiKey multiKey : entry.getValue().getKeyList()) {
                                    for (ByteBuffer key : multiKey.getKey()) {
                                        failedKeysContainer.put(key,
                                                new TerrapinSingleResponse().setErrorCode(errorCode));
                                    }
                                }
                                // Return an empty response since the keys failed and we will
                                // do second retry.
                                return Future.value(response);
                            }
                        }
                    })));
        }
        hostResponseMap.put("", Future.value(offlinePartitionResponse));
        return hostResponseMap;
    }
}