com.facebook.infrastructure.service.StorageService.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.infrastructure.service.StorageService.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.infrastructure.service;

import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TimerTask;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import javax.management.MBeanServer;
import javax.management.ObjectName;

import org.apache.commons.math.linear.RealMatrix;
import org.apache.commons.math.linear.RealMatrixImpl;
import org.apache.log4j.Logger;

import com.facebook.infrastructure.analytics.AnalyticsContext;
import com.facebook.infrastructure.concurrent.DebuggableThreadPoolExecutor;
import com.facebook.infrastructure.concurrent.MultiThreadedStage;
import com.facebook.infrastructure.concurrent.SingleThreadedStage;
import com.facebook.infrastructure.concurrent.StageManager;
import com.facebook.infrastructure.concurrent.ThreadFactoryImpl;
import com.facebook.infrastructure.config.DatabaseDescriptor;
import com.facebook.infrastructure.db.BinaryVerbHandler;
import com.facebook.infrastructure.db.DBManager;
import com.facebook.infrastructure.db.FileUtils;
import com.facebook.infrastructure.db.HintedHandOffManager;
import com.facebook.infrastructure.db.LoadVerbHandler;
import com.facebook.infrastructure.db.Memtable;
import com.facebook.infrastructure.db.ReadRepairVerbHandler;
import com.facebook.infrastructure.db.ReadVerbHandler;
import com.facebook.infrastructure.db.Row;
import com.facebook.infrastructure.db.RowMutationVerbHandler;
import com.facebook.infrastructure.db.SystemTable;
import com.facebook.infrastructure.db.Table;
import com.facebook.infrastructure.dht.BootStrapper;
import com.facebook.infrastructure.dht.BootstrapInitiateMessage;
import com.facebook.infrastructure.dht.BootstrapMetadataVerbHandler;
import com.facebook.infrastructure.dht.Range;
import com.facebook.infrastructure.gms.ApplicationState;
import com.facebook.infrastructure.gms.EndPointState;
import com.facebook.infrastructure.gms.FailureDetector;
import com.facebook.infrastructure.gms.Gossiper;
import com.facebook.infrastructure.gms.IEndPointStateChangeSubscriber;
import com.facebook.infrastructure.locator.EndPointSnitch;
import com.facebook.infrastructure.locator.IEndPointSnitch;
import com.facebook.infrastructure.locator.IReplicaPlacementStrategy;
import com.facebook.infrastructure.locator.RackAwareStrategy;
import com.facebook.infrastructure.locator.RackUnawareStrategy;
import com.facebook.infrastructure.locator.TokenMetadata;
import com.facebook.infrastructure.net.EndPoint;
import com.facebook.infrastructure.net.IVerbHandler;
import com.facebook.infrastructure.net.Message;
import com.facebook.infrastructure.net.MessagingService;
import com.facebook.infrastructure.net.http.HttpConnection;
import com.facebook.infrastructure.net.io.StreamContextManager;
import com.facebook.infrastructure.tools.MembershipCleanerVerbHandler;
import com.facebook.infrastructure.tools.TokenUpdateVerbHandler;
import com.facebook.infrastructure.utils.LogUtil;
import com.yahoo.zookeeper.KeeperException;
import com.yahoo.zookeeper.Watcher;
import com.yahoo.zookeeper.ZooKeeper;
import com.yahoo.zookeeper.ZooDefs.Ids;
import com.yahoo.zookeeper.data.Stat;
import com.yahoo.zookeeper.proto.WatcherEvent;

/*
 * This abstraction contains the token/identifier of this node
 * on the identifier space. This token gets gossiped around.
 * This class will also maintain histograms of the load information
 * of other nodes in the cluster.
 * Author : Avinash Lakshman ( alakshman@facebook.com) & Prashant Malik ( pmalik@facebook.com )
 */
public final class StorageService implements IEndPointStateChangeSubscriber, StorageServiceMBean {
    private static Logger logger_ = Logger.getLogger(StorageService.class);
    private static final BigInteger prime_ = BigInteger.valueOf(31);
    private final static int maxKeyHashLength_ = 24;
    private final static String nodeId_ = "NODE-IDENTIFIER";
    private final static String loadAll_ = "LOAD-ALL";
    public final static String mutationStage_ = "ROW-MUTATION-STAGE";
    public final static String readStage_ = "ROW-READ-STAGE";
    public final static String mutationVerbHandler_ = "ROW-MUTATION-VERB-HANDLER";
    public final static String tokenVerbHandler_ = "TOKEN-VERB-HANDLER";
    public final static String loadVerbHandler_ = "LOAD-VERB-HANDLER";
    public final static String binaryVerbHandler_ = "BINARY-VERB-HANDLER";
    public final static String readRepairVerbHandler_ = "READ-REPAIR-VERB-HANDLER";
    public final static String readVerbHandler_ = "ROW-READ-VERB-HANDLER";
    public final static String bootStrapInitiateVerbHandler_ = "BOOTSTRAP-INITIATE-VERB-HANDLER";
    public final static String bootStrapInitiateDoneVerbHandler_ = "BOOTSTRAP-INITIATE-DONE-VERB-HANDLER";
    public final static String bootStrapTerminateVerbHandler_ = "BOOTSTRAP-TERMINATE-VERB-HANDLER";
    public final static String tokenInfoVerbHandler_ = "TOKENINFO-VERB-HANDLER";
    public final static String mbrshipCleanerVerbHandler_ = "MBRSHIP-CLEANER-VERB-HANDLER";
    public final static String bsMetadataVerbHandler_ = "BS-METADATA-VERB-HANDLER";

    public static enum ConsistencyLevel {
        WEAK, STRONG
    };

    private static StorageService instance_;
    /* Used to lock the factory for creation of StorageService instance */
    private static Lock createLock_ = new ReentrantLock();
    private static EndPoint tcpAddr_;
    private static EndPoint udpAddr_;

    public static EndPoint getLocalStorageEndPoint() {
        return tcpAddr_;
    }

    public static EndPoint getLocalControlEndPoint() {
        return udpAddr_;
    }

    public static String getHostUrl() {
        return "http://" + tcpAddr_.getHost() + ":" + DatabaseDescriptor.getHttpPort();
    }

    /*
     * Order preserving hash for the specified key.
     */
    public static BigInteger hash(String key) {
        BigInteger h = BigInteger.ZERO;
        char val[] = key.toCharArray();
        for (int i = 0; i < StorageService.maxKeyHashLength_; i++) {
            if (i < val.length)
                h = StorageService.prime_.multiply(h).add(BigInteger.valueOf(val[i]));
            else
                h = StorageService.prime_.multiply(h).add(StorageService.prime_);
        }

        return h;
    }

    public static enum BootstrapMode {
        HINT, FULL
    };

    public static class BootstrapInitiateDoneVerbHandler implements IVerbHandler {
        private static Logger logger_ = Logger.getLogger(BootstrapInitiateDoneVerbHandler.class);

        public void doVerb(Message message) {
            logger_.debug("Received a bootstrap initiate done message ...");
            /* Let the Stream Manager do his thing. */
            StreamManager.instance(message.getFrom()).start();
        }
    }

    private class ShutdownTimerTask extends TimerTask {
        public void run() {
            StorageService.instance().shutdown();
        }
    }

    /*
     * Factory method that gets an instance of the StorageService class.
     */
    public static StorageService instance() {
        if (instance_ == null) {
            StorageService.createLock_.lock();
            try {
                if (instance_ == null) {
                    try {
                        instance_ = new StorageService();
                    } catch (Throwable th) {
                        logger_.error(LogUtil.throwableToString(th));
                        System.exit(1);
                    }
                }
            } finally {
                createLock_.unlock();
            }
        }
        return instance_;
    }

    /*
     * This is the endpoint snitch which depends on the network architecture. We
     * need to keep this information for each endpoint so that we make decisions
     * while doing things like replication etc.
     */
    private IEndPointSnitch endPointSnitch_;
    /*
     * Uptime of this node - we use this to determine if a bootstrap can be
     * performed by this node
     */
    private long uptime_ = 0L;

    /* This abstraction maintains the token/endpoint metadata information */
    private TokenMetadata tokenMetadata_ = new TokenMetadata();
    private DBManager.StorageMetadata storageMetadata_;

    /*
     * Maintains a list of all components that need to be shutdown for a clean
     * exit.
     */
    private Set<IComponentShutdown> components_ = new HashSet<IComponentShutdown>();
    /*
     * This boolean indicates if we are in loading state. If we are then we do not
     * want any distributed algorithms w.r.t change in token state to kick in.
     */
    private boolean isLoadState_ = false;

    /*
     * This variable indicates if the local storage instance has been shutdown.
     */
    private AtomicBoolean isShutdown_ = new AtomicBoolean(false);

    /* This thread pool is used to do the bootstrap for a new node */
    private ExecutorService bootStrapper_ = new DebuggableThreadPoolExecutor(1, 1, Integer.MAX_VALUE,
            TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new ThreadFactoryImpl("BOOT-STRAPPER"));

    /*
     * This thread pool does consistency checks when the client doesn't care about
     * consistency
     */
    private ExecutorService consistencyManager_;

    /* Helps determine number of keys processed in a time interval */
    private RequestCountSampler sampler_;

    /* This is the entity that tracks load information of all nodes in the cluster */
    private StorageLoadBalancer storageLoadBalancer_;
    /* We use this interface to determine where replicas need to be placed */
    private IReplicaPlacementStrategy nodePicker_;
    /* Handle to a ZooKeeper instance */
    private ZooKeeper zk_;

    /*
     * Registers with Management Server
     */
    private void init() {
        // Register this instance with JMX
        try {
            MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
            mbs.registerMBean(this, new ObjectName("com.facebook.infrastructure.service:type=StorageService"));
        } catch (Exception e) {
            logger_.error(LogUtil.throwableToString(e));
        }
    }

    public StorageService() throws Throwable {
        init();
        uptime_ = System.currentTimeMillis();
        storageLoadBalancer_ = new StorageLoadBalancer(this);
        endPointSnitch_ = new EndPointSnitch();

        /* register the verb handlers */
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.tokenVerbHandler_,
                new TokenUpdateVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.binaryVerbHandler_,
                new BinaryVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.loadVerbHandler_,
                new LoadVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.mutationVerbHandler_,
                new RowMutationVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.readRepairVerbHandler_,
                new ReadRepairVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.readVerbHandler_,
                new ReadVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.bootStrapInitiateVerbHandler_,
                new Table.BootStrapInitiateVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(
                StorageService.bootStrapInitiateDoneVerbHandler_,
                new StorageService.BootstrapInitiateDoneVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.bootStrapTerminateVerbHandler_,
                new StreamManager.BootstrapTerminateVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(HttpConnection.httpRequestVerbHandler_,
                new HttpRequestVerbHandler(this));
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.tokenInfoVerbHandler_,
                new TokenInfoVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.mbrshipCleanerVerbHandler_,
                new MembershipCleanerVerbHandler());
        MessagingService.getMessagingInstance().registerVerbHandlers(StorageService.bsMetadataVerbHandler_,
                new BootstrapMetadataVerbHandler());

        /* register the stage for the mutations */
        int threadCount = DatabaseDescriptor.getThreadsPerPool();
        consistencyManager_ = new DebuggableThreadPoolExecutor(threadCount, threadCount, Integer.MAX_VALUE,
                TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(),
                new ThreadFactoryImpl("CONSISTENCY-MANAGER"));

        StageManager.registerStage(StorageService.mutationStage_,
                new MultiThreadedStage("ROW-MUTATION", threadCount));
        StageManager.registerStage(StorageService.readStage_, new MultiThreadedStage("ROW-READ", threadCount));
        /* Stage for handling the HTTP messages. */
        StageManager.registerStage(HttpConnection.httpStage_, new SingleThreadedStage("HTTP-REQUEST"));

        if (DatabaseDescriptor.isRackAware())
            nodePicker_ = new RackAwareStrategy(tokenMetadata_);
        else
            nodePicker_ = new RackUnawareStrategy(tokenMetadata_);
    }

    private void reportToZookeeper() throws Throwable {
        try {
            zk_ = new ZooKeeper(DatabaseDescriptor.getZkAddress(), DatabaseDescriptor.getZkSessionTimeout(),
                    new Watcher() {
                        public void process(WatcherEvent we) {
                            String path = "/Cassandra/" + DatabaseDescriptor.getClusterName() + "/Leader";
                            String eventPath = we.getPath();
                            logger_.debug("PROCESS EVENT : " + eventPath);
                            if (eventPath != null && (eventPath.indexOf(path) != -1)) {
                                logger_.debug("Signalling the leader instance ...");
                                LeaderElector.instance().signal();
                            }
                        }
                    });

            Stat stat = zk_.exists("/", false);
            if (stat != null) {
                stat = zk_.exists("/Cassandra", false);
                if (stat == null) {
                    logger_.debug("Creating the Cassandra znode ...");
                    zk_.create("/Cassandra", new byte[0], Ids.OPEN_ACL_UNSAFE, 0);
                }

                String path = "/Cassandra/" + DatabaseDescriptor.getClusterName();
                stat = zk_.exists(path, false);
                if (stat == null) {
                    logger_.debug("Creating the cluster znode " + path);
                    zk_.create(path, new byte[0], Ids.OPEN_ACL_UNSAFE, 0);
                }

                /* Create the Leader, Locks and Misc znode */
                stat = zk_.exists(path + "/Leader", false);
                if (stat == null) {
                    logger_.debug("Creating the leader znode " + path);
                    zk_.create(path + "/Leader", new byte[0], Ids.OPEN_ACL_UNSAFE, 0);
                }

                stat = zk_.exists(path + "/Locks", false);
                if (stat == null) {
                    logger_.debug("Creating the locks znode " + path);
                    zk_.create(path + "/Locks", new byte[0], Ids.OPEN_ACL_UNSAFE, 0);
                }

                stat = zk_.exists(path + "/Misc", false);
                if (stat == null) {
                    logger_.debug("Creating the misc znode " + path);
                    zk_.create(path + "/Misc", new byte[0], Ids.OPEN_ACL_UNSAFE, 0);
                }
            }
        } catch (KeeperException ke) {
            LogUtil.throwableToString(ke);
            /* do the re-initialize again. */
            reportToZookeeper();
        }
    }

    protected ZooKeeper getZooKeeperHandle() {
        return zk_;
    }

    public boolean isLeader(EndPoint endpoint) {
        EndPoint leader = getLeader();
        return leader.equals(endpoint);
    }

    public EndPoint getLeader() {
        return LeaderElector.instance().getLeader();
    }

    public void registerComponentForShutdown(IComponentShutdown component) {
        components_.add(component);
    }

    public void registerExternalVerbHandler(String verb, IVerbHandler verbHandler) {
        MessagingService.getMessagingInstance().registerVerbHandlers(verb, verbHandler);
    }

    public void start() throws Throwable {
        storageMetadata_ = DBManager.instance().start();

        /* Set up TCP endpoint */
        tcpAddr_ = new EndPoint(DatabaseDescriptor.getStoragePort());
        /* Set up UDP endpoint */
        udpAddr_ = new EndPoint(DatabaseDescriptor.getControlPort());
        /* Listen for application messages */
        MessagingService.getMessagingInstance().listen(tcpAddr_, false);
        /* Listen for control messages */
        MessagingService.getMessagingInstance().listenUDP(udpAddr_);
        /* Listen for HTTP messages */
        MessagingService.getMessagingInstance().listen(new EndPoint(DatabaseDescriptor.getHttpPort()), true);
        /* start the analytics context package */
        AnalyticsContext.instance().start();
        /*
         * report our existence to ZooKeeper instance and start the leader election
         * service
         */
        // reportToZookeeper();
        // LeaderElector.instance().start();
        /* Start the storage load balancer */
        storageLoadBalancer_.start();
        /* Register with the Gossiper for EndPointState notifications */
        Gossiper.instance().register(this);
        /*
         * Start the gossiper with the generation # retrieved from the System table
         */
        Gossiper.instance().start(udpAddr_, storageMetadata_.getGeneration());
        /* Set up the request sampler */
        sampler_ = new RequestCountSampler();
        /* Make sure this token gets gossiped around. */
        tokenMetadata_.update(storageMetadata_.getStorageId(), StorageService.tcpAddr_);
        Gossiper.instance().addApplicationState(StorageService.nodeId_,
                new ApplicationState(storageMetadata_.getStorageId().toString()));
    }

    public void killMe() throws Throwable {
        isShutdown_.set(true);
        /*
         * Shutdown the Gossiper to stop responding/sending Gossip messages. This
         * causes other nodes to detect you as dead and starting hinting data for
         * the local endpoint.
         */
        Gossiper.instance().shutdown();
        final long nodeDeadDetectionTime = 25000L;
        Thread.sleep(nodeDeadDetectionTime);
        /* Now perform a force flush of the table */
        String table = DatabaseDescriptor.getTables().get(0);
        Table.open(table).flush(false);
        /* Now wait for the flush to complete */
        Thread.sleep(nodeDeadDetectionTime);
        /* Shutdown all other components */
        StorageService.instance().shutdown();
    }

    public boolean isShutdown() {
        return isShutdown_.get();
    }

    public void shutdown() {
        bootStrapper_.shutdownNow();
        /* shut down all stages */
        StageManager.shutdown();
        /* shut down the messaging service */
        MessagingService.shutdown();
        /* shut down all memtables */
        Memtable.shutdown();
        /* shut down the request count sampler */
        RequestCountSampler.shutdown();
        /* shut down the cleaner thread in FileUtils */
        FileUtils.shutdown();

        /* shut down all registered components */
        for (IComponentShutdown component : components_) {
            component.shutdown();
        }
    }

    public TokenMetadata getTokenMetadata() {
        return tokenMetadata_.cloneMe();
    }

    /* TODO: remove later */
    public void updateTokenMetadata(BigInteger token, EndPoint endpoint) {
        tokenMetadata_.update(token, endpoint);
    }

    public IEndPointSnitch getEndPointSnitch() {
        return endPointSnitch_;
    }

    /*
     * Given an EndPoint this method will report if the endpoint is in the same
     * data center as the local storage endpoint.
     */
    public boolean isInSameDataCenter(EndPoint endpoint) throws IOException {
        return endPointSnitch_.isInSameDataCenter(StorageService.tcpAddr_, endpoint);
    }

    /*
     * This method performs the requisite operations to make sure that the N
     * replicas are in sync. We do this in the background when we do not care much
     * about consistency.
     */
    public void doConsistencyCheck(Row row, List<EndPoint> endpoints, String columnFamily, int start, int count) {
        Runnable consistencySentinel = new ConsistencyManager(row.cloneMe(), endpoints, columnFamily, start, count);
        consistencyManager_.submit(consistencySentinel);
    }

    public void doConsistencyCheck(Row row, List<EndPoint> endpoints, String columnFamily, long sinceTimestamp) {
        Runnable consistencySentinel = new ConsistencyManager(row.cloneMe(), endpoints, columnFamily,
                sinceTimestamp);
        consistencyManager_.submit(consistencySentinel);
    }

    public void doConsistencyCheck(Row row, List<EndPoint> endpoints, String columnFamily, List<String> columns) {
        Runnable consistencySentinel = new ConsistencyManager(row.cloneMe(), endpoints, columnFamily, columns);
        consistencyManager_.submit(consistencySentinel);
    }

    /*
     * This method displays all the ranges and the replicas that are responsible
     * for the individual ranges. The format of this string is the following:
     * 
     * R1 : A B C R2 : D E F R3 : G H I
     */
    public String showTheRing() {
        StringBuilder sb = new StringBuilder();
        /* Get the token to endpoint map. */
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        Set<BigInteger> tokens = tokenToEndPointMap.keySet();
        /* All the ranges for the tokens */
        Range[] ranges = getAllRanges(tokens);
        Map<Range, List<EndPoint>> oldRangeToEndPointMap = constructRangeToEndPointMap(ranges);

        Set<Range> rangeSet = oldRangeToEndPointMap.keySet();
        for (Range range : rangeSet) {
            sb.append(range);
            sb.append(" : ");

            List<EndPoint> replicas = oldRangeToEndPointMap.get(range);
            for (EndPoint replica : replicas) {
                sb.append(replica);
                sb.append(" ");
            }
            sb.append(System.getProperty("line.separator"));
        }
        return sb.toString();
    }

    public Map<Range, List<EndPoint>> getRangeToEndPointMap() {
        /* Get the token to endpoint map. */
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        Set<BigInteger> tokens = tokenToEndPointMap.keySet();
        /* All the ranges for the tokens */
        Range[] ranges = getAllRanges(tokens);
        Map<Range, List<EndPoint>> oldRangeToEndPointMap = constructRangeToEndPointMap(ranges);

        return oldRangeToEndPointMap;
    }

    /**
     * Construct the range to endpoint mapping based on the true view of the
     * world.
     * 
     * @param ranges
     * @return mapping of ranges to the replicas responsible for them.
     */
    public Map<Range, List<EndPoint>> constructRangeToEndPointMap(Range[] ranges) {
        logger_.debug("Constructing range to endpoint map ...");
        Map<Range, List<EndPoint>> rangeToEndPointMap = new HashMap<Range, List<EndPoint>>();
        for (Range range : ranges) {
            EndPoint[] endpoints = getNStorageEndPoint(range.right());
            rangeToEndPointMap.put(range, new ArrayList<EndPoint>(Arrays.asList(endpoints)));
        }
        logger_.debug("Done constructing range to endpoint map ...");
        return rangeToEndPointMap;
    }

    /**
     * Construct the range to endpoint mapping based on the view as dictated by
     * the mapping of token to endpoints passed in.
     * 
     * @param ranges
     * @param tokenToEndPointMap
     *          mapping of token to endpoints.
     * @return mapping of ranges to the replicas responsible for them.
     */
    public Map<Range, List<EndPoint>> constructRangeToEndPointMap(Range[] ranges,
            Map<BigInteger, EndPoint> tokenToEndPointMap) {
        logger_.debug("Constructing range to endpoint map ...");
        Map<Range, List<EndPoint>> rangeToEndPointMap = new HashMap<Range, List<EndPoint>>();
        for (Range range : ranges) {
            EndPoint[] endpoints = getNStorageEndPoint(range.right(), tokenToEndPointMap);
            rangeToEndPointMap.put(range, new ArrayList<EndPoint>(Arrays.asList(endpoints)));
        }
        logger_.debug("Done constructing range to endpoint map ...");
        return rangeToEndPointMap;
    }

    /**
     * Construct a mapping from endpoint to ranges that endpoint is responsible
     * for.
     * 
     * @return the mapping from endpoint to the ranges it is responsible for.
     */
    public Map<EndPoint, List<Range>> constructEndPointToRangesMap() {
        Map<EndPoint, List<Range>> endPointToRangesMap = new HashMap<EndPoint, List<Range>>();
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        Collection<EndPoint> mbrs = tokenToEndPointMap.values();
        for (EndPoint mbr : mbrs) {
            endPointToRangesMap.put(mbr, getRangesForEndPoint(mbr));
        }
        return endPointToRangesMap;
    }

    /**
     * Get the estimated disk space of the target endpoint in its primary range.
     * 
     * @param target
     *          whose primary range we are interested in.
     * @return disk space of the target in the primary range.
     */
    private double getDiskSpaceForPrimaryRange(EndPoint target) {
        double primaryDiskSpace = 0d;
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        Set<BigInteger> tokens = tokenToEndPointMap.keySet();
        Range[] allRanges = getAllRanges(tokens);
        Arrays.sort(allRanges);
        /* Mapping from Range to its ordered position on the ring */
        Map<Range, Integer> rangeIndex = new HashMap<Range, Integer>();
        for (int i = 0; i < allRanges.length; ++i) {
            rangeIndex.put(allRanges[i], i);
        }
        /* Get the coefficients for the equations */
        List<double[]> equations = new ArrayList<double[]>();
        /* Get the endpoint to range map */
        Map<EndPoint, List<Range>> endPointToRangesMap = constructEndPointToRangesMap();
        Set<EndPoint> eps = endPointToRangesMap.keySet();

        for (EndPoint ep : eps) {
            List<Range> ranges = endPointToRangesMap.get(ep);
            double[] equation = new double[allRanges.length];
            for (Range range : ranges) {
                int index = rangeIndex.get(range);
                equation[index] = 1;
            }
            equations.add(equation);
        }
        double[][] coefficients = equations.toArray(new double[0][0]);

        /* Get the constants which are the aggregate disk space for each endpoint */
        double[] constants = new double[allRanges.length];
        int index = 0;
        for (EndPoint ep : eps) {
            /* reset the port back to control port */
            ep.setPort(DatabaseDescriptor.getControlPort());
            String lInfo = null;
            if (ep.equals(StorageService.udpAddr_))
                lInfo = getLoadInfo();
            else
                lInfo = getLoadInfo(ep);
            LoadInfo li = new LoadInfo(lInfo);
            constants[index++] = FileUtils.stringToFileSize(li.diskSpace());
        }

        RealMatrix matrix = new RealMatrixImpl(coefficients);
        double[] solutions = matrix.solve(constants);
        Range primaryRange = getPrimaryRangeForEndPoint(target);
        primaryDiskSpace = solutions[rangeIndex.get(primaryRange)];
        return primaryDiskSpace;
    }

    /**
     * This is very dangerous. This is used only on the client side to set up the
     * client library. This is then used to find the appropriate nodes to route
     * the key to.
     */
    public void setTokenMetadata(TokenMetadata tokenMetadata) {
        tokenMetadata_ = tokenMetadata;
    }

    /**
     * Called when there is a change in application state. In particular we are
     * interested in new tokens as a result of a new node or an existing node
     * moving to a new location on the ring.
     */
    public void onChange(EndPoint endpoint, EndPointState epState) {
        EndPoint ep = new EndPoint(endpoint.getHost(), DatabaseDescriptor.getStoragePort());
        /* node identifier for this endpoint on the identifier space */
        ApplicationState nodeIdState = epState.getApplicationState(StorageService.nodeId_);
        if (nodeIdState != null) {
            BigInteger newToken = new BigInteger(nodeIdState.getState());
            logger_.debug("CHANGE IN STATE FOR " + endpoint + " - has token " + nodeIdState.getState());
            BigInteger oldToken = tokenMetadata_.getToken(ep);

            if (oldToken != null) {
                /*
                 * If oldToken equals the newToken then the node had crashed and is
                 * coming back up again. If oldToken is not equal to the newToken this
                 * means that the node is being relocated to another position in the
                 * ring.
                 */
                if (!oldToken.equals(newToken)) {
                    logger_.debug("Relocation for endpoint " + ep);
                    tokenMetadata_.update(newToken, ep);
                } else {
                    /*
                     * This means the node crashed and is coming back up. Deliver the
                     * hints that we have for this endpoint.
                     */
                    logger_.debug("Sending hinted data to " + ep);
                    doBootstrap(endpoint, BootstrapMode.HINT);
                }
            } else {
                /*
                 * This is a new node and we just update the token map.
                 */
                tokenMetadata_.update(newToken, ep);
            }
        } else {
            /*
             * If we are here and if this node is UP and already has an entry in the
             * token map. It means that the node was behind a network partition.
             */
            if (epState.isAlive() && tokenMetadata_.isKnownEndPoint(endpoint)) {
                logger_.debug("EndPoint " + ep + " just recovered from a partition. Sending hinted data.");
                doBootstrap(ep, BootstrapMode.HINT);
            }
        }

        /* Check if a bootstrap is in order */
        ApplicationState loadAllState = epState.getApplicationState(StorageService.loadAll_);
        if (loadAllState != null) {
            String nodes = loadAllState.getState();
            if (nodes != null) {
                doBootstrap(ep, BootstrapMode.FULL);
            }
        }
    }

    public static BigInteger generateRandomToken() {
        byte[] randomBytes = new byte[24];
        Random random = new Random();
        for (int i = 0; i < 24; i++) {
            randomBytes[i] = (byte) (31 + random.nextInt(256 - 31));
        }
        return hash(new String(randomBytes));
    }

    /**
     * This method is called by the Load Balancing module and the Bootstrap
     * module. Here we receive a Counting Bloom Filter which we merge into the
     * counter.
     */
    public void sample(RequestCountSampler.Cardinality cardinality) {
        if (cardinality == null)
            return;
        sampler_.add(cardinality);
    }

    /**
     * Get the count of primary keys from the sampler.
     */
    public String getLoadInfo() {
        long diskSpace = FileUtils.getUsedDiskSpace();
        LoadInfo li = new LoadInfo(sampler_.count(), diskSpace);
        return li.toString();
    }

    /**
     * Get the primary count info for this endpoint. This is gossiped around and
     * cached in the StorageLoadBalancer.
     */
    public String getLoadInfo(EndPoint ep) {
        LoadInfo li = storageLoadBalancer_.getLoad(ep);
        return (li == null) ? "N/A" : li.toString();
    }

    /**
     * Get the endpoint that has the largest primary count.
     * 
     * @return
     */
    EndPoint getEndPointWithLargestPrimaryCount() {
        Set<EndPoint> allMbrs = Gossiper.instance().getAllMembers();
        Map<LoadInfo, EndPoint> loadInfoToEndPointMap = new HashMap<LoadInfo, EndPoint>();
        List<LoadInfo> lInfos = new ArrayList<LoadInfo>();

        for (EndPoint mbr : allMbrs) {
            mbr.setPort(DatabaseDescriptor.getStoragePort());
            LoadInfo li = null;
            if (mbr.equals(StorageService.tcpAddr_)) {
                li = new LoadInfo(getLoadInfo());
                lInfos.add(li);
            } else {
                li = storageLoadBalancer_.getLoad(mbr);
                lInfos.add(li);
            }
            loadInfoToEndPointMap.put(li, mbr);
        }

        Collections.sort(lInfos, new LoadInfo.PrimaryCountComparator());
        return loadInfoToEndPointMap.get(lInfos.get(lInfos.size() - 1));
    }

    /**
     * This method will sample the key into the request count sampler.
     */
    public void sample(String key) {
        if (isPrimary(key)) {
            sampler_.sample(key);
        }
    }

    /**
     * This method will delete the key from the request count sampler.
     */
    public void delete(String key) {
        if (isPrimary(key)) {
            sampler_.delete(key);
        }
    }

    /*
     * This method updates the token on disk and modifies the cached
     * StorageMetadata instance. This is only for the local endpoint.
     */
    public void updateToken(BigInteger token) throws IOException {
        /* update the token on disk */
        SystemTable.openSystemTable(SystemTable.name_).updateToken(token);
        /* Update the storageMetadata cache */
        storageMetadata_.setStorageId(token);
        /* Update the token maps */
        /* Get the old token. This needs to be removed. */
        tokenMetadata_.update(token, StorageService.tcpAddr_);
        /* Gossip this new token for the local storage instance */
        Gossiper.instance().addApplicationState(StorageService.nodeId_, new ApplicationState(token.toString()));
    }

    /*
     * This method removes the state associated with this endpoint from the
     * TokenMetadata instance.
     * 
     * param@ endpoint remove the token state associated with this endpoint.
     */
    public void removeTokenState(EndPoint endpoint) {
        tokenMetadata_.remove(endpoint);
        /* Remove the state from the Gossiper */
        Gossiper.instance().removeFromMembership(endpoint);
    }

    /*
     * This method is invoked by the Loader process to force the node to move from
     * its current position on the token ring, to a position to be determined
     * based on the keys. This will help all nodes to start off perfectly load
     * balanced. The array passed in is evaluated as follows by the loader
     * process: If there are 10 keys in the system and a totality of 5 nodes then
     * each node needs to have 2 keys i.e the array is made up of every 2nd key in
     * the total list of keys.
     */
    public void relocate(String[] keys) throws IOException {
        if (keys.length > 0) {
            isLoadState_ = true;
            BigInteger token = tokenMetadata_.getToken(StorageService.tcpAddr_);
            Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
            BigInteger[] tokens = tokenToEndPointMap.keySet().toArray(new BigInteger[0]);
            Arrays.sort(tokens);
            int index = Arrays.binarySearch(tokens, token) * (keys.length / tokens.length);
            BigInteger newToken = hash(keys[index]);
            /* update the token */
            updateToken(newToken);
        }
    }

    /*
     * This is used to indicate that this node is done with the loading of data.
     */
    public void resetLoadState() {
        isLoadState_ = false;
    }

    /**
     * This method takes a colon separated string of nodes that need to be
     * bootstrapped. It is also used to filter some source of data. Suppose the
     * nodes to be bootstrapped are A, B and C. Then <i>allNodes</i> must be
     * specified as A:B:C.
     * 
     */
    private void doBootstrap(String nodes) {
        String[] allNodesAndFilter = nodes.split("-");
        String nodesToLoad = null;
        String filterSources = null;

        if (allNodesAndFilter.length == 2) {
            nodesToLoad = allNodesAndFilter[0];
            filterSources = allNodesAndFilter[1];
        } else {
            nodesToLoad = allNodesAndFilter[0];
        }
        String[] allNodes = nodesToLoad.split(":");
        EndPoint[] endpoints = new EndPoint[allNodes.length];
        BigInteger[] tokens = new BigInteger[allNodes.length];

        for (int i = 0; i < allNodes.length; ++i) {
            endpoints[i] = new EndPoint(allNodes[i].trim(), DatabaseDescriptor.getStoragePort());
            tokens[i] = tokenMetadata_.getToken(endpoints[i]);
        }

        /* Start the bootstrap algorithm */
        if (filterSources == null)
            bootStrapper_.submit(new BootStrapper(endpoints, tokens));
        else {
            String[] allFilters = filterSources.split(":");
            EndPoint[] filters = new EndPoint[allFilters.length];
            for (int i = 0; i < allFilters.length; ++i) {
                filters[i] = new EndPoint(allFilters[i].trim(), DatabaseDescriptor.getStoragePort());
            }
            bootStrapper_.submit(new BootStrapper(endpoints, tokens, filters));
        }
    }

    /**
     * Starts the bootstrap operations for the specified endpoint. The name of
     * this method is however a misnomer since it does handoff of data to the
     * specified node when it has crashed and come back up, marked as alive after
     * a network partition and also when it joins the ring either as an old node
     * being relocated or as a brand new node.
     */
    public final void doBootstrap(EndPoint endpoint, BootstrapMode mode) {
        switch (mode) {
        case FULL:
            BigInteger token = tokenMetadata_.getToken(endpoint);
            bootStrapper_.submit(new BootStrapper(new EndPoint[] { endpoint }, new BigInteger[] { token }));
            break;

        case HINT:
            /* Deliver the hinted data to this endpoint. */
            HintedHandOffManager.instance().deliverHints(endpoint);
            break;

        default:
            break;
        }
    }

    /* This methods belong to the MBean interface */

    public long getRequestHandled() {
        return sampler_.count();
    }

    public String getToken(EndPoint ep) {
        EndPoint ep2 = new EndPoint(ep.getHost(), DatabaseDescriptor.getStoragePort());
        BigInteger token = tokenMetadata_.getToken(ep2);
        return (token == null) ? BigInteger.ZERO.toString() : token.toString();
    }

    public String getToken() {
        return tokenMetadata_.getToken(StorageService.tcpAddr_).toString();
    }

    public void updateToken(String token) {
        try {
            updateToken(new BigInteger(token));
        } catch (IOException ex) {
            logger_.debug(LogUtil.throwableToString(ex));
        }
    }

    public String getLiveNodes() {
        return stringify(Gossiper.instance().getLiveMembers());
    }

    public String getUnreachableNodes() {
        return stringify(Gossiper.instance().getUnreachableMembers());
    }

    /* Helper for the MBean interface */
    private String stringify(Set<EndPoint> eps) {
        StringBuilder sb = new StringBuilder("");
        for (EndPoint ep : eps) {
            sb.append(ep);
            sb.append(" ");
        }
        return sb.toString();
    }

    public void loadAll(String nodes) {
        // Gossiper.instance().addApplicationState(StorageService.loadAll_, new
        // ApplicationState(nodes));
        doBootstrap(nodes);
    }

    public String getAppropriateToken(int count) {
        BigInteger token = BootstrapAndLbHelper.getTokenBasedOnPrimaryCount(count);
        return token.toString();
    }

    public void doGC() {
        List<String> tables = DatabaseDescriptor.getTables();
        for (String tName : tables) {
            Table table = Table.open(tName);
            table.doGC();
        }
    }

    public void forceHandoff(String directories, String host) throws IOException {
        List<File> filesList = new ArrayList<File>();
        String[] sources = directories.split(":");
        for (String source : sources) {
            File directory = new File(source);
            Collections.addAll(filesList, directory.listFiles());
        }

        File[] files = filesList.toArray(new File[0]);
        StreamContextManager.StreamContext[] streamContexts = new StreamContextManager.StreamContext[files.length];
        int i = 0;
        for (File file : files) {
            streamContexts[i] = new StreamContextManager.StreamContext(file.getAbsolutePath(), file.length());
            logger_.debug("Stream context metadata " + streamContexts[i]);
            ++i;
        }

        if (files.length > 0) {
            EndPoint target = new EndPoint(host, DatabaseDescriptor.getStoragePort());
            /* Set up the stream manager with the files that need to streamed */
            StreamManager.instance(target).addFilesToStream(streamContexts);
            /* Send the bootstrap initiate message */
            BootstrapInitiateMessage biMessage = new BootstrapInitiateMessage(streamContexts);
            Message message = BootstrapInitiateMessage.makeBootstrapInitiateMessage(biMessage);
            logger_.debug("Sending a bootstrap initiate message to " + target + " ...");
            MessagingService.getMessagingInstance().sendOneWay(message, target);
            logger_.debug("Waiting for transfer to " + target + " to complete");
            StreamManager.instance(target).waitForStreamCompletion();
            logger_.debug("Done with transfer to " + target);
        }
    }

    /* End of MBean interface methods */

    /*
     * This method returns the predecessor of the endpoint ep on the identifier
     * space.
     */
    EndPoint getPredecessor(EndPoint ep) {
        BigInteger token = tokenMetadata_.getToken(ep);
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        List<BigInteger> tokens = new ArrayList<BigInteger>(tokenToEndPointMap.keySet());
        Collections.sort(tokens);
        int index = Collections.binarySearch(tokens, token);
        EndPoint predecessor = (index == 0) ? tokenToEndPointMap.get(tokens.get(tokens.size() - 1))
                : tokenToEndPointMap.get(tokens.get(--index));
        return predecessor;
    }

    /*
     * This method returns the successor of the endpoint ep on the identifier
     * space.
     */
    public EndPoint getSuccessor(EndPoint ep) {
        BigInteger token = tokenMetadata_.getToken(ep);
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        List<BigInteger> tokens = new ArrayList<BigInteger>(tokenToEndPointMap.keySet());
        Collections.sort(tokens);
        int index = Collections.binarySearch(tokens, token);
        EndPoint successor = (index == (tokens.size() - 1)) ? tokenToEndPointMap.get(tokens.get(0))
                : tokenToEndPointMap.get(tokens.get(++index));
        return successor;
    }

    /**
     * This method returns the range handled by this node.
     */
    public Range getMyRange() {
        BigInteger myToken = tokenMetadata_.getToken(StorageService.tcpAddr_);
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        List<BigInteger> allTokens = new ArrayList<BigInteger>(tokenToEndPointMap.keySet());
        Collections.sort(allTokens);
        int index = Collections.binarySearch(allTokens, myToken);
        /* Calculate the lhs for the range */
        BigInteger lhs = (index == 0) ? allTokens.get(allTokens.size() - 1) : allTokens.get(index - 1);
        return new Range(lhs, myToken);
    }

    /**
     * Get the primary range for the specified endpoint.
     * 
     * @param ep
     *          endpoint we are interested in.
     * @return range for the specified endpoint.
     */
    public Range getPrimaryRangeForEndPoint(EndPoint ep) {
        BigInteger right = tokenMetadata_.getToken(ep);
        EndPoint predecessor = getPredecessor(ep);
        BigInteger left = tokenMetadata_.getToken(predecessor);
        return new Range(left, right);
    }

    /**
     * Get all ranges an endpoint is responsible for.
     * 
     * @param ep
     *          endpoint we are interested in.
     * @return ranges for the specified endpoint.
     */
    List<Range> getRangesForEndPoint(EndPoint ep) {
        List<Range> ranges = new ArrayList<Range>();
        ranges.add(getPrimaryRangeForEndPoint(ep));

        EndPoint predecessor = ep;
        int count = DatabaseDescriptor.getReplicationFactor() - 1;
        for (int i = 0; i < count; ++i) {
            predecessor = getPredecessor(predecessor);
            ranges.add(getPrimaryRangeForEndPoint(predecessor));
        }

        return ranges;
    }

    /**
     * Get all ranges that span the ring given a set of tokens. All ranges are in
     * sorted order of ranges.
     */
    public Range[] getAllRanges(Set<BigInteger> tokens) {
        List<Range> ranges = new ArrayList<Range>();
        List<BigInteger> allTokens = new ArrayList<BigInteger>(tokens);
        Collections.sort(allTokens);
        int size = allTokens.size();
        for (int i = 1; i < size; ++i) {
            Range range = new Range(allTokens.get(i - 1), allTokens.get(i));
            ranges.add(range);
        }
        Range range = new Range(allTokens.get(size - 1), allTokens.get(0));
        ranges.add(range);
        return ranges.toArray(new Range[0]);
    }

    /**
     * Get all ranges that span the ring given a set of endpoints.
     */
    public Range[] getPrimaryRangesForEndPoints(Set<EndPoint> endpoints) {
        List<Range> allRanges = new ArrayList<Range>();
        for (EndPoint endpoint : endpoints) {
            allRanges.add(getPrimaryRangeForEndPoint(endpoint));
        }
        return allRanges.toArray(new Range[0]);
    }

    /**
     * This method returns the endpoint that is responsible for storing the
     * specified key.
     * 
     * param @ key - key for which we need to find the endpoint return value - the
     * endpoint responsible for this key
     */
    public EndPoint getPrimary(String key) {
        EndPoint endpoint = StorageService.tcpAddr_;
        BigInteger token = hash(key);
        Map<BigInteger, EndPoint> tokenToEndPointMap = tokenMetadata_.cloneTokenEndPointMap();
        List<BigInteger> tokens = new ArrayList<BigInteger>(tokenToEndPointMap.keySet());
        if (tokens.size() > 0) {
            Collections.sort(tokens);
            int index = Collections.binarySearch(tokens, token);
            if (index >= 0) {
                /*
                 * retrieve the endpoint based on the token at this index in the tokens
                 * list
                 */
                endpoint = tokenToEndPointMap.get(tokens.get(index));
            } else {
                index = (index + 1) * (-1);
                if (index < tokens.size())
                    endpoint = tokenToEndPointMap.get(tokens.get(index));
                else
                    endpoint = tokenToEndPointMap.get(tokens.get(0));
            }
        }
        return endpoint;
    }

    /**
     * This method determines whether the local endpoint is the primary for the
     * given key.
     * 
     * @param key
     * @return true if the local endpoint is the primary replica.
     */
    public boolean isPrimary(String key) {
        EndPoint endpoint = getPrimary(key);
        return StorageService.tcpAddr_.equals(endpoint);
    }

    /**
     * This method determines whether the target endpoint is the primary for the
     * given key.
     * 
     * @param key
     * @param target
     *          the target enpoint
     * @return true if the local endpoint is the primary replica.
     */
    public boolean isPrimary(String key, EndPoint target) {
        EndPoint endpoint = getPrimary(key);
        return target.equals(endpoint);
    }

    /**
     * This method determines whether the local endpoint is the seondary replica
     * for the given key.
     * 
     * @param key
     * @return true if the local endpoint is the secondary replica.
     */
    public boolean isSecondary(String key) {
        EndPoint[] topN = getNStorageEndPoint(key);
        if (topN.length < DatabaseDescriptor.getReplicationFactor())
            return false;
        return topN[1].equals(StorageService.tcpAddr_);
    }

    /**
     * This method determines whether the local endpoint is the seondary replica
     * for the given key.
     * 
     * @param key
     * @return true if the local endpoint is the tertiary replica.
     */
    public boolean isTertiary(String key) {
        EndPoint[] topN = getNStorageEndPoint(key);
        if (topN.length < DatabaseDescriptor.getReplicationFactor())
            return false;
        return topN[2].equals(StorageService.tcpAddr_);
    }

    /**
     * This method determines if the local endpoint is in the topN of N nodes
     * passed in.
     */
    public boolean isInTopN(String key) {
        EndPoint[] topN = getNStorageEndPoint(key);
        return Arrays.asList(topN).contains(StorageService.tcpAddr_);
    }

    /**
     * This method returns the N endpoints that are responsible for storing the
     * specified key i.e for replication.
     * 
     * param @ key - key for which we need to find the endpoint return value - the
     * endpoint responsible for this key
     */
    public EndPoint[] getNStorageEndPoint(String key) {
        BigInteger token = hash(key);
        return nodePicker_.getStorageEndPoints(token);
    }

    /**
     * This method attempts to return N endpoints that are responsible for storing
     * the specified key i.e for replication.
     * 
     * param @ key - key for which we need to find the endpoint return value - the
     * endpoint responsible for this key
     */
    public List<EndPoint> getNLiveStorageEndPoint(String key) {
        List<EndPoint> liveEps = new ArrayList<EndPoint>();
        EndPoint[] endpoints = getNStorageEndPoint(key);

        for (EndPoint endpoint : endpoints) {
            if (FailureDetector.instance().isAlive(endpoint))
                liveEps.add(endpoint);
        }

        return liveEps;
    }

    /**
     * This method returns the N endpoints that are responsible for storing the
     * specified key i.e for replication.
     * 
     * param @ key - key for which we need to find the endpoint return value - the
     * endpoint responsible for this key
     */
    public Map<EndPoint, EndPoint> getNStorageEndPointMap(String key) {
        BigInteger token = hash(key);
        return nodePicker_.getHintedStorageEndPoints(token);
    }

    /**
     * This method returns the N endpoints that are responsible for storing the
     * specified key i.e for replication. But it makes sure that the N endpoints
     * that are returned are live as reported by the FD. It returns the hint
     * information if some nodes in the top N are not live.
     * 
     * param @ key - key for which we need to find the endpoint return value - the
     * endpoint responsible for this key
     */
    public Map<EndPoint, EndPoint> getNHintedStorageEndPoint(String key) {
        BigInteger token = hash(key);
        return nodePicker_.getHintedStorageEndPoints(token);
    }

    /**
     * This method returns the N endpoints that are responsible for storing the
     * specified token i.e for replication.
     * 
     * param @ token - position on the ring
     */
    public EndPoint[] getNStorageEndPoint(BigInteger token) {
        return nodePicker_.getStorageEndPoints(token);
    }

    /**
     * This method returns the N endpoints that are responsible for storing the
     * specified token i.e for replication and are based on the token to endpoint
     * mapping that is passed in.
     * 
     * param @ token - position on the ring param @ tokens - w/o the following
     * tokens in the token list
     */
    protected EndPoint[] getNStorageEndPoint(BigInteger token, Map<BigInteger, EndPoint> tokenToEndPointMap) {
        return nodePicker_.getStorageEndPoints(token, tokenToEndPointMap);
    }

    /**
     * This method returns the N endpoints that are responsible for storing the
     * specified key i.e for replication. But it makes sure that the N endpoints
     * that are returned are live as reported by the FD. It returns the hint
     * information if some nodes in the top N are not live.
     * 
     * param @ token - position on the ring
     */
    public Map<EndPoint, EndPoint> getNHintedStorageEndPoint(BigInteger token) {
        return nodePicker_.getHintedStorageEndPoints(token);
    }

    /**
     * This function finds the most suitable endpoint given a key. It checks for
     * loclity and alive test.
     */
    protected EndPoint findSuitableEndPoint(String key) throws IOException {
        // If it's local, use ourself.
        if (isInTopN(key))
            return tcpAddr_;

        EndPoint[] endpoints = getNStorageEndPoint(key);
        EndPoint lastLiveEp = null;

        for (EndPoint ep : endpoints) {
            // Skip dead endpoints
            if (!FailureDetector.instance().isAlive(ep))
                continue;

            // If it's in the same datacenter, return immediately
            if (StorageService.instance().isInSameDataCenter(ep)) {
                logger_.debug("EndPoint " + ep + " is in the same data center as local storage endpoint.");
                return ep;
            }

            // This one is an option
            lastLiveEp = ep;
        }

        // We have tried to be really nice but looks like there are no servers
        // in the local data center that are alive and can service this request so
        // just send it to the last alive guy and see if we get anything.
        if (lastLiveEp != null) {
            logger_.debug("EndPoint " + lastLiveEp + " is alive so get data from it.");
            return lastLiveEp;
        }

        logger_.warn("Could not find any suitable endpoint for key '" + key + "'");
        return null;
    }
}