com.addthis.hydra.store.skiplist.SkipListCache.java Source code

Java tutorial

Introduction

Here is the source code for com.addthis.hydra.store.skiplist.SkipListCache.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.addthis.hydra.store.skiplist;

import com.addthis.basis.util.MemoryCounter;
import com.addthis.basis.util.Parameter;
import com.addthis.codec.Codec;
import com.addthis.hydra.store.db.CloseOperation;
import com.addthis.hydra.store.kv.ByteStore;
import com.addthis.hydra.store.kv.KeyCoder;
import com.addthis.hydra.store.kv.PagedKeyValueStore;
import com.addthis.hydra.store.util.MetricsUtil;
import com.addthis.hydra.store.util.NamedThreadFactory;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Gauge;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.ByteBufOutputStream;
import io.netty.buffer.PooledByteBufAllocator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

/**
 * ::TWO INVARIANTS TO AVOID DEADLOCK AND MAINTAIN CONSISTENCY::
 * <p/>
 * Invariant #1:
 * When locking two pages always lock the lower page before locking the higher page.
 * <p/>
 * Invariant #2:
 * To read a consistent snapshot of a page in the external storage you must
 * be holding a lock on the lower page in memory.
 * <p/>
 * The left sentinel page is the lowest page in storage. It is constructed with
 * a special first key with value negative infinity. No key may be smaller than
 * negative infinity. The left sentinel page may be neither <i>purged</i>
 * nor <i>deleted</i> (see below).
 * <p/>
 * A page is <i>evicted</i> when the contents of the page are transferred from
 * the JVM heap into the external storage. When a page is evicted a page stub
 * remains in memory that contains the minimal information needed to restore the
 * page into memory.
 * <p/>
 * A page is <i>purged</i> when a page stub is deleted from memory. The most
 * recent copy of this page still resides in the external storage. The left
 * sentinel page may not be purged.
 * <p/>
 * A page is <i>deleted</i> when it is removed from both memory and the external storage.
 * Only pages with 0 keys may be deleted. The left sentinel page may not be deleted.
 *
 * @param <K>
 * @param <V>
 */
public class SkipListCache<K, V extends Codec.BytesCodable> implements PagedKeyValueStore<K, V> {

    private static final Logger log = LoggerFactory.getLogger(SkipListCache.class);

    static final int defaultMaxPages = Parameter.intValue("eps.cache.pages", 50);
    static final int defaultMaxPageEntries = Parameter.intValue("eps.cache.page.entries", 50);
    static final int expirationDelta = Parameter.intValue("cache.expire.delta", 1000);
    private static final int defaultEvictionThreads = Parameter.intValue("cache.threadcount.eviction", 1);
    private static final int fixedNumberEvictions = Parameter.intValue("cache.batch.evictions", 10);
    static final boolean trackEncodingByteUsage = Parameter.boolValue("eps.cache.track.encoding", false);

    /**
     * Used as an absolute delta from maxPages when using that upper bound.
     * Otherwise it's treated as a percentage of maxTotalMemory.
     */
    private static final int shouldEvictDelta = Parameter.intValue("eps.cache.evict.delta", 20);

    final ConcurrentSkipListMap<K, Page<K, V>> cache;

    final ByteStore externalStore;

    private final AtomicBoolean shutdownGuard, shutdownEvictionThreads;

    final BlockingQueue<Page<K, V>> evictionQueue;

    final ConcurrentSkipListSet<K> purgeSet;

    final AtomicInteger purgeSetSize = new AtomicInteger(0);

    /**
     * Used to schedule synchronous page eviction in the
     * {@link #put(Object, com.addthis.codec.Codec.BytesCodable)} and {@link #remove(Object)}
     * methods when the background eviction threads are behind schedule.
     */
    private final LinkedBlockingQueue<BackgroundEvictionTask> evictionTaskQueue;

    final AtomicInteger cacheSize = new AtomicInteger();
    final AtomicInteger numPagesInMemory = new AtomicInteger();
    final AtomicLong numPagesDeleted = new AtomicLong();
    final AtomicLong numPagesEncoded = new AtomicLong();
    final AtomicLong numPagesDecoded = new AtomicLong();
    final AtomicLong numPagesSplit = new AtomicLong();

    final int mem_page;

    final AtomicLong memoryEstimate = new AtomicLong();

    final AtomicLong estimateCounter = new AtomicLong();

    private static final AtomicInteger evictionId = new AtomicInteger();

    private static final AtomicInteger scopeGenerator = new AtomicInteger();

    final String scope = "SkipListCache" + Integer.toString(scopeGenerator.getAndIncrement());

    final SkipListCacheMetrics metrics = new SkipListCacheMetrics(this);

    private final ScheduledExecutorService evictionThreadPool, purgeThreadPool;

    private final Comparator comparator;

    final KeyCoder<K, V> keyCoder;

    final PageFactory pageFactory;

    long softTotalMem;
    long maxTotalMem;
    long maxPageMem;
    boolean overrideDefaultMaxPages;
    int estimateInterval;
    int maxPageSize;
    int maxPages;

    private static long globalMaxTotalMem;
    private static long globalSoftTotalMem;

    private static final int evictionThreadSleepMillis = 10;
    private static final int threadPoolWaitShutdownSeconds = 10;

    /**
     * The Builder pattern allows many different variations of a class to
     * be instantiated without the pitfalls of complex constructors. See
     * ''Effective Java, Second Edition.'' Item 2 - "Consider a builder when
     * faced with many constructor parameters."
     */
    public static class Builder<K, V extends Codec.BytesCodable> {

        // Required parameters
        protected final int maxPageSize;
        protected final ByteStore externalStore;
        protected final KeyCoder<K, V> keyCoder;

        // Optional parameters - initialized to default values;
        protected int numEvictionThreads = defaultEvictionThreads;
        protected int maxPages = defaultMaxPages;
        protected PageFactory pageFactory = Page.DefaultPageFactory.singleton;

        public Builder(KeyCoder<K, V> keyCoder, ByteStore store, int maxPageSize) {
            this.externalStore = store;
            this.maxPageSize = maxPageSize;
            this.keyCoder = keyCoder;
        }

        public Builder(KeyCoder<K, V> keyCoder, ByteStore store, int maxPageSize, int maxPages) {
            this.externalStore = store;
            this.maxPageSize = maxPageSize;
            this.keyCoder = keyCoder;
            this.maxPages = maxPages;
        }

        @SuppressWarnings("unused")
        public Builder<K, V> numEvictionThreads(int val) {
            numEvictionThreads = val;
            return this;
        }

        @SuppressWarnings("unused")
        public Builder<K, V> maxPages(int val) {
            maxPages = val;
            return this;
        }

        @SuppressWarnings("unused")
        public Builder<K, V> pageFactory(PageFactory factory) {
            pageFactory = factory;
            return this;
        }

        public SkipListCache<K, V> build() {
            return new SkipListCache<>(keyCoder, externalStore, maxPageSize, maxPages, numEvictionThreads,
                    pageFactory);
        }

    }

    public SkipListCache(KeyCoder<K, V> keyCoder, ByteStore externalStore, int maxPageSize, int maxPages) {
        this(keyCoder, externalStore, maxPageSize, maxPages, defaultEvictionThreads,
                Page.DefaultPageFactory.singleton);
    }

    public SkipListCache(KeyCoder<K, V> keyCoder, ByteStore externalStore, int maxPageSize, int maxPages,
            int numEvictionThreads, PageFactory pageFactory) {
        if (externalStore == null) {
            throw new NullPointerException("externalStore must be non-null");
        }

        if (numEvictionThreads <= 0) {
            throw new IllegalStateException("numEvictionThreads must be a non-negative integer");
        }

        this.pageFactory = pageFactory;
        this.keyCoder = keyCoder;
        this.negInf = keyCoder.negInfinity();
        this.cache = new ConcurrentSkipListMap<>();
        this.mem_page = (int) MemoryCounter
                .estimateSize(pageFactory.measureMemoryEmptyPage(KeyCoder.EncodeType.SPARSE));
        this.externalStore = externalStore;
        this.maxPageSize = maxPageSize;
        this.maxPages = maxPages;
        this.shutdownGuard = new AtomicBoolean(false);
        this.shutdownEvictionThreads = new AtomicBoolean(false);
        this.evictionTaskQueue = new LinkedBlockingQueue<>();
        this.purgeSet = new ConcurrentSkipListSet<>();
        this.evictionQueue = new LinkedBlockingQueue<>();

        loadFromExternalStore();

        this.comparator = null;

        evictionThreadPool = Executors.newScheduledThreadPool(numEvictionThreads,
                new NamedThreadFactory(scope + "-eviction-", true));

        purgeThreadPool = Executors.newScheduledThreadPool(numEvictionThreads,
                new NamedThreadFactory(scope + "-purge-", true));

        for (int i = 0; i < numEvictionThreads; i++) {
            purgeThreadPool.scheduleAtFixedRate(new BackgroundPurgeTask(), i, evictionThreadSleepMillis,
                    TimeUnit.MILLISECONDS);

            evictionThreadPool.scheduleAtFixedRate(new BackgroundEvictionTask(0), i, evictionThreadSleepMillis,
                    TimeUnit.MILLISECONDS);
        }

        log.info("[init] ro=" + isReadOnly() + " maxPageSize=" + maxPageSize + " maxPages=" + maxPages + " gztype="
                + Page.gztype + " gzlevel=" + Page.gzlevel + " gzbuf=" + Page.gzbuf + " mem[page=" + mem_page
                + "]");

    }

    @SuppressWarnings("unused")
    public void setMaxPages(int maxPages) {
        this.maxPages = maxPages;
    }

    @SuppressWarnings("unused")
    public void setMaxPageSize(int maxPageSize) {
        this.maxPageSize = maxPageSize;
    }

    final K negInf;

    public final boolean nullRawValue(byte[] value) {
        return (value == null);
    }

    static enum EvictionStatus {
        // did not attempt eviction
        NO_STATUS,

        // call to page.writeTryLock() failed
        TRYLOCK_FAIL,

        // page is in a transient state.
        TRANSIENT_PAGE,

        // eviction successful
        SUCCESS,

        // page has already been evicted
        EVICTED_PAGE,

        // page is scheduled for deletion
        DELETION_SCHEDULED;

        /**
         * If true then reinsert this page into the eviction queue.
         * NO_STATUS implies we did not attempt eviction.
         * TRYLOCK_FAIL implies we optimistically attempted to call writeTryLock() and failed.
         * DELETION_SCHEDULED implies the page has 0 entries. This page will either
         * move into a transient state or new keys will be inserted into the page.
         */
        boolean needsAdditionalProcessing() {
            return this == NO_STATUS || this == TRYLOCK_FAIL || this == DELETION_SCHEDULED;
        }

        public boolean completeSuccess() {
            return this == SUCCESS;
        }

        public boolean removePurgeSet() {
            return this == SUCCESS || this == TRANSIENT_PAGE || this == EVICTED_PAGE;
        }
    }

    class BackgroundPurgeTask implements Runnable {

        Iterator<K> targetKeys;

        BackgroundPurgeTask() {
            targetKeys = purgeSet.iterator();
        }

        @Override
        public void run() {
            try {
                backgroundPurge();
            } catch (Exception ex) {
                logException("Uncaught exception in skiplist concurrent cache purge thread", ex);
            }
        }

        private void backgroundPurge() {
            while (!shutdownEvictionThreads.get() && shouldPurgePage() && doPurgePage())
                ;
        }

        /**
         * Return true if-and-only if no further processing is necessary.
         */
        private EvictionStatus removePageFromCache(K targetKey) {
            assert (!targetKey.equals(negInf));

            Page<K, V> prevPage = null, currentPage = null;

            try {

                // We must acquire the locks on the pages from lowest to highest.
                // This is inefficient but it avoids deadlock.
                Map.Entry<K, Page<K, V>> prevEntry, currentEntry;
                prevEntry = cache.lowerEntry(targetKey);
                prevPage = prevEntry.getValue();
                if (!prevPage.writeTryLock()) {
                    prevPage = null;
                    return EvictionStatus.TRYLOCK_FAIL;
                }
                if (prevPage.inTransientState()) {
                    return EvictionStatus.TRANSIENT_PAGE;
                }

                currentEntry = cache.higherEntry(prevEntry.getKey());
                if (currentEntry != null) {
                    currentPage = currentEntry.getValue();
                    if (!currentPage.writeTryLock()) {
                        currentPage = null;
                        return EvictionStatus.TRYLOCK_FAIL;
                    }
                    int compareKeys = compareKeys(targetKey, currentPage.firstKey);
                    if (compareKeys < 0) {
                        return EvictionStatus.NO_STATUS;
                    } else if (compareKeys == 0 && currentPage.keys == null
                            && currentPage.state == ExternalMode.DISK_MEMORY_IDENTICAL) {
                        currentPage.state = ExternalMode.MEMORY_EVICTED;
                        cache.remove(targetKey);
                        cacheSize.getAndDecrement();
                        return EvictionStatus.SUCCESS;
                    }
                }
                return EvictionStatus.EVICTED_PAGE;
            } finally {
                writeUnlockAndNull(currentPage);
                writeUnlockAndNull(prevPage);
            }
        }

        /**
         * Returns <code>true</code> is a page is purged and
         * false otherwise.
         */
        private boolean doPurgePage() {
            if (targetKeys == null) {
                targetKeys = purgeSet.iterator();
            }

            while (targetKeys.hasNext()) {
                K minKey = targetKeys.next();
                EvictionStatus status = removePageFromCache(minKey);
                if (status.removePurgeSet()) {
                    if (purgeSet.remove(minKey)) {
                        purgeSetSize.getAndDecrement();
                        return true;
                    }
                }
            }
            targetKeys = null;
            return false;
        }

    }

    class BackgroundEvictionTask implements Runnable {

        private volatile long timeout;

        private final long initialTimeout;

        private final int id;

        private final String scope;

        private final int maxEvictions;

        @SuppressWarnings("unused")
        private final Gauge<Long> timeoutGauge;

        BackgroundEvictionTask(int evictions) {
            id = evictionId.getAndIncrement();
            maxEvictions = evictions;
            scope = "EvictionTask-" + SkipListCache.this.scope + "-" + id;
            timeoutGauge = Metrics.newGauge(getClass(), "timeout", scope, new Gauge<Long>() {
                @Override
                public Long value() {
                    return timeout;
                }
            });
            initialTimeout = 10;
            timeout = initialTimeout;
        }

        @Override
        public void run() {
            try {
                if (maxEvictions <= 0) {
                    backgroundEviction();
                } else {
                    fixedNumberEviction();
                }
            } catch (Exception ex) {
                logException("Uncaught exception in skiplist concurrent cache eviction thread", ex);
            }
        }

        private void fixedNumberEviction() {
            ByteBufOutputStream byteStream = new ByteBufOutputStream(PooledByteBufAllocator.DEFAULT.buffer());
            try {
                for (int i = 0; i < maxEvictions; i++) {
                    doEvictPage(byteStream);
                }
            } finally {
                byteStream.buffer().release();
            }
        }

        private void backgroundEviction() {
            ByteBufOutputStream byteStream = new ByteBufOutputStream(PooledByteBufAllocator.DEFAULT.buffer());
            try {
                while (!shutdownEvictionThreads.get() && shouldEvictPage() && doEvictPage(byteStream))
                    ;
            } finally {
                byteStream.buffer().release();
            }
        }

        private EvictionStatus attemptPageEviction(Page<K, V> page, IterationMode iteration,
                ByteBufOutputStream byteStream) {
            if (iteration == IterationMode.OPTIMISTIC) {
                if (!page.writeTryLock()) {
                    return EvictionStatus.TRYLOCK_FAIL;
                }
            } else {
                page.writeLock();
            }

            try {
                if (page.inTransientState()) {
                    return EvictionStatus.TRANSIENT_PAGE;
                }

                assert (!page.splitCondition());

                if (page.size == 0 && !page.firstKey.equals(negInf)) {
                    return EvictionStatus.DELETION_SCHEDULED;
                }

                if (page.keys == null) {
                    addToPurgeSet(page);
                    return EvictionStatus.EVICTED_PAGE;
                }

                pushPageToDisk(page, byteStream);

                addToPurgeSet(page);

                if (iteration == IterationMode.OPTIMISTIC) {
                    timeout = timeout + expirationDelta;
                }

                return EvictionStatus.SUCCESS;
            } finally {
                writeUnlockAndNull(page);
            }
        }

        private void addToPurgeSet(Page<K, V> page) {
            if (!page.firstKey.equals(negInf)) {
                if (purgeSet.add(page.firstKey)) {
                    purgeSetSize.getAndIncrement();
                }
            }
        }

        /**
         * Returns <code>true</code> is a page is evicted and
         * false otherwise.
         * @param byteStream
         */
        private boolean doEvictPage(ByteBufOutputStream byteStream) {
            long referenceTime = generateTimestamp();

            Page<K, V> current = evictionQueue.poll();

            Page<K, V> oldestPage = current;

            // keeps track of the timestamp with the smallest value
            long oldestTimeStamp = (current != null) ? current.timeStamp : 0;

            int counter = 0;

            int numPages = getNumPagesInMemory();

            IterationMode iteration = IterationMode.OPTIMISTIC;

            EvictionStatus status;

            while (iteration != IterationMode.TERMINATION) {
                counter++;

                if (current == null) {
                    return false;
                }

                long timestamp = current.timeStamp;

                status = EvictionStatus.NO_STATUS;

                if (((iteration == IterationMode.OPTIMISTIC) && ((referenceTime - timestamp) >= timeout))
                        || (iteration == IterationMode.PESSIMISTIC)) {
                    status = attemptPageEviction(current, iteration, byteStream);

                    if (status.completeSuccess()) {
                        return true;
                    }
                }

                if (timestamp < oldestTimeStamp) {
                    oldestTimeStamp = timestamp;
                    oldestPage = current;
                }

                if (status.needsAdditionalProcessing()) {
                    evictionQueue.offer(current);
                }

                if (counter >= numPages) {
                    switch (iteration) {
                    case OPTIMISTIC:
                        iteration = IterationMode.PESSIMISTIC;
                        timeout /= 2;
                        status = attemptPageEviction(oldestPage, iteration, byteStream);
                        if (status.completeSuccess()) {
                            return true;
                        }
                        referenceTime = generateTimestamp();
                        counter = 0;
                        break;
                    case PESSIMISTIC:
                        iteration = IterationMode.TERMINATION;
                        break;
                    }
                }

                if (iteration != IterationMode.TERMINATION) {
                    current = evictionQueue.poll();
                }
            }

            return false;
        }

    }

    public boolean shouldPurgePage() {
        return purgeSetSize.get() > getNumPagesInMemory();
    }

    public boolean shouldEvictPage() {
        int numPages = getNumPagesInMemory();

        if (maxTotalMem > 0) {
            return getMemoryEstimate() > softTotalMem && numPages > 5;
        } else if (maxPages > 0) {
            return numPages > Math.max(maxPages - shouldEvictDelta, 5);
        } else if (!overrideDefaultMaxPages) {
            return numPages > Math.max(defaultMaxPages - shouldEvictDelta, 5);
        } else {
            return numPages > 0;
        }
    }

    public boolean mustEvictPage() {
        int numPages = getNumPagesInMemory();

        if (overrideDefaultMaxPages) {
            return false;
        } else if (maxTotalMem > 0) {
            return getMemoryEstimate() > maxTotalMem && numPages > 5;
        } else if (maxPages > 0) {
            return numPages > maxPages;
        } else {
            return numPages > defaultMaxPages;
        }
    }

    private static enum IterationMode {
        OPTIMISTIC, PESSIMISTIC, TERMINATION
    }

    /**
     * If the value of the {@link Page#nextFirstKey} field of a Page
     * is detected to be incorrect, then this method will correct that
     * field.
     */
    private void updateNextFirstKey(Page<K, V> prevPage, K newNextFirstKey, K targetKey, byte[] encodedTargetKey) {
        assert (prevPage.isWriteLockedByCurrentThread());

        Map.Entry<byte[], byte[]> entry = externalStore.floorEntry(encodedTargetKey);
        K floorKey = keyCoder.keyDecode(entry.getKey());
        if (floorKey.equals(prevPage.firstKey)) {
            if (prevPage.keys == null) {
                pullPageHelper(prevPage, entry.getValue());
            }
            assert (prevPage.nextFirstKey.equals(targetKey));
            prevPage.nextFirstKey = newNextFirstKey;
            if (prevPage.state == ExternalMode.DISK_MEMORY_IDENTICAL) {
                prevPage.state = ExternalMode.DISK_MEMORY_DIRTY;
            }
        } else {
            Page<K, V> diskPage = pageFactory.generateEmptyPage(SkipListCache.this, floorKey,
                    KeyCoder.EncodeType.SPARSE);
            diskPage.decode(entry.getValue());
            assert (diskPage.nextFirstKey.equals(targetKey));
            assert (compareKeys(prevPage.firstKey, diskPage.firstKey) <= 0);
            diskPage.nextFirstKey = newNextFirstKey;
            ByteBufOutputStream byteBufOutputStream = new ByteBufOutputStream(
                    PooledByteBufAllocator.DEFAULT.buffer());
            try {
                externalStore.put(entry.getKey(), diskPage.encode(byteBufOutputStream));
            } finally {
                byteBufOutputStream.buffer().release();
            }
        }
    }

    private void deletePage(final K targetKey) {
        assert (!targetKey.equals(negInf));

        final byte[] encodedTargetKey = keyCoder.keyEncode(targetKey);

        while (true) {

            Page<K, V> prevPage = null, currentPage = null;

            try {

                byte[] prevKeyEncoded = externalStore.lowerKey(encodedTargetKey);

                if (prevKeyEncoded == null) {
                    return;
                }

                K prevKey = keyCoder.keyDecode(prevKeyEncoded);
                prevPage = locatePage(prevKey, LockMode.WRITEMODE);

                if (!prevPage.firstKey.equals(prevKey)) {
                    continue;
                }

                Map.Entry<K, Page<K, V>> currentEntry = cache.higherEntry(prevKey);

                if (currentEntry == null) {
                    return;
                }

                currentPage = currentEntry.getValue();
                currentPage.writeLock();
                if (currentPage.inTransientState()) {
                    continue;
                }
                int compareKeys = compareKeys(targetKey, currentPage.firstKey);
                if (compareKeys > 0) {
                    continue;
                } else if (compareKeys == 0 && currentPage.size == 0) {
                    byte[] verifyPrevKeyEncoded = externalStore.lowerKey(encodedTargetKey);
                    // Test whether the lower key moved while we
                    // were acquiring locks on prevPage and currentPage.
                    if (verifyPrevKeyEncoded == null || !prevKey.equals(keyCoder.keyDecode(verifyPrevKeyEncoded))) {
                        continue;
                    }
                    externalStore.delete(encodedTargetKey);
                    Page<K, V> prev = cache.remove(targetKey);
                    assert (prev != null);
                    currentPage.state = ExternalMode.DELETED;
                    numPagesInMemory.getAndDecrement();
                    numPagesDeleted.getAndIncrement();
                    prevPage.nextFirstKey = currentPage.nextFirstKey;
                    prevPage.state = ExternalMode.DISK_MEMORY_DIRTY;
                }
                return;
            } finally {
                writeUnlockAndNull(currentPage);
                writeUnlockAndNull(prevPage);
            }
        }
    }

    private void splitPage(Page<K, V> target) {
        Page<K, V> newPage = null;
        try {
            newPage = splitOnePage(target);
            if (target.splitCondition()) {
                splitPage(target);
            }
            if (newPage.splitCondition()) {
                splitPage(newPage);
            }
        } finally {
            writeUnlockAndNull(newPage);
        }
    }

    /**
     * Splits a page in half. The input page must be write locked,
     * hold enough keys to satisfy the split condition, and cannot
     * be in a transient state. The skip-list cache uses the invariant
     * that each page in the cache must have some copy of the page
     * in external storage. We currently use Berkeley DB as the external
     * storage system, which is an append-only database. To conserve
     * disk space we do not store a full page to the database but instead
     * insert (new key, empty page) as a stub into the database.
     *
     * @param target page to split
     */
    private Page<K, V> splitOnePage(Page<K, V> target) {
        assert (target.isWriteLockedByCurrentThread());
        assert (target.splitCondition());
        assert (!target.inTransientState());

        if (target.keys == null) {
            pullPageFromDisk(target, LockMode.WRITEMODE);
        }

        int newSize = target.size / 2;
        int sibSize = target.size - newSize;

        List<K> keyRange = target.keys.subList(newSize, target.size);
        List<V> valueRange = target.values.subList(newSize, target.size);
        List<byte[]> rawValueRange = target.rawValues.subList(newSize, target.size);

        ArrayList<K> sibKeys = new ArrayList<>(keyRange);
        ArrayList<V> sibValues = new ArrayList<>(valueRange);
        ArrayList<byte[]> sibRawValues = new ArrayList<>(rawValueRange);
        K sibMinKey = sibKeys.get(0);

        Page<K, V> sibling = pageFactory.generateSiblingPage(SkipListCache.this, sibMinKey, target.nextFirstKey,
                sibSize, sibKeys, sibValues, sibRawValues, target.getEncodeType());
        sibling.writeLock();

        byte[] encodeKey;
        byte[] placeHolder;

        sibling.state = ExternalMode.DISK_MEMORY_DIRTY;
        target.state = ExternalMode.DISK_MEMORY_DIRTY;

        Page<K, V> prev = cache.putIfAbsent(sibMinKey, sibling);
        if (prev != null) {
            throw new IllegalStateException("Page split " + target.firstKey.toString() + " resulted in a new page "
                    + sibMinKey.toString() + " that already exists in cache.");
        }

        cacheSize.getAndIncrement();
        numPagesInMemory.getAndIncrement();

        sibling.avgEntrySize = target.avgEntrySize;
        sibling.estimates = target.estimates;
        sibling.estimateTotal = target.estimateTotal;

        target.nextFirstKey = sibMinKey;
        target.size = newSize;

        int prevMem = target.getMemoryEstimate();

        target.updateMemoryEstimate();
        sibling.updateMemoryEstimate();

        int updatedMem = target.getMemoryEstimate() + sibling.getMemoryEstimate();
        updateMemoryEstimate(updatedMem - prevMem);

        encodeKey = keyCoder.keyEncode(sibMinKey);

        ByteBufOutputStream byteBufOutputStream = new ByteBufOutputStream(ByteBufAllocator.DEFAULT.buffer());
        try {
            placeHolder = pageFactory.generateEmptyPage(SkipListCache.this, sibling.firstKey, sibling.nextFirstKey,
                    sibling.getEncodeType()).encode(byteBufOutputStream, false);
        } finally {
            byteBufOutputStream.buffer().release();
        }
        externalStore.put(encodeKey, placeHolder);

        evictionQueue.offer(sibling);
        numPagesSplit.getAndIncrement();

        keyRange.clear();
        valueRange.clear();
        rawValueRange.clear();

        return sibling;
    }

    private void logException(String message, Exception ex) {
        final Writer result = new StringWriter();
        final PrintWriter printWriter = new PrintWriter(result);
        ex.printStackTrace(printWriter);
        log.warn(message + " : " + result.toString());
    }

    /* ---------------- Comparison utilities -------------- */

    /**
     * Represents a key with a comparator as a Comparable.
     * <p/>
     * Because most sorted collections seem to use natural ordering on
     * Comparables (Strings, Integers, etc), most internal methods are
     * geared to use them. This is generally faster than checking
     * per-comparison whether to use comparator or comparable because
     * it doesn't require a (Comparable) cast for each comparison.
     * (Optimizers can only sometimes remove such redundant checks
     * themselves.) When Comparators are used,
     * ComparableUsingComparators are created so that they act in the
     * same way as natural orderings. This penalizes use of
     * Comparators vs Comparables, which seems like the right
     * tradeoff.
     */
    static final class ComparableUsingComparator<K> implements Comparable<K> {

        final K actualKey;
        final Comparator<? super K> cmp;

        ComparableUsingComparator(K key, Comparator<? super K> cmp) {
            this.actualKey = key;
            this.cmp = cmp;
        }

        public int compareTo(K k2) {
            return cmp.compare(actualKey, k2);
        }
    }

    /**
     * If using comparator, return a ComparableUsingComparator, else
     * cast key as Comparable, which may cause ClassCastException,
     * which is propagated back to caller.
     */
    @SuppressWarnings("unchecked")
    Comparable<? super K> comparable(K key) throws ClassCastException {
        if (key == null) {
            throw new NullPointerException();
        }
        if (comparator != null) {
            return new ComparableUsingComparator<K>(key, comparator);
        } else {
            return (Comparable<? super K>) key;
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public int compareKeys(K key1, K key2) {
        if (comparator == null) {
            return ((Comparable<? super K>) key1).compareTo(key2);
        } else {
            return comparator.compare(key1, key2);
        }
    }

    public boolean isReadOnly() {
        return externalStore.isReadOnly();
    }

    // For testing purposes only
    protected void setOverrideDefaultMaxPages() {
        overrideDefaultMaxPages = true;
    }

    /**
     * Invoked by the constructor. If the left sentinel page is not
     * found in the external storage, then create the left sentinel
     * page.
     */
    private void loadFromExternalStore() {
        Page<K, V> leftSentinel = pageFactory.generateEmptyPage(this, negInf, KeyCoder.EncodeType.SPARSE);
        ByteBufOutputStream byteBufOutputStream = null;
        try {
            if (externalStore.count() == 0) {
                byteBufOutputStream = new ByteBufOutputStream(PooledByteBufAllocator.DEFAULT.buffer());
                leftSentinel.initialize();
                byte[] encodeKey = keyCoder.keyEncode(negInf);
                byte[] encodePage = leftSentinel.encode(byteBufOutputStream);
                externalStore.put(encodeKey, encodePage);
            } else {
                byte[] encodedFirstKey = externalStore.firstKey();
                K firstKey = keyCoder.keyDecode(encodedFirstKey);
                byte[] page = externalStore.get(encodedFirstKey);

                if (firstKey.equals(negInf)) {
                    leftSentinel.decode(page);
                    updateMemoryEstimate(leftSentinel.getMemoryEstimate());
                } else {
                    byteBufOutputStream = new ByteBufOutputStream(PooledByteBufAllocator.DEFAULT.buffer());
                    leftSentinel.initialize();
                    leftSentinel.nextFirstKey = firstKey;

                    byte[] encodeKey = keyCoder.keyEncode(negInf);
                    byte[] encodePage = leftSentinel.encode(byteBufOutputStream);
                    externalStore.put(encodeKey, encodePage);

                    Page<K, V> minPage = pageFactory.generateEmptyPage(this, firstKey,
                            leftSentinel.getEncodeType());
                    minPage.decode(page);

                    cache.put(firstKey, minPage);
                    updateMemoryEstimate(minPage.getMemoryEstimate());
                    cacheSize.getAndIncrement();
                    numPagesInMemory.getAndIncrement();
                    evictionQueue.offer(minPage);
                }
            }
        } finally {
            if (byteBufOutputStream != null) {
                byteBufOutputStream.buffer().release();
            }
        }
        cache.put(negInf, leftSentinel);
        cacheSize.getAndIncrement();
        numPagesInMemory.getAndIncrement();
        evictionQueue.offer(leftSentinel);
    }

    @SuppressWarnings("unchecked")
    static <K> int binarySearch(ArrayList<K> arrayList, K key, Comparator comparator) {
        if (comparator != null) {
            return Collections.binarySearch(arrayList, key, comparator);
        } else {
            return Collections.binarySearch((ArrayList<Comparable<K>>) arrayList, key);
        }
    }

    /**
     * Returns the value to which the specified key is mapped,
     * or {@code null} if this map contains no mapping for the key.
     * <p/>
     * <p>More formally, if this map contains a mapping from a key
     * {@code k} to a value {@code v} such that {@code key} compares
     * equal to {@code k} according to the map's ordering, then this
     * method returns {@code v}; otherwise it returns {@code null}.
     * (There can be at most one such mapping.)
     *
     * @throws ClassCastException   if the specified key cannot be compared
     *                              with the keys currently in the map
     * @throws NullPointerException if the specified key is null
     */
    public V get(K key) {
        return doGet(key);
    }

    /**
     * Locate the page that stores the (key, value) pair
     * and retrieve the current value.
     */
    private V doGet(K key) {
        Page<K, V> page = locatePage(key, LockMode.READMODE);
        try {
            if (page.size == 0) {
                return null;
            }
            int offset = binarySearch(page.keys, key, comparator);
            if (offset >= 0) {
                page.fetchValue(offset);

                return page.values.get(offset);
            } else {
                return null;
            }
        } finally {
            page.readUnlock();
        }
    }

    /**
     * Associates the specified value with the specified key in this map.
     * If the map previously contained a mapping for the key, the old
     * value is replaced.
     *
     * @param key   key with which the specified value is to be associated
     * @param value value to be associated with the specified key
     * @return the previous value associated with the specified key, or
     *         <tt>null</tt> if there was no mapping for the key
     * @throws ClassCastException   if the specified key cannot be compared
     *                              with the keys currently in the map
     * @throws NullPointerException if the specified key or value is null
     */
    public V put(K key, V value) {
        if (value == null) {
            return doRemove(key);
        } else {
            return doPut(key, value);
        }
    }

    public V remove(K key) {
        return doRemove(key);
    }

    /**
     * @param start     lower bound of range deletion
     * @param end       upper bound of range deletion
     * @param inclusive if true then delete the end key
     */
    @Override
    public void removeValues(K start, K end, boolean inclusive) {
        doRemove(start, end, inclusive);
    }

    private V putIntoPage(Page<K, V> page, K key, V value) {
        V prev;
        int offset = binarySearch(page.keys, key, comparator);

        // An existing (key, value) pair is found.
        if (offset >= 0) {
            page.fetchValue(offset);

            prev = page.values.set(offset, value);
            page.rawValues.set(offset, null);

            updateMemoryCounters(page, key, value, prev);
        } else { // An existing (key, value) pair is not found.
            int position = ~offset;

            page.keys.add(position, key);
            page.values.add(position, value);
            page.rawValues.add(position, null);

            prev = null;

            // updateMemoryCounters must be invoked before incrementing size.
            updateMemoryCounters(page, key, value, null);
            page.size++;
        }
        return prev;
    }

    V doPut(K key, V value) {
        V prev;

        /**
         * If the background eviction threads are behind schedule,
         * then synchronously perform a page eviction. The
         * {@link #getEvictionTask()} and {@link #putEvictionTask(BackgroundEvictionTask)}
         * method are for re-using BackgroundEvictionTask object.
         */
        if (mustEvictPage()) {
            BackgroundEvictionTask task = getEvictionTask();
            task.run();
            putEvictionTask(task);
        }

        Page<K, V> page = locatePage(key, LockMode.WRITEMODE);

        try {
            prev = putIntoPage(page, key, value);

            int prevMem = page.getMemoryEstimate();
            page.updateMemoryEstimate();
            updateMemoryEstimate(page.getMemoryEstimate() - prevMem);

            if (page.splitCondition()) {
                splitPage(page);
            } else if (page.state == ExternalMode.DISK_MEMORY_IDENTICAL) {
                page.state = ExternalMode.DISK_MEMORY_DIRTY;
            }
        } finally {
            page.writeUnlock();
        }

        return prev;
    }

    void doRemove(K start, K end, boolean inclusive) {
        while (true) {
            if (mustEvictPage()) {
                BackgroundEvictionTask task = getEvictionTask();
                task.run();
                putEvictionTask(task);
            }

            Page<K, V> page = locatePage(start, LockMode.WRITEMODE);
            try {
                int startOffset = binarySearch(page.keys, start, comparator);
                int endOffset = binarySearch(page.keys, end, comparator);
                int pageSize = page.size;

                if (startOffset < 0) {
                    startOffset = ~startOffset;
                }

                if (endOffset < 0) {
                    endOffset = ~endOffset;
                } else if (inclusive) {
                    endOffset++;
                }

                if (startOffset < endOffset) {
                    int memEstimate = page.getMemoryEstimate();
                    int length = (endOffset - startOffset);
                    for (int i = 0; i < length; i++) {
                        page.keys.remove(startOffset);
                        page.values.remove(startOffset);
                        page.rawValues.remove(startOffset);
                    }
                    page.size -= length;

                    if (page.state == ExternalMode.DISK_MEMORY_IDENTICAL) {
                        page.state = ExternalMode.DISK_MEMORY_DIRTY;
                    }

                    page.updateMemoryEstimate();
                    updateMemoryEstimate(page.getMemoryEstimate() - memEstimate);
                }

                if (page.size == 0 && !page.firstKey.equals(negInf)) {
                    K targetKey = page.firstKey;
                    page = writeUnlockAndNull(page);
                    deletePage(targetKey);
                    continue;
                } else if (endOffset == pageSize) {
                    byte[] higherKeyEncoded = externalStore.higherKey(keyCoder.keyEncode(page.firstKey));
                    if (higherKeyEncoded != null) {
                        start = keyCoder.keyDecode(higherKeyEncoded);
                        continue;
                    }
                }
            } finally {
                writeUnlockAndNull(page);
            }

            break;
        }
    }

    V doRemove(K key) {
        if (mustEvictPage()) {
            BackgroundEvictionTask task = getEvictionTask();
            task.run();
            putEvictionTask(task);
        }

        Page<K, V> page = locatePage(key, LockMode.WRITEMODE);
        try {
            if (page.size == 0) {
                if (!page.firstKey.equals(negInf)) {
                    K targetKey = page.firstKey;
                    page = writeUnlockAndNull(page);
                    deletePage(targetKey);
                }

                return null;
            }
            int offset = binarySearch(page.keys, key, comparator);

            // An existing (key, value) pair is found.
            if (offset >= 0) {
                int memEstimate = page.getMemoryEstimate();

                page.fetchValue(offset);

                page.keys.remove(offset);
                page.rawValues.remove(offset);
                V prev = page.values.remove(offset);

                page.size--;

                if (page.state == ExternalMode.DISK_MEMORY_IDENTICAL) {
                    page.state = ExternalMode.DISK_MEMORY_DIRTY;
                }

                page.updateMemoryEstimate();
                updateMemoryEstimate(page.getMemoryEstimate() - memEstimate);

                if (page.size == 0 && !page.firstKey.equals(negInf)) {
                    K targetKey = page.firstKey;
                    page = writeUnlockAndNull(page);
                    deletePage(targetKey);
                }

                return prev;
            } else {
                return null;
            }
        } finally {
            writeUnlockAndNull(page);
        }
    }

    /**
     * Internal helper method.
     * If the input page is null then do nothing. Otherwise
     * unlock the page. Always return null.
     */
    private static <K, V extends Codec.BytesCodable> Page<K, V> unlockAndNull(Page<K, V> input, LockMode mode) {
        if (input == null) {
            return null;
        }
        input.modeUnlock(mode);
        return null;
    }

    private static <K, V extends Codec.BytesCodable> Page<K, V> writeUnlockAndNull(Page<K, V> input) {
        return unlockAndNull(input, LockMode.WRITEMODE);
    }

    /**
     * Helper method for loadPage().
     */
    private Page<K, V> loadPageCacheFloorEntry(Page<K, V> current, K externalKey) {
        boolean useHint = false;
        try {
            while (true) {
                Map.Entry<K, Page<K, V>> cacheEntry = cache.floorEntry(externalKey);
                K cacheKey = cacheEntry.getKey();
                Page<K, V> cachePage = cacheEntry.getValue();

                assert (cacheKey.equals(cachePage.firstKey));

                /** If the nearest page in cache equals the new page then return. */
                /** If we did not provide a hint then begin with the nearest page in cache. */
                /** If we provided a hint and it was incorrect then do not use the hint. */
                if (cacheKey.equals(externalKey) || current == null || !cacheKey.equals(current.firstKey)) {
                    current = writeUnlockAndNull(current);
                    cachePage.writeLock();
                    if (cachePage.inTransientState()) {
                        cachePage.writeUnlock();
                        continue;
                    } else {
                        return cachePage;
                    }
                }
                /** Else we are using the hint that was provided. */
                else {
                    useHint = true;
                    return current;
                }
            }
        } finally {
            if (!useHint) {
                writeUnlockAndNull(current);
            }
        }
    }

    /**
     * Helper method for loadPage().
     */
    private Page<K, V> constructNewPage(Page<K, V> current, Page<K, V> next, K externalKey,
            byte[] floorPageEncoded) {
        Page<K, V> newPage = pageFactory.generateEmptyPage(this, externalKey, current.getEncodeType());
        newPage.decode(floorPageEncoded);
        newPage.writeLock();
        assert (newPage.firstKey.equals(externalKey));
        assert (compareKeys(current.firstKey, newPage.firstKey) < 0);
        assert (next == null || compareKeys(next.firstKey, newPage.firstKey) > 0);

        Page<K, V> oldPage = cache.putIfAbsent(externalKey, newPage);
        assert (oldPage == null);

        updateMemoryEstimate(newPage.getMemoryEstimate());
        cacheSize.getAndIncrement();
        numPagesInMemory.getAndIncrement();
        evictionQueue.offer(newPage);
        return newPage;
    }

    /**
     * This method loads a page from the external storage if that page is not
     * found in the memory cache. The page that is loaded has the floor key
     * (greatest key less than or equal to) of the input key. Current can
     * be used as a hint to locate the new page. If current is non-null then
     * it must be write-locked.
     * <p/>
     * The target page is returned and it is either read-locked or write-locked
     * depending on the {@param mode} parameter. It is guaranteed that the
     * return page is not in a transient state. It is not guaranteed that the
     * return page has been loaded into memory, ie. (page.keys != null).
     */
    private Page<K, V> loadPage(K key, Page<K, V> current) {
        assert (current == null || current.isWriteLockedByCurrentThread());

        Page<K, V> next = null, cachePage;

        try {
            byte[] encodedKey = keyCoder.keyEncode(key);

            while (true) {
                byte[] externalKeyEncoded = externalStore.floorKey(encodedKey);

                /** Key of the page that will be loaded from disk. */
                K externalKey = keyCoder.keyDecode(externalKeyEncoded);

                assert (current == null || compareKeys(current.firstKey, externalKey) < 0);
                assert (compareKeys(key, externalKey) >= 0);

                // Transfer ownership of the 'current' variable to the inner method
                // to handle failures.
                Page<K, V> currentCopy = current;
                current = null;

                /** Locate the nearest page in cache that less than or equal to the new page. */
                cachePage = loadPageCacheFloorEntry(currentCopy, externalKey);

                if (cachePage.firstKey.equals(externalKey)) {
                    cachePage.timeStamp = generateTimestamp();
                    return cachePage;
                } else {
                    current = cachePage;
                }

                assert (!current.inTransientState());

                findnext: while (true) {
                    do {
                        writeUnlockAndNull(next);
                        Map.Entry<K, Page<K, V>> higherEntry = cache.higherEntry(current.firstKey);
                        if (higherEntry == null) {
                            break findnext;
                        }
                        next = higherEntry.getValue();
                        next.writeLock();
                    } while (next.inTransientState());

                    if (compareKeys(next.firstKey, externalKey) >= 0) {
                        break;
                    }
                    current.writeUnlock();
                    current = next;
                    next = null;
                }

                if (next != null && next.firstKey.equals(externalKey)) {
                    current = writeUnlockAndNull(current);
                    cachePage = next;
                    next = null;
                    cachePage.timeStamp = generateTimestamp();
                    return cachePage;
                }

                byte[] floorPageEncoded = externalStore.get(externalKeyEncoded);

                if (floorPageEncoded == null) {
                    current = writeUnlockAndNull(current);
                    next = writeUnlockAndNull(next);
                    continue;
                }

                return constructNewPage(current, next, externalKey, floorPageEncoded);
            }
        } finally {
            writeUnlockAndNull(current);
            writeUnlockAndNull(next);
        }
    }

    /**
     * This method locates a page either in cache or in the external storage.
     * If the page is on disk then it is loaded into memory. The target page
     * is returned and it is either read-locked or write-locked depending on
     * the {@param mode} parameter. It is guaranteed that the page returned
     * is not in a transient state and that it has been loaded into memory,
     * ie. (page.keys != null).
     * <p/>
     * Only returns a page when {@link Page#interval(Comparable)} is
     * true for the <code>key</code> argument.
     * <p/>
     * When searching for a page in order to acquire the write-lock,
     * then it is preferable to call loadPage() if a page hint is available.
     * Otherwise this method should be used.
     */
    Page<K, V> locatePage(K key, LockMode returnMode) {
        return locatePage(key, returnMode, false);
    }

    private Page<K, V> locatePage(K key, LockMode returnMode, boolean exact) {
        LockMode currentMode = returnMode;

        Comparable<? super K> ckey = comparable(key);

        Page<K, V> current = null;

        do {
            unlockAndNull(current, currentMode);

            Map.Entry<K, Page<K, V>> cacheEntry = cache.floorEntry(key);

            current = cacheEntry.getValue();

            current.modeLock(currentMode);

            assert (current.firstKey.equals(cacheEntry.getKey()));

        } while (current.inTransientState());

        boolean pageLoad = false;

        while (true) {
            assert (!current.inTransientState());

            K currentFirstKey = current.firstKey;

            assert (ckey.compareTo(currentFirstKey) >= 0);

            if (current.keys == null) {
                pullPageFromDisk(current, currentMode);
                // If currentMode is LockMode.READMODE then the lock was dropped and re-acquired.
                // We could be in a transient state.
            }

            if (!current.inTransientState()) {
                boolean returnPage = false;
                if (!exact && current.interval(ckey)) {
                    returnPage = true;
                }
                if (exact) {
                    if (current.firstKey.equals(ckey)) {
                        returnPage = true;
                    } else if (pageLoad) {
                        current.modeUnlock(currentMode);
                        return null;
                    }
                }
                if (returnPage) {
                    current.timeStamp = generateTimestamp();

                    /**
                     *  Fancy way of asserting that we do not
                     *  hold the READLOCK when we want to return the WRITELOCK.
                     */
                    assert (currentMode != LockMode.READMODE || returnMode != LockMode.WRITEMODE);

                    if (currentMode == LockMode.WRITEMODE && returnMode == LockMode.READMODE) {
                        current.downgradeLock();
                    }
                    return current;
                }
            }

            /**
             * The key was not found in a page on memory.
             * We must load a page from external storage.
             */
            if (!current.inTransientState() && currentMode == LockMode.WRITEMODE) {
                current = loadPage(key, current);
            } else {
                current.modeUnlock(currentMode);
                current = loadPage(key, null);
                pageLoad = true;
            }

            currentMode = LockMode.WRITEMODE;

        }
    }

    /**
     * Helper method for {@link #getFirstKey()}.
     */
    private K firstKeyFastPath() {
        Page<K, V> leftSentinel = cache.firstEntry().getValue();
        leftSentinel.readLock();
        try {
            if (leftSentinel.keys == null) {
                pullPageFromDisk(leftSentinel, LockMode.READMODE);
            }

            assert (!leftSentinel.inTransientState());

            if (leftSentinel.size > 0) {
                return leftSentinel.keys.get(0);
            }
        } finally {
            leftSentinel.readUnlock();
        }
        return null;
    }

    @Override
    public K getFirstKey() {
        // Fast path: the first key is located in the left sentinel page
        K fastPath = firstKeyFastPath();
        if (fastPath != null)
            return fastPath;

        Page<K, V> currentPage = cache.firstEntry().getValue();
        K currentKey = currentPage.firstKey;
        byte[] currentKeyEncoded = keyCoder.keyEncode(currentKey);

        currentPage.writeLock();

        // Slow path: we load each page from disk searching for the first key.
        try {
            while (true) {
                assert (!currentPage.inTransientState());

                if (currentPage.keys == null) {
                    pullPageFromDisk(currentPage, LockMode.WRITEMODE);
                }

                if (currentPage.size > 0) {
                    return currentPage.keys.get(0);
                }

                byte[] nextKeyEncoded = externalStore.higherKey(currentKeyEncoded);

                if (nextKeyEncoded == null) {
                    return null;
                }

                K nextKey = keyCoder.keyDecode(nextKeyEncoded);
                currentPage = loadPage(nextKey, currentPage);
                currentKey = currentPage.firstKey;
                currentKeyEncoded = keyCoder.keyEncode(currentKey);
            }
        } finally {
            currentPage.writeUnlock();
        }
    }

    /**
     * Helper method for {@link #getLastKey()}.
     */
    private K lastKeyFastPath() {
        Page<K, V> maxPage = cache.lastEntry().getValue();
        maxPage.readLock();
        try {
            if (maxPage.keys == null) {
                pullPageFromDisk(maxPage, LockMode.READMODE);
            }

            if (!maxPage.inTransientState() && maxPage.nextFirstKey == null && maxPage.size > 0) {
                return maxPage.keys.get(maxPage.size - 1);
            }
        } finally {
            maxPage.readUnlock();
        }
        return null;
    }

    @Override
    public K getLastKey() {
        // Fast path: the last page in cache happens to be the last page on disk.
        K fastPath = lastKeyFastPath();
        if (fastPath != null)
            return fastPath;

        K currentKey;
        byte[] currentKeyEncoded;
        Page<K, V> currentPage = null, prevPage = null;

        // Slow path: we load each page from disk searching for the first key.
        // This is slower than getFirstKey() due to our locking convention.
        try {
            // Load the high page into memory
            while (true) {
                currentKeyEncoded = externalStore.lastKey();
                currentKey = keyCoder.keyDecode(currentKeyEncoded);

                currentPage = loadPage(currentKey, null);
                if (!currentPage.inTransientState() && currentPage.nextFirstKey == null) {
                    break;
                }

            }

            // Find that last key!
            while (true) {
                K prevKey, verifyKey;
                byte[] prevKeyEncoded, verifyKeyEncoded;

                assert (!currentPage.inTransientState());

                if (currentPage.keys == null) {
                    pullPageFromDisk(currentPage, LockMode.WRITEMODE);
                }

                if (currentPage.size > 0) {
                    return currentPage.keys.get(currentPage.size - 1);
                }

                // This loop is needed to detect concurrent page split operations.
                do {
                    prevPage = writeUnlockAndNull(prevPage);

                    prevKeyEncoded = externalStore.lowerKey(currentKeyEncoded);

                    if (prevKeyEncoded == null) {
                        return null;
                    }

                    prevKey = keyCoder.keyDecode(prevKeyEncoded);

                    currentPage = writeUnlockAndNull(currentPage);

                    prevPage = loadPage(prevKey, null);

                    verifyKeyEncoded = externalStore.higherKey(prevKeyEncoded);

                    if (verifyKeyEncoded == null) {
                        assert (prevPage.nextFirstKey == null);
                        break;
                    }

                    verifyKey = keyCoder.keyDecode(verifyKeyEncoded);
                } while (!currentKey.equals(verifyKey));

                currentPage = prevPage;
                currentKey = prevKey;
                currentKeyEncoded = prevKeyEncoded;
                prevPage = null;
            }
        } finally {
            writeUnlockAndNull(prevPage);
            writeUnlockAndNull(currentPage);
        }
    }

    private class SkipListCacheKeyValue implements Map.Entry<K, V> {

        final K key;
        V value;

        public SkipListCacheKeyValue(K key, V value) {
            this.key = key;
            this.value = value;
        }

        @Override
        public K getKey() {
            return key;
        }

        @Override
        public V getValue() {
            return value;
        }

        @Override
        public V setValue(V value) {
            V prevValue = put(key, value);
            this.value = value;
            return prevValue;
        }
    }

    private class SkipListCacheIterator implements Iterator<Map.Entry<K, V>> {

        Page<K, V> page;
        int position;
        long stamp;
        K prevKey;
        K nextKey;
        V nextValue;

        SkipListCacheIterator(K from, boolean inclusive) {
            this.page = locatePage(from, LockMode.READMODE);
            this.prevKey = null;
            this.stamp = -1;

            nextHelper(from, inclusive, false);

        }

        @Override
        public boolean hasNext() {
            return nextKey != null;
        }

        @Override
        public Map.Entry<K, V> next() {
            if (nextKey == null) {
                return null;
            }

            prevKey = nextKey;

            Map.Entry<K, V> pair = new SkipListCacheKeyValue(nextKey, nextValue);

            nextHelper(prevKey, false, true);

            return pair;
        }

        @Override
        public void remove() {
            if (prevKey != null) {
                cache.remove(prevKey);
            }
        }

        private void nextHelper(K target, boolean inclusive, boolean acquireLock) {
            if (acquireLock) {
                page.readLock();
            }

            try {
                if (page.keys == null) {
                    pullPageFromDisk(page, LockMode.READMODE);
                    // The readlock was dropped and re-acquired.
                    // We could be in a transient state.
                }

                if (page.inTransientState()) {
                    Page<K, V> newPage = locatePage(target, LockMode.READMODE);

                    assert (!newPage.inTransientState());
                    assert (newPage.keys != null);

                    page = unlockAndNull(page, LockMode.READMODE);
                    page = newPage;
                    stamp = -1;
                }

                if (stamp != page.writeStamp) {
                    position = binarySearch(page.keys, target, comparator);

                    if (position < 0) {
                        position = ~position;
                    } else if (!inclusive) {
                        position = position + 1;
                    }

                    stamp = page.writeStamp;
                } else {
                    position++;
                }

                while (position < page.size && page.values.get(position) == null
                        && nullRawValue(page.rawValues.get(position))) {
                    position++;
                }

                if (position == page.size && !moveForward(target, inclusive)) {
                    return;
                }

                page.fetchValue(position);

                nextKey = page.keys.get(position);
                nextValue = page.values.get(position);

            } finally {
                unlockAndNull(page, LockMode.READMODE);
            }
        }

        /**
         * Finds the next key greater than or equal to the targetKey.
         * If inclusive is false then find the next key greater than
         * the targetKey.
         *
         * @param targetKey begin search with this key
         * @param inclusive search for values can terminate on finding the targetKey
         * @return true if-and-only-if a key is found
         */
        private boolean moveForward(K targetKey, boolean inclusive) {
            while (true) {
                byte[] higherKeyEncoded = externalStore.higherKey(keyCoder.keyEncode(page.firstKey));

                if (higherKeyEncoded == null) {
                    nextKey = null;
                    nextValue = null;
                    return false;
                }

                K higherKey = keyCoder.keyDecode(higherKeyEncoded);

                page.readUnlock();

                Page<K, V> higherPage = locatePage(higherKey, LockMode.READMODE, true);

                if (higherPage == null) {
                    page.readLock();
                    continue;
                }

                assert (!higherPage.inTransientState());
                assert (higherPage.keys != null);

                page = higherPage;

                assert (page.keys != null);

                position = binarySearch(page.keys, targetKey, comparator);

                if (position < 0) {
                    position = ~position;
                } else if (!inclusive) {
                    position = position + 1;
                }

                while (position < page.size && page.values.get(position) == null
                        && nullRawValue(page.rawValues.get(position))) {
                    position++;
                }

                if (position < page.size) {
                    stamp = page.writeStamp;
                    return true;
                }
            }
        }

    }

    @Override
    public Iterator<Map.Entry<K, V>> range(K start, boolean inclusive) {
        return new SkipListCacheIterator(start, inclusive);
    }

    @Override
    public boolean containsKey(K key) {
        return get(key) != null;
    }

    @Override
    public V getValue(K key) {
        return get(key);
    }

    @Override
    public V getPutValue(K key, V val) {
        return put(key, val);
    }

    @Override
    public V getRemoveValue(K key) {
        return remove(key);
    }

    @Override
    public void putValue(K key, V val) {
        put(key, val);
    }

    @Override
    public void removeValue(K key) {
        remove(key);
    }

    public long getMemoryEstimate() {
        return memoryEstimate.get() + getNumPagesInMemory() * mem_page;
    }

    void updateMemoryEstimate(int delta) {
        long est = memoryEstimate.addAndGet(delta);
        assert (est >= 0);
    }

    private void updateMemoryCounters(Page<K, V> page, K key, V value, V prev) {
        /** for memory estimation, the replacement gets 2x weighting */

        if (prev == null) {
            page.updateAverage(key, value, 1);
        } else {
            page.updateAverage(key, value, 2);
        }

    }

    private void pushPageToDisk(Page<K, V> current, ByteBufOutputStream byteStream) {

        assert (current.isWriteLockedByCurrentThread());

        assert (!current.inTransientState());
        assert (current.keys != null);

        if (current.state == ExternalMode.DISK_MEMORY_DIRTY) {

            // flush to external storage
            byte[] encodeKey = keyCoder.keyEncode(current.firstKey);
            byte[] encodePage = current.encode(byteStream);

            externalStore.put(encodeKey, encodePage);

            current.state = ExternalMode.DISK_MEMORY_IDENTICAL;
        }

        updateMemoryEstimate(-current.getMemoryEstimate());
        current.keys.clear();
        current.values.clear();
        current.rawValues.clear();
        current.keys = null;
        current.values = null;
        current.rawValues = null;
        numPagesInMemory.getAndDecrement();
    }

    private void pullPageHelper(Page<K, V> current, byte[] page) {
        assert (current.isWriteLockedByCurrentThread());

        current.decode(page);

        updateMemoryEstimate(current.getMemoryEstimate());
        evictionQueue.offer(current);
        numPagesInMemory.getAndIncrement();
    }

    private void pullPageFromDisk(Page<K, V> current, LockMode mode) {

        if (mode == LockMode.READMODE) {
            current.readUnlock();
            current.writeLock();
        }

        try {
            assert (current.isWriteLockedByCurrentThread());

            if (current.inTransientState()) {
                return;
            }

            if (current.keys == null) {

                byte[] encodeKey = keyCoder.keyEncode(current.firstKey);
                byte[] page = externalStore.get(encodeKey);

                pullPageHelper(current, page);
            }
        } finally {
            if (mode == LockMode.READMODE) {
                current.downgradeLock();
            }
        }

    }

    /**
     * Close without scheduling any unfinished background tasks.
     * The background eviction thread(s) are shut down regardless of
     * whether the skiplist exceeds its heap capacity.
     */
    @Override
    public void close() {
        doClose(false, false, CloseOperation.NONE);
    }

    /**
     * Close the cache.
     *
     * @param cleanLog if true then wait for the BerkeleyDB clean thread to finish.
     * @param operation optionally test or repair the berkeleyDB.
     * @return status code. A status code of 0 indicates success.
     */
    @Override
    public int close(boolean cleanLog, CloseOperation operation) {
        return doClose(cleanLog, false, operation);
    }

    /**
     * Wait for all background tasks to complete.
     * Wait for the background eviction threads to complete
     * purging all necessary pages. This method is intended
     * for JUnit testing. If it is being used in other instances,
     * then perhaps a new method should be introduced instead.
     */
    void waitForShutdown() {
        doClose(false, true, CloseOperation.NONE);
    }

    private int doClose(boolean cleanLog, boolean wait, CloseOperation operation) {
        int status = 0;
        if (!shutdownGuard.getAndSet(true)) {
            if (wait) {
                waitForPageEviction();
            }

            shutdownEvictionThreads.set(true);
            waitForEvictionThreads();
            pushAllPagesToDisk();
            if (operation != null && operation.testIntegrity()) {
                int failedPages = testIntegrity(operation.repairIntegrity());
                status = (failedPages > 0) ? 1 : 0;
            }
            closeExternalStore(cleanLog);
            assert (status == 0);
            log.info("pages: encoded=" + numPagesEncoded.get() + " decoded=" + numPagesDecoded.get() + " split="
                    + numPagesSplit.get());
            if (trackEncodingByteUsage) {
                log.info(MetricsUtil.histogramToString("encodeFirstKeySize", metrics.encodeFirstKeySize));
                log.info(MetricsUtil.histogramToString("encodeNextFirstKeySize", metrics.encodeNextFirstKeySize));
                log.info(MetricsUtil.histogramToString("encodeKeySize", metrics.encodeKeySize));
                log.info(MetricsUtil.histogramToString("encodeValueSize", metrics.encodeValueSize));
                log.info(MetricsUtil.histogramToString("encodePageSize (final)", metrics.encodePageSize));
                log.info(MetricsUtil.histogramToString("numberKeysPerPage", metrics.numberKeysPerPage));
            }
        }
        return status;
    }

    /**
     * Retrieve a BackgroundEvictionTask object from
     * the {@link #evictionTaskQueue} or create a new instance when
     * the queue is empty.
     */
    private BackgroundEvictionTask getEvictionTask() {
        BackgroundEvictionTask task = evictionTaskQueue.poll();
        if (task == null) {
            return new BackgroundEvictionTask(fixedNumberEvictions);
        } else {
            return task;
        }
    }

    /**
     * Place a BackgroundEvictionTask object onto
     * the shared queue so that other threads may
     * re-use this object.
     */
    private void putEvictionTask(BackgroundEvictionTask task) {
        evictionTaskQueue.add(task);
    }

    private void waitForEvictionThreads() {
        purgeThreadPool.shutdown();
        evictionThreadPool.shutdown();

        try {
            purgeThreadPool.awaitTermination(threadPoolWaitShutdownSeconds, TimeUnit.SECONDS);
            evictionThreadPool.awaitTermination(threadPoolWaitShutdownSeconds, TimeUnit.SECONDS);
        } catch (InterruptedException ignored) {
        }
    }

    private boolean pushAllPagesToDiskAssertion() {
        byte[] firstKeyEncoded = externalStore.firstKey();
        K firstKey = keyCoder.keyDecode(firstKeyEncoded);
        return firstKey.equals(negInf);
    }

    private void pushAllPagesToDisk() {
        final ByteBufOutputStream byteStream = new ByteBufOutputStream(PooledByteBufAllocator.DEFAULT.buffer());
        try {

            for (Page<K, V> page : evictionQueue) {

                page.writeLock();

                if (!page.inTransientState() && page.keys != null) {
                    pushPageToDisk(page, byteStream);
                }

                page.writeUnlock();
            }
        } finally {
            byteStream.buffer().release();
        }

        assert (pushAllPagesToDiskAssertion());
    }

    /**
     * This method is intended for internal use and unit testing purposes only.
     */
    protected void waitForPageEviction() {

        while (shouldEvictPage()) {
            try {
                Thread.sleep(10);
            } catch (InterruptedException ignored) {
            }
        }
    }

    /**
     * Close the external store.
     *
     * @param cleanLog if true then wait for the BerkeleyDB clean thread to finish.
     **/
    private void closeExternalStore(boolean cleanLog) {
        externalStore.close(cleanLog);
    }

    @SuppressWarnings("unused")
    int getCacheSize() {
        return cacheSize.get();
    }

    public int getNumPagesInMemory() {
        return numPagesInMemory.get();
    }

    /**
     * Counts the key/data pairs in the database. This operation is faster than
     * obtaining a count from a cursor based scan of the database, and will not
     * perturb the current contents of the cache. However, the count is not
     * guaranteed to be accurate if there are concurrent updates. Note that
     * this method does scan a significant portion of the database and should
     * be considered a fairly expensive operation.
     * <p/>
     * <p>A count of the key/data pairs in the database is returned without
     * adding to the cache.  The count may not be accurate in the face of
     * concurrent update operations in the database.</p>
     */
    @SuppressWarnings("unused")
    public long getNumPagesOnDisk() {
        return externalStore.count();
    }

    @SuppressWarnings("unused")
    public long getNumPagesDeleted() {
        return numPagesDeleted.get();
    }

    /**
     * Returns timestamps that are applied whenever a page is accessed.
     * <p/>
     * System.nanoTime() resolution has been found to improve the
     * performance of the WS-CLOCK eviction algorithm. If the performance
     * overhead of System.nanoTime() is unacceptable then perhaps
     * a microsecond precision version of JitterClock needs to be implemented.
     */
    static long generateTimestamp() {
        return System.nanoTime();
    }

    @Override
    public void setMaxPageMem(long maxPageMem) {
        this.maxPageMem = maxPageMem;
    }

    @Override
    public void setMaxTotalMem(long maxTotalMem) {
        this.maxTotalMem = maxTotalMem;
        this.softTotalMem = maxTotalMem - (long) ((1.0d / shouldEvictDelta) * maxTotalMem);
        globalMaxTotalMem = Math.max(globalMaxTotalMem, maxTotalMem);
        globalSoftTotalMem = Math.max(globalSoftTotalMem, softTotalMem);
        if (isReadOnly()) {
            this.maxTotalMem = globalMaxTotalMem;
            this.softTotalMem = globalSoftTotalMem;
        }
    }

    @Override
    public void setMemEstimateInterval(int interval) {
        this.estimateInterval = interval;
    }

    /**
     * Emit a log message that a page has been detected with a null nextFirstKey
     * and the page is not the largest page in the database.
     *
     * @param repair   if true then repair the page
     * @param counter  page number.
     * @param page     contents of the page.
     * @param key      key associated with the page.
     * @param nextKey  key associated with the next page.
     */
    private void missingNextFirstKey(final boolean repair, final int counter, Page<K, V> page, final K key,
            final K nextKey) {
        log.warn(
                "On page {} the firstKey is {} " + " the length is {} "
                        + " the nextFirstKey is null and the next page" + " is associated with key {}.",
                counter, page.firstKey, page.size, nextKey);
        if (repair) {
            log.info("Repairing nextFirstKey on page {}.", counter);
            page.nextFirstKey = nextKey;
            ByteBufOutputStream byteBufOutputStream = new ByteBufOutputStream(
                    PooledByteBufAllocator.DEFAULT.buffer());
            try {
                byte[] pageEncoded = page.encode(byteBufOutputStream);
                externalStore.put(keyCoder.keyEncode(key), pageEncoded);
            } finally {
                byteBufOutputStream.buffer().release();
            }
        }
    }

    private void repairInvalidKeys(final int counter, Page<K, V> page, final K key, final K nextKey) {
        boolean pageTransfer = false;
        Page<K, V> nextPage = pageFactory.generateEmptyPage(this, nextKey, page.getEncodeType());
        byte[] encodedNextPage = externalStore.get(keyCoder.keyEncode(nextKey));
        nextPage.decode(encodedNextPage);
        for (int i = 0, pos = 0; i < page.size; i++, pos++) {
            K testKey = page.keys.get(i);
            // if testKey >= nextKey then we need to move the testKey off the current page
            if (compareKeys(testKey, nextKey) >= 0) {
                // non-negative value from binary search indicates the key was found on the next page
                if (binarySearch(nextPage.keys, testKey, comparator) >= 0) {
                    log.info("Key {} was detected on next page. Deleting from page {}.", pos, counter);
                } else {
                    log.info("Moving key {} on page {}.", pos, counter);
                    page.fetchValue(i);
                    V value = page.values.get(i);
                    putIntoPage(nextPage, testKey, value);
                    pageTransfer = true;
                }
                page.keys.remove(i);
                page.rawValues.remove(i);
                page.values.remove(i);
                page.size--;
                i--;
            }
        }
        ByteBufOutputStream byteBufOutputStream = new ByteBufOutputStream(PooledByteBufAllocator.DEFAULT.buffer());
        try {
            byte[] pageEncoded = page.encode(byteBufOutputStream);
            externalStore.put(keyCoder.keyEncode(key), pageEncoded);
            if (pageTransfer) {
                encodedNextPage = nextPage.encode(byteBufOutputStream);
                externalStore.put(keyCoder.keyEncode(nextKey), encodedNextPage);
            }
        } finally {
            byteBufOutputStream.buffer().release();
        }

    }

    /**
     * Emit a log message that a page has been detected with an incorrect nextFirstKey
     * and the page is not the largest page in the database.
     *
     * @param repair   if true then repair the page and possibly move entries to the next page
     * @param counter  page number.
     * @param page     contents of the page.
     * @param key      key associated with the page.
     * @param nextKey  key associated with the next page.
     */
    private void invalidNextFirstKey(final boolean repair, final int counter, Page<K, V> page, final K key,
            final K nextKey) {
        int compareTo = compareKeys(page.nextFirstKey, nextKey);
        char direction = compareTo > 0 ? '>' : '<';
        log.warn("On page " + counter + " the firstKey is " + page.firstKey + " the length is " + page.size
                + " the nextFirstKey is " + page.nextFirstKey + " which is " + direction
                + " the next page is associated with key " + nextKey);
        if (repair) {
            log.info("Repairing nextFirstKey on page {}.", counter);
            page.nextFirstKey = nextKey;
            repairInvalidKeys(counter, page, key, nextKey);
        }

    }

    public int testIntegrity(boolean repair) {
        int counter = 0;
        int failedPages = 0;
        byte[] encodedKey = externalStore.firstKey();
        byte[] encodedPage = externalStore.get(encodedKey);
        K key = keyCoder.keyDecode(encodedKey);
        while (encodedKey != null) {
            counter++;
            Page<K, V> page = pageFactory.generateEmptyPage(this, key, KeyCoder.EncodeType.SPARSE);
            byte[] encodedNextKey = externalStore.higherKey(encodedKey);
            if (encodedNextKey != null) {
                page.decode(encodedPage);
                K nextKey = keyCoder.keyDecode(encodedNextKey);
                int numKeys = page.keys.size();
                if (page.nextFirstKey == null) {
                    missingNextFirstKey(repair, counter, page, key, nextKey);
                    failedPages++;
                } else if (!page.nextFirstKey.equals(nextKey)) {
                    invalidNextFirstKey(repair, counter, page, key, nextKey);
                    failedPages++;
                } else if (numKeys > 0 && compareKeys(page.keys.get(numKeys - 1), nextKey) >= 0) {
                    log.warn("On page " + counter + " the firstKey is " + page.firstKey + " the largest key is "
                            + page.keys.get(numKeys - 1) + " the next key is " + nextKey
                            + " which is less than or equal to the largest key.");
                    if (repair) {
                        repairInvalidKeys(counter, page, key, nextKey);
                    }
                    failedPages++;
                }
                key = nextKey;
                encodedPage = externalStore.get(encodedNextKey);
            }
            encodedKey = encodedNextKey;
            if (counter % 10000 == 0) {
                log.info("Scanned " + counter + " pages. Detected " + failedPages + " failed pages.");
            }
        }
        log.info("Scan complete. Scanned " + counter + " pages. Detected " + failedPages + " failed pages.");
        return repair ? 0 : failedPages;
    }

}