org.apache.bookkeeper.bookie.storage.ldb.WriteCache.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.bookkeeper.bookie.storage.ldb.WriteCache.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 */
package org.apache.bookkeeper.bookie.storage.ldb;

import static com.google.common.base.Preconditions.checkArgument;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.Unpooled;

import java.io.Closeable;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.bookkeeper.common.util.MathUtils;
import org.apache.bookkeeper.util.collections.ConcurrentLongHashSet;
import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap;
import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap;
import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap.LongPair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Write cache implementation.
 *
 * <p>The write cache will allocate the requested size from direct memory and it
 * will break it down into multiple segments.
 *
 * <p>The entries are appended in a common buffer and indexed though a hashmap,
 * until the cache is cleared.
 *
 * <p>There is the possibility to iterate through the stored entries in an ordered
 * way, by (ledgerId, entry).
 */
public class WriteCache implements Closeable {

    /**
     * Consumer that is used to scan the entire write cache.
     */
    public interface EntryConsumer {
        void accept(long ledgerId, long entryId, ByteBuf entry);
    }

    private final ConcurrentLongLongPairHashMap index = new ConcurrentLongLongPairHashMap(4096,
            2 * Runtime.getRuntime().availableProcessors());

    private final ConcurrentLongLongHashMap lastEntryMap = new ConcurrentLongLongHashMap(4096,
            2 * Runtime.getRuntime().availableProcessors());

    private final ByteBuf[] cacheSegments;
    private final int segmentsCount;

    private final long maxCacheSize;
    private final int maxSegmentSize;
    private final long segmentOffsetMask;
    private final long segmentOffsetBits;

    private final AtomicLong cacheSize = new AtomicLong(0);
    private final AtomicLong cacheOffset = new AtomicLong(0);
    private final LongAdder cacheCount = new LongAdder();

    private final ConcurrentLongHashSet deletedLedgers = new ConcurrentLongHashSet();

    private final ByteBufAllocator allocator;

    public WriteCache(ByteBufAllocator allocator, long maxCacheSize) {
        // Default maxSegmentSize set to 1Gb
        this(allocator, maxCacheSize, 1 * 1024 * 1024 * 1024);
    }

    public WriteCache(ByteBufAllocator allocator, long maxCacheSize, int maxSegmentSize) {
        checkArgument(maxSegmentSize > 0);

        long alignedMaxSegmentSize = alignToPowerOfTwo(maxSegmentSize);
        checkArgument(maxSegmentSize == alignedMaxSegmentSize, "Max segment size needs to be in form of 2^n");

        this.allocator = allocator;
        this.maxCacheSize = maxCacheSize;
        this.maxSegmentSize = (int) maxSegmentSize;
        this.segmentOffsetMask = maxSegmentSize - 1;
        this.segmentOffsetBits = 63 - Long.numberOfLeadingZeros(maxSegmentSize);

        this.segmentsCount = 1 + (int) (maxCacheSize / maxSegmentSize);

        this.cacheSegments = new ByteBuf[segmentsCount];

        for (int i = 0; i < segmentsCount - 1; i++) {
            // All intermediate segments will be full-size
            cacheSegments[i] = Unpooled.directBuffer(maxSegmentSize, maxSegmentSize);
        }

        int lastSegmentSize = (int) (maxCacheSize % maxSegmentSize);
        cacheSegments[segmentsCount - 1] = Unpooled.directBuffer(lastSegmentSize, lastSegmentSize);
    }

    public void clear() {
        cacheSize.set(0L);
        cacheOffset.set(0L);
        cacheCount.reset();
        index.clear();
        lastEntryMap.clear();
        deletedLedgers.clear();
    }

    @Override
    public void close() {
        for (ByteBuf buf : cacheSegments) {
            buf.release();
        }
    }

    public boolean put(long ledgerId, long entryId, ByteBuf entry) {
        int size = entry.readableBytes();

        // Align to 64 bytes so that different threads will not contend the same L1
        // cache line
        int alignedSize = align64(size);

        long offset;
        int localOffset;
        int segmentIdx;

        while (true) {
            offset = cacheOffset.getAndAdd(alignedSize);
            localOffset = (int) (offset & segmentOffsetMask);
            segmentIdx = (int) (offset >>> segmentOffsetBits);

            if ((offset + size) > maxCacheSize) {
                // Cache is full
                return false;
            } else if (maxSegmentSize - localOffset < size) {
                // If an entry is at the end of a segment, we need to get a new offset and try
                // again in next segment
                continue;
            } else {
                // Found a good offset
                break;
            }
        }

        cacheSegments[segmentIdx].setBytes(localOffset, entry, entry.readerIndex(), entry.readableBytes());

        // Update last entryId for ledger. This logic is to handle writes for the same
        // ledger coming out of order and from different thread, though in practice it
        // should not happen and the compareAndSet should be always uncontended.
        while (true) {
            long currentLastEntryId = lastEntryMap.get(ledgerId);
            if (currentLastEntryId > entryId) {
                // A newer entry is already there
                break;
            }

            if (lastEntryMap.compareAndSet(ledgerId, currentLastEntryId, entryId)) {
                break;
            }
        }

        index.put(ledgerId, entryId, offset, size);
        cacheCount.increment();
        cacheSize.addAndGet(size);
        return true;
    }

    public ByteBuf get(long ledgerId, long entryId) {
        LongPair result = index.get(ledgerId, entryId);
        if (result == null) {
            return null;
        }

        long offset = result.first;
        int size = (int) result.second;
        ByteBuf entry = allocator.buffer(size, size);

        int localOffset = (int) (offset & segmentOffsetMask);
        int segmentIdx = (int) (offset >>> segmentOffsetBits);
        entry.writeBytes(cacheSegments[segmentIdx], localOffset, size);
        return entry;
    }

    public ByteBuf getLastEntry(long ledgerId) {
        long lastEntryId = lastEntryMap.get(ledgerId);
        if (lastEntryId == -1) {
            // Ledger not found in write cache
            return null;
        } else {
            return get(ledgerId, lastEntryId);
        }
    }

    public void deleteLedger(long ledgerId) {
        deletedLedgers.add(ledgerId);
    }

    private static final ArrayGroupSort groupSorter = new ArrayGroupSort(2, 4);

    public void forEach(EntryConsumer consumer) {
        sortedEntriesLock.lock();

        try {
            int entriesToSort = (int) index.size();
            int arrayLen = entriesToSort * 4;
            if (sortedEntries == null || sortedEntries.length < arrayLen) {
                sortedEntries = new long[(int) (arrayLen * 2)];
            }

            long startTime = MathUtils.nowInNano();

            sortedEntriesIdx = 0;
            index.forEach((ledgerId, entryId, offset, length) -> {
                if (deletedLedgers.contains(ledgerId)) {
                    // Ignore deleted ledgers
                    return;
                }

                sortedEntries[sortedEntriesIdx] = ledgerId;
                sortedEntries[sortedEntriesIdx + 1] = entryId;
                sortedEntries[sortedEntriesIdx + 2] = offset;
                sortedEntries[sortedEntriesIdx + 3] = length;
                sortedEntriesIdx += 4;
            });

            if (log.isDebugEnabled()) {
                log.debug("iteration took {} ms", MathUtils.elapsedNanos(startTime) / 1e6);
            }
            startTime = MathUtils.nowInNano();

            // Sort entries by (ledgerId, entryId) maintaining the 4 items groups
            groupSorter.sort(sortedEntries, 0, sortedEntriesIdx);
            if (log.isDebugEnabled()) {
                log.debug("sorting {} ms", (MathUtils.elapsedNanos(startTime) / 1e6));
            }
            startTime = MathUtils.nowInNano();

            ByteBuf[] entrySegments = new ByteBuf[segmentsCount];
            for (int i = 0; i < segmentsCount; i++) {
                entrySegments[i] = cacheSegments[i].slice(0, cacheSegments[i].capacity());
            }

            for (int i = 0; i < sortedEntriesIdx; i += 4) {
                long ledgerId = sortedEntries[i];
                long entryId = sortedEntries[i + 1];
                long offset = sortedEntries[i + 2];
                long length = sortedEntries[i + 3];

                int localOffset = (int) (offset & segmentOffsetMask);
                int segmentIdx = (int) (offset >>> segmentOffsetBits);
                ByteBuf entry = entrySegments[segmentIdx];
                entry.setIndex(localOffset, localOffset + (int) length);
                consumer.accept(ledgerId, entryId, entry);
            }

            if (log.isDebugEnabled()) {
                log.debug("entry log adding {} ms", MathUtils.elapsedNanos(startTime) / 1e6);
            }
        } finally {
            sortedEntriesLock.unlock();
        }
    }

    public long size() {
        return cacheSize.get();
    }

    public long count() {
        return cacheCount.sum();
    }

    public boolean isEmpty() {
        return cacheSize.get() == 0L;
    }

    private static final int ALIGN_64_MASK = ~(64 - 1);

    static int align64(int size) {
        return (size + 64 - 1) & ALIGN_64_MASK;
    }

    private static long alignToPowerOfTwo(long n) {
        return (long) Math.pow(2, 64 - Long.numberOfLeadingZeros(n - 1));
    }

    private final ReentrantLock sortedEntriesLock = new ReentrantLock();
    private long[] sortedEntries;
    private int sortedEntriesIdx;

    private static final Logger log = LoggerFactory.getLogger(WriteCache.class);
}