Java tutorial
/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ package org.apache.bookkeeper.bookie.storage.ldb; import static com.google.common.base.Preconditions.checkArgument; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; import java.io.Closeable; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.locks.ReentrantLock; import org.apache.bookkeeper.common.util.MathUtils; import org.apache.bookkeeper.util.collections.ConcurrentLongHashSet; import org.apache.bookkeeper.util.collections.ConcurrentLongLongHashMap; import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap; import org.apache.bookkeeper.util.collections.ConcurrentLongLongPairHashMap.LongPair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Write cache implementation. * * <p>The write cache will allocate the requested size from direct memory and it * will break it down into multiple segments. * * <p>The entries are appended in a common buffer and indexed though a hashmap, * until the cache is cleared. * * <p>There is the possibility to iterate through the stored entries in an ordered * way, by (ledgerId, entry). */ public class WriteCache implements Closeable { /** * Consumer that is used to scan the entire write cache. */ public interface EntryConsumer { void accept(long ledgerId, long entryId, ByteBuf entry); } private final ConcurrentLongLongPairHashMap index = new ConcurrentLongLongPairHashMap(4096, 2 * Runtime.getRuntime().availableProcessors()); private final ConcurrentLongLongHashMap lastEntryMap = new ConcurrentLongLongHashMap(4096, 2 * Runtime.getRuntime().availableProcessors()); private final ByteBuf[] cacheSegments; private final int segmentsCount; private final long maxCacheSize; private final int maxSegmentSize; private final long segmentOffsetMask; private final long segmentOffsetBits; private final AtomicLong cacheSize = new AtomicLong(0); private final AtomicLong cacheOffset = new AtomicLong(0); private final LongAdder cacheCount = new LongAdder(); private final ConcurrentLongHashSet deletedLedgers = new ConcurrentLongHashSet(); private final ByteBufAllocator allocator; public WriteCache(ByteBufAllocator allocator, long maxCacheSize) { // Default maxSegmentSize set to 1Gb this(allocator, maxCacheSize, 1 * 1024 * 1024 * 1024); } public WriteCache(ByteBufAllocator allocator, long maxCacheSize, int maxSegmentSize) { checkArgument(maxSegmentSize > 0); long alignedMaxSegmentSize = alignToPowerOfTwo(maxSegmentSize); checkArgument(maxSegmentSize == alignedMaxSegmentSize, "Max segment size needs to be in form of 2^n"); this.allocator = allocator; this.maxCacheSize = maxCacheSize; this.maxSegmentSize = (int) maxSegmentSize; this.segmentOffsetMask = maxSegmentSize - 1; this.segmentOffsetBits = 63 - Long.numberOfLeadingZeros(maxSegmentSize); this.segmentsCount = 1 + (int) (maxCacheSize / maxSegmentSize); this.cacheSegments = new ByteBuf[segmentsCount]; for (int i = 0; i < segmentsCount - 1; i++) { // All intermediate segments will be full-size cacheSegments[i] = Unpooled.directBuffer(maxSegmentSize, maxSegmentSize); } int lastSegmentSize = (int) (maxCacheSize % maxSegmentSize); cacheSegments[segmentsCount - 1] = Unpooled.directBuffer(lastSegmentSize, lastSegmentSize); } public void clear() { cacheSize.set(0L); cacheOffset.set(0L); cacheCount.reset(); index.clear(); lastEntryMap.clear(); deletedLedgers.clear(); } @Override public void close() { for (ByteBuf buf : cacheSegments) { buf.release(); } } public boolean put(long ledgerId, long entryId, ByteBuf entry) { int size = entry.readableBytes(); // Align to 64 bytes so that different threads will not contend the same L1 // cache line int alignedSize = align64(size); long offset; int localOffset; int segmentIdx; while (true) { offset = cacheOffset.getAndAdd(alignedSize); localOffset = (int) (offset & segmentOffsetMask); segmentIdx = (int) (offset >>> segmentOffsetBits); if ((offset + size) > maxCacheSize) { // Cache is full return false; } else if (maxSegmentSize - localOffset < size) { // If an entry is at the end of a segment, we need to get a new offset and try // again in next segment continue; } else { // Found a good offset break; } } cacheSegments[segmentIdx].setBytes(localOffset, entry, entry.readerIndex(), entry.readableBytes()); // Update last entryId for ledger. This logic is to handle writes for the same // ledger coming out of order and from different thread, though in practice it // should not happen and the compareAndSet should be always uncontended. while (true) { long currentLastEntryId = lastEntryMap.get(ledgerId); if (currentLastEntryId > entryId) { // A newer entry is already there break; } if (lastEntryMap.compareAndSet(ledgerId, currentLastEntryId, entryId)) { break; } } index.put(ledgerId, entryId, offset, size); cacheCount.increment(); cacheSize.addAndGet(size); return true; } public ByteBuf get(long ledgerId, long entryId) { LongPair result = index.get(ledgerId, entryId); if (result == null) { return null; } long offset = result.first; int size = (int) result.second; ByteBuf entry = allocator.buffer(size, size); int localOffset = (int) (offset & segmentOffsetMask); int segmentIdx = (int) (offset >>> segmentOffsetBits); entry.writeBytes(cacheSegments[segmentIdx], localOffset, size); return entry; } public ByteBuf getLastEntry(long ledgerId) { long lastEntryId = lastEntryMap.get(ledgerId); if (lastEntryId == -1) { // Ledger not found in write cache return null; } else { return get(ledgerId, lastEntryId); } } public void deleteLedger(long ledgerId) { deletedLedgers.add(ledgerId); } private static final ArrayGroupSort groupSorter = new ArrayGroupSort(2, 4); public void forEach(EntryConsumer consumer) { sortedEntriesLock.lock(); try { int entriesToSort = (int) index.size(); int arrayLen = entriesToSort * 4; if (sortedEntries == null || sortedEntries.length < arrayLen) { sortedEntries = new long[(int) (arrayLen * 2)]; } long startTime = MathUtils.nowInNano(); sortedEntriesIdx = 0; index.forEach((ledgerId, entryId, offset, length) -> { if (deletedLedgers.contains(ledgerId)) { // Ignore deleted ledgers return; } sortedEntries[sortedEntriesIdx] = ledgerId; sortedEntries[sortedEntriesIdx + 1] = entryId; sortedEntries[sortedEntriesIdx + 2] = offset; sortedEntries[sortedEntriesIdx + 3] = length; sortedEntriesIdx += 4; }); if (log.isDebugEnabled()) { log.debug("iteration took {} ms", MathUtils.elapsedNanos(startTime) / 1e6); } startTime = MathUtils.nowInNano(); // Sort entries by (ledgerId, entryId) maintaining the 4 items groups groupSorter.sort(sortedEntries, 0, sortedEntriesIdx); if (log.isDebugEnabled()) { log.debug("sorting {} ms", (MathUtils.elapsedNanos(startTime) / 1e6)); } startTime = MathUtils.nowInNano(); ByteBuf[] entrySegments = new ByteBuf[segmentsCount]; for (int i = 0; i < segmentsCount; i++) { entrySegments[i] = cacheSegments[i].slice(0, cacheSegments[i].capacity()); } for (int i = 0; i < sortedEntriesIdx; i += 4) { long ledgerId = sortedEntries[i]; long entryId = sortedEntries[i + 1]; long offset = sortedEntries[i + 2]; long length = sortedEntries[i + 3]; int localOffset = (int) (offset & segmentOffsetMask); int segmentIdx = (int) (offset >>> segmentOffsetBits); ByteBuf entry = entrySegments[segmentIdx]; entry.setIndex(localOffset, localOffset + (int) length); consumer.accept(ledgerId, entryId, entry); } if (log.isDebugEnabled()) { log.debug("entry log adding {} ms", MathUtils.elapsedNanos(startTime) / 1e6); } } finally { sortedEntriesLock.unlock(); } } public long size() { return cacheSize.get(); } public long count() { return cacheCount.sum(); } public boolean isEmpty() { return cacheSize.get() == 0L; } private static final int ALIGN_64_MASK = ~(64 - 1); static int align64(int size) { return (size + 64 - 1) & ALIGN_64_MASK; } private static long alignToPowerOfTwo(long n) { return (long) Math.pow(2, 64 - Long.numberOfLeadingZeros(n - 1)); } private final ReentrantLock sortedEntriesLock = new ReentrantLock(); private long[] sortedEntries; private int sortedEntriesIdx; private static final Logger log = LoggerFactory.getLogger(WriteCache.class); }