org.apache.lucene.util.ByteBlockPool.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.util.ByteBlockPool.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;

import java.util.Arrays;
import java.util.List;

import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;

/** 
 * Class that Posting and PostingVector use to write byte
 * streams into shared fixed-size byte[] arrays.  The idea
 * is to allocate slices of increasing lengths For
 * example, the first slice is 5 bytes, the next slice is
 * 14, etc.  We start by writing our bytes into the first
 * 5 bytes.  When we hit the end of the slice, we allocate
 * the next slice and then write the address of the new
 * slice into the last 4 bytes of the previous slice (the
 * "forwarding address").
 *
 * Each slice is filled with 0's initially, and we mark
 * the end with a non-zero byte.  This way the methods
 * that are writing into the slice don't need to record
 * its length and instead allocate a new slice once they
 * hit a non-zero byte. 
 * 
 * @lucene.internal
 **/
public final class ByteBlockPool implements Accountable {
    private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(ByteBlockPool.class);

    public final static int BYTE_BLOCK_SHIFT = 15;
    public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
    public final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;

    /** Abstract class for allocating and freeing byte
     *  blocks. */
    public abstract static class Allocator {
        protected final int blockSize;

        public Allocator(int blockSize) {
            this.blockSize = blockSize;
        }

        public abstract void recycleByteBlocks(byte[][] blocks, int start, int end);

        public void recycleByteBlocks(List<byte[]> blocks) {
            final byte[][] b = blocks.toArray(new byte[blocks.size()][]);
            recycleByteBlocks(b, 0, b.length);
        }

        public byte[] getByteBlock() {
            return new byte[blockSize];
        }
    }

    /** A simple {@link Allocator} that never recycles. */
    public static final class DirectAllocator extends Allocator {

        public DirectAllocator() {
            this(BYTE_BLOCK_SIZE);
        }

        public DirectAllocator(int blockSize) {
            super(blockSize);
        }

        @Override
        public void recycleByteBlocks(byte[][] blocks, int start, int end) {
        }
    }

    /** A simple {@link Allocator} that never recycles, but
     *  tracks how much total RAM is in use. */
    public static class DirectTrackingAllocator extends Allocator {
        private final Counter bytesUsed;

        public DirectTrackingAllocator(Counter bytesUsed) {
            this(BYTE_BLOCK_SIZE, bytesUsed);
        }

        public DirectTrackingAllocator(int blockSize, Counter bytesUsed) {
            super(blockSize);
            this.bytesUsed = bytesUsed;
        }

        @Override
        public byte[] getByteBlock() {
            bytesUsed.addAndGet(blockSize);
            return new byte[blockSize];
        }

        @Override
        public void recycleByteBlocks(byte[][] blocks, int start, int end) {
            bytesUsed.addAndGet(-((end - start) * blockSize));
            for (int i = start; i < end; i++) {
                blocks[i] = null;
            }
        }
    };

    /**
     * array of buffers currently used in the pool. Buffers are allocated if
     * needed don't modify this outside of this class.
     */
    public byte[][] buffers = new byte[10][];

    /** index into the buffers array pointing to the current buffer used as the head */
    private int bufferUpto = -1; // Which buffer we are upto
    /** Where we are in head buffer */
    public int byteUpto = BYTE_BLOCK_SIZE;

    /** Current head buffer */
    public byte[] buffer;
    /** Current head offset */
    public int byteOffset = -BYTE_BLOCK_SIZE;

    private final Allocator allocator;

    public ByteBlockPool(Allocator allocator) {
        this.allocator = allocator;
    }

    /**
     * Resets the pool to its initial state reusing the first buffer and fills all
     * buffers with <tt>0</tt> bytes before they reused or passed to
     * {@link Allocator#recycleByteBlocks(byte[][], int, int)}. Calling
     * {@link ByteBlockPool#nextBuffer()} is not needed after reset.
     */
    public void reset() {
        reset(true, true);
    }

    /**
     * Expert: Resets the pool to its initial state reusing the first buffer. Calling
     * {@link ByteBlockPool#nextBuffer()} is not needed after reset. 
     * @param zeroFillBuffers if <code>true</code> the buffers are filled with <tt>0</tt>. 
     *        This should be set to <code>true</code> if this pool is used with slices.
     * @param reuseFirst if <code>true</code> the first buffer will be reused and calling
     *        {@link ByteBlockPool#nextBuffer()} is not needed after reset iff the 
     *        block pool was used before ie. {@link ByteBlockPool#nextBuffer()} was called before.
     */
    public void reset(boolean zeroFillBuffers, boolean reuseFirst) {
        if (bufferUpto != -1) {
            // We allocated at least one buffer

            if (zeroFillBuffers) {
                for (int i = 0; i < bufferUpto; i++) {
                    // Fully zero fill buffers that we fully used
                    Arrays.fill(buffers[i], (byte) 0);
                }
                // Partial zero fill the final buffer
                Arrays.fill(buffers[bufferUpto], 0, byteUpto, (byte) 0);
            }

            if (bufferUpto > 0 || !reuseFirst) {
                final int offset = reuseFirst ? 1 : 0;
                // Recycle all but the first buffer
                allocator.recycleByteBlocks(buffers, offset, 1 + bufferUpto);
                Arrays.fill(buffers, offset, 1 + bufferUpto, null);
            }
            if (reuseFirst) {
                // Re-use the first buffer
                bufferUpto = 0;
                byteUpto = 0;
                byteOffset = 0;
                buffer = buffers[0];
            } else {
                bufferUpto = -1;
                byteUpto = BYTE_BLOCK_SIZE;
                byteOffset = -BYTE_BLOCK_SIZE;
                buffer = null;
            }
        }
    }

    /**
     * Advances the pool to its next buffer. This method should be called once
     * after the constructor to initialize the pool. In contrast to the
     * constructor a {@link ByteBlockPool#reset()} call will advance the pool to
     * its first buffer immediately.
     */
    public void nextBuffer() {
        if (1 + bufferUpto == buffers.length) {
            byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length + 1, NUM_BYTES_OBJECT_REF)][];
            System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
            buffers = newBuffers;
        }
        buffer = buffers[1 + bufferUpto] = allocator.getByteBlock();
        bufferUpto++;

        byteUpto = 0;
        byteOffset += BYTE_BLOCK_SIZE;
    }

    /**
     * Allocates a new slice with the given size. 
     * @see ByteBlockPool#FIRST_LEVEL_SIZE
     */
    public int newSlice(final int size) {
        if (byteUpto > BYTE_BLOCK_SIZE - size)
            nextBuffer();
        final int upto = byteUpto;
        byteUpto += size;
        buffer[byteUpto - 1] = 16;
        return upto;
    }

    // Size of each slice.  These arrays should be at most 16
    // elements (index is encoded with 4 bits).  First array
    // is just a compact way to encode X+1 with a max.  Second
    // array is the length of each slice, ie first slice is 5
    // bytes, next slice is 14 bytes, etc.

    /**
     * An array holding the offset into the {@link ByteBlockPool#LEVEL_SIZE_ARRAY}
     * to quickly navigate to the next slice level.
     */
    public final static int[] NEXT_LEVEL_ARRAY = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 9 };

    /**
     * An array holding the level sizes for byte slices.
     */
    public final static int[] LEVEL_SIZE_ARRAY = { 5, 14, 20, 30, 40, 40, 80, 80, 120, 200 };

    /**
     * The first level size for new slices
     * @see ByteBlockPool#newSlice(int)
     */
    public final static int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];

    /**
     * Creates a new byte slice with the given starting size and 
     * returns the slices offset in the pool.
     */
    public int allocSlice(final byte[] slice, final int upto) {

        final int level = slice[upto] & 15;
        final int newLevel = NEXT_LEVEL_ARRAY[level];
        final int newSize = LEVEL_SIZE_ARRAY[newLevel];

        // Maybe allocate another block
        if (byteUpto > BYTE_BLOCK_SIZE - newSize) {
            nextBuffer();
        }

        final int newUpto = byteUpto;
        final int offset = newUpto + byteOffset;
        byteUpto += newSize;

        // Copy forward the past 3 bytes (which we are about
        // to overwrite with the forwarding address):
        buffer[newUpto] = slice[upto - 3];
        buffer[newUpto + 1] = slice[upto - 2];
        buffer[newUpto + 2] = slice[upto - 1];

        // Write forwarding address at end of last slice:
        slice[upto - 3] = (byte) (offset >>> 24);
        slice[upto - 2] = (byte) (offset >>> 16);
        slice[upto - 1] = (byte) (offset >>> 8);
        slice[upto] = (byte) offset;

        // Write new level:
        buffer[byteUpto - 1] = (byte) (16 | newLevel);

        return newUpto + 3;
    }

    /** Fill the provided {@link BytesRef} with the bytes at the specified offset/length slice.
     *  This will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses
     *  the provided {@link BytesRefBuilder} to copy bytes over. */
    void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int length) {
        result.length = length;

        int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
        byte[] buffer = buffers[bufferIndex];
        int pos = (int) (offset & BYTE_BLOCK_MASK);
        if (pos + length <= BYTE_BLOCK_SIZE) {
            // common case where the slice lives in a single block: just reference the buffer directly without copying
            result.bytes = buffer;
            result.offset = pos;
        } else {
            // uncommon case: the slice spans at least 2 blocks, so we must copy the bytes:
            builder.grow(length);
            result.bytes = builder.get().bytes;
            result.offset = 0;
            readBytes(offset, result.bytes, 0, length);
        }
    }

    // Fill in a BytesRef from term's length & bytes encoded in
    // byte block
    public void setBytesRef(BytesRef term, int textStart) {
        final byte[] bytes = term.bytes = buffers[textStart >> BYTE_BLOCK_SHIFT];
        int pos = textStart & BYTE_BLOCK_MASK;
        if ((bytes[pos] & 0x80) == 0) {
            // length is 1 byte
            term.length = bytes[pos];
            term.offset = pos + 1;
        } else {
            // length is 2 bytes
            term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
            term.offset = pos + 2;
        }
        assert term.length >= 0;
    }

    /**
     * Appends the bytes in the provided {@link BytesRef} at
     * the current position.
     */
    public void append(final BytesRef bytes) {
        int bytesLeft = bytes.length;
        int offset = bytes.offset;
        while (bytesLeft > 0) {
            int bufferLeft = BYTE_BLOCK_SIZE - byteUpto;
            if (bytesLeft < bufferLeft) {
                // fits within current buffer
                System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bytesLeft);
                byteUpto += bytesLeft;
                break;
            } else {
                // fill up this buffer and move to next one
                if (bufferLeft > 0) {
                    System.arraycopy(bytes.bytes, offset, buffer, byteUpto, bufferLeft);
                }
                nextBuffer();
                bytesLeft -= bufferLeft;
                offset += bufferLeft;
            }
        }
    }

    /**
     * Reads bytes out of the pool starting at the given offset with the given  
     * length into the given byte array at offset <tt>off</tt>.
     * <p>Note: this method allows to copy across block boundaries.</p>
     */
    public void readBytes(final long offset, final byte bytes[], int bytesOffset, int bytesLength) {
        int bytesLeft = bytesLength;
        int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
        int pos = (int) (offset & BYTE_BLOCK_MASK);
        while (bytesLeft > 0) {
            byte[] buffer = buffers[bufferIndex++];
            int chunk = Math.min(bytesLeft, BYTE_BLOCK_SIZE - pos);
            System.arraycopy(buffer, pos, bytes, bytesOffset, chunk);
            bytesOffset += chunk;
            bytesLeft -= chunk;
            pos = 0;
        }
    }

    /**
     * Set the given {@link BytesRef} so that its content is equal to the
     * {@code ref.length} bytes starting at {@code offset}. Most of the time this
     * method will set pointers to internal data-structures. However, in case a
     * value crosses a boundary, a fresh copy will be returned.
     * On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
     * expect the length to be encoded with the data.
     */
    public void setRawBytesRef(BytesRef ref, final long offset) {
        int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
        int pos = (int) (offset & BYTE_BLOCK_MASK);
        if (pos + ref.length <= BYTE_BLOCK_SIZE) {
            ref.bytes = buffers[bufferIndex];
            ref.offset = pos;
        } else {
            ref.bytes = new byte[ref.length];
            ref.offset = 0;
            readBytes(offset, ref.bytes, 0, ref.length);
        }
    }

    /** Read a single byte at the given {@code offset}. */
    public byte readByte(long offset) {
        int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
        int pos = (int) (offset & BYTE_BLOCK_MASK);
        byte[] buffer = buffers[bufferIndex];
        return buffer[pos];
    }

    @Override
    public long ramBytesUsed() {
        long size = BASE_RAM_BYTES;
        size += RamUsageEstimator.sizeOfObject(buffer);
        size += RamUsageEstimator.shallowSizeOf(buffers);
        for (byte[] buf : buffers) {
            if (buf == buffer) {
                continue;
            }
            size += RamUsageEstimator.sizeOfObject(buf);
        }
        return size;
    }
}