org.apache.lucene.util.FixedBitSet.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.util.FixedBitSet.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;

import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;

/**
 * BitSet of fixed length (numBits), backed by accessible ({@link #getBits})
 * long[], accessed with an int index, implementing {@link Bits} and
 * {@link DocIdSet}. If you need to manage more than 2.1B bits, use
 * {@link LongBitSet}.
 * 
 * @lucene.internal
 */
public final class FixedBitSet extends BitSet implements Bits, Accountable {

    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);

    private final long[] bits; // Array of longs holding the bits 
    private final int numBits; // The number of bits in use
    private final int numWords; // The exact number of longs needed to hold numBits (<= bits.length)

    /**
     * If the given {@link FixedBitSet} is large enough to hold {@code numBits+1},
     * returns the given bits, otherwise returns a new {@link FixedBitSet} which
     * can hold the requested number of bits.
     * <p>
     * <b>NOTE:</b> the returned bitset reuses the underlying {@code long[]} of
     * the given {@code bits} if possible. Also, calling {@link #length()} on the
     * returned bits may return a value greater than {@code numBits}.
     */
    public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) {
        if (numBits < bits.numBits) {
            return bits;
        } else {
            // Depends on the ghost bits being clear!
            // (Otherwise, they may become visible in the new instance)
            int numWords = bits2words(numBits);
            long[] arr = bits.getBits();
            if (numWords >= arr.length) {
                arr = ArrayUtil.grow(arr, numWords + 1);
            }
            return new FixedBitSet(arr, arr.length << 6);
        }
    }

    /** returns the number of 64 bit words it would take to hold numBits */
    public static int bits2words(int numBits) {
        return ((numBits - 1) >> 6) + 1; // I.e.: get the word-offset of the last bit and add one (make sure to use >> so 0 returns 0!)
    }

    /**
     * Returns the popcount or cardinality of the intersection of the two sets.
     * Neither set is modified.
     */
    public static long intersectionCount(FixedBitSet a, FixedBitSet b) {
        // Depends on the ghost bits being clear!
        return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.min(a.numWords, b.numWords));
    }

    /**
     * Returns the popcount or cardinality of the union of the two sets. Neither
     * set is modified.
     */
    public static long unionCount(FixedBitSet a, FixedBitSet b) {
        // Depends on the ghost bits being clear!
        long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.min(a.numWords, b.numWords));
        if (a.numWords < b.numWords) {
            tot += BitUtil.pop_array(b.bits, a.numWords, b.numWords - a.numWords);
        } else if (a.numWords > b.numWords) {
            tot += BitUtil.pop_array(a.bits, b.numWords, a.numWords - b.numWords);
        }
        return tot;
    }

    /**
     * Returns the popcount or cardinality of "a and not b" or
     * "intersection(a, not(b))". Neither set is modified.
     */
    public static long andNotCount(FixedBitSet a, FixedBitSet b) {
        // Depends on the ghost bits being clear!
        long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.min(a.numWords, b.numWords));
        if (a.numWords > b.numWords) {
            tot += BitUtil.pop_array(a.bits, b.numWords, a.numWords - b.numWords);
        }
        return tot;
    }

    /**
     * Creates a new LongBitSet.
     * The internally allocated long array will be exactly the size needed to accommodate the numBits specified.
     * @param numBits the number of bits needed
     */
    public FixedBitSet(int numBits) {
        this.numBits = numBits;
        bits = new long[bits2words(numBits)];
        numWords = bits.length;
    }

    /**
     * Creates a new LongBitSet using the provided long[] array as backing store.
     * The storedBits array must be large enough to accommodate the numBits specified, but may be larger.
     * In that case the 'extra' or 'ghost' bits must be clear (or they may provoke spurious side-effects)
     * @param storedBits the array to use as backing store
     * @param numBits the number of bits actually needed
     */
    public FixedBitSet(long[] storedBits, int numBits) {
        this.numWords = bits2words(numBits);
        if (numWords > storedBits.length) {
            throw new IllegalArgumentException("The given long array is too small  to hold " + numBits + " bits");
        }
        this.numBits = numBits;
        this.bits = storedBits;

        assert verifyGhostBitsClear();
    }

    /**
     * Checks if the bits past numBits are clear.
     * Some methods rely on this implicit assumption: search for "Depends on the ghost bits being clear!" 
     * @return true if the bits past numBits are clear.
     */
    private boolean verifyGhostBitsClear() {
        for (int i = numWords; i < bits.length; i++) {
            if (bits[i] != 0)
                return false;
        }

        if ((numBits & 0x3f) == 0)
            return true;

        long mask = -1L << numBits;

        return (bits[numWords - 1] & mask) == 0;
    }

    @Override
    public int length() {
        return numBits;
    }

    @Override
    public long ramBytesUsed() {
        return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
    }

    /** Expert. */
    public long[] getBits() {
        return bits;
    }

    /** Returns number of set bits.  NOTE: this visits every
     *  long in the backing bits array, and the result is not
     *  internally cached!
     */
    @Override
    public int cardinality() {
        // Depends on the ghost bits being clear!
        return (int) BitUtil.pop_array(bits, 0, numWords);
    }

    @Override
    public boolean get(int index) {
        assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
        int i = index >> 6; // div 64
        // signed shift will keep a negative index and force an
        // array-index-out-of-bounds-exception, removing the need for an explicit check.
        long bitmask = 1L << index;
        return (bits[i] & bitmask) != 0;
    }

    public void set(int index) {
        assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
        int wordNum = index >> 6; // div 64
        long bitmask = 1L << index;
        bits[wordNum] |= bitmask;
    }

    public boolean getAndSet(int index) {
        assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
        int wordNum = index >> 6; // div 64
        long bitmask = 1L << index;
        boolean val = (bits[wordNum] & bitmask) != 0;
        bits[wordNum] |= bitmask;
        return val;
    }

    @Override
    public void clear(int index) {
        assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
        int wordNum = index >> 6;
        long bitmask = 1L << index;
        bits[wordNum] &= ~bitmask;
    }

    public boolean getAndClear(int index) {
        assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
        int wordNum = index >> 6; // div 64
        long bitmask = 1L << index;
        boolean val = (bits[wordNum] & bitmask) != 0;
        bits[wordNum] &= ~bitmask;
        return val;
    }

    @Override
    public int nextSetBit(int index) {
        // Depends on the ghost bits being clear!
        assert index >= 0 && index < numBits : "index=" + index + ", numBits=" + numBits;
        int i = index >> 6;
        long word = bits[i] >> index; // skip all the bits to the right of index

        if (word != 0) {
            return index + Long.numberOfTrailingZeros(word);
        }

        while (++i < numWords) {
            word = bits[i];
            if (word != 0) {
                return (i << 6) + Long.numberOfTrailingZeros(word);
            }
        }

        return DocIdSetIterator.NO_MORE_DOCS;
    }

    @Override
    public int prevSetBit(int index) {
        assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits;
        int i = index >> 6;
        final int subIndex = index & 0x3f; // index within the word
        long word = (bits[i] << (63 - subIndex)); // skip all the bits to the left of index

        if (word != 0) {
            return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197
        }

        while (--i >= 0) {
            word = bits[i];
            if (word != 0) {
                return (i << 6) + 63 - Long.numberOfLeadingZeros(word);
            }
        }

        return -1;
    }

    @Override
    public void or(DocIdSetIterator iter) throws IOException {
        if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
            checkUnpositioned(iter);
            final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
            or(bits);
        } else {
            super.or(iter);
        }
    }

    /** this = this OR other */
    public void or(FixedBitSet other) {
        or(other.bits, other.numWords);
    }

    private void or(final long[] otherArr, final int otherNumWords) {
        assert otherNumWords <= numWords : "numWords=" + numWords + ", otherNumWords=" + otherNumWords;
        final long[] thisArr = this.bits;
        int pos = Math.min(numWords, otherNumWords);
        while (--pos >= 0) {
            thisArr[pos] |= otherArr[pos];
        }
    }

    /** this = this XOR other */
    public void xor(FixedBitSet other) {
        xor(other.bits, other.numWords);
    }

    /** Does in-place XOR of the bits provided by the iterator. */
    public void xor(DocIdSetIterator iter) throws IOException {
        checkUnpositioned(iter);
        if (BitSetIterator.getFixedBitSetOrNull(iter) != null) {
            final FixedBitSet bits = BitSetIterator.getFixedBitSetOrNull(iter);
            xor(bits);
        } else {
            int doc;
            while ((doc = iter.nextDoc()) < numBits) {
                flip(doc);
            }
        }
    }

    private void xor(long[] otherBits, int otherNumWords) {
        assert otherNumWords <= numWords : "numWords=" + numWords + ", other.numWords=" + otherNumWords;
        final long[] thisBits = this.bits;
        int pos = Math.min(numWords, otherNumWords);
        while (--pos >= 0) {
            thisBits[pos] ^= otherBits[pos];
        }
    }

    /** returns true if the sets have any elements in common */
    public boolean intersects(FixedBitSet other) {
        // Depends on the ghost bits being clear!
        int pos = Math.min(numWords, other.numWords);
        while (--pos >= 0) {
            if ((bits[pos] & other.bits[pos]) != 0)
                return true;
        }
        return false;
    }

    /** this = this AND other */
    public void and(FixedBitSet other) {
        and(other.bits, other.numWords);
    }

    private void and(final long[] otherArr, final int otherNumWords) {
        final long[] thisArr = this.bits;
        int pos = Math.min(this.numWords, otherNumWords);
        while (--pos >= 0) {
            thisArr[pos] &= otherArr[pos];
        }
        if (this.numWords > otherNumWords) {
            Arrays.fill(thisArr, otherNumWords, this.numWords, 0L);
        }
    }

    /** this = this AND NOT other */
    public void andNot(FixedBitSet other) {
        andNot(other.bits, other.numWords);
    }

    private void andNot(final long[] otherArr, final int otherNumWords) {
        final long[] thisArr = this.bits;
        int pos = Math.min(this.numWords, otherNumWords);
        while (--pos >= 0) {
            thisArr[pos] &= ~otherArr[pos];
        }
    }

    /**
     * Scans the backing store to check if all bits are clear.
     * The method is deliberately not called "isEmpty" to emphasize it is not low cost (as isEmpty usually is).
     * @return true if all bits are clear.
     */
    public boolean scanIsEmpty() {
        // This 'slow' implementation is still faster than any external one could be
        // (e.g.: (bitSet.length() == 0 || bitSet.nextSetBit(0) == -1))
        // especially for small BitSets
        // Depends on the ghost bits being clear!
        final int count = numWords;

        for (int i = 0; i < count; i++) {
            if (bits[i] != 0)
                return false;
        }

        return true;
    }

    /** Flips a range of bits
     *
     * @param startIndex lower index
     * @param endIndex one-past the last bit to flip
     */
    public void flip(int startIndex, int endIndex) {
        assert startIndex >= 0 && startIndex < numBits;
        assert endIndex >= 0 && endIndex <= numBits;
        if (endIndex <= startIndex) {
            return;
        }

        int startWord = startIndex >> 6;
        int endWord = (endIndex - 1) >> 6;

        /*** Grrr, java shifting uses only the lower 6 bits of the count so -1L>>>64 == -1
         * for that reason, make sure not to use endmask if the bits to flip will
         * be zero in the last word (redefine endWord to be the last changed...)
        long startmask = -1L << (startIndex & 0x3f);     // example: 11111...111000
        long endmask = -1L >>> (64-(endIndex & 0x3f));   // example: 00111...111111
        ***/

        long startmask = -1L << startIndex;
        long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex since only the lowest 6 bits are used

        if (startWord == endWord) {
            bits[startWord] ^= (startmask & endmask);
            return;
        }

        bits[startWord] ^= startmask;

        for (int i = startWord + 1; i < endWord; i++) {
            bits[i] = ~bits[i];
        }

        bits[endWord] ^= endmask;
    }

    /** Flip the bit at the provided index. */
    public void flip(int index) {
        assert index >= 0 && index < numBits : "index=" + index + " numBits=" + numBits;
        int wordNum = index >> 6; // div 64
        long bitmask = 1L << index; // mod 64 is implicit
        bits[wordNum] ^= bitmask;
    }

    /** Sets a range of bits
     *
     * @param startIndex lower index
     * @param endIndex one-past the last bit to set
     */
    public void set(int startIndex, int endIndex) {
        assert startIndex >= 0 && startIndex < numBits : "startIndex=" + startIndex + ", numBits=" + numBits;
        assert endIndex >= 0 && endIndex <= numBits : "endIndex=" + endIndex + ", numBits=" + numBits;
        if (endIndex <= startIndex) {
            return;
        }

        int startWord = startIndex >> 6;
        int endWord = (endIndex - 1) >> 6;

        long startmask = -1L << startIndex;
        long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex since only the lowest 6 bits are used

        if (startWord == endWord) {
            bits[startWord] |= (startmask & endmask);
            return;
        }

        bits[startWord] |= startmask;
        Arrays.fill(bits, startWord + 1, endWord, -1L);
        bits[endWord] |= endmask;
    }

    @Override
    public void clear(int startIndex, int endIndex) {
        assert startIndex >= 0 && startIndex < numBits : "startIndex=" + startIndex + ", numBits=" + numBits;
        assert endIndex >= 0 && endIndex <= numBits : "endIndex=" + endIndex + ", numBits=" + numBits;
        if (endIndex <= startIndex) {
            return;
        }

        int startWord = startIndex >> 6;
        int endWord = (endIndex - 1) >> 6;

        long startmask = -1L << startIndex;
        long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex since only the lowest 6 bits are used

        // invert masks since we are clearing
        startmask = ~startmask;
        endmask = ~endmask;

        if (startWord == endWord) {
            bits[startWord] &= (startmask | endmask);
            return;
        }

        bits[startWord] &= startmask;
        Arrays.fill(bits, startWord + 1, endWord, 0L);
        bits[endWord] &= endmask;
    }

    @Override
    public FixedBitSet clone() {
        long[] bits = new long[this.bits.length];
        System.arraycopy(this.bits, 0, bits, 0, numWords);
        return new FixedBitSet(bits, numBits);
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (!(o instanceof FixedBitSet)) {
            return false;
        }
        FixedBitSet other = (FixedBitSet) o;
        if (numBits != other.numBits) {
            return false;
        }
        // Depends on the ghost bits being clear!
        return Arrays.equals(bits, other.bits);
    }

    @Override
    public int hashCode() {
        // Depends on the ghost bits being clear!
        long h = 0;
        for (int i = numWords; --i >= 0;) {
            h ^= bits[i];
            h = (h << 1) | (h >>> 63); // rotate left
        }
        // fold leftmost bits into right and add a constant to prevent
        // empty sets from returning 0, which is too common.
        return (int) ((h >> 32) ^ h) + 0x98761234;
    }

    /**
     * Make a copy of the given bits.
     */
    public static FixedBitSet copyOf(Bits bits) {
        if (bits instanceof FixedBits) {
            // restore the original FixedBitSet
            FixedBits fixedBits = (FixedBits) bits;
            bits = new FixedBitSet(fixedBits.bits, fixedBits.length);
        }

        if (bits instanceof FixedBitSet) {
            return ((FixedBitSet) bits).clone();
        } else {
            int length = bits.length();
            FixedBitSet bitSet = new FixedBitSet(length);
            bitSet.set(0, length);
            for (int i = 0; i < length; ++i) {
                if (bits.get(i) == false) {
                    bitSet.clear(i);
                }
            }
            return bitSet;
        }
    }

    /**
     * Convert this instance to read-only {@link Bits}.
     * This is useful in the case that this {@link FixedBitSet} is returned as a
     * {@link Bits} instance, to make sure that consumers may not get write access
     * back by casting to a {@link FixedBitSet}.
     * NOTE: Changes to this {@link FixedBitSet} will be reflected on the returned
     * {@link Bits}.
     */
    public Bits asReadOnlyBits() {
        return new FixedBits(bits, numBits);
    }
}