org.apache.lucene.util.SentinelIntSet.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.util.SentinelIntSet.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;

import java.util.Arrays;

/**
 * A native int hash-based set where one value is reserved to mean "EMPTY" internally. The space overhead is fairly low
 * as there is only one power-of-two sized int[] to hold the values.  The set is re-hashed when adding a value that
 * would make it >= 75% full.  Consider extending and over-riding {@link #hash(int)} if the values might be poor
 * hash keys; Lucene docids should be fine.
 * The internal fields are exposed publicly to enable more efficient use at the expense of better O-O principles.
 * <p>
 * To iterate over the integers held in this set, simply use code like this:
 * <pre class="prettyprint">
 * SentinelIntSet set = ...
 * for (int v : set.keys) {
 *   if (v == set.emptyVal)
 *     continue;
 *   //use v...
 * }</pre>
 *
 * @lucene.internal
 */
public class SentinelIntSet {
    /** A power-of-2 over-sized array holding the integers in the set along with empty values. */
    public int[] keys;
    public int count;
    public final int emptyVal;
    /** the count at which a rehash should be done */
    public int rehashCount;

    /**
     *
     * @param size  The minimum number of elements this set should be able to hold without rehashing
     *              (i.e. the slots are guaranteed not to change)
     * @param emptyVal The integer value to use for EMPTY
     */
    public SentinelIntSet(int size, int emptyVal) {
        this.emptyVal = emptyVal;
        int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
        rehashCount = tsize - (tsize >> 2);
        if (size >= rehashCount) { // should be able to hold "size" w/o re-hashing
            tsize <<= 1;
            rehashCount = tsize - (tsize >> 2);
        }
        keys = new int[tsize];
        if (emptyVal != 0)
            clear();
    }

    public void clear() {
        Arrays.fill(keys, emptyVal);
        count = 0;
    }

    /** (internal) Return the hash for the key. The default implementation just returns the key,
     * which is not appropriate for general purpose use.
     */
    public int hash(int key) {
        return key;
    }

    /** The number of integers in this set. */
    public int size() {
        return count;
    }

    /** (internal) Returns the slot for this key */
    public int getSlot(int key) {
        assert key != emptyVal;
        int h = hash(key);
        int s = h & (keys.length - 1);
        if (keys[s] == key || keys[s] == emptyVal)
            return s;

        int increment = (h >> 7) | 1;
        do {
            s = (s + increment) & (keys.length - 1);
        } while (keys[s] != key && keys[s] != emptyVal);
        return s;
    }

    /** (internal) Returns the slot for this key, or -slot-1 if not found */
    public int find(int key) {
        assert key != emptyVal;
        int h = hash(key);
        int s = h & (keys.length - 1);
        if (keys[s] == key)
            return s;
        if (keys[s] == emptyVal)
            return -s - 1;

        int increment = (h >> 7) | 1;
        for (;;) {
            s = (s + increment) & (keys.length - 1);
            if (keys[s] == key)
                return s;
            if (keys[s] == emptyVal)
                return -s - 1;
        }
    }

    /** Does this set contain the specified integer? */
    public boolean exists(int key) {
        return find(key) >= 0;
    }

    /** Puts this integer (key) in the set, and returns the slot index it was added to.
     * It rehashes if adding it would make the set more than 75% full. */
    public int put(int key) {
        int s = find(key);
        if (s < 0) {
            count++;
            if (count >= rehashCount) {
                rehash();
                s = getSlot(key);
            } else {
                s = -s - 1;
            }
            keys[s] = key;
        }
        return s;
    }

    /** (internal) Rehashes by doubling {@code int[] key} and filling with the old values. */
    public void rehash() {
        int newSize = keys.length << 1;
        int[] oldKeys = keys;
        keys = new int[newSize];
        if (emptyVal != 0)
            Arrays.fill(keys, emptyVal);

        for (int key : oldKeys) {
            if (key == emptyVal)
                continue;
            int newSlot = getSlot(key);
            keys[newSlot] = key;
        }
        rehashCount = newSize - (newSize >> 2);
    }

    /** Return the memory footprint of this class in bytes. */
    public long ramBytesUsed() {
        return RamUsageEstimator.alignObjectSize(Integer.BYTES * 3 + RamUsageEstimator.NUM_BYTES_OBJECT_REF)
                + RamUsageEstimator.sizeOf(keys);
    }
}