CompactHashSet.java Source code

Java tutorial

Introduction

Here is the source code for CompactHashSet.java

Source

//package net.ontopia.utils;

// WARNING: If you do any changes to this class, make sure that you
// update CompactIdentityHashSet.java, UniqueSet.java and
// SoftHashMapIndex.java accordingly.

import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
 * INTERNAL: Implements the Set interface more compactly than java.util.HashSet
 * by using a closed hashtable.
 */
public class CompactHashSet<E> extends java.util.AbstractSet<E> {

    protected final static int INITIAL_SIZE = 3;
    protected final static double LOAD_FACTOR = 0.75;

    /**
     * This object is used to represent null, should clients add that to the
     * set.
     */
    protected final static Object nullObject = new Object();
    /**
     * When an object is deleted this object is put into the hashtable in its
     * place, so that other objects with the same key (collisions) further down
     * the hashtable are not lost after we delete an object in the collision
     * chain.
     */
    protected final static Object deletedObject = new Object();
    protected int elements;
    /**
     * This is the number of empty (null) cells. It's not necessarily the same
     * as objects.length - elements, because some cells may contain
     * deletedObject.
     */
    protected int freecells;
    protected E[] objects;
    protected int modCount;

    /**
     * Constructs a new, empty set.
     */
    public CompactHashSet() {
        this(INITIAL_SIZE);
    }

    /**
     * Constructs a new, empty set.
     */
    @SuppressWarnings("unchecked")
    public CompactHashSet(int size) {
        // NOTE: If array size is 0, we get a
        // "java.lang.ArithmeticException: / by zero" in add(Object).
        objects = (E[]) new Object[(size == 0 ? 1 : size)];
        elements = 0;
        freecells = objects.length;
        modCount = 0;
    }

    /**
     * Constructs a new set containing the elements in the specified collection.
     * 
     * @param c
     *            the collection whose elements are to be placed into this set.
     */
    public CompactHashSet(Collection<E> c) {
        this(c.size());
        addAll(c);
    }

    // SET IMPLEMENTATION 

    /**
     * Returns an iterator over the elements in this set. The elements are
     * returned in no particular order.
     * 
     * @return an Iterator over the elements in this set.
     * @see ConcurrentModificationException
     */
    public Iterator<E> iterator() {
        return new CompactHashIterator<E>();
    }

    /**
     * Returns the number of elements in this set (its cardinality).
     */
    public int size() {
        return elements;
    }

    /**
     * Returns <tt>true</tt> if this set contains no elements.
     */
    public boolean isEmpty() {
        return elements == 0;
    }

    /**
     * Returns <tt>true</tt> if this set contains the specified element.
     * 
     * @param o
     *            element whose presence in this set is to be tested.
     * @return <tt>true</tt> if this set contains the specified element.
     */
    public boolean contains(Object o) {
        if (o == null)
            o = nullObject;

        int hash = o.hashCode();
        int index = (hash & 0x7FFFFFFF) % objects.length;
        int offset = 1;

        // search for the object (continue while !null and !this object)
        while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
            index = ((index + offset) & 0x7FFFFFFF) % objects.length;
            offset = offset * 2 + 1;

            if (offset == -1)
                offset = 2;
        }

        return objects[index] != null;
    }

    /**
     * Adds the specified element to this set if it is not already present.
     * 
     * @param o
     *            element to be added to this set.
     * @return <tt>true</tt> if the set did not already contain the specified
     *         element.
     */
    @SuppressWarnings("unchecked")
    public boolean add(Object o) {
        if (o == null)
            o = nullObject;

        int hash = o.hashCode();
        int index = (hash & 0x7FFFFFFF) % objects.length;
        int offset = 1;
        int deletedix = -1;

        // search for the object (continue while !null and !this object)
        while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {

            // if there's a deleted object here we can put this object here,
            // provided it's not in here somewhere else already
            if (objects[index] == deletedObject)
                deletedix = index;

            index = ((index + offset) & 0x7FFFFFFF) % objects.length;
            offset = offset * 2 + 1;

            if (offset == -1)
                offset = 2;
        }

        if (objects[index] == null) { // wasn't present already
            if (deletedix != -1) // reusing a deleted cell
                index = deletedix;
            else
                freecells--;

            modCount++;
            elements++;

            // here we face a problem regarding generics:
            // add(E o) is not possible because of the null Object. We cant do
            // 'new E()' or '(E) new Object()'
            // so adding an empty object is a problem here
            // If (! o instanceof E) : This will cause a class cast exception
            // If (o instanceof E) : This will work fine

            objects[index] = (E) o;

            // do we need to rehash?
            if (1 - (freecells / (double) objects.length) > LOAD_FACTOR)
                rehash();
            return true;
        } else
            // was there already
            return false;
    }

    /**
     * Removes the specified element from the set.
     */
    @SuppressWarnings("unchecked")
    public boolean remove(Object o) {
        if (o == null)
            o = nullObject;

        int hash = o.hashCode();
        int index = (hash & 0x7FFFFFFF) % objects.length;
        int offset = 1;

        // search for the object (continue while !null and !this object)
        while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) {
            index = ((index + offset) & 0x7FFFFFFF) % objects.length;
            offset = offset * 2 + 1;

            if (offset == -1)
                offset = 2;
        }

        // we found the right position, now do the removal
        if (objects[index] != null) {
            // we found the object

            // same problem here as with add
            objects[index] = (E) deletedObject;
            modCount++;
            elements--;
            return true;
        } else
            // we did not find the object
            return false;
    }

    /**
     * Removes all of the elements from this set.
     */
    public void clear() {
        elements = 0;
        for (int ix = 0; ix < objects.length; ix++)
            objects[ix] = null;
        freecells = objects.length;
        modCount++;
    }

    public Object[] toArray() {
        Object[] result = new Object[elements];
        Object[] objects = this.objects;
        int pos = 0;
        for (int i = 0; i < objects.length; i++)
            if (objects[i] != null && objects[i] != deletedObject) {
                if (objects[i] == nullObject)
                    result[pos++] = null;
                else
                    result[pos++] = objects[i];
            }
        // unchecked because it should only contain E
        return result;
    }

    // not sure if this needs to have generics
    @SuppressWarnings("unchecked")
    public <T> T[] toArray(T[] a) {
        int size = elements;
        if (a.length < size)
            a = (T[]) java.lang.reflect.Array.newInstance(a.getClass().getComponentType(), size);
        E[] objects = this.objects;
        int pos = 0;
        for (int i = 0; i < objects.length; i++)
            if (objects[i] != null && objects[i] != deletedObject) {
                if (objects[i] == nullObject)
                    a[pos++] = null;
                else
                    a[pos++] = (T) objects[i];
            }
        return a;
    }

    //  INTERNAL METHODS 

    /**
     * INTERNAL: Used for debugging only.
     */
    public void dump() {
        System.out.println("Size: " + objects.length);
        System.out.println("Elements: " + elements);
        System.out.println("Free cells: " + freecells);
        System.out.println();
        for (int ix = 0; ix < objects.length; ix++)
            System.out.println("[" + ix + "]: " + objects[ix]);
    }

    /**
     * INTERNAL: Figures out correct size for rehashed set, then does the
     * rehash.
     */
    protected void rehash() {
        // do we need to increase capacity, or are there so many
        // deleted objects hanging around that rehashing to the same
        // size is sufficient? if 5% (arbitrarily chosen number) of
        // cells can be freed up by a rehash, we do it.

        int gargagecells = objects.length - (elements + freecells);
        if (gargagecells / (double) objects.length > 0.05)
            // rehash with same size
            rehash(objects.length);
        else
            // rehash with increased capacity
            rehash(objects.length * 2 + 1);
    }

    /**
     * INTERNAL: Rehashes the hashset to a bigger size.
     */
    @SuppressWarnings("unchecked")
    protected void rehash(int newCapacity) {
        int oldCapacity = objects.length;
        @SuppressWarnings("unchecked")
        E[] newObjects = (E[]) new Object[newCapacity];

        for (int ix = 0; ix < oldCapacity; ix++) {
            Object o = objects[ix];
            if (o == null || o == deletedObject)
                continue;

            int hash = o.hashCode();
            int index = (hash & 0x7FFFFFFF) % newCapacity;
            int offset = 1;

            // search for the object
            while (newObjects[index] != null) { // no need to test for
                // duplicates
                index = ((index + offset) & 0x7FFFFFFF) % newCapacity;
                offset = offset * 2 + 1;

                if (offset == -1)
                    offset = 2;
            }

            newObjects[index] = (E) o;
        }

        objects = newObjects;
        freecells = objects.length - elements;
    }

    // ITERATOR IMPLEMENTATON 

    private class CompactHashIterator<T> implements Iterator<T> {
        private int index;
        private int lastReturned = -1;

        /**
         * The modCount value that the iterator believes that the backing
         * CompactHashSet should have. If this expectation is violated, the
         * iterator has detected concurrent modification.
         */
        private int expectedModCount;

        @SuppressWarnings("empty-statement")
        public CompactHashIterator() {
            for (index = 0; index < objects.length
                    && (objects[index] == null || objects[index] == deletedObject); index++)
                ;
            expectedModCount = modCount;
        }

        public boolean hasNext() {
            return index < objects.length;
        }

        @SuppressWarnings({ "empty-statement", "unchecked" })
        public T next() {
            if (modCount != expectedModCount)
                throw new ConcurrentModificationException();
            int length = objects.length;
            if (index >= length) {
                lastReturned = -2;
                throw new NoSuchElementException();
            }

            lastReturned = index;
            for (index += 1; index < length && (objects[index] == null || objects[index] == deletedObject); index++)
                ;
            if (objects[lastReturned] == nullObject)
                return null;
            else
                return (T) objects[lastReturned];
        }

        @SuppressWarnings("unchecked")
        public void remove() {
            if (modCount != expectedModCount)
                throw new ConcurrentModificationException();
            if (lastReturned == -1 || lastReturned == -2)
                throw new IllegalStateException();
            // delete object
            if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) {
                objects[lastReturned] = (E) deletedObject;
                elements--;
                modCount++;
                expectedModCount = modCount; // this is expected; we made the
                // change
            }
        }
    }

}