Java tutorial
//package net.ontopia.utils; // WARNING: If you do any changes to this class, make sure that you // update CompactIdentityHashSet.java, UniqueSet.java and // SoftHashMapIndex.java accordingly. import java.util.Collection; import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.NoSuchElementException; /** * INTERNAL: Implements the Set interface more compactly than java.util.HashSet * by using a closed hashtable. */ public class CompactHashSet<E> extends java.util.AbstractSet<E> { protected final static int INITIAL_SIZE = 3; protected final static double LOAD_FACTOR = 0.75; /** * This object is used to represent null, should clients add that to the * set. */ protected final static Object nullObject = new Object(); /** * When an object is deleted this object is put into the hashtable in its * place, so that other objects with the same key (collisions) further down * the hashtable are not lost after we delete an object in the collision * chain. */ protected final static Object deletedObject = new Object(); protected int elements; /** * This is the number of empty (null) cells. It's not necessarily the same * as objects.length - elements, because some cells may contain * deletedObject. */ protected int freecells; protected E[] objects; protected int modCount; /** * Constructs a new, empty set. */ public CompactHashSet() { this(INITIAL_SIZE); } /** * Constructs a new, empty set. */ @SuppressWarnings("unchecked") public CompactHashSet(int size) { // NOTE: If array size is 0, we get a // "java.lang.ArithmeticException: / by zero" in add(Object). objects = (E[]) new Object[(size == 0 ? 1 : size)]; elements = 0; freecells = objects.length; modCount = 0; } /** * Constructs a new set containing the elements in the specified collection. * * @param c * the collection whose elements are to be placed into this set. */ public CompactHashSet(Collection<E> c) { this(c.size()); addAll(c); } // SET IMPLEMENTATION /** * Returns an iterator over the elements in this set. The elements are * returned in no particular order. * * @return an Iterator over the elements in this set. * @see ConcurrentModificationException */ public Iterator<E> iterator() { return new CompactHashIterator<E>(); } /** * Returns the number of elements in this set (its cardinality). */ public int size() { return elements; } /** * Returns <tt>true</tt> if this set contains no elements. */ public boolean isEmpty() { return elements == 0; } /** * Returns <tt>true</tt> if this set contains the specified element. * * @param o * element whose presence in this set is to be tested. * @return <tt>true</tt> if this set contains the specified element. */ public boolean contains(Object o) { if (o == null) o = nullObject; int hash = o.hashCode(); int index = (hash & 0x7FFFFFFF) % objects.length; int offset = 1; // search for the object (continue while !null and !this object) while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) { index = ((index + offset) & 0x7FFFFFFF) % objects.length; offset = offset * 2 + 1; if (offset == -1) offset = 2; } return objects[index] != null; } /** * Adds the specified element to this set if it is not already present. * * @param o * element to be added to this set. * @return <tt>true</tt> if the set did not already contain the specified * element. */ @SuppressWarnings("unchecked") public boolean add(Object o) { if (o == null) o = nullObject; int hash = o.hashCode(); int index = (hash & 0x7FFFFFFF) % objects.length; int offset = 1; int deletedix = -1; // search for the object (continue while !null and !this object) while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) { // if there's a deleted object here we can put this object here, // provided it's not in here somewhere else already if (objects[index] == deletedObject) deletedix = index; index = ((index + offset) & 0x7FFFFFFF) % objects.length; offset = offset * 2 + 1; if (offset == -1) offset = 2; } if (objects[index] == null) { // wasn't present already if (deletedix != -1) // reusing a deleted cell index = deletedix; else freecells--; modCount++; elements++; // here we face a problem regarding generics: // add(E o) is not possible because of the null Object. We cant do // 'new E()' or '(E) new Object()' // so adding an empty object is a problem here // If (! o instanceof E) : This will cause a class cast exception // If (o instanceof E) : This will work fine objects[index] = (E) o; // do we need to rehash? if (1 - (freecells / (double) objects.length) > LOAD_FACTOR) rehash(); return true; } else // was there already return false; } /** * Removes the specified element from the set. */ @SuppressWarnings("unchecked") public boolean remove(Object o) { if (o == null) o = nullObject; int hash = o.hashCode(); int index = (hash & 0x7FFFFFFF) % objects.length; int offset = 1; // search for the object (continue while !null and !this object) while (objects[index] != null && !(objects[index].hashCode() == hash && objects[index].equals(o))) { index = ((index + offset) & 0x7FFFFFFF) % objects.length; offset = offset * 2 + 1; if (offset == -1) offset = 2; } // we found the right position, now do the removal if (objects[index] != null) { // we found the object // same problem here as with add objects[index] = (E) deletedObject; modCount++; elements--; return true; } else // we did not find the object return false; } /** * Removes all of the elements from this set. */ public void clear() { elements = 0; for (int ix = 0; ix < objects.length; ix++) objects[ix] = null; freecells = objects.length; modCount++; } public Object[] toArray() { Object[] result = new Object[elements]; Object[] objects = this.objects; int pos = 0; for (int i = 0; i < objects.length; i++) if (objects[i] != null && objects[i] != deletedObject) { if (objects[i] == nullObject) result[pos++] = null; else result[pos++] = objects[i]; } // unchecked because it should only contain E return result; } // not sure if this needs to have generics @SuppressWarnings("unchecked") public <T> T[] toArray(T[] a) { int size = elements; if (a.length < size) a = (T[]) java.lang.reflect.Array.newInstance(a.getClass().getComponentType(), size); E[] objects = this.objects; int pos = 0; for (int i = 0; i < objects.length; i++) if (objects[i] != null && objects[i] != deletedObject) { if (objects[i] == nullObject) a[pos++] = null; else a[pos++] = (T) objects[i]; } return a; } // INTERNAL METHODS /** * INTERNAL: Used for debugging only. */ public void dump() { System.out.println("Size: " + objects.length); System.out.println("Elements: " + elements); System.out.println("Free cells: " + freecells); System.out.println(); for (int ix = 0; ix < objects.length; ix++) System.out.println("[" + ix + "]: " + objects[ix]); } /** * INTERNAL: Figures out correct size for rehashed set, then does the * rehash. */ protected void rehash() { // do we need to increase capacity, or are there so many // deleted objects hanging around that rehashing to the same // size is sufficient? if 5% (arbitrarily chosen number) of // cells can be freed up by a rehash, we do it. int gargagecells = objects.length - (elements + freecells); if (gargagecells / (double) objects.length > 0.05) // rehash with same size rehash(objects.length); else // rehash with increased capacity rehash(objects.length * 2 + 1); } /** * INTERNAL: Rehashes the hashset to a bigger size. */ @SuppressWarnings("unchecked") protected void rehash(int newCapacity) { int oldCapacity = objects.length; @SuppressWarnings("unchecked") E[] newObjects = (E[]) new Object[newCapacity]; for (int ix = 0; ix < oldCapacity; ix++) { Object o = objects[ix]; if (o == null || o == deletedObject) continue; int hash = o.hashCode(); int index = (hash & 0x7FFFFFFF) % newCapacity; int offset = 1; // search for the object while (newObjects[index] != null) { // no need to test for // duplicates index = ((index + offset) & 0x7FFFFFFF) % newCapacity; offset = offset * 2 + 1; if (offset == -1) offset = 2; } newObjects[index] = (E) o; } objects = newObjects; freecells = objects.length - elements; } // ITERATOR IMPLEMENTATON private class CompactHashIterator<T> implements Iterator<T> { private int index; private int lastReturned = -1; /** * The modCount value that the iterator believes that the backing * CompactHashSet should have. If this expectation is violated, the * iterator has detected concurrent modification. */ private int expectedModCount; @SuppressWarnings("empty-statement") public CompactHashIterator() { for (index = 0; index < objects.length && (objects[index] == null || objects[index] == deletedObject); index++) ; expectedModCount = modCount; } public boolean hasNext() { return index < objects.length; } @SuppressWarnings({ "empty-statement", "unchecked" }) public T next() { if (modCount != expectedModCount) throw new ConcurrentModificationException(); int length = objects.length; if (index >= length) { lastReturned = -2; throw new NoSuchElementException(); } lastReturned = index; for (index += 1; index < length && (objects[index] == null || objects[index] == deletedObject); index++) ; if (objects[lastReturned] == nullObject) return null; else return (T) objects[lastReturned]; } @SuppressWarnings("unchecked") public void remove() { if (modCount != expectedModCount) throw new ConcurrentModificationException(); if (lastReturned == -1 || lastReturned == -2) throw new IllegalStateException(); // delete object if (objects[lastReturned] != null && objects[lastReturned] != deletedObject) { objects[lastReturned] = (E) deletedObject; elements--; modCount++; expectedModCount = modCount; // this is expected; we made the // change } } } }