Java tutorial
/* * Created on 07-Mar-2005 by Ryan McNally */ //package com.ryanm.util.sets; import java.util.Random; /** * Implements a Bloom filter. Which, as you may not know, is a * space-efficient structure for storing a set. * * @author ryanm */ public class BloomFilter { /** * We discard this number of randoms from the RNG when we set the * seed, as the first few tend to be similar for similar seeds */ private final static int THROWAWAY_RANDOMS = 5; /** * The bitstring */ private boolean[] bitstring; /** * We use the output of the RNG as a hash function */ private final Random hash = new Random(6381273189l); /** * The number of hash functions to use */ private int hashCount; /** * A magic number used to compute what size a bloom filter should * be. Bloom filters should be ( members * hashes ) / bloomMagic * long */ public static final double bloomMagic = -Math.log(0.5) / Math.log(2); /** * Constructs a new Bloom filter * * @param filterSize * The size of the filter's bitstring * @param hashCount * The number of hash functions to use when inserting * into and querying the filter */ public BloomFilter(int filterSize, int hashCount) { bitstring = new boolean[filterSize]; this.hashCount = hashCount; } /** * Builds a Bloom filter with the optimum length * * @param members * The members to enter into the filter * @param hashCount * The number of hashes to use * @return An optimally-sized filter that contains the specified * elements */ public static BloomFilter buildFilter(int[] members, int hashCount) { int filterLength = (int) (members.length * hashCount / BloomFilter.bloomMagic); filterLength = Math.max(filterLength, 1); BloomFilter filter = new BloomFilter(filterLength, hashCount); for (int i : members) { filter.insert(i); } return filter; } /** * Inserts an element into this filter * * @param i * The element to insert */ public void insert(int i) { int[] indices = generateIndices(i); for (int j = 0; j < indices.length; j++) { bitstring[indices[j]] = true; } } /** * Tests if this filter contains an element * * @param i * The element to test for * @return true if the filter may contain the element, false if it * definitely does not */ public boolean contains(int i) { int[] indices = generateIndices(i); for (int j = 0; j < indices.length; j++) { if (!bitstring[indices[j]]) { return false; } } return true; } /** * Clears the filter of all elements */ public void clear() { for (int i = 0; i < bitstring.length; i++) { bitstring[i] = false; } } /** * Generates the indices for a given element * * @param i * The element * @return An array of the indices to set or check */ private int[] generateIndices(int i) { // prepare the rng hash.setSeed(i); for (int j = 0; j < THROWAWAY_RANDOMS; j++) { hash.nextInt(bitstring.length); } // generate the indices int[] indices = new int[hashCount]; for (int j = 0; j < indices.length; j++) { indices[j] = hash.nextInt(bitstring.length); } return indices; } /** * Sets the length of the bit string in this filter. Note that this * also has the effect of clearing all entries * * @param size * The new size of the bit string */ public void setSize(int size) { bitstring = new boolean[size]; } /** * Gets the size of this filter's bit string * * @return the size of this filter's bitstring */ public int getSize() { return bitstring.length; } /** * Sets the number of hashes that this filter will use. Note that * this also has the effect of clearing all entries * * @param hashes * The number of hashes to use. */ public void setHashes(int hashes) { hashes = Math.max(0, hashes); hashCount = hashes; clear(); } /** * Gets the number of hashes used in this filter * * @return the number of hashes */ public int getHashes() { return hashCount; } /** * Gets the number of bits that have been set in this filter * * @return The number of bits that are set to 1 in this filter */ public int bitsSet() { int count = 0; for (int i = 0; i < bitstring.length; i++) { if (bitstring[i]) { count++; } } return count; } /** * Returns the saturation level of this filter. When all bits are * set, saturation is 1.0, when no bits are set, saturation is 0.0. * You get the idea * * @return The saturation level */ public float saturation() { return (float) bitsSet() / (float) bitstring.length; } /** * Clones this filter's bitstring * * @return a new boolean array, with the same bits set as in this * filter */ public boolean[] cloneFilter() { boolean[] array = new boolean[bitstring.length]; System.arraycopy(bitstring, 0, array, 0, array.length); return array; } /** * Calculates the hamming distance between this filter and the * supplied array. ie: the number of bits that do not correspond. * The array must be the same size as this filter. * * @param b * @return the hamming distance, or -1 if the two arrays are not * the same size */ public int hammingDistance(boolean[] b) { if (b.length == bitstring.length) { int count = 0; for (int i = 0; i < b.length; i++) { if (b[i] != bitstring[i]) { count++; } } return count; } return -1; } }