net.tomp2p.rpc.SimpleBloomFilter.java Source code

Java tutorial

Introduction

Here is the source code for net.tomp2p.rpc.SimpleBloomFilter.java

Source

/*
 * Copyright 2012 Thomas Bocek
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package net.tomp2p.rpc;

import io.netty.buffer.ByteBuf;

import java.io.Serializable;
import java.util.BitSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A simple Bloom Filter (see http://en.wikipedia.org/wiki/Bloom_filter) that
 * uses java.util.Random as a primitive hash function, and which implements
 * Java's Set interface for convenience. Only the add(), addAll(), contains(),
 * and containsAll() methods are implemented. Calling any other method will
 * yield an UnsupportedOperationException. This code may be used, modified, and
 * redistributed provided that the author tag below remains intact.
 * 
 * @author Ian Clarke <ian@uprizer.com>
 * @author Thomas Bocek <tom@tomp2p.net> Added methods to get and create a
 *         SimpleBloomFilter from existing data. The data can be either a BitSet
 *         or a bye[].
 * @param <E>
 *            The type of object the BloomFilter should contain
 */
public class SimpleBloomFilter<E> implements Set<E>, Serializable {

    private static final Logger LOG = LoggerFactory.getLogger(SimpleBloomFilter.class);

    private static final long serialVersionUID = 3527833617516722215L;

    private static final int SIZE_HEADER_LENGTH = 2;

    private static final int SIZE_HEADER_ELEMENTS = 4;

    public static final int SIZE_HEADER = SIZE_HEADER_LENGTH + SIZE_HEADER_ELEMENTS;

    private final int k;

    private final BitSet bitSet;

    private final int byteArraySize, bitArraySize, expectedElements;

    /**
     * Construct an empty SimpleBloomFilter. You must specify the number of bits
     * in the Bloom Filter, and also you should specify the number of items you
     * expect to add. The latter is used to choose some optimal internal values
     * to minimize the false-positive rate (which can be estimated with
     * expectedFalsePositiveRate()).
     * 
     * @param byteArraySize
     *            The number of bits in multiple of 8 in the bit array (often
     *            called 'm' in the context of bloom filters).
     * @param expectedElements
     *            The typical number of items you expect to be added to the
     *            SimpleBloomFilter (often called 'n').
     */
    public SimpleBloomFilter(final int byteArraySize, final int expectedElements) {
        this(byteArraySize, expectedElements, new BitSet(byteArraySize * Byte.SIZE));
    }

    // inspired by https://github.com/magnuss/java-bloomfilter
    public SimpleBloomFilter(final double falsePositiveProbability, final int expectedElements) {
        final double c = Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2.0))) / Math.log(2.0);
        this.expectedElements = expectedElements;
        int tmpBitArraySize = (int) Math.ceil(c * expectedElements);
        this.byteArraySize = ((tmpBitArraySize + 7) / 8);
        this.bitArraySize = byteArraySize * Byte.SIZE;
        double hf = (bitArraySize / (double) expectedElements) * Math.log(2.0);
        // k may be larger as we may have increased the byte array size to match
        // a byte
        this.k = (int) Math.ceil(hf);
        this.bitSet = new BitSet(bitArraySize);
    }

    /**
     * Constructs a SimpleBloomFilter out of existing data.
     * 
     * @param channelBuffer
     *            The byte buffer with the data
     */
    public SimpleBloomFilter(final ByteBuf channelBuffer) {
        this.byteArraySize = channelBuffer.readUnsignedShort() - (SIZE_HEADER_ELEMENTS + SIZE_HEADER_LENGTH);
        this.bitArraySize = byteArraySize * Byte.SIZE;
        int expectedElements = channelBuffer.readInt();
        this.expectedElements = expectedElements;
        double hf = (bitArraySize / (double) expectedElements) * Math.log(2.0);
        this.k = (int) Math.ceil(hf);
        if (byteArraySize > 0) {
            byte[] me = new byte[byteArraySize];
            channelBuffer.readBytes(me);
            this.bitSet = RPCUtils.fromByteArray(me);
        } else {
            this.bitSet = new BitSet();
        }
    }

    /**
     * Constructs a SimpleBloomFilter out of existing data. You must specify the
     * number of bits in the Bloom Filter, and also you should specify the
     * number of items you expect to add. The latter is used to choose some
     * optimal internal values to minimize the false-positive rate (which can be
     * estimated with expectedFalsePositiveRate()).
     * 
     * @param byteArraySize
     *            The number of bits in multiple of 8 in the bit array (often
     *            called 'm' in the context of bloom filters).
     * @param expectedElements
     *            he typical number of items you expect to be added to the
     *            SimpleBloomFilter (often called 'n').
     * @param bitSet
     *            The data that will be used in the backing BitSet
     */
    public SimpleBloomFilter(final int byteArraySize, final int expectedElements, final BitSet bitSet) {
        this.byteArraySize = byteArraySize;
        this.bitArraySize = byteArraySize * Byte.SIZE;
        this.expectedElements = expectedElements;
        double hf = (bitArraySize / (double) expectedElements) * Math.log(2.0);
        this.k = (int) Math.ceil(hf);
        if (hf < 1.0) {
            LOG.warn(
                    "Bit size too small for storing all expected elements. For optimum result increase byteArraySize to {}",
                    expectedElements / Math.log(2.0));
        }
        this.bitSet = bitSet;
    }

    /**
     * Calculates the approximate probability of the contains() method returning
     * true for an object that had not previously been inserted into the bloom
     * filter. This is known as the "false positive probability".
     * 
     * @return The estimated false positive rate
     */
    public double expectedFalsePositiveProbability() {
        return Math.pow((1 - Math.exp(-k * (double) expectedElements / bitArraySize)), k);
    }

    /**
     * Returns the expected elements that was provided by the user.
     * 
     * @return The expected elements that was provided by the user
     */
    public int expectedElements() {
        return expectedElements;
    }

    /**
     * @param o
     *            Add element
     * @return This method will always return false
     * 
     * @see java.util.Set#add(java.lang.Object)
     */
    @Override
    public boolean add(final E o) {
        Random r = new Random(o.hashCode());
        for (int x = 0; x < k; x++) {
            bitSet.set(r.nextInt(bitArraySize), true);
        }
        return false;
    }

    /**
     * @param c
     *            The elements to add
     * @return This method will always return false
     */
    @Override
    public boolean addAll(final Collection<? extends E> c) {
        for (E o : c) {
            add(o);
        }
        return false;
    }

    /**
     * Clear the Bloom Filter.
     */
    @Override
    public void clear() {
        for (int x = 0; x < bitSet.length(); x++) {
            bitSet.set(x, false);
        }
    }

    /**
     * @param o
     *            The object to compare
     * @return False indicates that o was definitely not added to this Bloom
     *         Filter, true indicates that it probably was. The probability can
     *         be estimated using the expectedFalsePositiveProbability() method.
     */
    @Override
    public boolean contains(final Object o) {
        if (isVoid()) {
            return false;
        }
        Random r = new Random(o.hashCode());
        for (int x = 0; x < k; x++) {
            if (!bitSet.get(r.nextInt(bitArraySize))) {
                return false;
            }
        }
        return true;
    }

    /**
     * @param c
     *            The collection to check
     * @return true if all elements of the collection are in this bloom filter
     */
    @Override
    public boolean containsAll(final Collection<?> c) {
        for (Object o : c) {
            if (!contains(o)) {
                return false;
            }
        }
        return true;
    }

    /**
     * Returns if the bloom filter is empty and never can be filled
     * 
     * @return nothing
     */
    @Override
    public boolean isEmpty() {
        return bitSet.isEmpty();
    }

    /**
     * Returns if the bloom filter is empty and never can be filled
     * 
     * @return nothing
     */

    public boolean isVoid() {
        return byteArraySize == 0 && expectedElements == 0;
    }

    public boolean isFull() {
        return bitSet.cardinality() == bitSet.size();
    }

    /**
     * Not implemented.
     * 
     * @return nothing
     */
    @Override
    public Iterator<E> iterator() {
        throw new UnsupportedOperationException();
    }

    /**
     * Not implemented.
     * 
     * @param o
     *            nothing
     * @return nothing
     */
    @Override
    public boolean remove(final Object o) {
        throw new UnsupportedOperationException();
    }

    /**
     * Not implemented.
     * 
     * @param c
     *            nothing
     * @return nothing
     */
    @Override
    public boolean removeAll(final Collection<?> c) {
        throw new UnsupportedOperationException();
    }

    /**
     * Not implemented.
     * 
     * @param c
     *            nothing
     * @return nothing
     */
    @Override
    public boolean retainAll(final Collection<?> c) {
        throw new UnsupportedOperationException();
    }

    /**
     * @return the byte array size of the data (incl. header)
     */
    @Override
    public int size() {
        return byteArraySize + 2 + 4;
    }

    /**
     * Not implemented.
     * 
     * @return nothing
     */
    @Override
    public Object[] toArray() {
        throw new UnsupportedOperationException();
    }

    /**
     * Not implemented.
     * 
     * @param a
     *            nothing
     * @param <T>
     *            nothing
     * @return nothing
     */
    @Override
    public <T> T[] toArray(final T[] a) {
        throw new UnsupportedOperationException();
    }

    /**
     * Returns the bitset that backs the bloom filter.
     * 
     * @return bloom filter as a bitset
     */
    public BitSet getBitSet() {
        return bitSet;
    }

    /**
     * Converts data to a byte buffer. The first two bytes contain the size of
     * this simple bloom filter. Thus, the bloom filter can only be of length
     * 65536.
     * 
     * @param buf
     *            The byte buffer where the bloom filter will be written.
     */
    public void toByteBuf(final ByteBuf buf) {
        buf.writeShort(byteArraySize + SIZE_HEADER_ELEMENTS + SIZE_HEADER_LENGTH);
        buf.writeInt(expectedElements);
        byte[] tmp = RPCUtils.toByteArray(bitSet);
        int currentByteArraySize = tmp.length;
        buf.writeBytes(tmp);
        buf.writeZero(byteArraySize - currentByteArraySize);
    }

    /**
     * @param toMerge
     *            Merge to bloom filters using OR
     * @return A new bloom filter that contains both sets.
     */
    public SimpleBloomFilter<E> merge(final SimpleBloomFilter<E> toMerge) {
        if (toMerge.bitArraySize != bitArraySize) {
            throw new RuntimeException("this is not supposed to happen");
        }
        BitSet mergedBitSet = (BitSet) bitSet.clone();
        mergedBitSet.or(toMerge.bitSet);
        return new SimpleBloomFilter<E>(bitArraySize, expectedElements, mergedBitSet);
    }

    @Override
    public boolean equals(final Object obj) {
        if (!(obj instanceof SimpleBloomFilter)) {
            return false;
        }
        if (this == obj) {
            return true;
        }
        @SuppressWarnings("unchecked")
        SimpleBloomFilter<E> o = (SimpleBloomFilter<E>) obj;
        return o.k == k && o.bitArraySize == bitArraySize && expectedElements == o.expectedElements
                && bitSet.equals(o.bitSet);
    }

    @Override
    public int hashCode() {
        final int magic = 31;
        int hash = 7;
        hash = magic * hash + bitSet.hashCode();
        hash = magic * hash + k;
        hash = magic * hash + expectedElements;
        hash = magic * hash + bitArraySize;
        return hash;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        int length = bitSet.length();
        for (int i = 0; i < length; i++) {
            sb.append(bitSet.get(i) ? "1" : "0");
        }
        return sb.toString();
    }

    public SimpleBloomFilter<E> setAll() {
        bitSet.set(0, bitSet.size(), true);
        return this;
    }
}