View Javadoc

1   package net.obsearch.index.bucket;
2   
3   import java.io.Serializable;
4   import java.util.BitSet;
5   import java.util.Collection;
6   import java.util.Iterator;
7   import java.util.Random;
8   import java.util.Set;
9   
10  /**
11   * A simple Bloom Filter (see http://en.wikipedia.org/wiki/Bloom_filter) that
12   * uses java.util.Random as a primitive hash function, and which implements
13   * Java's Set interface for convenience.
14   * 
15   * Only the add(), addAll(), contains(), and containsAll() methods are
16   * implemented. Calling any other method will yield an 
17   * UnsupportedOperationException.
18   * 
19   * This code may be used, modified, and redistributed provided that the 
20   * author tag below remains intact.
21   * 
22   * @author Ian Clarke <ian@uprizer.com>
23   * 
24   * @param <E>
25   *            The type of object the BloomFilter should contain
26   */
27  public class SimpleBloomFilter<E> implements Set<E>, Serializable {
28  	private static final long serialVersionUID = 3527833617516722215L;
29  	protected int k;
30  	BitSet bitSet;
31  	int bitArraySize, expectedElements;
32  
33  	/**
34  	 * Construct a SimpleBloomFilter. You must specify the number of bits in the
35  	 * Bloom Filter, and also you should specify the number of items you
36  	 * expect to add. The latter is used to choose some optimal internal values to
37  	 * minimize the false-positive rate (which can be estimated with
38  	 * expectedFalsePositiveRate()).
39  	 * 
40  	 * @param bitArraySize
41  	 *            The number of bits in the bit array (often called 'm' in the
42  	 *            context of bloom filters).
43  	 * @param expectedElements
44  	 *            The typical number of items you expect to be added to the
45  	 *            SimpleBloomFilter (often called 'n').
46  	 */
47  	public SimpleBloomFilter(int bitArraySize, int expectedElements) {
48  		this.bitArraySize = bitArraySize;
49  		this.expectedElements = expectedElements;
50  		this.k = (int) Math.ceil((bitArraySize / expectedElements)
51  				* Math.log(2.0));
52  		bitSet = new BitSet(bitArraySize);
53  	}
54  
55  	/**
56  	 * Calculates the approximate probability of the contains() method returning
57  	 * true for an object that had not previously been inserted into the bloom
58  	 * filter. This is known as the "false positive probability".
59  	 * 
60  	 * @return The estimated false positive rate
61  	 */
62  	public double expectedFalsePositiveProbability() {
63  		return Math.pow((1 - Math.exp(-k * (double) expectedElements
64  				/ (double) bitArraySize)), k);
65  	}
66  
67  	/*
68  	 * @return This method will always return false
69  	 * 
70  	 * @see java.util.Set#add(java.lang.Object)
71  	 */
72  	public boolean add(E o) {
73  		Random r = new Random(o.hashCode());
74  		for (int x = 0; x < k; x++) {
75  			bitSet.set(r.nextInt(bitArraySize), true);
76  		}
77  		return false;
78  	}
79  
80  	/**
81  	 * @return This method will always return false
82  	 */
83  	public boolean addAll(Collection<? extends E> c) {
84  		for (E o : c) {
85  			add(o);
86  		}
87  		return false;
88  	}
89  
90  	/**
91  	 * Clear the Bloom Filter
92  	 */
93  	public void clear() {
94  		for (int x = 0; x < bitSet.length(); x++) {
95  			bitSet.set(x, false);
96  		}
97  	}
98  
99  	/**
100 	 * @return False indicates that o was definitely not added to this Bloom Filter, 
101 	 *         true indicates that it probably was.  The probability can be estimated
102 	 *         using the expectedFalsePositiveProbability() method.
103 	 */
104 	public boolean contains(Object o) {
105 		Random r = new Random(o.hashCode());
106 		for (int x = 0; x < k; x++) {
107 			if (!bitSet.get(r.nextInt(bitArraySize)))
108 				return false;
109 		}
110 		return true;
111 	}
112 
113 	public boolean containsAll(Collection<?> c) {
114 		for (Object o : c) {
115 			if (!contains(o))
116 				return false;
117 		}
118 		return true;
119 	}
120 
121 	/**
122 	 * Not implemented
123 	 */
124 	public boolean isEmpty() {
125 		throw new UnsupportedOperationException();
126 	}
127 
128 	/**
129 	 * Not implemented
130 	 */
131 	public Iterator<E> iterator() {
132 		throw new UnsupportedOperationException();
133 	}
134 
135 	/**
136 	 * Not implemented
137 	 */
138 	public boolean remove(Object o) {
139 		throw new UnsupportedOperationException();
140 	}
141 
142 	/**
143 	 * Not implemented
144 	 */
145 	public boolean removeAll(Collection<?> c) {
146 		throw new UnsupportedOperationException();
147 	}
148 
149 	/**
150 	 * Not implemented
151 	 */
152 	public boolean retainAll(Collection<?> c) {
153 		throw new UnsupportedOperationException();
154 	}
155 
156 	/**
157 	 * Not implemented
158 	 */
159 	public int size() {
160 		throw new UnsupportedOperationException();
161 	}
162 
163 	/**
164 	 * Not implemented
165 	 */
166 	public Object[] toArray() {
167 		throw new UnsupportedOperationException();
168 	}
169 
170 	/**
171 	 * Not implemented
172 	 */
173 	public <T> T[] toArray(T[] a) {
174 		throw new UnsupportedOperationException();
175 	}
176 }