View Javadoc

1   package net.obsearch.index.ghs.impl;
2   
3   import java.io.IOException;
4   import java.nio.ByteBuffer;
5   import java.util.ArrayList;
6   import java.util.Arrays;
7   import java.util.Collections;
8   import java.util.Iterator;
9   import java.util.List;
10  import java.util.logging.Logger;
11  
12  import net.obsearch.index.ghs.CBitVector;
13  import net.obsearch.index.CommonsFloat;
14  import net.obsearch.AbstractOBResult;
15  import net.obsearch.asserts.OBAsserts;
16  import net.obsearch.constants.ByteConstants;
17  import net.obsearch.exception.IllegalIdException;
18  import net.obsearch.exception.NotFrozenException;
19  import net.obsearch.exception.OBException;
20  import net.obsearch.exception.OBStorageException;
21  import net.obsearch.exception.OutOfRangeException;
22  import net.obsearch.filter.Filter;
23  import net.obsearch.filter.FilterNonEquals;
24  import net.obsearch.index.IndexFloat;
25  import net.obsearch.index.bucket.impl.BucketObjectFloat;
26  import net.obsearch.index.bucket.sleek.SleekBucketFloat;
27  import net.obsearch.index.ghs.AbstractSketch64;
28  import net.obsearch.ob.OBFloat;
29  import net.obsearch.pivots.IncrementalPairPivotSelector;
30  import net.obsearch.pivots.IncrementalPivotSelector;
31  import net.obsearch.query.AbstractOBQuery;
32  import net.obsearch.query.OBQueryFloat;
33  import net.obsearch.result.OBPriorityQueueFloat;
34  import net.obsearch.result.OBResultInvertedByte;
35  import net.obsearch.result.OBResultInvertedFloat;
36  import net.obsearch.result.OBResultFloat;
37  import net.obsearch.index.ghs.SketchProjection;
38  
39  public final class Sketch64Float<O extends OBFloat> extends AbstractSketch64<O, BucketObjectFloat<O>, OBQueryFloat<O>, SleekBucketFloat<O>>
40  implements IndexFloat<O> {
41  	
42  	private boolean DEBUG = true;
43  	
44  	private static final transient Logger logger = Logger
45  	.getLogger(Sketch64Float.class.getName());
46  	
47  	/**
48  	 * Create a new Sketch64Float with m bytes. 
49  	 * @param type Type of object that will be stored
50  	 * @param pivotSelector Pivot selection strategy to be employed.
51  	 * @param m The number of bits
52  	 * @param bucketPivotCount Number of pivots per bucket
53  	 * @throws OBStorageException
54  	 * @throws OBException
55  	 * @throws IOException
56  	 */
57  	public Sketch64Float(Class<O> type,
58  			IncrementalPairPivotSelector<O> pivotSelector, int m
59  			)												
60  			throws OBStorageException, OBException, IOException {
61  		
62  		super(type, pivotSelector,  m, 0);
63  		
64  	}
65  
66  	public Sketch64Float(){
67  			super();
68  	}
69  	
70  	@Override
71  	public BucketObjectFloat<O> getBucket(O object) throws OBException,
72  			InstantiationException, IllegalAccessException {
73  		BucketObjectFloat<O> res = new BucketObjectFloat<O>(null, -1L, object);
74  		return res;
75  	}
76  
77  	
78  	/**
79  	 * Compute the sketch for the given object.
80  	 */
81  	@Override
82  	public SketchProjection getProjection(BucketObjectFloat<O> bucket) throws OBException {
83  			//OBAsserts.chkAssert(m <= Byte.MAX_VALUE, "Cannot address more than Byte.MAX_VALUE");
84  		int i = 0;
85  		CBitVector res = new CBitVector(m);
86  		double[] lowerBounds = new double[m];
87  		while(i < m){
88  				float distanceA = pivotGrid[i][0].distance(bucket.getObject());
89  				float distanceB = pivotGrid[i][1].distance(bucket.getObject());
90  			if(distanceA > distanceB ){
91  					res.set(i);
92  				distortionStats[i][1]++;
93  			}else{
94  				distortionStats[i][0]++;
95  			}
96  			// update the lower bounds:
97  			lowerBounds[i] = Math.max(((double)Math.abs(distanceA - distanceB)) / 2, 0);
98  			i++;
99  		}
100 
101 		// calculate distances that are farther to the hyperplane
102 		double[] lowerBoundsSorted = new double[m];
103 		System.arraycopy(lowerBounds,0,lowerBoundsSorted,0,m);
104 		Arrays.sort(lowerBoundsSorted);
105 		byte[] ordering = new byte[m];
106 		i = 0;
107 		while(i < m){
108 				
109 				ordering[i] = (byte)Arrays.binarySearch(lowerBoundsSorted, lowerBounds[i]);
110 				i++;
111 		}
112 		
113 		SketchProjection result = new SketchProjection(ordering, res, -1, lowerBounds);
114 		return result;
115 	}
116 
117 	
118 	@Override
119 	protected SleekBucketFloat<O> instantiateBucketContainer(
120 			byte[] data, byte[] address) throws InstantiationException, IllegalAccessException, OBException {
121 		if(data == null){
122 			return new SleekBucketFloat<O>( type, bucketPivotCount);
123 		}else{
124 			try{
125 				return new SleekBucketFloat<O>( type, bucketPivotCount, data);
126 			}catch(IOException e){
127 				throw new OBException(e);
128 			}
129 		
130 		}
131 	}
132 	
133 	@Override
134 	protected int primitiveDataTypeSize() {
135 		return ByteConstants.Float.getSize();
136 	}
137 	
138 	
139 	
140 	
141 	//TODO: raise this method one level up and change all the interfaces.
142 	@Override
143 	public void searchOB(O object, float r, OBPriorityQueueFloat<O> result)
144 			throws NotFrozenException, InstantiationException,
145 			IllegalIdException, IllegalAccessException, OutOfRangeException,
146 			OBException {
147 		searchOB(object, r,null, result);
148 		
149 				
150 	}
151 	
152 	@Override
153 	protected AbstractOBQuery<O> getKQuery(O object, int k) throws OBException, InstantiationException, IllegalAccessException {
154 		BucketObjectFloat<O> b = getBucket(object);
155 		OBPriorityQueueFloat<O> result = new OBPriorityQueueFloat<O>(k);
156 		OBQueryFloat<O> q = new OBQueryFloat<O>(object, Float.MAX_VALUE, result, b
157 				.getSmapVector());
158 		return q;
159 	}
160 
161 
162 	@Override
163 	public void searchOB(O object, float r, Filter<O> filter,
164 			OBPriorityQueueFloat<O> result) throws NotFrozenException,
165 			InstantiationException, IllegalIdException, IllegalAccessException,
166 			OutOfRangeException, OBException {
167 		BucketObjectFloat<O> b = getBucket(object);
168 
169 		OBQueryFloat<O> q = new OBQueryFloat<O>(object, r, result, b
170 				.getSmapVector());
171 		SketchProjection query = this.getProjection(b);
172 		
173 		// we only use the k estimation once, at the beginning of the search process.
174 		int kEstimation = estimateK(result.getK());
175 		stats.addExtraStats("K-estimation", kEstimation);
176 		long time = System.currentTimeMillis();
177 		List<SketchProjection> sortedBuckets = searchBuckets(query, kEstimation);
178 		getStats().addExtraStats("Buckets_search_time", System.currentTimeMillis() - time);
179 		for(SketchProjection bucket: sortedBuckets){
180 				SleekBucketFloat<O> container = this.bucketCache.get(bucket.getAddress());
181         //SleekBucketFloat<O> container = this.instantiateBucketContainer(this.Buckets.getValue(bucket.getAddress()), bucket.getAddress());
182 			stats.incBucketsRead();
183 			container.search(q, b, filter, getStats());															
184 		}
185 	}
186 
187 
188 	/**
189 	 * Performs a knn graph search
190 	 */ 
191 	public Iterator<List<OBQueryFloat<O>>> knnGraph(int k, float r){
192 			return new KnnIterator(k, r);
193 	}
194 
195 	/**
196 	 * Implements a knn graph iteration over all the dataset
197 	 */ 
198 	protected class KnnIterator implements Iterator<List<OBQueryFloat<O>>> {
199 
200 			private Iterator<SleekBucketFloat<O>> bucketsIt;
201 			private List<OBQueryFloat<O>> next;
202 			private int kEstimation;
203 			private int k;
204 			private float range;
205 			public KnnIterator(int k, float range) {
206 					try{
207 							this.k = k;
208 							bucketsIt = iterateBuckets();
209 							kEstimation = estimateK(k);
210 							this.range = range;
211 							getNext();
212 					}catch(Exception e){
213 							throw new IllegalArgumentException(e);
214 					}
215 
216 			}
217 
218 			private void getNext(){
219 					try{
220 					if(bucketsIt.hasNext()){
221 							// get the bucket id.
222 							SleekBucketFloat<O> sl = bucketsIt.next();
223 							BucketObjectFloat<O> b = getBucket(sl.getObjects().get(0).getObject());
224 							SketchProjection query = getProjection(b);
225 							List<SketchProjection> sortedBuckets = searchBuckets(query, kEstimation);
226 							List<SleekBucketFloat<O>> buckets = new ArrayList<SleekBucketFloat<O>>(sortedBuckets.size());
227 							buckets.add(sl); // add also our current bucket!
228 							for(SketchProjection sp : sortedBuckets){
229 									SleekBucketFloat<O> container = bucketCache.get(sp.getAddress());
230 									buckets.add(container);
231 							}							
232 							// process the result.
233 						  next = new ArrayList<OBQueryFloat<O>>(sl.getObjects().size());
234 							for (BucketObjectFloat<O> o : sl.getObjects()) {
235 									OBPriorityQueueFloat<O> result = new OBPriorityQueueFloat<O>(k);
236 									OBQueryFloat<O> q = new OBQueryFloat<O>(o.getObject(), range, result, null);
237 									// search all the buckets!
238 									for(SleekBucketFloat<O> cont : buckets){
239 											cont.search(q, b,  new FilterNonEquals(), getStats());		
240 									}
241 									next.add(q);
242 							}
243 					}else{
244 							next = null;
245 					}
246 					}catch(Exception e){
247 							throw new IllegalArgumentException(e);
248 					}
249 			}
250 
251 			public boolean hasNext(){
252 					return next != null;
253 			}
254 			
255 
256 			public List<OBQueryFloat<O>> next(){
257 					List<OBQueryFloat<O>> toReturn  = next;
258 					getNext();
259 					return toReturn;
260 			}
261 
262 			public void remove(){
263 					throw new UnsupportedOperationException();
264 			}
265 	}
266 	
267 
268 	
269 	/**
270 	 * This method returns a list of all the distances of the query against  the DB.
271 	 * This helps to calculate EP values in a cheaper way. results that are equal to the original object are added
272 	 * as Float.MAX_VALUE
273 	 * @param query
274 	 * @param filterSame if True we do not return objects o such that query.equals(o)
275 	 * @return
276 	 * @throws OBException 
277 	 * @throws InstantiationException 
278 	 * @throws IllegalAccessException 
279 	 */
280 	public float[] fullMatchLite(O query, boolean filterSame) throws OBException, IllegalAccessException, InstantiationException{
281 			return CommonsFloat.fullMatchLite((OBFloat)query, filterSame, this);
282 	}
283 	
284 	
285 	/**
286 	 * Get the kMax closest objects. Count how many different bucket ids are
287 	 * there for each k and fill in accordingly the tables.
288 	 * 
289 	 * @param object
290 	 * @throws OBException
291 	 * @throws InstantiationException
292 	 * @throws IllegalAccessException
293 	 */
294 	protected void maxKEstimationAux(O object)
295 			throws OBException, InstantiationException, IllegalAccessException {
296 		// we calculate first the list of elements against the DB.
297 		BucketObjectFloat<O> b = getBucket(object);
298 		SketchProjection longAddr = getProjection(b);
299 		byte[] addr = longAddr.getAddress();
300 		
301 		
302 
303 		float [] sortedList = fullMatchLite( object, true);
304 		//`calculate the AVG distance between the sample and the DB.
305 		for(float t : sortedList){
306 				getStats().addExtraStats("GENERAL_DISTANCE", t);
307 		}
308 		// we now calculate the buckets and we sort them
309 		// according to the distance of the query.
310 		
311 			loadMasks();
312 
313 		long time = System.currentTimeMillis();
314 		OBAsserts.chkAssert(Buckets.size() <= Integer.MAX_VALUE, "Capacity exceeded");
315 		List<SketchProjection> sortedBuckets = searchBuckets(longAddr, (int)Buckets.size());
316 		
317 		//List<OBResultFloat<O>> sortedBuckets2 = fullMatch(object);
318 		logger.info("Time searching sketches: " + (System.currentTimeMillis() - time));
319 
320 		// now we have to calculate the EP for each k up to maxK
321 		// and for each k we calculate how many buckets must be read in order
322 		// to obtain ep less than
323 		int i = 0;
324 		// container
325 		// used to
326 		// read
327 		// data.
328 		FilterNonEquals<O> fne = new FilterNonEquals<O>();
329 		while (i < getMaxK().length) {
330 			double ep = Double.MAX_VALUE;
331 			int goodK = 0;
332 			// get the query for the
333 			AbstractOBQuery<O> queryAbst = getKQuery(object, userK[i]);
334 			OBQueryFloat<O> query = (OBQueryFloat<O>) queryAbst;
335 			
336 			for (SketchProjection result : sortedBuckets) {
337 				
338 					SleekBucketFloat<O> container = this.bucketCache.get(result.getAddress());
339 					//SleekBucketFloat<O> container = this.instantiateBucketContainer(this.Buckets.getValue(result.getAddress()), result.getAddress());
340 				// search the objects
341 				assert container != null : "Problem while loading: " + result.getSketch();
342 				container.search(query, b, fne, getStats());
343 				// calculate the ep of the query and the DB.
344 				if (query.isFull() ||  (query.getResult().getSize() >= (databaseSize() -1)) ) { // only if the query is full of items.
345 					ep = query.approx(sortedList);
346 					//double epOld = query.ep((List)sortedBuckets2);
347 					//OBAsserts.chkAssert(Math.abs(ep - epOld)<= 1f / sortedList.length, "oops: new: " + ep + " old: " + epOld);					
348 				}
349 				goodK++;
350 				if (query.isFull() || (query.getResult().getSize() >= (databaseSize() -1))  && ep <= this.getExpectedEP() ) {
351 					// add the information to the stats:
352 					// goodK buckets required to retrieve with k==i.
353 						logger.info("Found result after reading: " + goodK + " buckets " + " current error: " + ep + " <= expected error: " + this.getExpectedEP());
354 						logger.info("CARD" + result.getCompactRepresentation().cardinality() + " CARD_Q: " + longAddr.getCompactRepresentation().cardinality());
355 					kEstimators[i].add(goodK);
356            logger.info("Distance best found: " + query.getResult().getSortedElements().get(0).getDistance());
357            logger.info("Distance real: " + sortedList[0]);
358 					 logger.info("Query: " + object);
359 					 logger.info("Best found: " + query.getResult().getSortedElements().get(0).getObject());
360 					// store the distance of the best object and the real-best object
361 					float difference = (float)Math.abs(sortedList[0] - query.getResult().getSortedElements().get(0).getDistance());
362 					break; // we are done.
363 				}
364 			}
365 			i++;
366 		}
367 
368 	}
369 	
370 	
371 	protected double distance(O a, O b) throws OBException{
372 		return a.distance(b);
373 	}
374 
375 
376 
377 
378 
379 	@Override
380 	protected int getCPSize() {
381 			return Math.max((int)Math.ceil(m  /  ByteConstants.Byte.getBits() ), ByteConstants.Long.getSize());
382 	}
383 
384 	@Override
385 	protected Class<CBitVector> getPInstance() {
386 			return CBitVector.class;
387 	}
388 
389 	
390 }
391