View Javadoc

1   		 package net.obsearch.index.bucket.impl;
2   /*
3   	OBSearch: a distributed similarity search engine This project is to
4   	similarity search what 'bit-torrent' is to downloads. 
5   	Copyright (C) 2008 Arnoldo Jose Muller Molina
6   
7   	This program is free software: you can redistribute it and/or modify
8   	it under the terms of the GNU General Public License as published by
9   	the Free Software Foundation, either version 3 of the License, or
10  	(at your option) any later version.
11  
12  	This program is distributed in the hope that it will be useful,
13  	but WITHOUT ANY WARRANTY; without even the implied warranty of
14  	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  	GNU General Public License for more details.
16  
17  	You should have received a copy of the GNU General Public License
18  	along with this program.  If not, see <http://www.gnu.org/licenses/>.
19  */
20  
21  import java.nio.ByteBuffer;
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.Collections;
25  import java.util.Iterator;
26  import java.util.LinkedList;
27  import java.util.List;
28  import java.util.ListIterator;
29  import net.obsearch.stats.Statistics;
30  import net.obsearch.Index;
31  import net.obsearch.OperationStatus;
32  import net.obsearch.Status;
33  import net.obsearch.exception.IllegalIdException;
34  import net.obsearch.exception.OBException;
35  import net.obsearch.index.utils.IntegerHolder;
36  import net.obsearch.index.bucket.BucketContainer;
37  import net.obsearch.query.AbstractOBQuery;
38  
39  import net.obsearch.ob.OBFloat;
40  import net.obsearch.query.OBQueryFloat;
41  import net.obsearch.utils.bytes.ByteConversion;
42  import net.obsearch.filter.Filter;
43  import net.obsearch.storage.OBStore;
44  import net.obsearch.storage.OBStoreFloat;
45  import net.obsearch.storage.CloseIterator;
46  import net.obsearch.storage.TupleBytes;
47  import net.obsearch.constants.ByteConstants;
48  /** 
49   *  AbstractBucketContainerFloat Holds the functionality of a
50   *  bucket that sorts its smap-vectors lexicographically. Binary
51   *  searches are employed inside the vector.
52   *  
53   *  @author      Arnoldo Jose Muller Molina    
54   */
55  //*************************************************************************
56  //****** Warning: this is a generated file ********************************
57  //****** The source file is: AbstractBucketContainer.java    
58  //*************************************************************************
59  		public abstract class AbstractBucketContainerFloat < O extends OBFloat, B extends BucketObjectFloat > implements
60          BucketContainer < O, B, OBQueryFloat < O >> {
61  
62  						/**
63  						 * Utility class
64  						 */
65  						private B current = instantiateBucketObject();
66  						/**
67  						 * Storage device used to iterate through the bucket.
68  						 */
69  						private OBStore<TupleBytes> storage;
70  
71  						/**
72  						 * # of pivots in this Bucket container.
73  						 */
74  						private int pivots;
75  
76  						/**
77  						 * We need the index to perform some extra operations.
78  						 */
79  						private Index < O > index;
80  
81  	
82  
83  						private int TUPLE_SIZE;
84  		
85  						/**
86  						 * Iterate only through this key.
87  						 */
88  						protected byte[] key;
89  
90  
91  
92  						/**
93  						 * Additional pivot used to sort objects within the bucket.
94  						 */
95  						private int secondaryIndexPivot;
96  
97  						/**
98  						 * Build a new container for floats over the given index, with the given pivot number
99  						 * and a certain storage device where all the smap vectors are stored.
100 						 * Key is used as the base key for this bucket.
101 						 * @param index Underlying index.
102 						 * @param pivots Number of pivots employed
103 						 * @param storage Storage device where the smap vectors are stored.
104 						 * @param key Base key of this container
105 						 */
106 						public AbstractBucketContainerFloat(Index < O > index, int pivots, OBStore<TupleBytes> storage, byte[] key) {
107 								this(index,pivots, storage,key,-1);
108 						}
109 
110 						/**
111 						 * Build a new container for floats over the given index, with the given pivot number
112 						 * and a certain storage device where all the smap vectors are stored.
113 						 * Key is used as the base key for this bucket. The parameter secondaryIndexPivot chooses
114 						 * a dimension of the s-map vector to sort the vectors by this value. This helps
115 						 * in prunning a bit the buckets.
116 						 * @param index Underlying index.
117 						 * @param pivots Number of pivots employed
118 						 * @param storage Storage device where the smap vectors are stored.
119 						 * @param key Base key of this container
120 						 * @param secondaryIndexPivot Sort records by this dimension.
121 						 */
122 						public AbstractBucketContainerFloat(Index < O > index, int pivots, OBStore<TupleBytes> storage, byte[] key, int secondaryIndexPivot) {
123 								assert index != null;
124 								updateTupleSize(pivots);
125 								this.index = index;
126 								this.pivots = pivots;
127 								this.storage = storage;
128 								this.key = key;
129 								this.secondaryIndexPivot = secondaryIndexPivot;
130 						}
131 
132 						/**
133 	 * Return the object list!
134 	 * @return
135 	 */
136 	public List<B> getObjects(){
137 		return null;
138 	}
139 
140 						/**
141 						 * Appends value to the end of the given key.
142 						 */
143 						private byte[] buildKey(byte[] key, B bucket){
144 									if(secondaryIndexPivot == -1){
145 										return key;
146 									}else{
147 								
148 								return buildKey(key, bucket.getSmapVector()[this.secondaryIndexPivot]);
149 									}
150 						}
151 
152 						private byte[] buildKey(byte[] key, float value){
153 								if(secondaryIndexPivot == -1){
154 										return key;
155 								}else{
156 										ByteBuffer temp = ByteConversion.createByteBuffer(key.length + ByteConstants.Float.getSize());
157 										temp.put(key);										
158 										temp.put(storage.getFactory().serializeFloat(value));
159 										return temp.array();
160 								}
161 						}
162 
163 
164 						
165 
166 
167 						/**
168 						 * Instantiate a new Bucket ready to process each record.
169 						 */
170 						protected abstract B instantiateBucketObject();
171 
172 						@Override
173 						public void setPivots(int pivots) {
174 								this.pivots = pivots;
175 
176 						}
177 
178 		
179 
180 						// need to update this thing.
181 						@Override
182 						public OperationStatus delete(B bucket, O object)
183 								throws OBException, IllegalIdException, IllegalAccessException,
184 								InstantiationException {
185 								byte[] key2 = buildKey(key, bucket);
186 								CloseIterator<TupleBytes> pr = storage.processRange(key2,key2);
187 								OperationStatus res = new OperationStatus();
188 								res.setStatus(Status.NOT_EXISTS);
189 								try{
190 										B cmp = instantiateBucketObject();
191 				
192 										while(pr.hasNext()){
193 												TupleBytes t = pr.next();
194 												cmp.read(ByteConversion.createByteBuffer(t.getValue()), getPivots());
195 												// bucket.compareTo(cmp) == 0 && 
196 												if(index.getObject(cmp.getId()).distance(object) == 0){
197 														res.setStatus(Status.OK);
198 														res.setId(cmp.getId());
199 														pr.remove();
200 														break;
201 												}
202 										}
203 								}finally{
204 										pr.closeCursor();
205 								}
206 			
207 
208 								return res;
209 						}
210 
211 						private void updateTupleSize(int pivots) {
212 								TUPLE_SIZE = (pivots * net.obsearch.constants.ByteConstants.Float
213 				.getSize())
214 				+ Index.ID_SIZE;
215 		}
216 
217 
218 
219     /**
220      * Insert the given bucket into the container.
221 		 * @param bucket Bucket to insert.
222 		 * @param object the bucket has an object.
223 		 * @return The result of the operation.
224      */
225     public OperationStatus insert(B bucket, O object) throws OBException,
226             IllegalIdException, IllegalAccessException, InstantiationException {
227                
228 				OperationStatus res = exists(bucket, object);
229 				
230 				if(res.getStatus() == Status.NOT_EXISTS){
231 						res = insertBulk(bucket, object);
232 				}
233 				return res;					
234 		
235 		
236 		
237     }
238 
239     /**
240      * Insert the given bucket into the container.
241 		 * @param bucket Bucket to insert.
242 		 * @param object the bucket has an object.
243 		 * @return The result of the operation.
244      */
245     public OperationStatus insertBulk(B bucket, O object) throws OBException,
246             IllegalIdException, IllegalAccessException, InstantiationException {
247        
248 				    ByteBuffer out = ByteConversion
249 								.createByteBuffer(calculateBufferSize(getPivots()));
250 						OperationStatus res = new OperationStatus();
251 						res.setStatus(Status.OK);
252 						assert bucket.getId() != -1;
253 						res.setId(bucket.getId());
254 						bucket.write(out);
255 						byte[] key2 = buildKey(key, bucket);
256 						storage.put(key2, out.array());
257 						return res;
258     }
259 
260 	 /**
261      * Calculate buffer size for n items.
262      * @param i
263      *                Number of items to add.
264      * @return the number of bytes required to store n smap vectors.
265      */
266 //*************************************************************************
267 //****** Warning: this is a generated file ********************************
268 //****** The source file is: AbstractBucketContainer.java    
269 //*************************************************************************
270     private int calculateBufferSize(int i) {
271         return (TUPLE_SIZE);
272     }
273 
274 
275 
276     @Override
277     public OperationStatus exists(B bucket, O object)
278 				throws OBException, IllegalIdException, IllegalAccessException,
279 				InstantiationException {
280 				OperationStatus res = new OperationStatus();
281         res.setStatus(Status.NOT_EXISTS);
282 				byte[] key2 = buildKey(key, bucket);
283 				CloseIterator<TupleBytes> pr = storage.processRange(key2,key2);
284 				try{
285 						B cmp = instantiateBucketObject();
286 						while(pr.hasNext()){
287 								TupleBytes t = pr.next();
288 								cmp.read(ByteConversion.createByteBuffer(t.getValue()), getPivots());
289 								//bucket.compareTo(cmp) == 0 && 
290 								if(index.getObject(cmp.getId()).distance(object) == 0){						
291 										res.setStatus(Status.EXISTS);							
292 										res.setId(cmp.getId());
293 										break;
294 								}
295 						}
296 				}finally{
297 						pr.closeCursor();
298 				}
299         return res;
300     }
301 
302     
303 		public void search(OBQueryFloat < O > query, B b,
304 											 ByteBuffer data, Filter<O> filter, Statistics stats) throws IllegalAccessException,
305 				OBException, InstantiationException, IllegalIdException {
306 				
307 			  
308 				current.read(data, getPivots());
309 				stats.incDataRead(TUPLE_SIZE);
310 				stats.incSmapCount();
311 				float max = current.lInf(b);
312 				long res = 0;
313 				if (max <= query.getDistance() && query.isCandidate(max)) {
314 										long id = current.getId();
315 										O toCompare = index.getObject(id);
316 										// Process query only if the filter is null.
317 										if(filter == null || filter.accept(toCompare, query.getObject())){
318 												float realDistance = query.getObject().distance(toCompare);
319 												stats.incDistanceCount();
320 												if (realDistance <= query.getDistance()) {
321 														query.add(id, toCompare, realDistance);
322 												}
323 										}
324 								}
325 		}
326 
327 		
328 
329 		public void search(AbstractOBQuery<O> q, B bucket,  Filter<O> filter, Statistics stats) throws IllegalAccessException,
330 	 OBException, InstantiationException,
331 				IllegalIdException{
332 				search((OBQueryFloat)q, bucket, filter, stats ) ;
333 		}
334 
335     
336 		/**
337      * Searches the data by using a binary search to reduce SMAP vector
338      * computations. Does not cache objects and creates less
339 		 * objects during the search than searchSorted(...).
340      * @param query
341      * @param b
342      * @return
343      * @throws IllegalAccessException
344      * @throws DatabaseException
345      * @throws OBException
346      * @throws InstantiationException
347      * @throws IllegalIdException
348      */
349     public void search(OBQueryFloat < O > query, B b,
350 											 Filter<O> filter, Statistics stats) throws IllegalAccessException,
351 				OBException, InstantiationException, IllegalIdException {
352 			 
353 				byte[] key1;
354 				byte[] key2;
355 				if(secondaryIndexPivot != -1){
356 						key1 = buildKey(key, query.getLow()[this.secondaryIndexPivot]);
357 						key2 = buildKey(key, query.getHigh()[this.secondaryIndexPivot]);
358 				}else{
359 						key1 = key;
360 						key2 = key;
361 				}
362 				search(query,b,filter,key1,key2, stats);
363     }
364 
365 
366 		/**
367      * Convenience method that forces the search to be performed on a certain key set.
368 		 */ 
369 		public void search(OBQueryFloat < O > query, B b,
370 											  Filter<O> filter, byte[] key1, byte[] key2, Statistics stats) throws IllegalAccessException,
371 				OBException, InstantiationException, IllegalIdException {
372 
373 
374 				CloseIterator<TupleBytes> pr = storage.processRange(key1,key2);
375 				long res = 0;
376 				try{
377 						B current = instantiateBucketObject();
378 
379 						while(pr.hasNext()){
380 								TupleBytes t = pr.next();
381 								
382 								current.read(ByteConversion.createByteBuffer(t.getValue()), getPivots());
383 								stats.incDataRead(t.getValue().length);
384 								float max = current.lInf(b);
385 								stats.incSmapCount();
386 								if (max <= query.getDistance() && query.isCandidate(max)) {
387 										long id = current.getId();
388 										O toCompare = index.getObject(id);
389 										// Process query only if the filter is null.
390 										if(filter == null || filter.accept(toCompare, query.getObject())){
391 												float realDistance = query.getObject().distance(toCompare);
392 												stats.incDistanceCount();
393 												if (realDistance <= query.getDistance()) {
394 														query.add(id, toCompare, realDistance);
395 												}
396 										}
397 								}
398 								
399 						}
400 				}finally{
401 						pr.closeCursor();
402 				}
403 		}
404 
405     /*
406      * (non-Javadoc)
407      * @see net.obsearch.index.bucket.BucketContainer#getPivots()
408      */
409 //*************************************************************************
410 //****** Warning: this is a generated file ********************************
411 //****** The source file is: AbstractBucketContainer.java    
412 //*************************************************************************
413     @Override
414     public int getPivots() {
415         return this.pivots;
416     }
417 
418 		/**
419 		 * Return the # of S-Map vectors in the bucket.
420 		 */
421 		public int size() throws OBException{
422 					CloseIterator<TupleBytes> pr = storage.processRange(key,key);
423 				int res = 0;
424 				try{
425 						while(pr.hasNext()){
426 								TupleBytes t = pr.next();
427 								res++;
428 						}
429 						}finally{
430 								pr.closeCursor();
431 						}
432 						return res;
433 		}
434 
435 		public void setKey(byte[] key){
436 				this.key = key;
437 		}
438 
439 
440 		public byte[] serialize(){
441 				return null;
442 		}
443 
444 		public boolean isModified(){
445 				return true;
446 		}
447 
448 
449 }
450