1 package net.obsearch.index.ghs.impl;
2
3 import java.io.IOException;
4 import java.nio.ByteBuffer;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.logging.Logger;
11
12 import net.obsearch.index.ghs.CBitVector;
13 import net.obsearch.index.CommonsFloat;
14 import net.obsearch.AbstractOBResult;
15 import net.obsearch.asserts.OBAsserts;
16 import net.obsearch.constants.ByteConstants;
17 import net.obsearch.exception.IllegalIdException;
18 import net.obsearch.exception.NotFrozenException;
19 import net.obsearch.exception.OBException;
20 import net.obsearch.exception.OBStorageException;
21 import net.obsearch.exception.OutOfRangeException;
22 import net.obsearch.filter.Filter;
23 import net.obsearch.filter.FilterNonEquals;
24 import net.obsearch.index.IndexFloat;
25 import net.obsearch.index.bucket.impl.BucketObjectFloat;
26 import net.obsearch.index.bucket.sleek.SleekBucketFloat;
27 import net.obsearch.index.ghs.AbstractSketch64;
28 import net.obsearch.ob.OBFloat;
29 import net.obsearch.pivots.IncrementalPairPivotSelector;
30 import net.obsearch.pivots.IncrementalPivotSelector;
31 import net.obsearch.query.AbstractOBQuery;
32 import net.obsearch.query.OBQueryFloat;
33 import net.obsearch.result.OBPriorityQueueFloat;
34 import net.obsearch.result.OBResultInvertedByte;
35 import net.obsearch.result.OBResultInvertedFloat;
36 import net.obsearch.result.OBResultFloat;
37 import net.obsearch.index.ghs.SketchProjection;
38
39 public final class Sketch64Float<O extends OBFloat> extends AbstractSketch64<O, BucketObjectFloat<O>, OBQueryFloat<O>, SleekBucketFloat<O>>
40 implements IndexFloat<O> {
41
42 private boolean DEBUG = true;
43
44 private static final transient Logger logger = Logger
45 .getLogger(Sketch64Float.class.getName());
46
47
48
49
50
51
52
53
54
55
56
57 public Sketch64Float(Class<O> type,
58 IncrementalPairPivotSelector<O> pivotSelector, int m
59 )
60 throws OBStorageException, OBException, IOException {
61
62 super(type, pivotSelector, m, 0);
63
64 }
65
66 public Sketch64Float(){
67 super();
68 }
69
70 @Override
71 public BucketObjectFloat<O> getBucket(O object) throws OBException,
72 InstantiationException, IllegalAccessException {
73 BucketObjectFloat<O> res = new BucketObjectFloat<O>(null, -1L, object);
74 return res;
75 }
76
77
78
79
80
81 @Override
82 public SketchProjection getProjection(BucketObjectFloat<O> bucket) throws OBException {
83
84 int i = 0;
85 CBitVector res = new CBitVector(m);
86 double[] lowerBounds = new double[m];
87 while(i < m){
88 float distanceA = pivotGrid[i][0].distance(bucket.getObject());
89 float distanceB = pivotGrid[i][1].distance(bucket.getObject());
90 if(distanceA > distanceB ){
91 res.set(i);
92 distortionStats[i][1]++;
93 }else{
94 distortionStats[i][0]++;
95 }
96
97 lowerBounds[i] = Math.max(((double)Math.abs(distanceA - distanceB)) / 2, 0);
98 i++;
99 }
100
101
102 double[] lowerBoundsSorted = new double[m];
103 System.arraycopy(lowerBounds,0,lowerBoundsSorted,0,m);
104 Arrays.sort(lowerBoundsSorted);
105 byte[] ordering = new byte[m];
106 i = 0;
107 while(i < m){
108
109 ordering[i] = (byte)Arrays.binarySearch(lowerBoundsSorted, lowerBounds[i]);
110 i++;
111 }
112
113 SketchProjection result = new SketchProjection(ordering, res, -1, lowerBounds);
114 return result;
115 }
116
117
118 @Override
119 protected SleekBucketFloat<O> instantiateBucketContainer(
120 byte[] data, byte[] address) throws InstantiationException, IllegalAccessException, OBException {
121 if(data == null){
122 return new SleekBucketFloat<O>( type, bucketPivotCount);
123 }else{
124 try{
125 return new SleekBucketFloat<O>( type, bucketPivotCount, data);
126 }catch(IOException e){
127 throw new OBException(e);
128 }
129
130 }
131 }
132
133 @Override
134 protected int primitiveDataTypeSize() {
135 return ByteConstants.Float.getSize();
136 }
137
138
139
140
141
142 @Override
143 public void searchOB(O object, float r, OBPriorityQueueFloat<O> result)
144 throws NotFrozenException, InstantiationException,
145 IllegalIdException, IllegalAccessException, OutOfRangeException,
146 OBException {
147 searchOB(object, r,null, result);
148
149
150 }
151
152 @Override
153 protected AbstractOBQuery<O> getKQuery(O object, int k) throws OBException, InstantiationException, IllegalAccessException {
154 BucketObjectFloat<O> b = getBucket(object);
155 OBPriorityQueueFloat<O> result = new OBPriorityQueueFloat<O>(k);
156 OBQueryFloat<O> q = new OBQueryFloat<O>(object, Float.MAX_VALUE, result, b
157 .getSmapVector());
158 return q;
159 }
160
161
162 @Override
163 public void searchOB(O object, float r, Filter<O> filter,
164 OBPriorityQueueFloat<O> result) throws NotFrozenException,
165 InstantiationException, IllegalIdException, IllegalAccessException,
166 OutOfRangeException, OBException {
167 BucketObjectFloat<O> b = getBucket(object);
168
169 OBQueryFloat<O> q = new OBQueryFloat<O>(object, r, result, b
170 .getSmapVector());
171 SketchProjection query = this.getProjection(b);
172
173
174 int kEstimation = estimateK(result.getK());
175 stats.addExtraStats("K-estimation", kEstimation);
176 long time = System.currentTimeMillis();
177 List<SketchProjection> sortedBuckets = searchBuckets(query, kEstimation);
178 getStats().addExtraStats("Buckets_search_time", System.currentTimeMillis() - time);
179 for(SketchProjection bucket: sortedBuckets){
180 SleekBucketFloat<O> container = this.bucketCache.get(bucket.getAddress());
181
182 stats.incBucketsRead();
183 container.search(q, b, filter, getStats());
184 }
185 }
186
187
188
189
190
191 public Iterator<List<OBQueryFloat<O>>> knnGraph(int k, float r){
192 return new KnnIterator(k, r);
193 }
194
195
196
197
198 protected class KnnIterator implements Iterator<List<OBQueryFloat<O>>> {
199
200 private Iterator<SleekBucketFloat<O>> bucketsIt;
201 private List<OBQueryFloat<O>> next;
202 private int kEstimation;
203 private int k;
204 private float range;
205 public KnnIterator(int k, float range) {
206 try{
207 this.k = k;
208 bucketsIt = iterateBuckets();
209 kEstimation = estimateK(k);
210 this.range = range;
211 getNext();
212 }catch(Exception e){
213 throw new IllegalArgumentException(e);
214 }
215
216 }
217
218 private void getNext(){
219 try{
220 if(bucketsIt.hasNext()){
221
222 SleekBucketFloat<O> sl = bucketsIt.next();
223 BucketObjectFloat<O> b = getBucket(sl.getObjects().get(0).getObject());
224 SketchProjection query = getProjection(b);
225 List<SketchProjection> sortedBuckets = searchBuckets(query, kEstimation);
226 List<SleekBucketFloat<O>> buckets = new ArrayList<SleekBucketFloat<O>>(sortedBuckets.size());
227 buckets.add(sl);
228 for(SketchProjection sp : sortedBuckets){
229 SleekBucketFloat<O> container = bucketCache.get(sp.getAddress());
230 buckets.add(container);
231 }
232
233 next = new ArrayList<OBQueryFloat<O>>(sl.getObjects().size());
234 for (BucketObjectFloat<O> o : sl.getObjects()) {
235 OBPriorityQueueFloat<O> result = new OBPriorityQueueFloat<O>(k);
236 OBQueryFloat<O> q = new OBQueryFloat<O>(o.getObject(), range, result, null);
237
238 for(SleekBucketFloat<O> cont : buckets){
239 cont.search(q, b, new FilterNonEquals(), getStats());
240 }
241 next.add(q);
242 }
243 }else{
244 next = null;
245 }
246 }catch(Exception e){
247 throw new IllegalArgumentException(e);
248 }
249 }
250
251 public boolean hasNext(){
252 return next != null;
253 }
254
255
256 public List<OBQueryFloat<O>> next(){
257 List<OBQueryFloat<O>> toReturn = next;
258 getNext();
259 return toReturn;
260 }
261
262 public void remove(){
263 throw new UnsupportedOperationException();
264 }
265 }
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280 public float[] fullMatchLite(O query, boolean filterSame) throws OBException, IllegalAccessException, InstantiationException{
281 return CommonsFloat.fullMatchLite((OBFloat)query, filterSame, this);
282 }
283
284
285
286
287
288
289
290
291
292
293
294 protected void maxKEstimationAux(O object)
295 throws OBException, InstantiationException, IllegalAccessException {
296
297 BucketObjectFloat<O> b = getBucket(object);
298 SketchProjection longAddr = getProjection(b);
299 byte[] addr = longAddr.getAddress();
300
301
302
303 float [] sortedList = fullMatchLite( object, true);
304
305 for(float t : sortedList){
306 getStats().addExtraStats("GENERAL_DISTANCE", t);
307 }
308
309
310
311 loadMasks();
312
313 long time = System.currentTimeMillis();
314 OBAsserts.chkAssert(Buckets.size() <= Integer.MAX_VALUE, "Capacity exceeded");
315 List<SketchProjection> sortedBuckets = searchBuckets(longAddr, (int)Buckets.size());
316
317
318 logger.info("Time searching sketches: " + (System.currentTimeMillis() - time));
319
320
321
322
323 int i = 0;
324
325
326
327
328 FilterNonEquals<O> fne = new FilterNonEquals<O>();
329 while (i < getMaxK().length) {
330 double ep = Double.MAX_VALUE;
331 int goodK = 0;
332
333 AbstractOBQuery<O> queryAbst = getKQuery(object, userK[i]);
334 OBQueryFloat<O> query = (OBQueryFloat<O>) queryAbst;
335
336 for (SketchProjection result : sortedBuckets) {
337
338 SleekBucketFloat<O> container = this.bucketCache.get(result.getAddress());
339
340
341 assert container != null : "Problem while loading: " + result.getSketch();
342 container.search(query, b, fne, getStats());
343
344 if (query.isFull() || (query.getResult().getSize() >= (databaseSize() -1)) ) {
345 ep = query.approx(sortedList);
346
347
348 }
349 goodK++;
350 if (query.isFull() || (query.getResult().getSize() >= (databaseSize() -1)) && ep <= this.getExpectedEP() ) {
351
352
353 logger.info("Found result after reading: " + goodK + " buckets " + " current error: " + ep + " <= expected error: " + this.getExpectedEP());
354 logger.info("CARD" + result.getCompactRepresentation().cardinality() + " CARD_Q: " + longAddr.getCompactRepresentation().cardinality());
355 kEstimators[i].add(goodK);
356 logger.info("Distance best found: " + query.getResult().getSortedElements().get(0).getDistance());
357 logger.info("Distance real: " + sortedList[0]);
358 logger.info("Query: " + object);
359 logger.info("Best found: " + query.getResult().getSortedElements().get(0).getObject());
360
361 float difference = (float)Math.abs(sortedList[0] - query.getResult().getSortedElements().get(0).getDistance());
362 break;
363 }
364 }
365 i++;
366 }
367
368 }
369
370
371 protected double distance(O a, O b) throws OBException{
372 return a.distance(b);
373 }
374
375
376
377
378
379 @Override
380 protected int getCPSize() {
381 return Math.max((int)Math.ceil(m / ByteConstants.Byte.getBits() ), ByteConstants.Long.getSize());
382 }
383
384 @Override
385 protected Class<CBitVector> getPInstance() {
386 return CBitVector.class;
387 }
388
389
390 }
391