1 package net.obsearch.index.utils;
2
3 import hep.aida.bin.StaticBin1D;
4
5 import java.io.BufferedReader;
6 import java.io.File;
7 import java.io.FileReader;
8 import java.io.IOException;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.HashSet;
12 import java.util.Iterator;
13 import java.util.LinkedList;
14 import java.util.List;
15 import java.util.Set;
16
17 import net.obsearch.AbstractOBResult;
18 import net.obsearch.ApproxIndexShort;
19 import net.obsearch.asserts.OBAsserts;
20 import net.obsearch.exception.OBException;
21 import net.obsearch.exception.OBStorageException;
22 import net.obsearch.index.IndexShort;
23 import net.obsearch.ob.OBShort;
24 import net.obsearch.query.OBQueryShort;
25 import net.obsearch.result.OBPriorityQueueShort;
26 import net.obsearch.result.OBResultShort;
27 import net.obsearch.stats.Statistics;
28
29 import org.apache.log4j.Logger;
30 import static org.junit.Assert.assertEquals;
31 import static org.junit.Assert.assertFalse;
32 import static org.junit.Assert.assertTrue;
33
34 public class IndexSmokeTUtilApprox<O extends OBShort> extends IndexSmokeTUtil<O> {
35
36 public IndexSmokeTUtilApprox(OBFactory<O> factory) throws IOException {
37 super(factory);
38 }
39
40
41
42
43 private static transient final Logger logger = Logger
44 .getLogger(IndexSmokeTUtilApprox.class);
45
46 public void search(IndexShort<O> index, short range, short k)
47 throws Exception {
48
49
50
51 OBAsserts.chkAssert(index.databaseSize() <= Integer.MAX_VALUE, "Cannot test with more than 2^32 items");
52 index.resetStats();
53
54 int querySize = 1000;
55 String re = null;
56 logger.info("Matching begins...");
57 File query = new File(testProperties.getProperty("test.query.input"));
58 File dbFolder = new File(testProperties.getProperty("test.db.path"));
59 BufferedReader r = new BufferedReader(new FileReader(query));
60 List<OBPriorityQueueShort<O>> result = new LinkedList<OBPriorityQueueShort<O>>();
61 re = r.readLine();
62 int i = 0;
63 long realIndex = index.databaseSize();
64
65 while (re != null) {
66 String line = parseLine(re);
67 if (line != null) {
68 OBPriorityQueueShort<O> x = new OBPriorityQueueShort<O>(k);
69 if (i % 100 == 0) {
70 logger.info("Matching " + i);
71 }
72
73 O s = factory.create(line);
74 if (factory.shouldProcess(s)) {
75 index.searchOB(s, range, x);
76 result.add(x);
77 i++;
78 }
79 }
80 if (i == querySize) {
81 logger.warn("Finishing test at i : " + i);
82 break;
83 }
84 re = r.readLine();
85 }
86
87 logger.info(index.getStats().toString());
88
89 int maxQuery = i;
90
91
92
93
94 Iterator<OBPriorityQueueShort<O>> it = result.iterator();
95 r.close();
96 r = new BufferedReader(new FileReader(query));
97 re = r.readLine();
98 i = 0;
99 StaticBin1D stats = new StaticBin1D();
100 int emptyResults = 0;
101 int badResults = 0;
102 int zeroResults = 0;
103 IntegerHolder badCount = new IntegerHolder(0);
104 Statistics st = new Statistics();
105 StaticBin1D recall = new StaticBin1D();
106 while (re != null) {
107 String line = parseLine(re);
108 if (line != null) {
109 if (i % 300 == 0) {
110 logger.info("Matching " + i + " of " + maxQuery);
111 }
112 O s = factory.create(line);
113 if (factory.shouldProcess(s)) {
114
115 ArrayList<OBResultShort<O>> x2 = new ArrayList<OBResultShort<O>>((int)index.databaseSize());
116
117 searchSequential(realIndex, s, x2, index, range);
118 OBPriorityQueueShort<O> x1 = it.next();
119
120
121 st.addExtraStats("ReturnedSize", x2.size());
122 if(isApproxZero(x1,x2,range)){
123
124
125 emptyResults++;
126 }else{
127 double ep = ep(x1,x2,index);
128 if(ok(x1,x2,range)){
129 assertTrue(ep == 0);
130 }
131 stats.add(ep);
132 if(index instanceof ApproxIndexShort){
133 OBQueryShort<O> q = new OBQueryShort<O>(null, x1);
134 List<AbstractOBResult<O>> seq = new LinkedList<AbstractOBResult<O>>();
135 for(OBResultShort<O> o : x2){
136 seq.add(o);
137 }
138 double anotherEp = q.ep(seq);
139 assertTrue("index ep " + ep + " priority ep: " + anotherEp, ep == anotherEp);
140
141 }
142 }
143 if(!ok(x1,x2, range)){
144 badResults++;
145 }
146
147 if(zeroResults(x2,range)){
148 zeroResults++;
149 }
150 recall.add(recall(x1,x2,k,range));
151 i++;
152 }
153
154 }
155 if (i == querySize) {
156 logger.warn("Finishing test at i : " + i);
157 break;
158 }
159
160 re = r.readLine();
161 }
162
163 r.close();
164 logger.info("Finished CompoundError calculation: ");
165 logger.info("Returned size" + st);
166 logger.info(StatsUtil.prettyPrintStats("CompoundError", stats));
167 assertFalse(it.hasNext());
168 logger.info("Recall: " + recall.toString());
169 logger.info("Zero queries: " + emptyResults);
170 logger.info("Bad results: " + badResults);
171 logger.info("Zero (real) queries : " + zeroResults);
172 }
173
174
175
176
177
178
179
180 public boolean zeroResults( List<OBResultShort<O>> db, short range){
181 return db.get(0).getDistance() > range;
182 }
183
184
185
186
187
188
189
190
191
192 public boolean isApproxZero(OBPriorityQueueShort<O> x1, List<OBResultShort<O>> db, short range){
193 return x1.getSize() == 0 && (db.size() != 0 && db.get(0).getDistance() <= range);
194 }
195
196 public boolean ok(OBPriorityQueueShort<O> x1, List<OBResultShort<O>> db, short range){
197
198
199 if(zeroResults(db,range)){
200 assert x1.getSize() == 0;
201 return true;
202 }
203
204 List<OBResultShort<O>> query = x1.getSortedElements();
205
206 if(query.size() == 0 && db.size() != 0){
207 return false;
208 }
209 assert query.size() <= db.size() : "Query: " + query + " db: " + db;
210 Iterator<OBResultShort<O>> it = db.iterator();
211 for(OBResultShort<O> r : query){
212 assert it.hasNext();
213 if(it.hasNext() && r.compareTo(it.next()) != 0){
214 return false;
215 }
216 }
217 return true;
218 }
219
220 public double recall(OBPriorityQueueShort<O> x1, List<OBResultShort<O>> db, int k, short range){
221 if(zeroResults(db,range)){
222 assert x1.getSize() == 0;
223 return 1;
224 }
225 int hits = 0;
226 Set<OBResultShort<O>> s = new HashSet<OBResultShort<O>>();
227 for(OBResultShort<O> r : x1.getSortedElements()){
228 int i = 0;
229 for(OBResultShort<O> d : db){
230 if(i == k){
231 break;
232 }
233 if(! s.contains(d) &&d.compareTo(r) == 0){
234 s.add(d);
235 hits++;
236 break;
237 }
238 i++;
239 }
240 }
241 return (double)hits / (double)k;
242 }
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261 public void searchSequential(long max, O o,
262 ArrayList < OBResultShort<O> > result,
263 IndexShort < O > index, short range) throws OBException, IllegalAccessException, InstantiationException {
264 int i = 0;
265 while (i < max) {
266 O obj = index.getObject(i);
267 short res = o.distance(obj);
268
269 result.add( new OBResultShort<O>(obj, i, res));
270
271 i++;
272 }
273 Collections.sort(result);
274 Collections.reverse(result);
275 }
276
277 public double ep(OBPriorityQueueShort<O> x1, List<OBResultShort<O>> db, IndexShort < O > index) throws OBStorageException{
278 List<OBResultShort<O>> query = x1.getSortedElements();
279 int i = 0;
280 int result = 0;
281 Set<OBResultShort<O>> s = new HashSet<OBResultShort<O>>();
282 for(OBResultShort<O> r : query){
283
284 int cx = 0;
285 for(OBResultShort<O> c : db){
286 if(! s.contains(c) &&c.compareTo(r) == 0){
287 s.add(c);
288 result += cx - i;
289 break;
290 }
291 cx++;
292 }
293 i++;
294 }
295 if(query.size() == 0){
296 return 0;
297 }else{
298 double res = ((double)result)/ ((double)(query.size() * db.size()));
299 return res;
300 }
301 }
302
303 }