View Javadoc

1   package net.obsearch.example.vectors;
2   
3   import hep.aida.bin.StaticBin1D;
4   
5   import java.io.FileNotFoundException;
6   import java.io.IOException;
7   import java.util.ArrayList;
8   import java.util.Iterator;
9   import java.util.List;
10  
11  import net.obsearch.ambient.Ambient;
12  
13  import net.obsearch.ambient.bdb.AmbientBDBJe;
14  
15  import net.obsearch.ambient.tc.AmbientTC;
16  import net.obsearch.exception.NotFrozenException;
17  import net.obsearch.exception.OBException;
18  import net.obsearch.exception.OBStorageException;
19  import net.obsearch.exception.PivotsUnavailableException;
20  import net.obsearch.index.ghs.impl.Sketch64Float;
21  import net.obsearch.index.ghs.impl.Sketch64Long;
22  
23  import net.obsearch.index.utils.Directory;
24  import net.obsearch.pivots.AcceptAll;
25  import net.obsearch.pivots.bustos.impl.IncrementalBustosNavarroChavezShort;
26  import net.obsearch.pivots.rf02.RF02PivotSelectorShort;
27  import net.obsearch.pivots.rf03.RF03PivotSelectorLong;
28  import net.obsearch.pivots.rf03.RF03PivotSelectorShort;
29  import net.obsearch.pivots.rf04.RF04PivotSelectorFloat;
30  import net.obsearch.query.OBQueryFloat;
31  import net.obsearch.query.OBQueryLong;
32  
33  import net.obsearch.result.OBPriorityQueueFloat;
34  import net.obsearch.result.OBPriorityQueueLong;
35  import net.obsearch.result.OBPriorityQueueShort;
36  import net.obsearch.result.OBResultFloat;
37  import net.obsearch.result.OBResultShort;
38  
39  public class VectorsDemoGHS extends VectorsDemo {
40  	
41  	
42  	
43  	
44  	public static void main(String args[]) throws FileNotFoundException, OBStorageException, NotFrozenException, IllegalAccessException, InstantiationException, OBException, IOException, PivotsUnavailableException {
45  		
46  		init();
47  		
48  		// Delete the directory of the index just in case.
49  		Directory.deleteDirectory(INDEX_FOLDER);
50  		
51  		
52  		// Create the pivot selection strategy
53  		RF04PivotSelectorFloat<L1Float> sel = new RF04PivotSelectorFloat<L1Float>(new AcceptAll<L1Float>());
54  		sel.setDataSample(100);
55  						
56  		// make the bit set as short so that m objects can fit in the buckets.
57  		// create an index.
58  		// Choose pivot sizes that are multiples of 64 to optimize the space
59  	    Sketch64Float<L1Float> index = new Sketch64Float<L1Float>(L1Float.class, sel, 256);
60  	    // error expected 
61  	    index.setExpectedError(1);
62  	    // small if you are planning to insert a lot of objects!
63  	    index.setSampleSize(100); 
64  	    // Probability of returning an error within 1.40 times the real distance
65  	    // (measured in standard deviations) (3 means a prob. of 0.99)
66  	    index.setKAlpha(3);
67  	    
68  	    // select the ks that the user will call. 
69  	    // This example will only be called with k=1
70  	    index.setMaxK(new int[]{1});	  
71  	    // little optimization that can help if your objects are of the same size.
72  	    index.setFixedRecord(true);
73      	index.setFixedRecord(VEC_SIZE*4);
74  		// Create the ambient that will store the index's data. (NOTE: folder name is hardcoded)
75      	Ambient<L1Float, Sketch64Float<L1Float>> a =  new AmbientTC<L1Float, Sketch64Float<L1Float>>( index, INDEX_FOLDER );
76  		
77  		// Add some random objects to the index:	
78  		logger.info("Adding " + DB_SIZE + " objects...");
79  		int i = 0;		
80  		while(i < DB_SIZE){
81  			index.insert(generateFloatVector());
82  			if(i % 100000 == 0){
83  				logger.info("Loading: " + i);
84  			}
85  			i++;
86  		}
87  		
88  		// prepare the index
89  		logger.info("Preparing the index...");
90  		a.freeze();
91  		logger.info("Index stats: " + index.getStats());
92  		
93  		float range = 100f;
94  		// now we can match some objects!		
95  		logger.info("Querying the index...");
96  		i = 0;
97  		index.resetStats(); // reset the stats counter
98  		long start = System.currentTimeMillis();
99  		List<OBPriorityQueueFloat<L1Float>> queryResults = new ArrayList<OBPriorityQueueFloat<L1Float>>(QUERY_SIZE);
100 		List<L1Float> queries = new ArrayList<L1Float>(QUERY_SIZE);
101 		while(i < QUERY_SIZE){
102 			L1Float q = 	generateFloatVector();	
103 			// query the index with k=1			
104 			OBPriorityQueueFloat<L1Float> queue = new OBPriorityQueueFloat<L1Float>(1);			
105 			// perform a query with a large range and k = 1 
106 			index.searchOB(q, range , queue);
107 			queryResults.add(queue);
108 			for(OBResultFloat<L1Float> f : queue.getSortedElements()){
109 				// check that the id makes sense
110 				assert index.getObject(f.getId()).equals(f.getObject());
111 				logger.info("Distance: " + f.getId() + " " + f.getDistance());
112 				assert f.getDistance() <= range;
113 			}
114 			queries.add(q);
115 			
116 			i++;
117 		}
118 		// print the results of the set of queries. 
119 		long elapsed = System.currentTimeMillis() - start;
120 		logger.info("Time per query: " + elapsed / QUERY_SIZE + " millisec.");
121 		
122 		logger.info("Stats follow: (total distances / pivot vectors computed during the experiment)");
123 		logger.info(index.getStats().toString());
124 
125 		// now we validate the result of the search
126 		logger.info("Doing Error validation");
127 		StaticBin1D ep = new StaticBin1D();
128 		
129 
130 		Iterator<OBPriorityQueueFloat<L1Float>> it1 = queryResults.iterator();
131 		Iterator<L1Float> it2 = queries.iterator();
132 		StaticBin1D seqTime = new StaticBin1D();
133 		i = 0;
134 		while(it1.hasNext()){
135 			OBPriorityQueueFloat<L1Float> qu = it1.next();
136 			L1Float q = it2.next();
137 			long time = System.currentTimeMillis();
138 			float[] sortedList = index.fullMatchLite(q, false);
139 			long el = System.currentTimeMillis() - time;
140 			seqTime.add(el);
141 			logger.info("Elapsed: " + el + " "  + i);
142 			OBQueryFloat<L1Float> queryObj = new OBQueryFloat<L1Float	>(q, range, qu, null);
143 			ep.add(queryObj.approx(sortedList));
144 			i++;
145 		}
146 		
147 		logger.info(ep.toString());
148 		logger.info("Time per seq query: ");
149 		logger.info(seqTime.toString());
150 		
151 	}
152 
153 }