View Javadoc

1   package net.obsearch.index.utils;
2   
3   import java.io.File;
4   import java.io.FileNotFoundException;
5   import java.io.FileWriter;
6   import java.io.IOException;
7   import java.util.ArrayList;
8   import java.util.Collections;
9   import java.util.List;
10  
11  import org.apache.log4j.Logger;
12  import org.kohsuke.args4j.Option;
13  
14  import net.obsearch.AbstractOBResult;
15  import net.obsearch.ApproxIndexShort;
16  import net.obsearch.ambient.Ambient;
17  import net.obsearch.asserts.OBAsserts;
18  import net.obsearch.exception.IllegalIdException;
19  import net.obsearch.exception.NotFrozenException;
20  import net.obsearch.exception.OBException;
21  import net.obsearch.exception.OBStorageException;
22  import net.obsearch.exception.OutOfRangeException;
23  import net.obsearch.index.IndexShort;
24  import net.obsearch.ob.OBShort;
25  import net.obsearch.query.AbstractOBQuery;
26  import net.obsearch.query.OBQueryShort;
27  import net.obsearch.result.OBPriorityQueueShort;
28  import net.obsearch.result.OBResultInvertedShort;
29  import net.obsearch.result.OBResultShort;
30  import net.obsearch.stats.Statistics;
31  
32  /**
33   * In this command line helper, data is separated by newlines and
34   * Index and objects are of type short. 
35   * @author Arnoldo Jose Muller-Molina
36   *
37   * @param <O> Object that we are handling
38   * @param <I> The index that stores all data
39   * @param <A> The ambient that controls the index.
40   */
41  public abstract class AbstractNewLineCommandLineShort<O extends OBShort, I extends IndexShort<O>, A extends Ambient<O,I>> extends
42  		AbstractNewLineCommandLine<O, I, A> {
43  	
44  	private static Logger logger = Logger.getLogger(AbstractNewLineCommandLineShort.class);
45  	
46  	
47  	@Option(name = "-histogram", usage = "Generate histogram of distances")
48  	protected boolean histogram = false;
49  	
50  	@Option(name = "-histogramFile", usage = "Generate histogram of distances")
51  	protected File histogramFile = new File("histogram.csv");
52  	
53  	protected ArrayList<O> seq = null;
54  	
55  	protected void searchObjectApprox(I index, O object, Statistics other) throws NotFrozenException,
56  	IllegalIdException, OutOfRangeException, InstantiationException,
57  	IllegalAccessException, OBException, IOException {
58  		OBAsserts.chkAssert( index instanceof ApproxIndexShort, "Index must implement the interface " + ApproxIndexShort.class.getCanonicalName());
59  		OBAsserts.chkAssert(index.databaseSize() <= Integer.MAX_VALUE, "db is too large");		
60  		short perfectRange = (short)r;
61  		int perfectK;
62  		// k is different in ep mode (all the db) and in recall mode (k).
63  		if(mode == Mode.approxEvalEP){
64  			perfectK = (int)index.databaseSize();
65  		}else{
66  			perfectK = k;
67  		}
68  
69  		// perform sequential search
70  		OBPriorityQueueShort<O> result = new OBPriorityQueueShort<O>(perfectK);
71  		OBQueryShort<O> dbQueue = new OBQueryShort<O>(object,perfectRange, result );
72  		List<OBResultShort<O>> results = new ArrayList<OBResultShort<O>>((int)index.databaseSize());
73  		
74  		int i = 0;
75  		int max = (int)index.databaseSize();
76  		if(seq == null){
77  			seq = new ArrayList<O>(max);
78  			while (i < max) {
79  				seq.add(index.getObject(i));
80  				i++;
81  			}
82  		}
83  		
84  		i = 0;
85  		for(O o : seq){
86  			short res = object.distance(o);          
87  			results.add(new OBResultShort<O>(o, i, res));
88  			i++;
89  		}
90  		       
91          Collections.sort(results);
92          Collections.reverse(results);
93          // now we just have to ask the index to evaluate with the given ep or recall
94          ApproxIndexShort<O> ai = (ApproxIndexShort<O>)index;
95          OBPriorityQueueShort<O> res = new  OBPriorityQueueShort<O>(k);
96          List<?> l =  results; // trick
97          long distances = index.getStats().getDistanceCount();
98          if(mode == Mode.approxEvalEP){
99          	
100         	ai.searchOBAnalyzeEp(object, (short)r, res, super.approxEvalEp, (List<AbstractOBResult<O>>)l);
101         }else{
102         	ai.searchOBAnalyzeRecall(object, (short)r, res, super.approxEvalRecall, (List<AbstractOBResult<O>>) l);
103         }
104         
105         if(histogram){
106         	long computedDistances = index.getStats().getDistanceCount() - distances;
107         	short cost = res.getSortedElements().get(0).getDistance();
108         	FileWriter f = new FileWriter(new File(histogramFile.getAbsolutePath() + "-" + k), true);
109         	f.write(cost +  ", " + computedDistances +  "\n");
110         	f.close();
111         }
112 		
113 	}
114 	
115 	protected abstract Class<O> obtainClass();
116 	
117 	
118 	@Override
119 	protected void searchObject(I index, O object, Statistics other) throws NotFrozenException,
120 			IllegalIdException, OutOfRangeException, InstantiationException,
121 			IllegalAccessException, OBException, IOException {
122 		
123 		if(super.mode != Mode.x && mode != Mode.opt){
124 			index.resetStats();
125 		}
126 		OBPriorityQueueShort<O> result = new OBPriorityQueueShort<O>(k);
127 		short range = (short)r;
128 		long timeA = System.currentTimeMillis();		
129 		index.searchOB(object, range, result);	
130 		time += System.currentTimeMillis()- timeA;		
131 		//logger.info(result.toString() + " " + index.getStats().toStringSummary() + "time: " + time + " " + k + " " + r);
132 		other.incQueryCount();
133 		if(validate){
134 			IndexSmokeTUtilApprox<O> t = new IndexSmokeTUtilApprox<O>(null);
135 			ArrayList<OBResultShort<O>> x2 = new ArrayList<OBResultShort<O>>((int)index.databaseSize());
136 			t.searchSequential(index.databaseSize(), object, x2, index, range);
137 			
138 			double ep = t.ep(result, x2, index);			
139 			if(t.isApproxZero(result, x2, range)){
140 				other.incExtra("ZEROS");
141 			}else{
142 				other.addExtraStats("CompoundError", ep);			
143 			}
144 			if(! t.ok(result, x2, range)){
145 				other.incExtra("BAD");
146 			}
147 			other.addExtraStats("RECALL", t.recall(result, x2, k, range));
148 
149 		}				
150 	}
151 	
152 }