View Javadoc

1   /*
2   		OBSearch: a distributed similarity search engine This project is to
3    similarity search what 'bit-torrent' is to downloads. 
4       Copyright (C) 2008 Arnoldo Jose Muller Molina
5   
6     	This program is free software: you can redistribute it and/or modify
7       it under the terms of the GNU General Public License as published by
8       the Free Software Foundation, either version 3 of the License, or
9       (at your option) any later version.
10  
11      This program is distributed in the hope that it will be useful,
12      but WITHOUT ANY WARRANTY; without even the implied warranty of
13      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14      GNU General Public License for more details.
15  
16      You should have received a copy of the GNU General Public License
17      along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19  
20  /** 
21  	*  TestFrameworkApprox 
22  	*  
23    *  @author      Arnoldo Jose Muller Molina    
24    */
25  package net.obsearch.index;
26  
27  import hep.aida.bin.StaticBin1D;
28  import org.apache.log4j.Logger;
29  import net.obsearch.asserts.OBAsserts;
30  import net.obsearch.exception.OBStorageException;
31  import net.obsearch.index.IndexShort;
32  import net.obsearch.ob.OBShort;
33  import net.obsearch.result.OBPriorityQueueShort;
34  import net.obsearch.result.OBResultShort;
35  import net.obsearch.index.utils.StatsUtil;
36  import net.obsearch.exception.PivotsUnavailableException;
37  
38  import java.util.HashSet;
39  import java.util.Iterator;
40  import java.util.LinkedList;
41  import java.util.List;
42  import java.util.Set;
43  
44  import static org.junit.Assert.assertEquals;
45  import static org.junit.Assert.assertFalse;
46  import static org.junit.Assert.assertTrue;
47  /*
48      OBSearch: a distributed similarity search engine
49      This project is to similarity search what 'bit-torrent' is to downloads.
50      Copyright (C)  2008 Arnoldo Jose Muller Molina
51  
52      This program is free software; you can redistribute it and/or modify
53      it under the terms of the GNU General Public License as published by
54      the Free Software Foundation; either version 2 of the License, or
55      (at your option) any later version.
56  
57      This program is distributed in the hope that it will be useful,
58      but WITHOUT ANY WARRANTY; without even the implied warranty of
59      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
60      GNU General Public License for more details.
61  
62      You should have received a copy of the GNU General Public License along
63      with this program; if not, write to the Free Software Foundation, Inc.,
64      51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
65  */
66  /** 
67  		<class_description> Perform approximate validation of data.
68  	  
69      @author      Arnoldo Jose Muller Molina    
70      @version     %I%, %G%
71      @since       0.0
72  */
73  
74  
75  public abstract class TestFrameworkApproxShort<O extends OBShort>  extends TestFrameworkShort<O> {
76  	
77  		private static final Logger logger = Logger.getLogger(TestFrameworkApproxShort.class);
78  
79  		public TestFrameworkApproxShort(Class<O> type, int dbSize, int querySize,
80  																			IndexShort<O> index) {
81  				super(type, dbSize, querySize, index);
82  		}
83  	
84  		  	/**
85       * Perform all the searches with
86       * @param x
87       *                the index that will be used
88       * @param range
89       * @param k
90       */
91      public void search(IndexShort < O > index, short range, byte k)
92              throws Exception {
93          //range = (short)Math.min(Short.MAX_VALUE, range);
94          index.resetStats();
95          // it is time to Search
96          
97          String re = null;
98          logger.info("Matching begins...");
99          
100         int i = 0;
101         long realIndex = index.databaseSize();
102         List < OBPriorityQueueShort < O >> result = new LinkedList < OBPriorityQueueShort < O >>();        
103         while (i < this.queries.length) {
104                 OBPriorityQueueShort < O > x = new OBPriorityQueueShort < O >(
105                         k);
106                 if (i % 100 == 0) {
107                     logger.info("Matching " + i);
108                 }
109 
110                 O s = queries[i];                
111                     index.searchOB(s, range, x);
112                     result.add(x);
113                     i++;                                        
114         }
115         logger.info("Range: " + range + " k " + k + " " + index.getStats().toString());
116        
117         int maxQuery = i;
118 
119         Iterator < OBPriorityQueueShort < O >> it = result.iterator();
120 				StaticBin1D stats = new StaticBin1D();
121         i = 0;
122 				int emptyResults = 0;
123         while (i < queries.length) {
124         	if (i % 300 == 0) {
125                     logger.info("Validating " + i + " of " + maxQuery);
126         	}
127                 O s = queries[i];
128                     OBPriorityQueueShort < O > x2 = new OBPriorityQueueShort < O >(
129                             k);
130                     searchSequential( s, x2, index, range);
131                     OBPriorityQueueShort < O > x1 = it.next();
132                     
133                     
134 										stats.add(ep(x1,x2,index));
135 										if(x1.getSize() == 0 && x2.getSize() != 0){
136 						emptyResults++;
137 					}
138                     i++;
139                 
140                 
141             }
142 
143 				logger.info("Finished  EP calculation: ");
144 				logger.info(StatsUtil.prettyPrintStats("EP", stats));
145                    
146         logger.info("Finished  matching validation.");
147         assertFalse(it.hasNext());
148 
149 				logger.info("Zero queries: " + emptyResults);
150     }
151 
152 
153 		private double ep(OBPriorityQueueShort<O> x1, OBPriorityQueueShort<O> x2, IndexShort < O > index) throws OBStorageException{
154 		List<OBResultShort<O>> query = x1.getSortedElements();
155 		List<OBResultShort<O>> db = x2.getSortedElements();
156 		int i = 0;
157 		int result = 0;
158 		Set<OBResultShort<O>> s = new HashSet<OBResultShort<O>>();
159 		for(OBResultShort<O> r : query){
160 			// find the position in the db. 
161 			int cx = 0;
162 			for(OBResultShort<O> c : db){
163 				if(s.contains(c)){
164 					cx++;
165 					continue;
166 				}
167 				if(c.compareTo(r) == 0){
168 					s.add(c);
169 					result += cx - i;
170 					break;
171 				}
172 				cx++;
173 			}
174 			i++;
175 		}
176 		if(query.size() == 0){
177 			return 0;
178 		}else{
179 			double res = ((double)result)/ ((double)(query.size() * index.databaseSize()));
180 			return res;
181 		}
182 	}
183 
184 }
185