View Javadoc

1   package net.obsearch.index.utils;
2   
3   import java.io.File;
4   import java.io.FileNotFoundException;
5   import java.io.FileWriter;
6   import java.io.IOException;
7   import java.io.InputStream;
8   import java.text.DecimalFormat;
9   import java.util.ArrayList;
10  import java.util.Collection;
11  import java.util.LinkedList;
12  import java.util.List;
13  import java.util.Properties;
14  
15  import net.obsearch.Index;
16  import net.obsearch.OB;
17  import net.obsearch.ambient.Ambient;
18  import net.obsearch.asserts.OBAsserts;
19  import net.obsearch.exception.OBException;
20  import net.obsearch.exception.OBStorageException;
21  import net.obsearch.exception.PivotsUnavailableException;
22  import net.obsearch.stats.Statistics;
23  import net.obsearch.utils.Pair;
24  
25  import org.apache.log4j.Logger;
26  import org.apache.log4j.PropertyConfigurator;
27  import org.freehep.util.argv.BooleanOption;
28  import org.freehep.util.argv.DoubleOption;
29  import org.freehep.util.argv.IntOption;
30  import org.freehep.util.argv.StringOption;
31  import org.kohsuke.args4j.CmdLineException;
32  import org.kohsuke.args4j.CmdLineParser;
33  import org.kohsuke.args4j.Option;
34  import org.opt4j.benchmark.DoubleString;
35  import org.opt4j.core.Archive;
36  import org.opt4j.core.Individual;
37  import org.opt4j.core.Objective;
38  import org.opt4j.core.Objectives;
39  import org.opt4j.core.Objective.Sign;
40  import org.opt4j.core.problem.Creator;
41  import org.opt4j.core.problem.Evaluator;
42  import org.opt4j.optimizer.ea.EvolutionaryAlgorithmModule;
43  import org.opt4j.start.Opt4JTask;
44  
45  import com.google.inject.Module;
46  import com.sleepycat.je.DatabaseException;
47  
48  public abstract class AbstractCommandLine<O extends OB, I extends Index<O>, A extends Ambient<O, I>>
49  		implements Evaluator<DoubleString> {
50  
51  	private static Logger logger = Logger.getLogger(AbstractCommandLine.class);
52  
53  	/**
54  	 * Properties that modify the behavior of this application.
55  	 */
56  	protected Properties props;
57  
58  	protected enum Mode {
59  		search, // search data
60  		create, // create a database
61  		add, // add data to an existing db, only objects that are not in the db are added.
62  		x, // experiment set
63  		// optimize an experiment set.
64  		opt,
65  		approxEvalEP, // evaluate approximate indexes
66  		approxEvalRecall, // evaluate approximate indexes
67  		
68  	};
69  
70  	/**
71  	 * Output format
72  	 */
73  	private DecimalFormat f = new DecimalFormat("00000.0000");
74  
75  	@Option(name = "-h", usage = "Print help message", aliases = { "--help" })
76  	private boolean help = false;
77  
78  	@Option(name = "-v", usage = "Print version information", aliases = { "--version" })
79  	private boolean version = false;
80  
81  	@Option(name = "-db", usage = "Database Folder. Path to the folder where the DB is located", aliases = { "--database" })
82  	private File databaseFolder;
83  
84  	@Option(name = "-l", usage = "Load data into the DB. (only in create mode)", aliases = { "--load" })
85  	private File load;
86  
87  	@Option(name = "-p", usage = "# of pivots to be employed. Used in create mode only", aliases = { "--pivots" })
88  	protected int pivots = 7;
89  
90  	@Option(name = "-k", usage = "# of closest objects to be retrieved.")
91  	protected int k = 1;
92  
93  	@Option(name = "-m", usage = "Set the mode in search, create(start a new DB), add (add data to an existing database)", aliases = { "--mode" })
94  	protected Mode mode;
95  
96  	@Option(name = "-q", usage = "Query Filename. (Search mode only)", aliases = { "--query" })
97  	private File query;
98  
99  	@Option(name = "-mq", usage = "Maximum number of queries to be executed", aliases = { "--max-queries" })
100 	protected int maxQueries = 1000;
101 
102 	@Option(name = "-n", usage = "Name of the experiment", aliases = { "--name" })
103 	protected String experimentName = "default";
104 
105 	@Option(name = "-rf", usage = "Experiment result filename", aliases = { "--exp-result" })
106 	protected String experimentResultFileName = "result.txt";
107 
108 	@Option(name = "-b", usage = "Bulk mode is to be employed for create/add", aliases = { "--bulk" })
109 	protected boolean bulkMode = false;
110 
111 	@Option(name = "-es", usage = "Experiment set, a colon separated list of  ranges and ks. Just like: r_1,k_1:r_1,k_1:...:r_n,k_n ", aliases = { "--experiment-set" })
112 	protected String experimentSet;
113 
114 	@Option(name = "-r", usage = "Range used for retrieval")
115 	protected double r;
116 	
117 	@Option(name = "-evalEp", usage = "Expected ep in approxEvalEp mode" )
118 	protected double approxEvalEp;
119 	
120 	@Option(name = "-evalRecall", usage = "Expected ep in approxEvalRecall mode" )
121 	protected double approxEvalRecall;
122 
123 
124 	/**
125 	 * Options related to opt4j optimization
126 	 */
127 	@Option(name = "-it", usage = "Iterations for the optimization")
128 	protected int iterations = 100;
129 	
130 	@Option(name = "-optPopSize", usage = "Population size (optimization)")
131 	protected int optPopulationSize = 30;
132 	
133 	@Option(name = "-optNumParents", usage = "Num of parents (optimization)")
134 	protected int optNumParents = 8;
135 	
136 	@Option(name = "-optNumChildren", usage = "Num of children (optimization)")
137 	protected int optNumChildren= 8;
138 	
139 	
140 	/**
141 	 * Keep track of the optimization iterations.
142 	 */
143 	private int iterationTimes = 0;
144 
145 	@Option(name = "-validate", usage = "Validate results against sequential search")
146 	protected boolean validate = false;
147 
148 	private A ambiente;
149 
150 	private I index;
151 
152 	private Opt4JTask task;
153 
154 	/**
155 	 * Number of queries executed.
156 	 */
157 	protected int queries = 0;
158 	/**
159 	 * Total ellapsed time during each query.
160 	 */
161 	protected long time;
162 
163 	public void initProperties() throws IOException {
164 
165 		InputStream is = this.getClass().getResourceAsStream(
166 				File.separator + "obsearch.properties");
167 		props = new Properties();
168 		props.load(is);
169 		// configure log4j only once too
170 		String prop = props.getProperty("log4j.file");
171 		PropertyConfigurator.configure(prop);
172 	}
173 
174 	/**
175 	 * Return the "this" reference, used to access all the command line options.
176 	 * 
177 	 * @return The reference of the bottommost class that contains parameters.
178 	 */
179 	protected abstract AbstractCommandLine getReference();
180 
181 	protected I getIndex() {
182 		return index;
183 	}
184 
185 	/**
186 	 * This method must be called by the children of this class.
187 	 * 
188 	 * @param thisReference
189 	 *            this reference of the subclass.
190 	 * @param args
191 	 *            Arguments sent to the application.
192 	 */
193 	public void processUserCommands(String args[]) {
194 
195 		try {
196 			initProperties();
197 		} catch (final Exception e) {
198 			System.err.print("Make sure log4j is configured properly"
199 					+ e.getMessage());
200 			e.printStackTrace();
201 			System.exit(48);
202 		}
203 
204 		CmdLineParser parser = new CmdLineParser(getReference());
205 		try {
206 			parser.parseArgument(args);
207 			// arguments have been loaded.
208 			if (help) {
209 				parser.printUsage(System.err);
210 			}
211 			switch (mode) {
212 			case create:
213 				create();
214 				return;
215 			case search:
216 				search();
217 				return;
218 			case add:
219 				add();
220 				return;
221 			case x:
222 				experimentSet();
223 				return;
224 			case opt:
225 				optimize();
226 				return;
227 			case approxEvalEP:
228 			case approxEvalRecall:
229 				approxEval();
230 				return;
231 			}
232 
233 			throw new OBException("Incorrect operation mode");
234 
235 		} catch (CmdLineException e) {
236 			logger.fatal("Error in command line arguments", e);
237 			parser.printUsage(System.err);
238 			System.err.println();
239 			System.exit(32);
240 		} catch (Exception e) {
241 			logger.fatal(e);
242 			e.printStackTrace();
243 			System.exit(33);
244 		}
245 
246 	}
247 
248 	protected abstract A instantiateNewAmbient(File dbFolder)
249 			throws OBStorageException, OBException, FileNotFoundException,
250 			IllegalAccessException, InstantiationException, IOException;
251 
252 	protected abstract A instantiateAmbient(File dbFolder)
253 			throws OBStorageException, OBException, FileNotFoundException,
254 			IllegalAccessException, InstantiationException, IOException;
255 
256 	/**
257 	 * Adds objects to the index. Loads the objects from File.
258 	 * 
259 	 * @param index
260 	 *            Index to load the objects into.
261 	 * @param load
262 	 *            File to load.
263 	 * @throws FileNotFoundException
264 	 * @throws IOException
265 	 * @throws OBStorageException
266 	 * @throws OBException
267 	 * @throws IllegalAccessException
268 	 * @throws InstantiationException
269 	 */
270 	protected abstract void addObjects(I index, File load)
271 			throws FileNotFoundException, IOException, OBStorageException,
272 			OBException, IllegalAccessException, InstantiationException;
273 
274 	/**
275 	 * Opens a query file and queries the index storing all the results there.
276 	 * 
277 	 * @param index
278 	 *            The index to query.
279 	 * @param query
280 	 *            The query to load.
281 	 * @return ep Value if there was a validation. (0 if everything goes well)
282 	 *         or null otherwise
283 	 * @throws IOException
284 	 * @throws OBException
285 	 * @throws InstantiationException
286 	 * @throws IllegalAccessException
287 	 */
288 	protected abstract void searchObjects(I index, File query, Statistics other)
289 			throws IOException, OBException, InstantiationException,
290 			IllegalAccessException;
291 	
292 	
293 	
294 
295 	protected void create() throws IOException, OBStorageException,
296 			OBException, DatabaseException, InstantiationException,
297 			IllegalAccessException, PivotsUnavailableException {
298 		// OBAsserts.chkFileNotExists(databaseFolder);
299 		OBAsserts.chkFileExists(load);
300 
301 		A ambiente = instantiateNewAmbient(databaseFolder);
302 		I index = ambiente.getIndex();
303 
304 		logger.info("Loading Data...");
305 		logger.info(expName() + " pivots: " + pivots);
306 		addObjects(index, load);
307 		/*
308 		 * logger.info("Closing..."); ambiente.close();
309 		 * logger.info("Re-opening..."); ambiente =
310 		 * instantiateNewAmbient(databaseFolder);
311 		 */
312 		logger.info("Freezing...");
313 		ambiente.freeze();
314 
315 		logger.info(ambiente.getIndex().getStats());
316 		ambiente.close();
317 	}
318 
319 	protected void add() throws IOException, OBStorageException, OBException,
320 			DatabaseException, InstantiationException, IllegalAccessException {
321 		OBAsserts.chkFileExists(databaseFolder);
322 		OBAsserts.chkFileExists(load);
323 
324 		A ambiente = instantiateAmbient(databaseFolder);
325 		I index = ambiente.getIndex();
326 
327 		logger.info("Loading Data... current size: " + index.databaseSize());
328 		addObjects(index, load);
329 		logger.info("Size after load: " + index.databaseSize());
330 		logger.info(index.getStats());
331 		ambiente.close();
332 	}
333 
334 	private void writeLine(FileWriter w, String[] data) throws IOException {
335 
336 		String tab = "";
337 		for (String s : data) {
338 			w.write(tab);
339 			w.write(s);
340 			tab = "\t";
341 		}
342 		w.write("\n");
343 	}
344 
345 	/**
346 	 * Process a list of experiments.
347 	 * 
348 	 * @return Returns the statistics for each set of experiments.
349 	 * @throws OBException
350 	 * @throws IOException
351 	 * @throws DatabaseException
352 	 * @throws InstantiationException
353 	 * @throws IllegalAccessException
354 	 */
355 	private List<Pair<Statistics, Statistics>> processExperimentSet()
356 			throws OBException, IOException, DatabaseException,
357 			InstantiationException, IllegalAccessException {
358 		String[] sets = this.experimentSet.split(":");
359 		List<Pair<Statistics, Statistics>> result = new LinkedList<Pair<Statistics, Statistics>>();
360 		for (String set : sets) {			
361 			String[] rk = set.split(",");
362 			OBAsserts.chkAssert(rk.length == 2, "Wrong experiment set format");
363 			r = Double.parseDouble(rk[0]);
364 			k = Short.parseShort(rk[1]);
365 			result.add(searchAux());
366 
367 		}
368 		return result;
369 
370 	}
371 
372 	/**
373 	 * Experiment set executes a number of experiments and then exists.
374 	 * 
375 	 * @throws OBException
376 	 * @throws IOException
377 	 * @throws DatabaseException
378 	 * @throws InstantiationException
379 	 * @throws IllegalAccessException
380 	 */
381 	private void experimentSet() throws OBException, IOException,
382 			DatabaseException, InstantiationException, IllegalAccessException {
383 		openIndex();
384 		processExperimentSet();
385 		closeIndex();
386 	}
387 	
388 	
389 	/**
390 	 * Evaluation of approximate algorithms
391 	 * 
392 	 * @throws OBException
393 	 * @throws IOException
394 	 * @throws DatabaseException
395 	 * @throws InstantiationException
396 	 * @throws IllegalAccessException
397 	 */
398 	private void approxEval() throws OBException, IOException,
399 			DatabaseException, InstantiationException, IllegalAccessException {
400 		openIndex();
401 		processExperimentSet();
402 		closeIndex();
403 	}
404 
405 	/**
406 	 * Execute processExperimentSet() several times and obtain a set of
407 	 * parameters that gives good results.
408 	 * 
409 	 * @throws OBException
410 	 * @throws IOException
411 	 * @throws DatabaseException
412 	 * @throws InstantiationException
413 	 * @throws IllegalAccessException
414 	 */
415 	private void optimize() throws OBException, IOException, DatabaseException,
416 			InstantiationException, IllegalAccessException {
417 		openIndex();
418 		this.validate = true; // we always validate in optimize mode.
419 		EvolutionaryAlgorithmModule ea = new EvolutionaryAlgorithmModule();
420 		ea.setGenerations(iterations);
421 		ea.setAlpha(optPopulationSize); //population size
422 		ea.setMu(this.optNumParents); // numer of parents per gen
423 		ea.setLambda(this.optNumChildren); // number of children per gen
424 
425 		OBOptimizerModule op = new OBOptimizerModule(getCreator(), this);
426 
427 		Collection<Module> modules = new ArrayList<Module>();
428 		modules.add(ea);
429 		modules.add(op);
430 		// setup opt4j
431 		task = new Opt4JTask(false);
432 		task.init(modules);
433 
434 		try {
435 			task.execute();
436 			logger.info("Final optimization result:");
437 			this.printOptStatus();
438 		} catch (Exception e) {
439 			throw new OBException(e);
440 		} finally {
441 			task.close();
442 		}
443 
444 		closeIndex();
445 	}
446 
447 	/**
448 	 * distance computations
449 	 */
450 	Objective distance = new Objective("distance", Sign.MIN);
451 	/**
452 	 * smap access count
453 	 */
454 	Objective smap = new Objective("smap", Sign.MIN);
455 
456 	/**
457 	 * bucket access count
458 	 */
459 	Objective buckets = new Objective("buckets", Sign.MIN);
460 
461 	/**
462 	 * Recall
463 	 */
464 	Objective recall = new Objective("recall", Sign.MAX);
465 
466 	/**
467 	 * Recall
468 	 */
469 	Objective zeros = new Objective("zeros", Sign.MIN);
470 
471 	/**
472 	 * CompoundError result
473 	 */
474 	Objective ep = new Objective("ep", Sign.MIN);
475 
476 	@Override
477 	public Collection<Objective> getObjectives() {
478 		List<Objective> objs = new LinkedList();
479 		//objs.add(distance);
480 		objs.add(smap);
481 		//objs.add(buckets);
482 		objs.add(recall);
483 		objs.add(ep);
484 		//objs.add(zeros);
485 		return objs;
486 	}
487 
488 	private void printOptStatus() {
489 		Archive archive = task.getInstance(Archive.class);
490 
491 		for (Individual individual : archive) {
492 			logger.info("Param: " + individual.getGenotype());
493 			logger.info("Results: " + individual.getObjectives());
494 		}
495 	}
496 
497 	/**
498 	 * Returns the creator used to generate new configurations for this index.
499 	 * 
500 	 * @return a new creator
501 	 * @throws OBException
502 	 */
503 	protected abstract Creator<DoubleString> getCreator() throws OBException;
504 
505 	public Objectives evaluate(DoubleString config) {
506 		try {
507 			logger.info("Opt status: ");
508 			this.printOptStatus();
509 			logger.info("Evaluating: " + config + " times: " + iterationTimes);
510 			iterationTimes++;
511 			updateIndexConfig(config);
512 			List<Pair<Statistics, Statistics>> stats = processExperimentSet();
513 			int totalQueries = 0;
514 			int failed = 0;
515 			int distances = 0;
516 			int zeros = 0;
517 			int smap = 0;
518 			int buckets = 0;
519 			double ep = 0;
520 			double recall = 0;
521 			for (Pair<Statistics, Statistics> s : stats) {
522 				totalQueries += s.getB().getQueryCount();
523 				failed += s.getB().getExtra("BAD");
524 				ep += s.getB().getStats("CompoundError").mean();
525 				distances += s.getA().getDistanceCount();
526 				recall += s.getB().getStats("RECALL").mean();
527 				smap += s.getA().getSmapCount();
528 				buckets += s.getA().getBucketsRead();
529 				zeros += s.getB().getExtra("ZEROS");
530 			}
531 			// ep = ep / ((double)stats.size()); // normalize ep.
532 			
533 			Objectives objectives = new Objectives();
534 			//objectives.add(this.distance, (double) distances
535 			//		/ (double) totalQueries);
536 			objectives.add(this.smap, (double) smap /   (double) totalQueries);
537 			//objectives.add(this.buckets, (double) buckets
538 			//		 / (double) totalQueries);
539 			objectives.add(this.recall, (double) recall / (double) stats.size());;
540 			objectives.add(this.ep, (double) ep / (double) stats.size());
541 			//objectives.add(this.zeros, (double) zeros / (double) stats.size());
542 			logger.info("Objectives: " + objectives);
543 			// logger.info("BAD: " + failed + " Z: " + zeros + " rec: " + recall
544 			// + " ep: " + ep );
545 			return objectives;
546 
547 		} catch (Exception e) {
548 			// the interface of the library
549 			// does not include exceptions, Hack!
550 			logger.fatal("Fatal error", e);
551 			System.exit(-1);
552 			return null;
553 		}
554 	}
555 
556 	/**
557 	 * Updates the configuration of the index with the given phenotype.
558 	 * 
559 	 * @param phenotype
560 	 */
561 	protected abstract void updateIndexConfig(DoubleString phenotype);
562 
563 	protected String expName() {
564 		String base = this.experimentName + ":r" + this.r + ":k" + this.k ;
565 			if(isApproxMode()){
566 				if(mode == Mode.approxEvalEP){
567 					return base + ":ep" + this.approxEvalEp;
568 				}else{
569 					return base + ":recall" + this.approxEvalRecall;
570 				}
571 			}else{
572 				return base;
573 			}
574 			
575 	}
576 
577 	private String p(double value) {
578 		return ((double) value / (double) queries) + "";
579 	}
580 
581 	protected void writeAll(FileWriter[] files, String str) throws IOException {
582 		for (FileWriter f : files) {
583 			f.write(str);
584 			f.flush();
585 		}
586 	}
587 
588 	protected void closeAll(FileWriter[] files) throws IOException {
589 		for (FileWriter f : files) {
590 			f.close();
591 		}
592 	}
593 
594 	private void openIndex() throws IOException, OBStorageException,
595 			OBException, IllegalAccessException, InstantiationException {
596 		OBAsserts.chkFileExists(databaseFolder);
597 		OBAsserts.chkFileExists(query);
598 		ambiente = instantiateAmbient(databaseFolder);
599 		index = ambiente.getIndex();
600 	}
601 
602 	private void closeIndex() throws OBException {
603 		ambiente.close();
604 	}
605 	
606 	/**
607 	 * Updates the underlying index based on different parameters
608 	 */
609 	protected abstract void updateParams();
610 
611 	protected Pair<Statistics, Statistics> searchAux() throws IOException,
612 			OBStorageException, OBException, DatabaseException,
613 			InstantiationException, IllegalAccessException {
614 
615 		File d = new File(experimentResultFileName);
616 
617 		FileWriter w = new FileWriter(d, true);
618 		if (d.length() == 0) {
619 			// write header if the file is empty
620 			writeLine(w, new String[] { "exp_name", "details", "dist", "smap",
621 					"ep", "recall", "zeros", "buckets" });
622 		}
623 
624 		index.resetStats();
625 		Statistics otherStats = new Statistics();
626 		
627 		updateParams();
628 		logger.info("Executing experiment " + expName() + " : "
629 				 + " : " + expDetails());
630 		
631 		searchObjects(index, query, otherStats);
632 		
633 		logger.info(index.getStats().toString());
634 		logger.info(otherStats.toString());
635 
636 		Statistics stats = index.getStats();
637 		writeLine(w, new String[] { expName(), expDetails(),
638 				String.valueOf(stats.getDistanceCount()),
639 				String.valueOf(stats.getSmapCount()),
640 				String.valueOf(otherStats.getStats("CompoundError").mean()),
641 				String.valueOf(otherStats.getStats("RECALL").mean()),
642 				String.valueOf(otherStats.getExtra("ZEROS")),
643 				String.valueOf(stats.getBucketsRead()), });
644 
645 		w.close();
646 		return new Pair<Statistics, Statistics>(stats, otherStats);
647 	}
648 	
649 	protected boolean isApproxMode(){
650 		return this.mode == Mode.approxEvalEP || this.mode == Mode.approxEvalRecall;
651 	}
652 
653 	protected abstract String expDetails();
654 
655 	/**
656 	 * Perform one search for a given k and r.
657 	 * 
658 	 * @throws IOException
659 	 * @throws OBStorageException
660 	 * @throws OBException
661 	 * @throws DatabaseException
662 	 * @throws InstantiationException
663 	 * @throws IllegalAccessException
664 	 */
665 	protected void search() throws IOException, OBStorageException,
666 			OBException, DatabaseException, InstantiationException,
667 			IllegalAccessException {
668 		openIndex();
669 		searchAux();
670 		closeIndex();
671 	}
672 
673 }