1 package net.obsearch.index.utils;
2
3 import java.io.File;
4 import java.io.FileNotFoundException;
5 import java.io.FileWriter;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.text.DecimalFormat;
9 import java.util.ArrayList;
10 import java.util.Collection;
11 import java.util.LinkedList;
12 import java.util.List;
13 import java.util.Properties;
14
15 import net.obsearch.Index;
16 import net.obsearch.OB;
17 import net.obsearch.ambient.Ambient;
18 import net.obsearch.asserts.OBAsserts;
19 import net.obsearch.exception.OBException;
20 import net.obsearch.exception.OBStorageException;
21 import net.obsearch.exception.PivotsUnavailableException;
22 import net.obsearch.stats.Statistics;
23 import net.obsearch.utils.Pair;
24
25 import org.apache.log4j.Logger;
26 import org.apache.log4j.PropertyConfigurator;
27 import org.freehep.util.argv.BooleanOption;
28 import org.freehep.util.argv.DoubleOption;
29 import org.freehep.util.argv.IntOption;
30 import org.freehep.util.argv.StringOption;
31 import org.kohsuke.args4j.CmdLineException;
32 import org.kohsuke.args4j.CmdLineParser;
33 import org.kohsuke.args4j.Option;
34 import org.opt4j.benchmark.DoubleString;
35 import org.opt4j.core.Archive;
36 import org.opt4j.core.Individual;
37 import org.opt4j.core.Objective;
38 import org.opt4j.core.Objectives;
39 import org.opt4j.core.Objective.Sign;
40 import org.opt4j.core.problem.Creator;
41 import org.opt4j.core.problem.Evaluator;
42 import org.opt4j.optimizer.ea.EvolutionaryAlgorithmModule;
43 import org.opt4j.start.Opt4JTask;
44
45 import com.google.inject.Module;
46 import com.sleepycat.je.DatabaseException;
47
48 public abstract class AbstractCommandLine<O extends OB, I extends Index<O>, A extends Ambient<O, I>>
49 implements Evaluator<DoubleString> {
50
51 private static Logger logger = Logger.getLogger(AbstractCommandLine.class);
52
53
54
55
56 protected Properties props;
57
58 protected enum Mode {
59 search,
60 create,
61 add,
62 x,
63
64 opt,
65 approxEvalEP,
66 approxEvalRecall,
67
68 };
69
70
71
72
73 private DecimalFormat f = new DecimalFormat("00000.0000");
74
75 @Option(name = "-h", usage = "Print help message", aliases = { "--help" })
76 private boolean help = false;
77
78 @Option(name = "-v", usage = "Print version information", aliases = { "--version" })
79 private boolean version = false;
80
81 @Option(name = "-db", usage = "Database Folder. Path to the folder where the DB is located", aliases = { "--database" })
82 private File databaseFolder;
83
84 @Option(name = "-l", usage = "Load data into the DB. (only in create mode)", aliases = { "--load" })
85 private File load;
86
87 @Option(name = "-p", usage = "# of pivots to be employed. Used in create mode only", aliases = { "--pivots" })
88 protected int pivots = 7;
89
90 @Option(name = "-k", usage = "# of closest objects to be retrieved.")
91 protected int k = 1;
92
93 @Option(name = "-m", usage = "Set the mode in search, create(start a new DB), add (add data to an existing database)", aliases = { "--mode" })
94 protected Mode mode;
95
96 @Option(name = "-q", usage = "Query Filename. (Search mode only)", aliases = { "--query" })
97 private File query;
98
99 @Option(name = "-mq", usage = "Maximum number of queries to be executed", aliases = { "--max-queries" })
100 protected int maxQueries = 1000;
101
102 @Option(name = "-n", usage = "Name of the experiment", aliases = { "--name" })
103 protected String experimentName = "default";
104
105 @Option(name = "-rf", usage = "Experiment result filename", aliases = { "--exp-result" })
106 protected String experimentResultFileName = "result.txt";
107
108 @Option(name = "-b", usage = "Bulk mode is to be employed for create/add", aliases = { "--bulk" })
109 protected boolean bulkMode = false;
110
111 @Option(name = "-es", usage = "Experiment set, a colon separated list of ranges and ks. Just like: r_1,k_1:r_1,k_1:...:r_n,k_n ", aliases = { "--experiment-set" })
112 protected String experimentSet;
113
114 @Option(name = "-r", usage = "Range used for retrieval")
115 protected double r;
116
117 @Option(name = "-evalEp", usage = "Expected ep in approxEvalEp mode" )
118 protected double approxEvalEp;
119
120 @Option(name = "-evalRecall", usage = "Expected ep in approxEvalRecall mode" )
121 protected double approxEvalRecall;
122
123
124
125
126
127 @Option(name = "-it", usage = "Iterations for the optimization")
128 protected int iterations = 100;
129
130 @Option(name = "-optPopSize", usage = "Population size (optimization)")
131 protected int optPopulationSize = 30;
132
133 @Option(name = "-optNumParents", usage = "Num of parents (optimization)")
134 protected int optNumParents = 8;
135
136 @Option(name = "-optNumChildren", usage = "Num of children (optimization)")
137 protected int optNumChildren= 8;
138
139
140
141
142
143 private int iterationTimes = 0;
144
145 @Option(name = "-validate", usage = "Validate results against sequential search")
146 protected boolean validate = false;
147
148 private A ambiente;
149
150 private I index;
151
152 private Opt4JTask task;
153
154
155
156
157 protected int queries = 0;
158
159
160
161 protected long time;
162
163 public void initProperties() throws IOException {
164
165 InputStream is = this.getClass().getResourceAsStream(
166 File.separator + "obsearch.properties");
167 props = new Properties();
168 props.load(is);
169
170 String prop = props.getProperty("log4j.file");
171 PropertyConfigurator.configure(prop);
172 }
173
174
175
176
177
178
179 protected abstract AbstractCommandLine getReference();
180
181 protected I getIndex() {
182 return index;
183 }
184
185
186
187
188
189
190
191
192
193 public void processUserCommands(String args[]) {
194
195 try {
196 initProperties();
197 } catch (final Exception e) {
198 System.err.print("Make sure log4j is configured properly"
199 + e.getMessage());
200 e.printStackTrace();
201 System.exit(48);
202 }
203
204 CmdLineParser parser = new CmdLineParser(getReference());
205 try {
206 parser.parseArgument(args);
207
208 if (help) {
209 parser.printUsage(System.err);
210 }
211 switch (mode) {
212 case create:
213 create();
214 return;
215 case search:
216 search();
217 return;
218 case add:
219 add();
220 return;
221 case x:
222 experimentSet();
223 return;
224 case opt:
225 optimize();
226 return;
227 case approxEvalEP:
228 case approxEvalRecall:
229 approxEval();
230 return;
231 }
232
233 throw new OBException("Incorrect operation mode");
234
235 } catch (CmdLineException e) {
236 logger.fatal("Error in command line arguments", e);
237 parser.printUsage(System.err);
238 System.err.println();
239 System.exit(32);
240 } catch (Exception e) {
241 logger.fatal(e);
242 e.printStackTrace();
243 System.exit(33);
244 }
245
246 }
247
248 protected abstract A instantiateNewAmbient(File dbFolder)
249 throws OBStorageException, OBException, FileNotFoundException,
250 IllegalAccessException, InstantiationException, IOException;
251
252 protected abstract A instantiateAmbient(File dbFolder)
253 throws OBStorageException, OBException, FileNotFoundException,
254 IllegalAccessException, InstantiationException, IOException;
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270 protected abstract void addObjects(I index, File load)
271 throws FileNotFoundException, IOException, OBStorageException,
272 OBException, IllegalAccessException, InstantiationException;
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 protected abstract void searchObjects(I index, File query, Statistics other)
289 throws IOException, OBException, InstantiationException,
290 IllegalAccessException;
291
292
293
294
295 protected void create() throws IOException, OBStorageException,
296 OBException, DatabaseException, InstantiationException,
297 IllegalAccessException, PivotsUnavailableException {
298
299 OBAsserts.chkFileExists(load);
300
301 A ambiente = instantiateNewAmbient(databaseFolder);
302 I index = ambiente.getIndex();
303
304 logger.info("Loading Data...");
305 logger.info(expName() + " pivots: " + pivots);
306 addObjects(index, load);
307
308
309
310
311
312 logger.info("Freezing...");
313 ambiente.freeze();
314
315 logger.info(ambiente.getIndex().getStats());
316 ambiente.close();
317 }
318
319 protected void add() throws IOException, OBStorageException, OBException,
320 DatabaseException, InstantiationException, IllegalAccessException {
321 OBAsserts.chkFileExists(databaseFolder);
322 OBAsserts.chkFileExists(load);
323
324 A ambiente = instantiateAmbient(databaseFolder);
325 I index = ambiente.getIndex();
326
327 logger.info("Loading Data... current size: " + index.databaseSize());
328 addObjects(index, load);
329 logger.info("Size after load: " + index.databaseSize());
330 logger.info(index.getStats());
331 ambiente.close();
332 }
333
334 private void writeLine(FileWriter w, String[] data) throws IOException {
335
336 String tab = "";
337 for (String s : data) {
338 w.write(tab);
339 w.write(s);
340 tab = "\t";
341 }
342 w.write("\n");
343 }
344
345
346
347
348
349
350
351
352
353
354
355 private List<Pair<Statistics, Statistics>> processExperimentSet()
356 throws OBException, IOException, DatabaseException,
357 InstantiationException, IllegalAccessException {
358 String[] sets = this.experimentSet.split(":");
359 List<Pair<Statistics, Statistics>> result = new LinkedList<Pair<Statistics, Statistics>>();
360 for (String set : sets) {
361 String[] rk = set.split(",");
362 OBAsserts.chkAssert(rk.length == 2, "Wrong experiment set format");
363 r = Double.parseDouble(rk[0]);
364 k = Short.parseShort(rk[1]);
365 result.add(searchAux());
366
367 }
368 return result;
369
370 }
371
372
373
374
375
376
377
378
379
380
381 private void experimentSet() throws OBException, IOException,
382 DatabaseException, InstantiationException, IllegalAccessException {
383 openIndex();
384 processExperimentSet();
385 closeIndex();
386 }
387
388
389
390
391
392
393
394
395
396
397
398 private void approxEval() throws OBException, IOException,
399 DatabaseException, InstantiationException, IllegalAccessException {
400 openIndex();
401 processExperimentSet();
402 closeIndex();
403 }
404
405
406
407
408
409
410
411
412
413
414
415 private void optimize() throws OBException, IOException, DatabaseException,
416 InstantiationException, IllegalAccessException {
417 openIndex();
418 this.validate = true;
419 EvolutionaryAlgorithmModule ea = new EvolutionaryAlgorithmModule();
420 ea.setGenerations(iterations);
421 ea.setAlpha(optPopulationSize);
422 ea.setMu(this.optNumParents);
423 ea.setLambda(this.optNumChildren);
424
425 OBOptimizerModule op = new OBOptimizerModule(getCreator(), this);
426
427 Collection<Module> modules = new ArrayList<Module>();
428 modules.add(ea);
429 modules.add(op);
430
431 task = new Opt4JTask(false);
432 task.init(modules);
433
434 try {
435 task.execute();
436 logger.info("Final optimization result:");
437 this.printOptStatus();
438 } catch (Exception e) {
439 throw new OBException(e);
440 } finally {
441 task.close();
442 }
443
444 closeIndex();
445 }
446
447
448
449
450 Objective distance = new Objective("distance", Sign.MIN);
451
452
453
454 Objective smap = new Objective("smap", Sign.MIN);
455
456
457
458
459 Objective buckets = new Objective("buckets", Sign.MIN);
460
461
462
463
464 Objective recall = new Objective("recall", Sign.MAX);
465
466
467
468
469 Objective zeros = new Objective("zeros", Sign.MIN);
470
471
472
473
474 Objective ep = new Objective("ep", Sign.MIN);
475
476 @Override
477 public Collection<Objective> getObjectives() {
478 List<Objective> objs = new LinkedList();
479
480 objs.add(smap);
481
482 objs.add(recall);
483 objs.add(ep);
484
485 return objs;
486 }
487
488 private void printOptStatus() {
489 Archive archive = task.getInstance(Archive.class);
490
491 for (Individual individual : archive) {
492 logger.info("Param: " + individual.getGenotype());
493 logger.info("Results: " + individual.getObjectives());
494 }
495 }
496
497
498
499
500
501
502
503 protected abstract Creator<DoubleString> getCreator() throws OBException;
504
505 public Objectives evaluate(DoubleString config) {
506 try {
507 logger.info("Opt status: ");
508 this.printOptStatus();
509 logger.info("Evaluating: " + config + " times: " + iterationTimes);
510 iterationTimes++;
511 updateIndexConfig(config);
512 List<Pair<Statistics, Statistics>> stats = processExperimentSet();
513 int totalQueries = 0;
514 int failed = 0;
515 int distances = 0;
516 int zeros = 0;
517 int smap = 0;
518 int buckets = 0;
519 double ep = 0;
520 double recall = 0;
521 for (Pair<Statistics, Statistics> s : stats) {
522 totalQueries += s.getB().getQueryCount();
523 failed += s.getB().getExtra("BAD");
524 ep += s.getB().getStats("CompoundError").mean();
525 distances += s.getA().getDistanceCount();
526 recall += s.getB().getStats("RECALL").mean();
527 smap += s.getA().getSmapCount();
528 buckets += s.getA().getBucketsRead();
529 zeros += s.getB().getExtra("ZEROS");
530 }
531
532
533 Objectives objectives = new Objectives();
534
535
536 objectives.add(this.smap, (double) smap / (double) totalQueries);
537
538
539 objectives.add(this.recall, (double) recall / (double) stats.size());;
540 objectives.add(this.ep, (double) ep / (double) stats.size());
541
542 logger.info("Objectives: " + objectives);
543
544
545 return objectives;
546
547 } catch (Exception e) {
548
549
550 logger.fatal("Fatal error", e);
551 System.exit(-1);
552 return null;
553 }
554 }
555
556
557
558
559
560
561 protected abstract void updateIndexConfig(DoubleString phenotype);
562
563 protected String expName() {
564 String base = this.experimentName + ":r" + this.r + ":k" + this.k ;
565 if(isApproxMode()){
566 if(mode == Mode.approxEvalEP){
567 return base + ":ep" + this.approxEvalEp;
568 }else{
569 return base + ":recall" + this.approxEvalRecall;
570 }
571 }else{
572 return base;
573 }
574
575 }
576
577 private String p(double value) {
578 return ((double) value / (double) queries) + "";
579 }
580
581 protected void writeAll(FileWriter[] files, String str) throws IOException {
582 for (FileWriter f : files) {
583 f.write(str);
584 f.flush();
585 }
586 }
587
588 protected void closeAll(FileWriter[] files) throws IOException {
589 for (FileWriter f : files) {
590 f.close();
591 }
592 }
593
594 private void openIndex() throws IOException, OBStorageException,
595 OBException, IllegalAccessException, InstantiationException {
596 OBAsserts.chkFileExists(databaseFolder);
597 OBAsserts.chkFileExists(query);
598 ambiente = instantiateAmbient(databaseFolder);
599 index = ambiente.getIndex();
600 }
601
602 private void closeIndex() throws OBException {
603 ambiente.close();
604 }
605
606
607
608
609 protected abstract void updateParams();
610
611 protected Pair<Statistics, Statistics> searchAux() throws IOException,
612 OBStorageException, OBException, DatabaseException,
613 InstantiationException, IllegalAccessException {
614
615 File d = new File(experimentResultFileName);
616
617 FileWriter w = new FileWriter(d, true);
618 if (d.length() == 0) {
619
620 writeLine(w, new String[] { "exp_name", "details", "dist", "smap",
621 "ep", "recall", "zeros", "buckets" });
622 }
623
624 index.resetStats();
625 Statistics otherStats = new Statistics();
626
627 updateParams();
628 logger.info("Executing experiment " + expName() + " : "
629 + " : " + expDetails());
630
631 searchObjects(index, query, otherStats);
632
633 logger.info(index.getStats().toString());
634 logger.info(otherStats.toString());
635
636 Statistics stats = index.getStats();
637 writeLine(w, new String[] { expName(), expDetails(),
638 String.valueOf(stats.getDistanceCount()),
639 String.valueOf(stats.getSmapCount()),
640 String.valueOf(otherStats.getStats("CompoundError").mean()),
641 String.valueOf(otherStats.getStats("RECALL").mean()),
642 String.valueOf(otherStats.getExtra("ZEROS")),
643 String.valueOf(stats.getBucketsRead()), });
644
645 w.close();
646 return new Pair<Statistics, Statistics>(stats, otherStats);
647 }
648
649 protected boolean isApproxMode(){
650 return this.mode == Mode.approxEvalEP || this.mode == Mode.approxEvalRecall;
651 }
652
653 protected abstract String expDetails();
654
655
656
657
658
659
660
661
662
663
664
665 protected void search() throws IOException, OBStorageException,
666 OBException, DatabaseException, InstantiationException,
667 IllegalAccessException {
668 openIndex();
669 searchAux();
670 closeIndex();
671 }
672
673 }