1 package net.obsearch.index.utils;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileInputStream;
6 import java.io.FileNotFoundException;
7 import java.io.FileReader;
8 import java.io.IOException;
9 import java.io.InputStreamReader;
10 import java.nio.charset.Charset;
11 import org.apache.log4j.Logger;
12 import org.kohsuke.args4j.Option;
13
14 import net.obsearch.Index;
15 import net.obsearch.OB;
16 import net.obsearch.ambient.Ambient;
17 import net.obsearch.exception.IllegalIdException;
18 import net.obsearch.exception.NotFrozenException;
19 import net.obsearch.exception.OBException;
20 import net.obsearch.exception.OBStorageException;
21 import net.obsearch.exception.OutOfRangeException;
22 import net.obsearch.stats.Statistics;
23
24 public abstract class AbstractNewLineBytesCommandLine<O extends OB, I extends Index<O>, A extends Ambient<O, I>>
25 extends AbstractNewLineCommandLine<O, I, A> {
26
27 private static Logger logger = Logger
28 .getLogger(AbstractNewLineBytesCommandLine.class);
29
30 private InputStreamReader createReader(File toOpen)
31 throws FileNotFoundException {
32
33 return new InputStreamReader(new FileInputStream(toOpen), Charset
34 .forName("US-ASCII"));
35 }
36
37 protected void addObjects(I index, File load) throws IOException,
38 OBStorageException, OBException, IllegalAccessException,
39 InstantiationException {
40 if (bulkMode) {
41 logger.info("Using bulk mode");
42 }
43 InputStreamReader r = createReader(load);
44 byte[] line = new byte[arraySize()];
45 int i = 0;
46 int res = read(line, r);
47 while (res != -1) {
48 O o = instantiate(line);
49 if (bulkMode) {
50 index.insertBulk(o);
51 } else {
52 index.insert(o);
53 }
54 res = read(line, r);
55 if (i % 100000 == 0) {
56 logger.info("Loading: " + i);
57
58 }
59 i++;
60 }
61 }
62
63 protected void searchObjects(I index, File load, Statistics other)
64 throws IOException, OBException, InstantiationException,
65 IllegalAccessException {
66 InputStreamReader r = createReader(load);
67 byte[] line = new byte[arraySize()];
68 int i = 0;
69 int res = read(line, r);
70 while (res != -1 && i < super.maxQueries) {
71 O o = instantiate(line);
72 queries++;
73 if (i % 100 == 0) {
74 logger.info("Searching: " + i);
75 }
76 searchObject(index, o, other);
77 res = read(line, r);
78 i++;
79 }
80 }
81
82 private int read(byte[] buffer, InputStreamReader r) throws IOException {
83 int i = 0;
84 while (i < buffer.length) {
85 int b = r.read();
86 if (b == -1 && i != 0) {
87 throw new IOException(
88 "Reached end of file before we could complete one read");
89 } else if (b == -1) {
90 return b;
91 }
92 assert b >= 0;
93 assert b <= Byte.MAX_VALUE;
94 buffer[i] = (byte) b;
95 i++;
96 }
97
98 int b = r.read();
99 if (b == -1) {
100 return b;
101 } else if (b != '\n') {
102 throw new IOException("Format is incorrect");
103 }
104 return b;
105 }
106
107
108
109
110
111
112 protected abstract int arraySize();
113
114
115
116
117
118
119
120 protected abstract O instantiate(byte[] line) throws OBException;
121
122
123
124
125
126
127
128 protected O instantiate(String line) throws OBException {
129 throw new OBException("This is not used here");
130 }
131
132 }