1 package net.obsearch.index.ghs;
2
3 import it.unimi.dsi.io.InputBitStream;
4 import it.unimi.dsi.io.OutputBitStream;
5
6 import java.io.File;
7 import java.io.FileInputStream;
8 import java.io.FileOutputStream;
9 import java.io.IOException;
10 import java.util.ArrayList;
11 import java.util.Arrays;
12 import java.util.Collections;
13 import java.util.List;
14
15 import net.obsearch.asserts.OBAsserts;
16 import net.obsearch.exception.OBException;
17 import net.obsearch.result.OBResultInvertedByte;
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 public class CompressedBitSet64 {
50
51 protected byte[] data;
52 private File f;
53 private OutputBitStream out;
54 protected int count = 0;
55 protected long first;
56 private long previous;
57 private boolean commit;
58
59
60
61
62
63
64 public CompressedBitSet64() throws OBException {
65 try {
66 f = File.createTempFile("OBSearch", "bitset");
67 FileOutputStream o = new FileOutputStream(f);
68 out = new OutputBitStream(o);
69 } catch (IOException e) {
70 throw new OBException(e);
71 }
72 commit = false;
73 }
74
75
76
77
78
79
80
81 public void add(long bit) throws OBException {
82 OBAsserts.chkAssert(!commit, "Cannot add when commited");
83 if (count == 0) {
84 first = bit;
85 } else {
86 OBAsserts.chkAssert(previous < bit,
87 "Elements must be ordered from lower to higher values");
88 long d = bit - previous;
89 try {
90 write(d, out);
91 } catch (IOException e) {
92 throw new OBException(e);
93 }
94 }
95 previous = bit;
96 count++;
97 }
98
99
100
101
102
103
104 public void commit() throws OBException {
105 OBAsserts.chkAssert(!commit, "Cannot commit when commited");
106 try {
107 out.close();
108 long byteSize = f.length();
109 OBAsserts.chkAssert(byteSize <= Integer.MAX_VALUE,
110 "Exceeded allowed index size");
111 data = new byte[(int) byteSize];
112 FileInputStream fi = new FileInputStream(f);
113 fi.read(data);
114 fi.close();
115 f.delete();
116 } catch (IOException e) {
117 throw new OBException(e);
118 }
119 }
120
121 public long getBytesSize() {
122 return data.length;
123 }
124
125
126
127
128
129
130
131
132 public final int bucketDistance(long a, long b) {
133 return Long.bitCount(a ^ b);
134 }
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 public long[] searchBuckets(long query, int maxF, int m)
151 throws OBException {
152
153 InputBitStream delta = new InputBitStream(data);
154 long[] result = new long[maxF];
155 FastPriorityQueueLong l = new FastPriorityQueueLong(m, maxF);
156
157
158 int distance = bucketDistance(query, first);
159
160 long prev = first;
161 l.add(first, distance);
162
163 try {
164 int i = 1;
165 while (i < count) {
166 long object = read(delta) + prev;
167 distance = bucketDistance(query, object);
168 l.add(object, distance);
169 prev = object;
170 i++;
171 }
172 } catch (IOException e) {
173 throw new OBException(e);
174 }
175 return l.get();
176 }
177
178
179
180
181
182
183
184
185 public List<OBResultInvertedByte<Long>> searchFull(long query) throws OBException {
186 List<OBResultInvertedByte<Long>> result = new ArrayList<OBResultInvertedByte<Long>>(size());
187 InputBitStream delta = new InputBitStream(data);
188
189 int distance = bucketDistance(query, first);
190 result.add(new OBResultInvertedByte<Long>(first, first, (byte)distance));
191 long prev = first;
192 int i = 1;
193 try {
194 while (i < count) {
195 long object;
196
197 object = read(delta) + prev;
198
199 distance = bucketDistance(query, object);
200 assert distance <= Byte.MAX_VALUE;
201 result.add(new OBResultInvertedByte<Long>(object, object, (byte)distance));
202 prev = object;
203 i++;
204 }
205 Collections.sort(result);
206 return result;
207 } catch (IOException e) {
208 throw new OBException(e);
209 }
210 }
211
212 public int size(){
213 return count;
214 }
215
216
217
218
219
220
221 protected long[] getAll() throws IOException{
222 long[] result = new long[count];
223 long prev = first;
224 int i = 0;
225 result[i] = prev;
226 i++;
227 InputBitStream delta = new InputBitStream(data);
228 while(i < result.length){
229 result[i] = read(delta) + prev;
230 prev = result[i];
231 i++;
232 }
233 return result;
234 }
235
236 private void write(long object, OutputBitStream out) throws IOException{
237 out.writeLongDelta(object);
238 }
239
240 protected long read(InputBitStream in) throws IOException{
241 return in.readLongDelta();
242 }
243
244 }