View Javadoc

1   package net.obsearch.index.utils;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileInputStream;
6   import java.io.FileNotFoundException;
7   import java.io.FileReader;
8   import java.io.IOException;
9   import java.io.InputStreamReader;
10  import java.nio.charset.Charset;
11  import org.apache.log4j.Logger;
12  import org.kohsuke.args4j.Option;
13  
14  import net.obsearch.Index;
15  import net.obsearch.OB;
16  import net.obsearch.ambient.Ambient;
17  import net.obsearch.exception.IllegalIdException;
18  import net.obsearch.exception.NotFrozenException;
19  import net.obsearch.exception.OBException;
20  import net.obsearch.exception.OBStorageException;
21  import net.obsearch.exception.OutOfRangeException;
22  import net.obsearch.stats.Statistics;
23  
24  public abstract class AbstractNewLineBytesCommandLine<O extends OB, I extends Index<O>, A extends Ambient<O, I>>
25  		extends AbstractNewLineCommandLine<O, I, A> {
26  
27  	private static Logger logger = Logger
28  			.getLogger(AbstractNewLineBytesCommandLine.class);
29  
30  	private InputStreamReader createReader(File toOpen)
31  			throws FileNotFoundException {
32  
33  		return new InputStreamReader(new FileInputStream(toOpen), Charset
34  				.forName("US-ASCII"));
35  	}
36  
37  	protected void addObjects(I index, File load) throws IOException,
38  			OBStorageException, OBException, IllegalAccessException,
39  			InstantiationException {
40  		if (bulkMode) {
41  			logger.info("Using  bulk mode");
42  		}
43  		InputStreamReader r = createReader(load);
44  		byte[] line = new byte[arraySize()];
45  		int i = 0;
46  		int res = read(line, r);
47  		while (res != -1) {
48  			O o = instantiate(line);
49  			if (bulkMode) {
50  				index.insertBulk(o);
51  			} else {
52  				index.insert(o);
53  			}
54  			res = read(line, r);
55  			if (i % 100000 == 0) {
56  				logger.info("Loading: " + i);
57  				// logger.info(index.getStats().toString());
58  			}
59  			i++;
60  		}
61  	}
62  
63  	protected void searchObjects(I index, File load, Statistics other)
64  			throws IOException, OBException, InstantiationException,
65  			IllegalAccessException {
66  		InputStreamReader r = createReader(load);
67  		byte[] line = new byte[arraySize()];
68  		int i = 0;
69  		int res = read(line, r);
70  		while (res != -1 && i < super.maxQueries) {
71  			O o = instantiate(line);
72  			queries++;
73  			if (i % 100 == 0) {
74  				logger.info("Searching: " + i);
75  			}
76  			searchObject(index, o, other);
77  			res = read(line, r);
78  			i++;
79  		}
80  	}
81  
82  	private int read(byte[] buffer, InputStreamReader r) throws IOException {
83  		int i = 0;
84  		while (i < buffer.length) {
85  			int b = r.read();
86  			if (b == -1 && i != 0) {
87  				throw new IOException(
88  						"Reached end of file before we could complete one read");
89  			} else if (b == -1) {
90  				return b; // we are done.
91  			}
92  			assert b >= 0;
93  			assert b <= Byte.MAX_VALUE;
94  			buffer[i] = (byte) b;
95  			i++;
96  		}
97  		// we should have a newline here
98  		int b = r.read();
99  		if (b == -1) {
100 			return b;
101 		} else if (b != '\n') {
102 			throw new IOException("Format is incorrect");
103 		}
104 		return b;
105 	}
106 
107 	/**
108 	 * Read a byte array with the given size;
109 	 * 
110 	 * @return
111 	 */
112 	protected abstract int arraySize();
113 
114 	/**
115 	 * Instantiate an object from a fixed byte array;
116 	 * 
117 	 * @return The object
118 	 * @throws OBException
119 	 */
120 	protected abstract O instantiate(byte[] line) throws OBException;
121 
122 	/**
123 	 * Instantiate an object from a string.
124 	 * 
125 	 * @return The object
126 	 * @throws OBException
127 	 */
128 	protected O instantiate(String line) throws OBException {
129 		throw new OBException("This is not used here");
130 	}
131 
132 }