1 package net.obsearch.example.vectors; 2 3 import java.io.File; 4 import java.io.FileNotFoundException; 5 import java.io.IOException; 6 import java.io.InputStream; 7 import java.util.Iterator; 8 import java.util.Properties; 9 import java.util.Random; 10 import java.util.logging.Logger; 11 12 import org.apache.log4j.PropertyConfigurator; 13 14 import net.obsearch.ambient.Ambient; 15 import net.obsearch.ambient.bdb.AmbientBDBJe; 16 17 import net.obsearch.exception.NotFrozenException; 18 import net.obsearch.exception.OBException; 19 import net.obsearch.exception.OBStorageException; 20 import net.obsearch.exception.PivotsUnavailableException; 21 22 import net.obsearch.index.utils.Directory; 23 import net.obsearch.index.utils.TUtils; 24 import net.obsearch.pivots.AcceptAll; 25 import net.obsearch.pivots.bustos.impl.IncrementalBustosNavarroChavezInt; 26 import net.obsearch.pivots.bustos.impl.IncrementalBustosNavarroChavezShort; 27 import net.obsearch.result.OBPriorityQueueInt; 28 import net.obsearch.result.OBPriorityQueueShort; 29 import net.obsearch.result.OBResultInt; 30 import net.obsearch.storage.bdb.Utils; 31 32 /* 33 OBSearch: a distributed similarity search engine This project is to 34 similarity search what 'bit-torrent' is to downloads. 35 Copyright (C) 2009 Arnoldo Jose Muller Molina 36 37 This program is free software: you can redistribute it and/or modify 38 it under the terms of the GNU General Public License as published by 39 the Free Software Foundation, either version 3 of the License, or 40 (at your option) any later version. 41 42 This program is distributed in the hope that it will be useful, 43 but WITHOUT ANY WARRANTY; without even the implied warranty of 44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 45 GNU General Public License for more details. 46 47 You should have received a copy of the GNU General Public License 48 along with this program. If not, see <http://www.gnu.org/licenses/>. 49 */ 50 51 /** 52 * VectorsDemo shows how to use OBSearch in vector spaces (L1 distance). 53 * 54 * @author Arnoldo Jose Muller Molina 55 */ 56 57 public class VectorsDemo { 58 59 /** 60 * Dimension of the vectors. 61 */ 62 final static int VEC_SIZE = 20; 63 64 /** 65 * Database size. 66 */ 67 final static int DB_SIZE = 100000; 68 69 /** 70 * FREEZE size 71 */ 72 final static int FREEZE_SIZE = 100000; 73 74 /** 75 * Query count. 76 */ 77 final static int QUERY_SIZE = 100; 78 79 final static float EP = 0.0001f; 80 81 final static float ALPHA = 3f; 82 83 /** 84 * Index folder 85 */ 86 87 final static File INDEX_FOLDER = new File("." + File.separator + 88 "index"); 89 90 91 final static Random r = new Random(); 92 93 /** 94 * Logging provided by Java 95 */ 96 static Logger logger = Logger.getLogger(VectorsDemo.class.getName()); 97 98 /** 99 * Randomly generate a vector. 100 * 101 * @return a randomly generated vector. 102 */ 103 public static L1 generateVector() { 104 105 short[] data = new short[VEC_SIZE]; 106 int i = 0; 107 108 while (i < data.length) { 109 data[i] = (short) r.nextInt(500); 110 i++; 111 } 112 113 return new L1(data); 114 } 115 116 public static L1Long generateLongVector() { 117 118 int[] data = new int[VEC_SIZE]; 119 int i = 0; 120 121 while (i < data.length) { 122 data[i] = r.nextInt(10000); 123 i++; 124 } 125 126 return new L1Long(data); 127 } 128 129 public static L1Float generateFloatVector() { 130 131 float[] data = new float[VEC_SIZE]; 132 int i = 0; 133 134 while (i < data.length) { 135 data[i] = r.nextFloat(); 136 i++; 137 } 138 139 return new L1Float(data); 140 } 141 142 public static void init() throws IOException { 143 144 InputStream is = VectorsDemo.class.getResourceAsStream(File.separator 145 + "obsearch.properties"); 146 Properties props = new Properties(); 147 props.load(is); 148 149 } 150 151 public static void main(String args[]) throws FileNotFoundException, 152 OBStorageException, NotFrozenException, IllegalAccessException, 153 InstantiationException, OBException, IOException, 154 PivotsUnavailableException { 155 156 // init(); 157 // 158 // // Create a pivot selection strategy for L1 distance 159 // IncrementalBustosNavarroChavezShort<L1> sel = new 160 // IncrementalBustosNavarroChavezShort<L1>( 161 // new AcceptAll<L1>(), 4000, 1000); 162 // 163 // // Create the iDistance method with 126 pivots 164 // IDistanceIndexShort<L1> index = new IDistanceIndexShort<L1>(L1.class, 165 // sel, 64); 166 // 167 // // Delete the directory of the index just in case. 168 // Directory.deleteDirectory(INDEX_FOLDER); 169 // 170 // // Create the ambient that will store the index's data. (NOTE: folder 171 // name is hardcoded) 172 // Ambient<L1, IDistanceIndexShort<L1>> a = new AmbientBDBJe<L1, 173 // IDistanceIndexShort<L1>>( index, INDEX_FOLDER ); 174 // 175 // 176 // // Add some random objects to the index: 177 // logger.info("Adding " + DB_SIZE + " objects..."); 178 // int i = 0; 179 // while(i < DB_SIZE){ 180 // index.insert(generateVector()); 181 // i++; 182 // } 183 // 184 // // prepare the index 185 // logger.info("Preparing the index..."); 186 // a.freeze(); 187 // 188 // // now we can match some objects! 189 // logger.info("Querying the index..."); 190 // i = 0; 191 // index.resetStats(); // reset the stats counter 192 // long start = System.currentTimeMillis(); 193 // while(i < QUERY_SIZE){ 194 // L1 q = generateVector(); 195 // // query the index with k=1 196 // OBPriorityQueueShort<L1> queue = new OBPriorityQueueShort<L1>(1); 197 // // perform a query with r=3000000 and k = 1 198 // index.searchOB(q, Short.MAX_VALUE, queue); 199 // // you can see the results with this loop: 200 // /*Iterator<OBResultInt<L1>> it = queue.iterator(); 201 // while(it.hasNext()){ 202 // OBResultInt<L1> res = it.next(); 203 // L1 answerObject = res.getObject(); // get the answer object 204 // long id = res.getId(); // the id of the answer object 205 // int distance = res.getDistance(); // the distance of the object to 206 // the query 207 // }*/ 208 // i++; 209 // } 210 // // print the results of the set of queries. 211 // long elapsed = System.currentTimeMillis() - start; 212 // logger.info("Time per query: " + elapsed / QUERY_SIZE + 213 // " millisec."); 214 // 215 // logger.info("Stats follow: (total distances / pivot vectors computed during the experiment)"); 216 // logger.info(index.getStats().toString()); 217 // // 218 219 } 220 221 }