A 20 minute tutorial

This quick tutorial will show how to use OBSearch.

What you need first is to know the object you want to store. You may also need a distance function d that satisfies the triangle inequality. This is not a requirement for the GHS index. This function d compares objects and tells you how "far" or "close" they are from each other.

So we will store vectors of 100 dimensions, and we will calculate the 1-norm distance on them!

The following code shows how to create an OB object.

package net.obsearch.example.vectors;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.nio.ShortBuffer;
import java.util.Arrays;

import net.obsearch.asserts.OBAsserts;
import net.obsearch.constants.ByteConstants;
import net.obsearch.exception.OBException;
import net.obsearch.ob.OBFloat;
import net.obsearch.ob.OBInt;
import net.obsearch.ob.OBLong;
import net.obsearch.ob.OBShort;
import net.obsearch.utils.bytes.ByteConversion;

public class L1Float implements OBFloat {
        private float[] vector;
        public L1Float(){
                // required by OBSearch
         * Construct an object from an array.
         * @param vector
        public L1Float(float[] vector){
                this.vector = vector;
         * Parses a string with numbers separated by spaces
         * @param data
        public L1Float(String data)throws OBException{
                String[] split = data.split("[ |,]");
                vector = new float[split.length];
                //OBAsserts.chkAssert(vector.length == 64, "Size wrong for vector: " + vector.length);
                int i = 0;
                for(String s : split){
                        vector[i] = Float.parseFloat(s);

        public float distance(OBFloat object) throws OBException {
                L1Float other = (L1Float)object;
                int i = 0;
                float res = 0;
                OBAsserts.chkAssert(vector.length == other.vector.length, "Vector size mismatch");
                while(i < vector.length){
                        res += Math.abs(vector[i] - other.vector[i]);
                OBAsserts.chkAssert(res <= Long.MAX_VALUE, "max value exceeded");
                return res; 

        public void load(byte[] input) throws OBException, IOException {
                FloatBuffer s = ByteConversion.createByteBuffer(input).asFloatBuffer();
                vector = new float[input.length / ByteConstants.Float.getSize()];
     * 6) Equals method. Implementation of the equals method is required. A
     * casting error can happen here, but we don't check it for efficiency
     * reasons.
     * @param object
     *            The object to compare.
     * @return true if this and object are equal.
    public final boolean equals(final Object object) {
        L1Float o = (L1Float) object;
        return Arrays.equals(vector, o.vector);

        public byte[] store() throws OBException, IOException {
                ByteBuffer b = ByteConversion.createByteBuffer(ByteConstants.Float.getSize() * vector.length);
                FloatBuffer s = b.asFloatBuffer();
                return b.array();               


Now you can insert objects in an index and retrieve them.

package net.obsearch.example.vectors;

import hep.aida.bin.StaticBin1D;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import net.obsearch.ambient.Ambient;

import net.obsearch.ambient.bdb.AmbientBDBJe;

import net.obsearch.ambient.tc.AmbientTC;
import net.obsearch.exception.NotFrozenException;
import net.obsearch.exception.OBException;
import net.obsearch.exception.OBStorageException;
import net.obsearch.exception.PivotsUnavailableException;
import net.obsearch.index.ghs.impl.Sketch64Float;
import net.obsearch.index.ghs.impl.Sketch64Long;

import net.obsearch.index.utils.Directory;
import net.obsearch.pivots.AcceptAll;
import net.obsearch.pivots.bustos.impl.IncrementalBustosNavarroChavezShort;
import net.obsearch.pivots.rf02.RF02PivotSelectorShort;
import net.obsearch.pivots.rf03.RF03PivotSelectorLong;
import net.obsearch.pivots.rf03.RF03PivotSelectorShort;
import net.obsearch.pivots.rf04.RF04PivotSelectorFloat;
import net.obsearch.query.OBQueryFloat;
import net.obsearch.query.OBQueryLong;

import net.obsearch.result.OBPriorityQueueFloat;
import net.obsearch.result.OBPriorityQueueLong;
import net.obsearch.result.OBPriorityQueueShort;
import net.obsearch.result.OBResultShort;

public class VectorsDemoGHS extends VectorsDemo {
        public static void main(String args[]) throws FileNotFoundException, OBStorageException, NotFrozenException, IllegalAccessException, InstantiationException, OBException, IOException, PivotsUnavailableException {
                // Delete the directory of the index just in case.
                // Create the pivot selection strategy
                RF04PivotSelectorFloat<L1Float> sel = new RF04PivotSelectorFloat<L1Float>(new AcceptAll<L1Float>());
                // make the bit set as short so that m objects can fit in the buckets.
                // create an index.
                // Choose pivot sizes that are multiples of 64 to optimize the space
            Sketch64Float<L1Float> index = new Sketch64Float<L1Float>(L1Float.class, sel, 256);
            // error expected 
            // small if you are planning to insert a lot of objects!
            // Probability of returning an error within 1.40 times the real distance
            // (measured in standard deviations) (3 means a prob. of 0.99)
            // select the ks that the user will call. 
            // This example will only be called with k=1
            index.setMaxK(new int[]{1});          
            // little optimization that can help if your objects are of the same size.
                // Create the ambient that will store the index's data. (NOTE: folder name is hardcoded)
        Ambient<L1Float, Sketch64Float<L1Float>> a =  new AmbientTC<L1Float, Sketch64Float<L1Float>>( index, INDEX_FOLDER );
                // Add some random objects to the index:        
                logger.info("Adding " + DB_SIZE + " objects...");
                int i = 0;              
                while(i < DB_SIZE){
                        if(i % 100000 == 0){
                                logger.info("Loading: " + i);
                // prepare the index
                logger.info("Preparing the index...");
                logger.info("YAY! stats: " + index.getStats());
                // now we can match some objects!               
                logger.info("Querying the index...");
                i = 0;
                index.resetStats(); // reset the stats counter
                long start = System.currentTimeMillis();
                List<OBPriorityQueueFloat<L1Float>> queryResults = new ArrayList<OBPriorityQueueFloat<L1Float>>(QUERY_SIZE);
                List<L1Float> queries = new ArrayList<L1Float>(QUERY_SIZE);
                while(i < QUERY_SIZE){
                        L1Float q =     generateFloatVector();  
                        // query the index with k=1                     
                        OBPriorityQueueFloat<L1Float> queue = new OBPriorityQueueFloat<L1Float>(1);                     
                        // perform a query with a large range and k = 1 
                        index.searchOB(q, Float.MAX_VALUE, queue);
                // print the results of the set of queries. 
                long elapsed = System.currentTimeMillis() - start;
                logger.info("Time per query: " + elapsed / QUERY_SIZE + " millisec.");
                logger.info("Stats follow: (total distances / pivot vectors computed during the experiment)");

                // now we validate the result of the search
                logger.info("Doing Error validation");
                StaticBin1D ep = new StaticBin1D();

                Iterator<OBPriorityQueueFloat<L1Float>> it1 = queryResults.iterator();
                Iterator<L1Float> it2 = queries.iterator();
                StaticBin1D seqTime = new StaticBin1D();
                i = 0;
                        OBPriorityQueueFloat<L1Float> qu = it1.next();
                        L1Float q = it2.next();
                        long time = System.currentTimeMillis();
                        float[] sortedList = index.fullMatchLite(q, false);
                        long el = System.currentTimeMillis() - time;
                        logger.info("Elapsed: " + el + " "  + i);
                        OBQueryFloat<L1Float> queryObj = new OBQueryFloat<L1Float       >(q, Float.MAX_VALUE, qu, null);
                logger.info("Time per seq query: ");


To run the previous demo simply do:

java -classpath obsearch-with-dependencies.jar net.obsearch.example.vectors.VectorsDemoGHS