View Javadoc

1   package net.obsearch.example.doc;
2   
3   import java.util.Arrays;
4   
5   import net.obsearch.exception.OBException;
6   import net.obsearch.ob.OBFloat;
7   /**
8    * Implementation of the Tanimoto distance.
9    * as described by Zezula et al. in the book:
10   * Similarity Search, the metric space approach (Section 3.6, page 14)
11   * @author amuller
12   *
13   */
14  public class OBTanimoto extends AbstractDocument {
15  
16  	public OBTanimoto(){
17  		
18  	}
19  	public OBTanimoto(String data) throws OBException {
20  		super(data);		
21  	}
22  	
23  public float distance(OBFloat object) throws OBException {
24  		
25  		OBTanimoto other = (OBTanimoto)object;
26  		
27  		// calculate the dot product
28  		// ids must be sorted prior doing this.
29  		int dot = 0;
30  		int i1 = 0;
31  		int i2 = 0;
32  		final int max1 = super.ids.length;
33  		final int max2 = other.ids.length;
34  		while(i1 < max1 && i2 < max2){
35  			if(ids[i1] < other.ids[i2]){
36  				//dot = Math.max(counts[i1], dot);
37  				dot += counts[i1];
38  				i1++;
39  			}else if(ids[i1] > other.ids[i2]){
40  				//dot = Math.max(other.counts[i2], dot);
41  				dot += other.counts[i2];
42  				i2++;
43  			}else{ // they are equal				
44  				//dot = Math.max(Math.abs(counts[i1] - other.counts[i2]), dot);
45  				dot += Math.abs(counts[i1] - other.counts[i2]);
46  				i1++;
47  				i2++;
48  			}			
49  		}
50  		while(i1 < max1){
51  			dot += counts[i1];
52  			//dot = Math.max(counts[i1], dot);
53  			i1++;
54  		}
55  		while(i2 < max2){
56  			dot += other.counts[i2];
57  			//dot = Math.max(other.counts[i2], dot);
58  			i2++;
59  		}
60  		assert ((float)dot) == dot;
61  		
62  		return (float)dot;
63  	}
64  
65  	@Override
66  	/*
67  	public float distance(OBFloat object) throws OBException {
68  		
69  		OBTanimoto other = (OBTanimoto)object;
70  		
71  		// calculate the dot product
72  		// ids must be sorted prior doing this.
73  		long dot = 0;
74  		int i1 = 0;
75  		int i2 = 0;
76  		final int max1 = super.ids.length;
77  		final int max2 = other.ids.length;
78  		while(i1 < max1 && i2 < max2){
79  			if(ids[i1] < other.ids[i2]){
80  				i1++;
81  			}else if(ids[i1] > other.ids[i2]){
82  				i2++;
83  			}else{ // they are equal				
84  				dot += counts[i1] * other.counts[i2];
85  				i1++;
86  				i2++;
87  			}			
88  		}
89  		
90  		long normA = euclideanNormSquared();
91  		long normB = other.euclideanNormSquared();	
92  		
93  		double res = 1d - ((double) (dot) / (double)((normA + normB) - dot));
94  		assert res >= 0 : " result: " + res + " A: " + super.getName() + " B: " + other.getName();
95  		assert res <= 1 : " result: " + res;
96  		
97  		return (float)res;
98  	}*/
99  	
100 	
101 	public boolean equals(Object o){
102 		OBTanimoto other = (OBTanimoto)o;
103 		return Arrays.equals(ids, other.ids) && Arrays.equals(counts, other.counts);
104 	}
105 	
106 	private long euclideanNormSquared(){
107 		long res = 0;
108 		for(long d : counts){
109 			res += d * d;
110 		}
111 		return res;
112 	}
113 
114 }