View Javadoc

1   package net.obsearch.index.pyramid.imp;
2   
3   import java.io.File;
4   import java.io.IOException;
5   import java.nio.ByteBuffer;
6   import java.util.BitSet;
7   import java.util.Iterator;
8   
9   import net.obsearch.Index;
10  import net.obsearch.OperationStatus;
11  import net.obsearch.Status;
12  import net.obsearch.exception.IllegalIdException;
13  import net.obsearch.exception.NotFrozenException;
14  import net.obsearch.exception.OBException;
15  import net.obsearch.exception.OBStorageException;
16  import net.obsearch.exception.OutOfRangeException;
17  import net.obsearch.filter.Filter;
18  import net.obsearch.index.pyramid.AbstractExtendedPyramidIndex;
19  import net.obsearch.pivots.IncrementalPivotSelector;
20  import net.obsearch.storage.CloseIterator;
21  import net.obsearch.storage.TupleBytes;
22  
23  import net.obsearch.utils.bytes.ByteBufferFactoryConversion;
24  
25  import net.obsearch.index.IndexShort;
26  import net.obsearch.ob.OBShort;
27  import net.obsearch.result.OBPriorityQueueShort;
28  import net.obsearch.result.OBResultShort;
29  import net.obsearch.storage.OBStoreDouble;
30  import net.obsearch.storage.TupleDouble;
31  import net.obsearch.storage.TupleLong;
32  
33  
34  /*
35   OBSearch: a distributed similarity search engine
36   This project is to similarity search what 'bit-torrent' is to downloads.
37   Copyright (C)  2007 Arnoldo Jose Muller Molina
38  
39   This program is free software: you can redistribute it and/or modify
40   it under the terms of the GNU General Public License as published by
41   the Free Software Foundation, either version 3 of the License, or
42   (at your option) any later version.
43  
44   This program is distributed in the hope that it will be useful,
45   but WITHOUT ANY WARRANTY; without even the implied warranty of
46   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
47   GNU General Public License for more details.
48  
49   You should have received a copy of the GNU General Public License
50   along with this program.  If not, see <http://www.gnu.org/licenses/>.
51   */
52  /**
53   * ExtendedPyramidIndexShort is an index that uses the pyramid technique. The
54   * distance function used must return short values. 
55   * This index will be deprecated soon because often floating point errors affect the results.
56   * Please use the idistance instead.
57   * 
58   * @param <O>
59   *                The type of object to be stored in the Index.
60   * @author Arnoldo Jose Muller Molina
61   * @since 0.7
62   */
63  public class ExtendedPyramidIndexShort < O extends OBShort >
64          extends AbstractExtendedPyramidIndex < O > implements IndexShort < O > {
65  
66      /**
67       * Minimum value to be returned by the distance function.
68       */
69      private short minInput;
70  
71      /**
72       * Maximum value to be returned by the distance function.
73       */
74      private short maxInput;
75  
76      /**
77       * Creates a new ExtendedPyramidIndexShort. Ranges accepted by this pyramid
78       * will be between 0 and Short.MAX_VALUE
79       * @param databaseDirectory
80       *                Directory were the index will be stored
81       * @param pivots
82       *                Number of pivots to be used.
83       * @param pivotSelector
84       *                The pivot selector that will be used by this index.
85       * @param type
86       *                The class of the object O that will be used.
87       * @throws DatabaseException
88       *                 If something goes wrong with the DB
89       * @throws IOException
90       *                 If the databaseDirectory directory does not exist.
91       */
92      public ExtendedPyramidIndexShort(Class < O > type,
93              IncrementalPivotSelector < O > pivotSelector, int pivotCount,
94              short minValue, short maxValue) throws OBStorageException,
95              OBException, IOException {
96  
97          super(type, pivotSelector, pivotCount);
98          this.minInput = minValue;
99          this.maxInput = maxValue;
100         assert minInput < maxInput;
101     }
102 
103     @Override
104     protected double[] extractTuple(ByteBuffer in) throws OutOfRangeException {
105         int i = 0;
106         double[] res = new double[getPivotCount()];
107         while (i < getPivotCount()) {
108             res[i] = normalize(in.getShort());
109             i++;
110         }
111         return res;
112     }
113 
114     public boolean intersects(final O object, final short r, final int box)
115             throws NotFrozenException, InstantiationException,
116             IllegalIdException, IllegalAccessException, OutOfRangeException,
117             OBException {
118         short[] t = new short[getPivotCount()];
119         calculatePivotTuple(object, t); // calculate the pivot for the given
120         double[][] q = new double[getPivotCount()][2]; // rectangular query
121         generateRectangle(t, r, q);
122         double[] lowHighResult = new double[2];
123         double[] minArray = generateMinArray(q);
124         return intersect(q, box, minArray, lowHighResult);
125     }
126 
127     public Iterator < Long > intersectingBoxes(final O object, final short r)
128             throws NotFrozenException, InstantiationException,
129             IllegalIdException, IllegalAccessException, OutOfRangeException,
130             OBException {
131         throw new UnsupportedOperationException();
132     }
133 
134     public void searchOB(final O object, final short r,
135             final OBPriorityQueueShort < O > result) throws NotFrozenException,
136             InstantiationException, IllegalIdException, IllegalAccessException,
137             OutOfRangeException, OBException {
138         // check if we are frozen
139         assertFrozen();
140 
141         short[] t = new short[getPivotCount()];
142         calculatePivotTuple(object, t); // calculate the pivot for the given
143         // object
144 
145         int pyramidCount = 2 * getPivotCount();
146         double[][] qorig = new double[getPivotCount()][2]; // rectangular query
147         double[][] q = new double[getPivotCount()][2]; // rectangular query
148         short myr = r;
149         generateRectangle(t, myr, qorig);
150         int i = 0;
151         double[] lowHighResult = new double[2];
152         double[] minArray = generateMinArray(qorig);
153         while (i < pyramidCount) {
154             copyQuery(qorig, q);
155 
156             if (intersect(q, i, minArray, lowHighResult)) {
157                 searchBTreeAndUpdate(object, t, myr, i + lowHighResult[HLOW], i
158                         + lowHighResult[HHIGH], result);
159                 short nr = result.updateRange(myr);
160                 if (nr < myr) {
161                     myr = nr;
162                     // regenerate the query with a smaller range
163                     generateRectangle(t, myr, qorig);
164                 }
165             }
166             i++;
167         }
168     }
169 
170     public void searchOB(final O object, final short r,
171             final OBPriorityQueueShort < O > result, final long[] boxes)
172             throws NotFrozenException, InstantiationException,
173             IllegalIdException, IllegalAccessException, OutOfRangeException,
174             OBException {
175         // check if we are frozen
176         assertFrozen();
177         BitSet boxesBit = new BitSet();
178         int i = 0;
179         while (i < boxes.length) {
180             boxesBit.set((int) boxes[i]);
181             i++;
182         }
183         short[] t = new short[getPivotCount()];
184         calculatePivotTuple(object, t); // calculate the pivot for the given
185         // object
186 
187         int pyramidCount = 2 * getPivotCount();
188         double[][] qorig = new double[getPivotCount()][2]; // rectangular query
189         double[][] q = new double[getPivotCount()][2]; // rectangular query
190         short myr = r;
191         generateRectangle(t, myr, qorig);
192         i = 0;
193         double[] lowHighResult = new double[2];
194         // TODO: select the pyramids randomly just like quicksort
195         double[] minArray = generateMinArray(qorig);
196         while (i < pyramidCount) {
197             copyQuery(qorig, q);
198             if (intersect(q, i, minArray, lowHighResult) && boxesBit.get(i)) {
199                 searchBTreeAndUpdate(object, t, myr, i + lowHighResult[HLOW], i
200                         + lowHighResult[HHIGH], result);
201                 short nr = result.updateRange(myr);
202                 if (nr < myr) {
203                     myr = nr;
204                     // regenerate the query with a smaller range
205                     generateRectangle(t, myr, qorig);
206                 }
207             }
208             i++;
209         }
210     }
211 
212     /**
213      * This method reads from the B-tree appies l-infinite to discard false
214      * positives. This technique is called SMAP. Calculates the real distance
215      * and updates the result priority queue It is left public so that junit can
216      * perform validations on it Performance-wise this is one of the most
217      * important methods
218      * @param object
219      *                object to search
220      * @param tuple
221      *                tuple of the object
222      * @param r
223      *                range
224      * @param hlow
225      *                lowest pyramid value
226      * @param hhigh
227      *                highest pyramid value
228      * @param result
229      *                result of the search operation
230      * @throws DatabaseException
231      *                 If somehing goes wrong with the DB
232      * @throws OBException
233      *                 User generated exception
234      * @throws IllegalAccessException
235      *                 If there is a problem when instantiating objects O
236      * @throws InstantiationException
237      *                 If there is a problem when instantiating objects O
238      * @throws IllegalIdException
239      *                 This exception is left as a Debug flag. If you receive
240      *                 this exception please report the problem to:
241      *                 http://code.google.com/p/obsearch/issues/list
242      */
243     private void searchBTreeAndUpdate(final O object, final short[] tuple,
244             final short r, final double hlow, final double hhigh,
245             final OBPriorityQueueShort < O > result)
246             throws IllegalAccessException, InstantiationException,
247             IllegalIdException, OBException {
248 
249         CloseIterator < TupleDouble > it = C.processRange(hlow, hhigh);
250         try{
251         short max = Short.MIN_VALUE;
252         short realDistance = Short.MIN_VALUE;
253         while (it.hasNext()) {
254         	stats.incDiskAccessCount();
255         	stats.incSmapCount();
256         	stats.incBucketsRead();
257         	
258             TupleDouble tup = it.next();
259             ByteBuffer in = tup.getValue();
260             stats.incDataRead(in.array().length);
261             int i = 0;
262             short t;
263             max = Short.MIN_VALUE;
264             while (i < tuple.length) {
265                 t = (short) Math.abs(tuple[i] - in.getShort());
266                 if (t > max) {
267                     max = t;
268                     if (t > r) {
269                         break; // finish this loop this slice won't be
270                         // matched
271                         // after all!
272                     }
273                 }
274                 i++;
275             }
276             if (max <= r && result.isCandidate(max)) {
277                 // there is a chance it is a possible match
278                 long id = in.getLong();
279                 O toCompare = getObject(id);
280                 realDistance = object.distance(toCompare);
281                 stats.incDistanceCount();
282                 if (realDistance <= r) {
283                     result.add(id, toCompare, realDistance);
284                 }
285             }
286         }
287         }finally{
288             it.closeCursor();
289         }
290         
291     }
292 
293     /**
294      * Generates min and max for the given tuple It is actually the query. If
295      * you want non-rectangular queries you have to override this method and
296      * make sure your modification works well with searchOB
297      * @param t
298      *                the tuple to be processed
299      * @param r
300      *                the range
301      * @param q
302      *                resulting rectangle query
303      * @throws OutOfRangeException
304      *                 If the distance of any object to any other object exceeds
305      *                 the range defined by the user.
306      */
307 
308     protected final void generateRectangle(final short[] t, final short r,
309             final double[][] q) throws OutOfRangeException {
310         // range
311         int i = 0;
312         while (i < q.length) { //
313             q[i][MIN] = extendedPyramidNormalization(normalize((short) Math
314                     .max(t[i] - r, minInput)), i);
315             q[i][MAX] = extendedPyramidNormalization(normalize((short) Math
316                     .min(t[i] + r, maxInput)), i);
317             i++;
318         }
319         centerQuery(q);
320     }
321 
322     /**
323      * Transforms the given tuple into an extended pyramid technique normalized
324      * value that considers the "center" of the dimension.
325      * @param tuple
326      *                The original tuple in the default dimension
327      * @throws OutOfRangeException
328      *                 If the distance of any object to any other object exceeds
329      *                 the range defined by the user.
330      * @return resulting normalized tuple
331      */
332     protected final double[] extendedPyramidTransform(final short[] tuple)
333             throws OutOfRangeException {
334         int i = 0;
335         double[] result = new double[tuple.length];
336         assert tuple.length == result.length;
337         while (i < tuple.length) {
338             double norm = normalize(tuple[i]);
339             double norm2 = extendedPyramidNormalization(norm, i);
340             result[i] = norm2;
341             i++;
342         }
343         return result;
344     }
345 
346     /**
347      * Normalize the given value.
348      * @param x
349      *                value to normalize
350      * @return the normalized value
351      * @throws OutOfRangeException
352      *                 If the distance of any object to any other object exceeds
353      *                 the range defined by the user.
354      */
355     protected double normalize(final short x) throws OutOfRangeException {
356         if (x < minInput || x > maxInput) {
357             throw new OutOfRangeException(minInput + "", maxInput + "", "" + x);
358         }
359         return ((double) (x - minInput)) / ((double) (maxInput - minInput));
360     }
361 
362     
363 
364     /*
365      * (non-Javadoc)
366      * @see net.obsearch.index.AbstractOBIndex#insertAux(long,
367      *      net.obsearch.result.OB)
368      */
369     @Override
370     protected net.obsearch.OperationStatus insertAux(long id, O object)
371             throws OBStorageException, OBException, IllegalAccessException,
372             InstantiationException {
373         short[] t = new short[getPivotCount()];
374         calculatePivotTuple(object, t); // calculate the tuple for the new //
375 
376         double[] et = extendedPyramidTransform(t);
377         double pyramidValue = pyramidValue(et);
378         ByteBuffer out = ByteBufferFactoryConversion.createByteBuffer(0, super
379                 .getPivotCount(), 0, 1, 0, 0);
380         // write the tuple
381         for (short d : t) {
382             out.putShort(d);
383         }
384         // write the object's id
385         out.putLong(id);
386         C.put(pyramidValue, out);
387         OperationStatus result = new OperationStatus();
388         result.setStatus(Status.OK);
389         result.setId(id);
390         return result;
391     }
392     
393     protected net.obsearch.OperationStatus insertAuxBulk(long id, O object)
394     throws OBStorageException, OBException, IllegalAccessException,
395     InstantiationException {
396     	return insertAux(id, object);
397     }
398 
399     public long getBox(final O object) throws OBException {
400         short[] t = new short[getPivotCount()];
401         calculatePivotTuple(object, t); // calculate the tuple for the new //
402         double[] et = extendedPyramidTransform(t);
403         return super.pyramidOfPoint(et);
404     }
405 
406     public OperationStatus exists(O object) throws OBException,
407             IllegalAccessException, InstantiationException {
408         OperationStatus res = new OperationStatus(Status.NOT_EXISTS);
409         OBPriorityQueueShort < O > result = new OBPriorityQueueShort < O >(
410                 (byte) 1);
411         searchOB(object, (short) 1, result);
412         
413         if (result.getSize() == 1 ){
414             OBResultShort < O > r = result.iterator().next();
415                         if( r.getObject().equals(object)) {
416                             res.setStatus(Status.EXISTS);
417                             res.setId(r.getId());
418                         }
419         }
420         return res;
421     }
422 
423     /**
424      * Calculates the tuple vector for the given object.
425      * @param obj
426      *                object to be processed
427      * @param tuple
428      *                The resulting tuple will be stored here
429      * @throws OBException
430      *                 User generated exception
431      */
432     protected final void calculatePivotTuple(final O obj, final short[] tuple)
433             throws OBException {
434         assert tuple.length == getPivotCount();
435         int i = 0;
436         while (i < tuple.length) {
437             tuple[i] = obj.distance(pivots[i]);
438             i++;
439         }
440     }
441 
442     @Override
443     protected OperationStatus deleteAux(final O object) throws OBException,
444             IllegalAccessException, InstantiationException {
445         long resId = -1;
446         OperationStatus res = new OperationStatus(Status.NOT_EXISTS);
447         short[] tuple = new short[getPivotCount()];
448         // calculate the pivot for the given object
449         calculatePivotTuple(object, tuple);
450         double[] et = extendedPyramidTransform(tuple);
451         double pyramidValue = pyramidValue(et);
452 
453         CloseIterator < TupleDouble > it = C
454                 .processRange(pyramidValue, pyramidValue);
455         try{
456         short max = Short.MIN_VALUE;
457         while (it.hasNext()) {
458             TupleDouble tup = it.next();
459             ByteBuffer in = tup.getValue();
460 
461             int i = 0;
462             short t;
463             max = Short.MIN_VALUE;
464             // STATS
465             while (i < tuple.length) {
466                 t = (short) Math.abs(tuple[i] - in.getShort());
467                 if (t > max) {
468                     max = t;
469                     if (t != 0) {
470                         break; // finish this loop this slice won't be
471                         // matched
472                         // after all!
473                     }
474                 }
475                 i++;
476             }
477 
478             if (max == 0) {
479                 // there is a chance it is a possible match
480                 long id = in.getLong();
481                 O toCompare = super.getObject(id);
482                 if (object.equals(toCompare)) {
483                     resId = id;
484                     res = new OperationStatus(Status.OK);
485                     res.setId(resId);
486                     it.remove();
487                     break;
488                 }
489 
490             }
491 
492         }
493         }catch(Exception e){
494             throw new OBException(e);
495         }
496         finally{
497             it.closeCursor();
498         }
499         return res;
500     }
501 
502     /*
503      * (non-Javadoc)
504      * @see net.obsearch.index.pivot.AbstractPivotOBIndex#objectToProjectionBytes(net.obsearch.result.OB)
505      */
506     @Override
507     protected ByteBuffer objectToProjectionBytes(O object) throws OBException {
508         short[] t = new short[getPivotCount()];
509         calculatePivotTuple(object, t); // calculate the pivot for the given
510         ByteBuffer out = ByteBufferFactoryConversion.createByteBuffer(0, super
511                 .getPivotCount(), 0, 0, 0, 0);
512         for (short d : t) {
513             out.putShort(d);
514         }
515         return out;
516     }
517 
518     /*
519      * (non-Javadoc)
520      * @see net.obsearch.result.index.IndexShort#searchOB(net.obsearch.result.ob.OBShort,
521      *      short, net.obsearch.result.result.OBPriorityQueueShort, int[])
522      */
523     @Override
524     public void searchOB(O object, short r, OBPriorityQueueShort < O > result,
525             int[] boxes) throws NotFrozenException, InstantiationException,
526             IllegalIdException, IllegalAccessException, OutOfRangeException,
527             OBException {
528         // TODO Auto-generated method stub
529 
530     }
531     
532     
533 	public void searchOB(O object, short r, Filter<O> filter,
534 			OBPriorityQueueShort<O> result) throws NotFrozenException,
535 			InstantiationException, IllegalIdException, IllegalAccessException,
536 			OutOfRangeException, OBException {
537 		
538     	throw new UnsupportedOperationException();
539 	}
540 
541     public double distance(O a, O b) throws OBException {
542         short result = ((OBShort) a).distance((OBShort) b);
543         return normalize(result);
544     }
545 
546 }