View Javadoc

1   package net.obsearch.index.pivot;
2   
3   /*
4    OBSearch: a distributed similarity search engine This project is to
5    similarity search what 'bit-torrent' is to downloads. 
6    Copyright (C) 2008 Arnoldo Jose Muller Molina
7   
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 3 of the License, or
11   (at your option) any later version.
12  
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17  
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20   */
21  
22  
23  
24  import hep.aida.bin.StaticBin1D;
25  
26  import java.io.IOException;
27  import java.nio.ByteBuffer;
28  import java.util.Arrays;
29  import java.util.Random;
30  
31  import net.obsearch.storage.CloseIterator;
32  import net.obsearch.storage.OBStoreFactory;
33  import net.obsearch.storage.TupleLong;
34  
35  import org.apache.log4j.Logger;
36  
37  import com.sleepycat.je.DatabaseException;
38  
39  import cern.colt.list.IntArrayList;
40  import cern.colt.list.LongArrayList;
41  
42  import net.obsearch.OB;
43  import net.obsearch.OperationStatus;
44  import net.obsearch.asserts.OBAsserts;
45  import net.obsearch.exception.AlreadyFrozenException;
46  import net.obsearch.exception.IllegalIdException;
47  import net.obsearch.exception.NotFrozenException;
48  import net.obsearch.exception.OBException;
49  import net.obsearch.exception.OBStorageException;
50  import net.obsearch.exception.OutOfRangeException;
51  import net.obsearch.exception.PivotsUnavailableException;
52  import net.obsearch.index.AbstractOBIndex;
53  import net.obsearch.pivots.IncrementalPivotSelector;
54  import net.obsearch.pivots.PivotResult;
55  
56  /** 
57   *  AbstractPivotOBIndex defines abstract functionality for an index that
58   *  uses n pivots to partition the data into more tractable subsets.
59   *  To create a new index, do the following:
60   *  1) Implement objectToProjectionBytes(O object) (smap an object and codify it)
61   *  2) Implement deleteAux
62   *  3) Implement insertAux
63   *  4) Implement searchOB. That's all!
64   *  @author  Arnoldo Jose Muller Molina    
65   */
66  public abstract class AbstractPivotOBIndex < O extends OB >
67          extends AbstractOBIndex < O > {
68  	
69  	private static int MAX_PIVOT_SAMPLE = 500000;
70      
71      /**
72       * Logger.
73       */
74      private static final transient Logger logger = Logger
75              .getLogger(AbstractPivotOBIndex.class);
76      
77      /**
78       * The pivot selector used by the index.
79       */
80      protected IncrementalPivotSelector < O > pivotSelector;
81      
82      private int pivotCount;
83      
84      // TODO: in the future, make an array of arrays of pivots for indexes like D.
85      /**
86       * The pivots for this Tree. When we instantiate or de-serialize this object
87       * we load them from {@link #pivotsBytes}.
88       */
89      protected  O[] pivots;
90     
91      /**
92       * Use the following amount of pairs when the intrinsic dimensionality
93       * is calculated.
94       */
95      protected int intrinsicDimensionalityPairs = 1000000;
96  
97      /**
98       * Creates an index that uses pivots as its major data partitioning strategy.
99       * @param type The type of object that will be stored.
100      * @param pivotSelector The pivot selection strategy to be employed.
101      * @param pivotCount The number of pivots that will be selected.
102      * @throws OBStorageException
103      * @throws OBException
104      */
105     protected AbstractPivotOBIndex(Class < O > type, IncrementalPivotSelector < O > pivotSelector, int pivotCount) throws OBStorageException,
106             OBException {
107         super(type);
108         OBAsserts.chkAssert(pivotCount >= 0, "Pivot count must be >= 0");
109         this.pivotCount = pivotCount;
110         this.pivotSelector = pivotSelector;       
111     }
112     
113     protected AbstractPivotOBIndex(){
114     	super();
115     }
116 
117     @Override
118     public void freeze() throws  AlreadyFrozenException,
119     IllegalIdException, IllegalAccessException, InstantiationException,
120     OBStorageException, OutOfRangeException, OBException, PivotsUnavailableException, IOException {
121         super.freeze();
122         if(pivotCount > 0){
123         	pivots = getObjects(selectPivots(pivotCount, pivotSelector).getPivotIds());
124         }
125     }
126     
127     /**
128      * Calculates the intrinsic dimensionality of the inserted dataset.
129      * @return
130      * @throws IllegalIdException
131      * @throws IllegalAccessException
132      * @throws InstantiationException
133      * @throws OBException
134      */
135     protected double calculateIntrinsicDimensionality() throws IllegalIdException, IllegalAccessException, InstantiationException, OBException{
136     	StaticBin1D stats = new StaticBin1D();
137     	Random r = new Random();
138     	long max = databaseSize();
139     	for(int i = 0 ; i < this.intrinsicDimensionalityPairs ; i++){
140     		O a = getObject(r.nextInt((int)max));
141     		O b = getObject(r.nextInt((int)max));
142     		stats.add(distance(a,b));
143     	}
144     	logger.info(stats.toString());
145     	return Math.pow(stats.mean(),2) / (2 * stats.variance());
146     }
147     
148     
149     
150     /**
151      * Override this method if selection must be changed
152      * @throws IOException 
153      */
154     protected PivotResult  selectPivots(int pivotCount, IncrementalPivotSelector < O > pivotSelector) throws  AlreadyFrozenException,
155     IllegalIdException, IllegalAccessException, InstantiationException,
156     OBStorageException, OutOfRangeException, OBException, IOException{
157         // select pivots.
158     	OBAsserts.chkAssert(A.size() <= Integer.MAX_VALUE, "Cannot accept more than " + Integer.MAX_VALUE + " on freeze");
159     	/*int max = Math.min( (int)A.size(), MAX_PIVOT_SAMPLE);
160         LongArrayList elementsSource = new LongArrayList(max);
161 
162         int i = 0;
163         while( i < max){
164         	elementsSource.add(i);
165         	i++;
166         }*/
167         try{
168         PivotResult pivots = pivotSelector.generatePivots(pivotCount,
169                 null, this);       
170         // store the pivots selected for serialization.
171         //this.storePivots(pivots.getPivotIds());
172         return pivots;
173         }catch(PivotsUnavailableException e){
174             throw new OBException(e);
175         }
176         
177     }
178     
179     
180     
181     /**
182      * Stores the given pivots in a local array. Takes the pivots from the
183      * database using the given ids.
184      * @param ids
185      *                Ids of the pivots that will be stored.
186      * @throws IllegalIdException
187      *                 If the pivot selector generates invalid ids
188      * @throws DatabaseException
189      *                 If something goes wrong with the DB
190      * @throws OBException
191      *                 User generated exception
192      * @throws IllegalAccessException
193      *                 If there is a problem when instantiating objects O
194      * @throws InstantiationException
195      *                 If there is a problem when instantiating objects O
196      */
197     protected O[] getObjects(final long[] ids) throws IllegalIdException,
198             IllegalAccessException, InstantiationException,
199             OBException {
200         if (logger.isDebugEnabled()) {
201            // logger.debug("Pivots selected " + Arrays.toString(ids));
202 
203         }
204         O[] p  = emptyPivotsArray(ids.length);
205         int i = 0;
206         for(long id : ids){
207             O obj = getObject(id);
208             p[i] = obj;
209             i++;
210         }
211         if (logger.isDebugEnabled()) {
212            // logger.debug("Detail: " + Arrays.toString(p));
213         }
214         return p;
215         
216     }
217     
218     
219     
220     /**
221      * Creates an array with the pivots. It has to be created like this because
222      * we are using generics. Subclasses must override this if different
223      * sets of pivots are to be used. We can override this later to have
224      * a multiple set of pivots for indexes like D... 
225      */
226     protected void createPivotsArray(int size) {
227         this.pivots = emptyPivotsArray(size);
228     }
229 
230     /**
231      * @return The number of pivots used in this index.
232      */
233     public int getPivotCount(){
234         return pivotCount;
235     }
236     
237     
238     
239 
240     public void init(OBStoreFactory fact) throws OBStorageException,
241     OBException, NotFrozenException, 
242     IllegalAccessException, InstantiationException, OBException{
243         super.init(fact);
244 //        if(isFrozen()){
245 //            this.loadPivots();
246 //       }
247     }
248 }