1 package net.obsearch.index.pivot;
2
3 /*
4 OBSearch: a distributed similarity search engine This project is to
5 similarity search what 'bit-torrent' is to downloads.
6 Copyright (C) 2008 Arnoldo Jose Muller Molina
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
22
23
24 import hep.aida.bin.StaticBin1D;
25
26 import java.io.IOException;
27 import java.nio.ByteBuffer;
28 import java.util.Arrays;
29 import java.util.Random;
30
31 import net.obsearch.storage.CloseIterator;
32 import net.obsearch.storage.OBStoreFactory;
33 import net.obsearch.storage.TupleLong;
34
35 import org.apache.log4j.Logger;
36
37 import com.sleepycat.je.DatabaseException;
38
39 import cern.colt.list.IntArrayList;
40 import cern.colt.list.LongArrayList;
41
42 import net.obsearch.OB;
43 import net.obsearch.OperationStatus;
44 import net.obsearch.asserts.OBAsserts;
45 import net.obsearch.exception.AlreadyFrozenException;
46 import net.obsearch.exception.IllegalIdException;
47 import net.obsearch.exception.NotFrozenException;
48 import net.obsearch.exception.OBException;
49 import net.obsearch.exception.OBStorageException;
50 import net.obsearch.exception.OutOfRangeException;
51 import net.obsearch.exception.PivotsUnavailableException;
52 import net.obsearch.index.AbstractOBIndex;
53 import net.obsearch.pivots.IncrementalPivotSelector;
54 import net.obsearch.pivots.PivotResult;
55
56 /**
57 * AbstractPivotOBIndex defines abstract functionality for an index that
58 * uses n pivots to partition the data into more tractable subsets.
59 * To create a new index, do the following:
60 * 1) Implement objectToProjectionBytes(O object) (smap an object and codify it)
61 * 2) Implement deleteAux
62 * 3) Implement insertAux
63 * 4) Implement searchOB. That's all!
64 * @author Arnoldo Jose Muller Molina
65 */
66 public abstract class AbstractPivotOBIndex < O extends OB >
67 extends AbstractOBIndex < O > {
68
69 private static int MAX_PIVOT_SAMPLE = 500000;
70
71 /**
72 * Logger.
73 */
74 private static final transient Logger logger = Logger
75 .getLogger(AbstractPivotOBIndex.class);
76
77 /**
78 * The pivot selector used by the index.
79 */
80 protected IncrementalPivotSelector < O > pivotSelector;
81
82 private int pivotCount;
83
84 // TODO: in the future, make an array of arrays of pivots for indexes like D.
85 /**
86 * The pivots for this Tree. When we instantiate or de-serialize this object
87 * we load them from {@link #pivotsBytes}.
88 */
89 protected O[] pivots;
90
91 /**
92 * Use the following amount of pairs when the intrinsic dimensionality
93 * is calculated.
94 */
95 protected int intrinsicDimensionalityPairs = 1000000;
96
97 /**
98 * Creates an index that uses pivots as its major data partitioning strategy.
99 * @param type The type of object that will be stored.
100 * @param pivotSelector The pivot selection strategy to be employed.
101 * @param pivotCount The number of pivots that will be selected.
102 * @throws OBStorageException
103 * @throws OBException
104 */
105 protected AbstractPivotOBIndex(Class < O > type, IncrementalPivotSelector < O > pivotSelector, int pivotCount) throws OBStorageException,
106 OBException {
107 super(type);
108 OBAsserts.chkAssert(pivotCount >= 0, "Pivot count must be >= 0");
109 this.pivotCount = pivotCount;
110 this.pivotSelector = pivotSelector;
111 }
112
113 protected AbstractPivotOBIndex(){
114 super();
115 }
116
117 @Override
118 public void freeze() throws AlreadyFrozenException,
119 IllegalIdException, IllegalAccessException, InstantiationException,
120 OBStorageException, OutOfRangeException, OBException, PivotsUnavailableException, IOException {
121 super.freeze();
122 if(pivotCount > 0){
123 pivots = getObjects(selectPivots(pivotCount, pivotSelector).getPivotIds());
124 }
125 }
126
127 /**
128 * Calculates the intrinsic dimensionality of the inserted dataset.
129 * @return
130 * @throws IllegalIdException
131 * @throws IllegalAccessException
132 * @throws InstantiationException
133 * @throws OBException
134 */
135 protected double calculateIntrinsicDimensionality() throws IllegalIdException, IllegalAccessException, InstantiationException, OBException{
136 StaticBin1D stats = new StaticBin1D();
137 Random r = new Random();
138 long max = databaseSize();
139 for(int i = 0 ; i < this.intrinsicDimensionalityPairs ; i++){
140 O a = getObject(r.nextInt((int)max));
141 O b = getObject(r.nextInt((int)max));
142 stats.add(distance(a,b));
143 }
144 logger.info(stats.toString());
145 return Math.pow(stats.mean(),2) / (2 * stats.variance());
146 }
147
148
149
150 /**
151 * Override this method if selection must be changed
152 * @throws IOException
153 */
154 protected PivotResult selectPivots(int pivotCount, IncrementalPivotSelector < O > pivotSelector) throws AlreadyFrozenException,
155 IllegalIdException, IllegalAccessException, InstantiationException,
156 OBStorageException, OutOfRangeException, OBException, IOException{
157 // select pivots.
158 OBAsserts.chkAssert(A.size() <= Integer.MAX_VALUE, "Cannot accept more than " + Integer.MAX_VALUE + " on freeze");
159 /*int max = Math.min( (int)A.size(), MAX_PIVOT_SAMPLE);
160 LongArrayList elementsSource = new LongArrayList(max);
161
162 int i = 0;
163 while( i < max){
164 elementsSource.add(i);
165 i++;
166 }*/
167 try{
168 PivotResult pivots = pivotSelector.generatePivots(pivotCount,
169 null, this);
170 // store the pivots selected for serialization.
171 //this.storePivots(pivots.getPivotIds());
172 return pivots;
173 }catch(PivotsUnavailableException e){
174 throw new OBException(e);
175 }
176
177 }
178
179
180
181 /**
182 * Stores the given pivots in a local array. Takes the pivots from the
183 * database using the given ids.
184 * @param ids
185 * Ids of the pivots that will be stored.
186 * @throws IllegalIdException
187 * If the pivot selector generates invalid ids
188 * @throws DatabaseException
189 * If something goes wrong with the DB
190 * @throws OBException
191 * User generated exception
192 * @throws IllegalAccessException
193 * If there is a problem when instantiating objects O
194 * @throws InstantiationException
195 * If there is a problem when instantiating objects O
196 */
197 protected O[] getObjects(final long[] ids) throws IllegalIdException,
198 IllegalAccessException, InstantiationException,
199 OBException {
200 if (logger.isDebugEnabled()) {
201 // logger.debug("Pivots selected " + Arrays.toString(ids));
202
203 }
204 O[] p = emptyPivotsArray(ids.length);
205 int i = 0;
206 for(long id : ids){
207 O obj = getObject(id);
208 p[i] = obj;
209 i++;
210 }
211 if (logger.isDebugEnabled()) {
212 // logger.debug("Detail: " + Arrays.toString(p));
213 }
214 return p;
215
216 }
217
218
219
220 /**
221 * Creates an array with the pivots. It has to be created like this because
222 * we are using generics. Subclasses must override this if different
223 * sets of pivots are to be used. We can override this later to have
224 * a multiple set of pivots for indexes like D...
225 */
226 protected void createPivotsArray(int size) {
227 this.pivots = emptyPivotsArray(size);
228 }
229
230 /**
231 * @return The number of pivots used in this index.
232 */
233 public int getPivotCount(){
234 return pivotCount;
235 }
236
237
238
239
240 public void init(OBStoreFactory fact) throws OBStorageException,
241 OBException, NotFrozenException,
242 IllegalAccessException, InstantiationException, OBException{
243 super.init(fact);
244 // if(isFrozen()){
245 // this.loadPivots();
246 // }
247 }
248 }