1 package net.obsearch.index.pyramid;
2
3 import hep.aida.bin.QuantileBin1D;
4 import hep.aida.bin.StaticBin1D;
5
6 import java.io.File;
7 import java.io.IOException;
8 import java.nio.ByteBuffer;
9 import java.util.Arrays;
10 import java.util.Iterator;
11
12 import net.obsearch.OB;
13 import net.obsearch.exception.AlreadyFrozenException;
14 import net.obsearch.exception.IllegalIdException;
15 import net.obsearch.exception.NotFrozenException;
16 import net.obsearch.exception.OBException;
17 import net.obsearch.exception.OBStorageException;
18 import net.obsearch.exception.OutOfRangeException;
19 import net.obsearch.index.pivot.AbstractPivotOBIndex;
20 import net.obsearch.pivots.IncrementalPivotSelector;
21 import net.obsearch.storage.CloseIterator;
22 import net.obsearch.storage.OBStore;
23 import net.obsearch.storage.TupleBytes;
24
25
26 import net.obsearch.storage.OBStorageConfig;
27 import net.obsearch.storage.OBStoreDouble;
28 import net.obsearch.storage.OBStoreFactory;
29 import net.obsearch.storage.OBStoreLong;
30 import net.obsearch.storage.TupleDouble;
31 import net.obsearch.storage.TupleLong;
32 import org.apache.log4j.Logger;
33
34 import cern.colt.list.DoubleArrayList;
35 import cern.colt.list.DoubleArrayList;
36
37 import com.sleepycat.bind.tuple.IntegerBinding;
38 import com.sleepycat.bind.tuple.TupleInput;
39 import com.sleepycat.je.Cursor;
40 import com.sleepycat.je.Database;
41 import com.sleepycat.je.DatabaseConfig;
42 import com.sleepycat.je.DatabaseEntry;
43 import com.sleepycat.je.DatabaseException;
44 import com.sleepycat.je.LockMode;
45 import com.sleepycat.je.OperationStatus;
46 import com.sleepycat.je.PreloadConfig;
47 import com.thoughtworks.xstream.annotations.XStreamAlias;
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 @XStreamAlias("ExtendedPyramidIndex")
77 public abstract class AbstractExtendedPyramidIndex < O extends OB >
78 extends AbstractPivotOBIndex < O > {
79
80
81
82
83 protected static final int MIN = 0;
84
85
86
87
88 protected static final int MAX = 1;
89
90
91
92
93 protected static final int HLOW = 0;
94
95
96
97
98 protected static final int HHIGH = 1;
99
100
101
102
103
104
105 private static Logger logger = Logger
106 .getLogger(AbstractExtendedPyramidIndex.class.getSimpleName());
107
108
109
110
111 private double[] mp;
112
113
114
115
116 protected OBStoreDouble C;
117
118
119
120
121 protected OBStoreLong B;
122
123
124
125
126 public AbstractExtendedPyramidIndex(Class < O > type,
127 IncrementalPivotSelector < O > pivotSelector, int pivotCount)
128 throws OBStorageException, OBException {
129 super(type, pivotSelector, pivotCount);
130 mp = new double[super.getPivotCount()];
131 }
132
133 public void init(OBStoreFactory fact) throws OBStorageException,
134 OBException, NotFrozenException, IllegalAccessException,
135 InstantiationException, OBException {
136 super.init(fact);
137 OBStorageConfig conf = new OBStorageConfig();
138 conf.setTemp(false);
139 conf.setDuplicates(true);
140 conf.setBulkMode(! isFrozen());
141 this.C = fact.createOBStoreDouble("C", conf);
142 if (!this.isFrozen()) {
143 conf = new OBStorageConfig();
144 conf.setTemp(true);
145 conf.setDuplicates(false);
146 conf.setBulkMode(false);
147 this.B = fact.createOBStoreLong("B", conf);
148 }
149
150 }
151
152 @Override
153 public void close() throws OBException {
154 C.close();
155 B.close();
156 super.close();
157
158 }
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177 protected void calculateIndexParameters() throws IllegalAccessException,
178 InstantiationException, OutOfRangeException, OBException {
179
180
181 QuantileBin1D[] medianHolder = createMedianHolders(B.size());
182
183 StaticBin1D qt = new StaticBin1D();
184
185
186 CloseIterator < TupleLong > it = this.B.processAll();
187 try {
188 while (it.hasNext()) {
189 TupleLong t = it.next();
190 double [] vect = extractTuple(t.getValue());
191 updateMedianHolder(vect, medianHolder);
192 calculateDistances(vect, qt);
193 }
194 } finally {
195 it.closeCursor();
196 }
197 logger.info("Separation: " + qt.mean() + "std:" + qt.standardDeviation() + " min " + qt.min());
198 int i = 0;
199 while (i < mp.length) {
200 double median = medianHolder[i].median();
201 assert median >= 0 && median <= 1;
202 mp[i] = median;
203 i++;
204 }
205 logger.debug("Median calculation finished");
206 }
207
208 private void calculateDistances(double[] vector, StaticBin1D medianHolder){
209
210
211 int i = 0;
212 while(i < vector.length-1){
213 medianHolder.add(Math.abs(vector[i] - vector[i+1]));
214 i++;
215 }
216 }
217
218
219
220
221
222
223
224
225
226
227
228 protected abstract double[] extractTuple(ByteBuffer in)
229 throws OutOfRangeException;
230
231
232
233
234
235
236 protected abstract ByteBuffer objectToProjectionBytes(O object) throws OBException;
237
238 @Override
239 public void freeze() throws IOException, AlreadyFrozenException,
240 IllegalIdException, IllegalAccessException, InstantiationException,
241 OBStorageException, OutOfRangeException, OBException {
242 super.freeze();
243
244 CloseIterator < TupleLong > it = A.processAll();
245 try {
246 while (it.hasNext()) {
247 TupleLong tup = it.next();
248 O obj = bytesToObject(tup.getValue());
249 ByteBuffer pivots = objectToProjectionBytes(obj);
250 assert pivots != null : "Implement projection serialization";
251 B.put(tup.getKey(), pivots);
252 }
253 } finally {
254 it.closeCursor();
255 }
256
257 calculateIndexParameters();
258
259
260
261 it = A.processAll();
262
263
264 try {
265 while (it.hasNext()) {
266 TupleLong tup = it.next();
267 O obj = bytesToObject(tup.getValue());
268 insertAux(tup.getKey(), obj);
269 }
270 } finally {
271 it.closeCursor();
272 }
273
274 }
275
276
277
278
279
280
281
282
283
284 protected final void updateMedianHolder(final double[] tuple,
285 QuantileBin1D[] medianHolder) {
286 int i = 0;
287 assert tuple.length == medianHolder.length;
288 assert medianHolder.length == super.getPivotCount();
289 while (i < medianHolder.length) {
290 medianHolder[i].add(tuple[i]);
291 i++;
292 }
293 }
294
295 protected final void updateDoubleHolder(final double[] tuple,
296 DoubleArrayList[] medianHolder) {
297 int i = 0;
298 assert tuple.length == medianHolder.length;
299 assert medianHolder.length == getPivotCount();
300 while (i < medianHolder.length) {
301 medianHolder[i].add(tuple[i]);
302 i++;
303 }
304 }
305
306
307
308
309
310
311
312
313 protected final QuantileBin1D[] createMedianHolders(final long size) {
314 QuantileBin1D[] res = new QuantileBin1D[getPivotCount()];
315 int i = 0;
316 while (i < res.length) {
317
318
319 res[i] = new QuantileBin1D(true, size, 0.00001, 0.00001, 10000,
320 new cern.jet.random.engine.DRand(new java.util.Date()),
321 true, true, 2);
322 i++;
323 }
324 return res;
325 }
326
327 protected final DoubleArrayList[] createDoubleHolders(int size) {
328 DoubleArrayList[] res = new DoubleArrayList[getPivotCount()];
329 int i = 0;
330 while (i < res.length) {
331
332
333 res[i] = new DoubleArrayList(size);
334 i++;
335 }
336 return res;
337 }
338
339
340
341
342
343
344
345
346
347
348 protected final double extendedPyramidNormalization(final double norm,
349 final int i) {
350 return (double) Math.pow(norm, -1.d / (Math.log(mp[i]) / Math.log(2)));
351 }
352
353
354
355
356
357
358
359
360
361
362 protected final double heightOfPoint(final double[] tuple,
363 final int pyramidNumber) {
364 double res = Math.abs(0.5 - tuple[pyramidNumber % getPivotCount()]);
365 assert res >= 0 && res <= 0.5;
366 return res;
367 }
368
369
370
371
372
373
374
375 public final double pyramidValue(final double[] tuple) {
376 int pyramid = pyramidOfPoint(tuple);
377 assert pyramid >= 0 && pyramid < getPivotCount() * 2 : " Pyramid value:"
378 + pyramid;
379 return pyramid + heightOfPoint(tuple, pyramid);
380 }
381
382
383
384
385
386
387
388 public final int pyramidOfPoint(final double[] tuple) {
389 int jmax = pyramidOfPointAux(tuple);
390 if (tuple[jmax] < 0.5) {
391 return jmax;
392 } else {
393 return jmax + getPivotCount();
394 }
395 }
396
397
398
399
400
401
402
403 protected final int pyramidOfPointAux(final double[] tuple) {
404 int j = 0;
405 assert getPivotCount() == tuple.length;
406 while (j < getPivotCount()) {
407 int k = 0;
408 boolean failed = false;
409 while (k < getPivotCount() && !failed) {
410 if (k == j) {
411 k++;
412 continue;
413 }
414 if (!(Math.abs(0.5 - tuple[j]) >= Math.abs(0.5 - tuple[k]))) {
415
416
417
418 failed = true;
419 }
420 k++;
421 }
422 if (!failed) {
423
424
425 return j;
426 }
427 j++;
428 }
429 assert false : " Catastrophic Failure ";
430 return Integer.MIN_VALUE;
431 }
432
433
434
435
436
437
438
439
440
441
442
443
444
445 protected final boolean intersect(final double[][] q, final int p,
446 double[] minArray, double[] lowHighResult) {
447
448
449 int i = p;
450 assert i < this.getPivotCount() * 2;
451 int j = 0;
452 if (i < this.getPivotCount()) {
453 double minimum = q[i][MIN];
454 while (j < q.length) {
455 if (j != i) {
456 if (!(minimum <= -minArray[j])) {
457 return false;
458 }
459 }
460 j++;
461 }
462 assert j == getPivotCount();
463 if (q[i][MAX] > 0) {
464 q[i][MAX] = 0;
465 }
466 } else {
467 i = i - this.getPivotCount();
468 double maximum = q[i][MAX];
469 while (j < q.length) {
470 if (j != i) {
471 if (!(maximum >= minArray[j])) {
472 return false;
473 }
474 }
475 j++;
476 }
477 assert j == getPivotCount();
478 if (q[i][MIN] < 0) {
479 q[i][MIN] = 0;
480 }
481 }
482
483
484 determineRanges(i, q, lowHighResult);
485 return true;
486 }
487
488
489
490
491
492
493
494 protected final double[] generateMinArray(double[][] query) {
495 int i = 0;
496 double[] res = new double[query.length];
497 while (i < query.length) {
498 res[i] = min(query[i]);
499 i++;
500 }
501 return res;
502 }
503
504
505
506
507 public long totalBoxes() {
508 return getPivotCount() * 2;
509 }
510
511
512
513
514
515
516
517
518
519
520 private final void determineRanges(int p, double[][] q,
521 double[] lowHighResult) {
522
523 int i = p;
524 assert i < getPivotCount();
525 lowHighResult[HHIGH] = max(q[i]);
526 if (isEasyCase(q)) {
527 lowHighResult[HLOW] = 0;
528 } else {
529 int j = 0;
530 double max = 0;
531 while (j < getPivotCount()) {
532 if (i != j) {
533 double t = qjmin(q, j, i);
534 if (t > max) {
535 max = t;
536 }
537 }
538 j++;
539 }
540 lowHighResult[HLOW] = max;
541 }
542 }
543
544
545
546
547
548
549
550
551
552
553
554
555 private final double qjmin(final double[][] q, final int j, final int i) {
556 if (min(q[j]) > min(q[i])) {
557 return min(q[j]);
558 } else {
559 return min(q[i]);
560 }
561 }
562
563
564
565
566
567
568
569
570 private final boolean isEasyCase(final double[][] q) {
571 int i = 0;
572 while (i < q.length) {
573 if (!((q[i][MIN] <= 0) && (0 <= q[i][MAX]))) {
574 return false;
575 }
576 i++;
577 }
578 return true;
579 }
580
581
582
583
584
585
586
587 private final double min(final double[] minMax) {
588 if (minMax[MIN] <= 0 && 0 <= minMax[MAX]) {
589 return 0;
590 } else {
591 return Math.min(Math.abs(minMax[MAX]), Math.abs(minMax[MIN]));
592 }
593 }
594
595
596
597
598
599
600
601 private final double max(final double[] minMax) {
602 return Math.max(Math.abs(minMax[MAX]), Math.abs(minMax[MIN]));
603 }
604
605
606
607
608
609
610
611 protected final void centerQuery(final double[][] q) {
612 int i = 0;
613 while (i < q.length) {
614 q[i][MIN] = q[i][MIN] - 0.5f;
615 q[i][MAX] = q[i][MAX] - 0.5f;
616 i++;
617 }
618 }
619
620
621
622
623
624
625
626
627 protected final void copyQuery(final double[][] src, double[][] dest) {
628 int i = 0;
629 while (i < src.length) {
630 dest[i][MIN] = src[i][MIN];
631 dest[i][MAX] = src[i][MAX];
632 i++;
633 }
634 }
635
636 }