View Javadoc

1   package net.obsearch.pivots;
2   
3   import java.lang.reflect.Array;
4   import java.util.HashSet;
5   import java.util.Random;
6   import java.util.concurrent.atomic.AtomicInteger;
7   import java.util.logging.Logger;
8   
9   import net.obsearch.Index;
10  import net.obsearch.OB;
11  import net.obsearch.asserts.OBAsserts;
12  import net.obsearch.exception.IllegalIdException;
13  import net.obsearch.exception.OBException;
14  import net.obsearch.exception.OBStorageException;
15  import net.obsearch.exception.PivotsUnavailableException;
16  
17  import cern.colt.list.IntArrayList;
18  import cern.colt.list.LongArrayList;
19  
20  import com.sleepycat.je.DatabaseException;
21  
22  /*
23   OBSearch: a distributed similarity search engine This project is to
24   similarity search what 'bit-torrent' is to downloads. 
25   Copyright (C) 2008 Arnoldo Jose Muller Molina
26  
27   This program is free software: you can redistribute it and/or modify
28   it under the terms of the GNU General Public License as published by
29   the Free Software Foundation, either version 3 of the License, or
30   (at your option) any later version.
31  
32   This program is distributed in the hope that it will be useful,
33   but WITHOUT ANY WARRANTY; without even the implied warranty of
34   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
35   GNU General Public License for more details.
36  
37   You should have received a copy of the GNU General Public License
38   along with this program.  If not, see <http://www.gnu.org/licenses/>.
39   */
40  
41  /**
42   * AbstractIncrementalPivotSelector holds common functionality to all the
43   * incremental pivot selectors.
44   * 
45   * @author Arnoldo Jose Muller Molina
46   */
47  
48  public abstract class AbstractIncrementalPivotSelector<O extends OB>  {
49  	
50  	protected Random r = new Random();
51  	private boolean acceptRepeated = false;
52  	private static Logger logger = Logger.getLogger(AbstractIncrementalPivotSelector.class.toString());
53  
54  	protected AbstractIncrementalPivotSelector(Pivotable<O> pivotable) {
55  		this.pivotable = pivotable;
56  	}
57  	
58  	
59  	protected AbstractIncrementalPivotSelector(){
60  		
61  	}
62  
63  	/**
64  	 * Pivotable objects determine if a given object is suitable. For example,
65  	 * in the case of trees, very big trees will become a burden and we should
66  	 * avoid using them as pivots.
67  	 */
68  	protected Pivotable<O> pivotable;
69  
70  	// TODO: The id auto increment must be initialized properly. We should leave
71  	// this
72  	// auto-increment to the underlying storage system.
73  
74  	/**
75  	 * Returns the given object. If elements != null, then the returned item id
76  	 * is elements[i].
77  	 * 
78  	 * @param i
79  	 *            The id in the database or in elements of the object that will
80  	 *            be accessed.
81  	 * @param elements
82  	 *            Elements that will be searched.
83  	 * @return O object of the corresponding id.
84  	 * @throws IllegalAccessException
85  	 * @throws InstantiationException
86  	 * @throws DatabaseException
87  	 * @throws IllegalIdException
88  	 * @throws OBException
89  	 */
90  	protected final O getObject(long i, LongArrayList elements, Index<O> index)
91  			throws IllegalAccessException, InstantiationException,
92  			 IllegalIdException, OBException {
93  
94  		return index.getObject(mapId(i, elements));
95  	}
96  
97  	protected long mapId(long i, LongArrayList elements) {
98  		if (elements != null) {
99  			return elements.get((int) i);
100 		} else {
101 			return i;
102 		}
103 	}
104 	
105 	public void enableAcceptRepeated(){
106 		acceptRepeated = true;
107 	}
108 
109 	/*
110 	 * (non-Javadoc)
111 	 * 
112 	 * @see net.obsearch.pivots.IncrementalPivotSelector#generatePivots(int,
113 	 * net.obsearch.result.Index)
114 	 */
115 	public PivotResult generatePivots(int pivotsCount, Index<O> index)
116 			throws OBException, IllegalAccessException, InstantiationException,
117 			OBStorageException, PivotsUnavailableException {
118 
119 		return generatePivots(pivotsCount, null, index);
120 	}
121 	
122 	public abstract PivotResult generatePivots(int pivotCount, LongArrayList elements,  Index<O> index) throws OBException,
123     IllegalAccessException, InstantiationException, OBStorageException,
124     PivotsUnavailableException; 
125 
126 	/**
127 	 * Returns the max # of elements. if source != null then source.size()
128 	 * otherwise index.databaseSize();
129 	 * 
130 	 * @param source
131 	 *            The source of data (can be null)
132 	 * @param index
133 	 *            The underlying index.
134 	 * @return The max # of elements of source if source != null or of index if
135 	 *         source == null.
136 	 */
137 	protected int max(LongArrayList source, Index<O> index)
138 			throws OBStorageException {
139 		int max;
140 		if (source == null) {	
141 			max = (int) Math.min(index.databaseSize(), Integer.MAX_VALUE);
142 		} else {
143 			max = source.size();
144 		}
145 		assert max >= 0 : "src: " + source + " index: " + index.databaseSize();
146 		return max;
147 	}
148 
149 	/**
150 	 * Selects k random elements from the given source.
151 	 * 
152 	 * @param k
153 	 *            number of elements to select
154 	 * @param r
155 	 *            Random object used to randomly select objects.
156 	 * @param source
157 	 *            The source of item ids.
158 	 * @param index
159 	 *            underlying index.
160 	 * @param will
161 	 *            not add pivots included in excludes.
162 	 * @return The ids of selected objects.
163 	 * @throws InstantiationException 
164 	 * @throws IllegalAccessException 
165 	 * @throws OBException 
166 	 * @throws IllegalIdException 
167 	 */
168 	protected long[] select(int k, Random r, LongArrayList source,
169 			Index<O> index, LongArrayList excludes) throws OBStorageException, IllegalIdException, OBException, IllegalAccessException, InstantiationException {
170 		return selectUnique(k, r, source,index,excludes);
171 	}
172 	
173 	
174 	protected long[] selectUnique(int k, Random r, LongArrayList source,
175 			Index<O> index, LongArrayList excludes) throws OBStorageException, IllegalIdException, OBException, IllegalAccessException, InstantiationException {
176 		int max = max(source, index);
177 		long[] res = new long[k];
178 		int i = 0;
179 		int excludesSize = 0;
180 		if(excludes != null){
181 			excludesSize = excludes.size();
182 		}
183 		HashSet<Long> excludesSet = new HashSet<Long>(excludesSize);
184 		HashSet<Long> generatedSet = new HashSet<Long>(k);
185 		int cx = 0;
186 		while(cx < excludesSize){
187 			excludesSet.add(excludes.get(cx));
188 			cx++;
189 		}
190 		LongArrayList generated = new LongArrayList(k);
191 		int repeats = 0;
192 		while (i < res.length) {
193 			if(repeats == 10000){
194 				throw new OBException("Too few elements in the db: " + max + " consider calling: enableAcceptRepeated()");
195 			}
196 			long id = mapId(r.nextInt(max), source);
197 			if (acceptRepeated || excludes == null || !excludesSet.contains(id) && ! generatedSet.contains(id)) {
198 				res[i] = id;
199 				generated.add(id);
200 				generatedSet.add(id);
201 			} else {
202 				repeats++;
203 				continue; // repeat step.
204 			}
205 			i++;
206 		}
207 		return res;
208 	}
209 
210 	protected O[] selectO(int k, Random r, LongArrayList source,
211 			Index<O> index, LongArrayList excludes) throws IllegalIdException,
212 			IllegalAccessException, InstantiationException,
213 			OBException {
214 		long[] sample = select(k, r, source, index, null);
215 		O[] objs = (O[]) Array.newInstance(index.getType(), k);
216 		int i = 0;
217 		for (long l : sample) {
218 			objs[i] = this.getObject(l, source, index);
219 			i++;
220 		}
221 		return objs;
222 	}
223 
224 }