Line data Source code
1 : /*
2 : * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : *
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : *
11 : * 2. Redistributions in binary form must reproduce the above copyright
12 : * notice, this list of conditions and the following disclaimer in the
13 : * documentation and/or other materials provided with the distribution.
14 : *
15 : * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 : * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 : * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 : * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 : * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 : * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 : * POSSIBILITY OF SUCH DAMAGE.
26 : */
27 :
28 : /** \file
29 : * Linear chained hashtable.
30 : *
31 : * This hashtable resizes dynamically and progressively using a variation of the
32 : * linear hashing algorithm described in http://en.wikipedia.org/wiki/Linear_hashing
33 : *
34 : * It starts with the minimal size of 16 buckets, it doubles the size then it
35 : * reaches a load factor greater than 0.5 and it halves the size with a load
36 : * factor lower than 0.125.
37 : *
38 : * The progressive resize is good for real-time and interactive applications
39 : * as it makes insert and delete operations taking always the same time.
40 : *
41 : * For resizing it's used a dynamic array that supports access to not contigous
42 : * segments.
43 : * In this way we only allocate additional table segments on the heap, without
44 : * freeing the previous table, and then not increasing the heap fragmentation.
45 : *
46 : * The resize takes place inside tommy_hashlin_insert() and tommy_hashlin_remove().
47 : * No resize is done in the tommy_hashlin_search() operation.
48 : *
49 : * To initialize the hashtable you have to call tommy_hashlin_init().
50 : *
51 : * \code
52 : * tommy_hashslin hashlin;
53 : *
54 : * tommy_hashlin_init(&hashlin);
55 : * \endcode
56 : *
57 : * To insert elements in the hashtable you have to call tommy_hashlin_insert() for
58 : * each element.
59 : * In the insertion call you have to specify the address of the node, the
60 : * address of the object, and the hash value of the key to use.
61 : * The address of the object is used to initialize the tommy_node::data field
62 : * of the node, and the hash to initialize the tommy_node::key field.
63 : *
64 : * \code
65 : * struct object {
66 : * int value;
67 : * // other fields
68 : * tommy_node node;
69 : * };
70 : *
71 : * struct object* obj = malloc(sizeof(struct object)); // creates the object
72 : *
73 : * obj->value = ...; // initializes the object
74 : *
75 : * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object
76 : * \endcode
77 : *
78 : * To find and element in the hashtable you have to call tommy_hashtable_search()
79 : * providing a comparison function, its argument, and the hash of the key to search.
80 : *
81 : * \code
82 : * int compare(const void* arg, const void* obj)
83 : * {
84 : * return *(const int*)arg != ((const struct object*)obj)->value;
85 : * }
86 : *
87 : * int value_to_find = 1;
88 : * struct object* obj = tommy_hashlin_search(&hashlin, compare, &value_to_find, tommy_inthash_u32(value_to_find));
89 : * if (!obj) {
90 : * // not found
91 : * } else {
92 : * // found
93 : * }
94 : * \endcode
95 : *
96 : * To iterate over all the elements in the hashtable with the same key, you have to
97 : * use tommy_hashlin_bucket() and follow the tommy_node::next pointer until NULL.
98 : * You have also to check explicitely for the key, as the bucket may contains
99 : * different keys.
100 : *
101 : * \code
102 : * int value_to_find = 1;
103 : * tommy_node* i = tommy_hashlin_bucket(&hashlin, tommy_inthash_u32(value_to_find));
104 : * while (i) {
105 : * struct object* obj = i->data; // gets the object pointer
106 : *
107 : * if (obj->value == value_to_find) {
108 : * printf("%d\n", obj->value); // process the object
109 : * }
110 : *
111 : * i = i->next; // goes to the next element
112 : * }
113 : * \endcode
114 : *
115 : * To remove an element from the hashtable you have to call tommy_hashlin_remove()
116 : * providing a comparison function, its argument, and the hash of the key to search
117 : * and remove.
118 : *
119 : * \code
120 : * struct object* obj = tommy_hashlin_remove(&hashlin, compare, &value_to_remove, tommy_inthash_u32(value_to_remove));
121 : * if (obj) {
122 : * free(obj); // frees the object allocated memory
123 : * }
124 : * \endcode
125 : *
126 : * To destroy the hashtable you have to remove all the elements, and deinitialize
127 : * the hashtable calling tommy_hashlin_done().
128 : *
129 : * \code
130 : * tommy_hashlin_done(&hashlin);
131 : * \endcode
132 : *
133 : * If you need to iterate over all the elements in the hashtable, you can use
134 : * tommy_hashlin_foreach() or tommy_hashlin_foreach_arg().
135 : * If you need a more precise control with a real iteration, you have to insert
136 : * all the elements also in a ::tommy_list, and use the list to iterate.
137 : * See the \ref multiindex example for more detail.
138 : */
139 :
140 : #ifndef __TOMMYHASHLIN_H
141 : #define __TOMMYHASHLIN_H
142 :
143 : #include "tommyhash.h"
144 :
145 : /******************************************************************************/
146 : /* hashlin */
147 :
148 : /** \internal
149 : * Initial and minimal size of the hashtable expressed as a power of 2.
150 : * The initial size is 2^TOMMY_HASHLIN_BIT.
151 : */
152 : #define TOMMY_HASHLIN_BIT 6
153 :
154 : /**
155 : * Hashtable node.
156 : * This is the node that you have to include inside your objects.
157 : */
158 : typedef tommy_node tommy_hashlin_node;
159 :
160 : /**
161 : * Hashtable container type.
162 : * \note Don't use internal fields directly, but access the container only using functions.
163 : */
164 : typedef struct tommy_hashlin_struct {
165 : tommy_hashlin_node** bucket[TOMMY_SIZE_BIT]; /**< Dynamic array of hash buckets. One list for each hash modulus. */
166 : tommy_size_t bucket_max; /**< Number of buckets. */
167 : tommy_size_t bucket_mask; /**< Bit mask to access the buckets. */
168 : tommy_size_t low_max; /**< Low order max value. */
169 : tommy_size_t low_mask; /**< Low order mask value. */
170 : tommy_size_t split; /**< Split position. */
171 : tommy_size_t count; /**< Number of elements. */
172 : tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */
173 : tommy_uint_t state; /**< Reallocation state. */
174 : } tommy_hashlin;
175 :
176 : /**
177 : * Initializes the hashtable.
178 : */
179 : void tommy_hashlin_init(tommy_hashlin* hashlin);
180 :
181 : /**
182 : * Deinitializes the hashtable.
183 : *
184 : * You can call this function with elements still contained,
185 : * but such elements are not going to be freed by this call.
186 : */
187 : void tommy_hashlin_done(tommy_hashlin* hashlin);
188 :
189 : /**
190 : * Inserts an element in the hashtable.
191 : */
192 : void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash);
193 :
194 : /**
195 : * Searches and removes an element from the hashtable.
196 : * You have to provide a compare function and the hash of the element you want to remove.
197 : * If the element is not found, 0 is returned.
198 : * If more equal elements are present, the first one is removed.
199 : * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
200 : * The function should return 0 for equal elements, anything other for different elements.
201 : * \param cmp_arg Compare argument passed as first argument of the compare function.
202 : * \param hash Hash of the element to find and remove.
203 : * \return The removed element, or 0 if not found.
204 : */
205 : void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash);
206 :
207 : /** \internal
208 : * Returns the bucket at the specified position.
209 : */
210 308448199 : tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos)
211 : {
212 : tommy_uint_t bsr;
213 :
214 : /* get the highest bit set, in case of all 0, return 0 */
215 308448199 : bsr = tommy_ilog2(pos | 1);
216 :
217 308448199 : return &hashlin->bucket[bsr][pos];
218 : }
219 :
220 : /** \internal
221 : * Returns a pointer to the bucket of the specified hash.
222 : */
223 160740001 : tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash)
224 : {
225 : tommy_size_t pos;
226 : tommy_size_t high_pos;
227 :
228 160740001 : pos = hash & hashlin->low_mask;
229 160740001 : high_pos = hash & hashlin->bucket_mask;
230 :
231 : /* if this position is already allocated in the high half */
232 160740001 : if (pos < hashlin->split) {
233 : /* The following assigment is expected to be implemented */
234 : /* with a conditional move instruction */
235 : /* that results in a little better and constant performance */
236 : /* regardless of the split position. */
237 : /* This affects mostly the worst case, when the split value */
238 : /* is near at its half, resulting in a totally unpredictable */
239 : /* condition by the CPU. */
240 : /* In such case the use of the conditional move is generally faster. */
241 :
242 : /* use also the high bit */
243 41603307 : pos = high_pos;
244 : }
245 :
246 160740001 : return tommy_hashlin_pos(hashlin, pos);
247 : }
248 :
249 : /**
250 : * Gets the bucket of the specified hash.
251 : * The bucket is guaranteed to contain ALL the elements with the specified hash,
252 : * but it can contain also others.
253 : * You can access elements in the bucket following the ::next pointer until 0.
254 : * \param hash Hash of the element to find.
255 : * \return The head of the bucket, or 0 if empty.
256 : */
257 1 : tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash)
258 : {
259 1 : return *tommy_hashlin_bucket_ref(hashlin, hash);
260 : }
261 :
262 : /**
263 : * Searches an element in the hashtable.
264 : * You have to provide a compare function and the hash of the element you want to find.
265 : * If more equal elements are present, the first one is returned.
266 : * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
267 : * The function should return 0 for equal elements, anything other for different elements.
268 : * \param cmp_arg Compare argument passed as first argument of the compare function.
269 : * \param hash Hash of the element to find.
270 : * \return The first element found, or 0 if none.
271 : */
272 : tommy_inline void* tommy_hashlin_search(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
273 : {
274 : tommy_hashlin_node* i = tommy_hashlin_bucket(hashlin, hash);
275 :
276 : while (i) {
277 : /* we first check if the hash matches, as in the same bucket we may have multiples hash values */
278 : if (i->index == hash && cmp(cmp_arg, i->data) == 0)
279 : return i->data;
280 : i = i->next;
281 : }
282 : return 0;
283 : }
284 :
285 : /**
286 : * Removes an element from the hashtable.
287 : * You must already have the address of the element to remove.
288 : * \return The tommy_node::data field of the node removed.
289 : */
290 : void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node);
291 :
292 : /**
293 : * Calls the specified function for each element in the hashtable.
294 : *
295 : * You cannot add or remove elements from the inside of the callback,
296 : * but can use it to deallocate them.
297 : *
298 : * \code
299 : * tommy_hashlin hashlin;
300 : *
301 : * // initializes the hashtable
302 : * tommy_hashlin_init(&hashlin);
303 : *
304 : * ...
305 : *
306 : * // creates an object
307 : * struct object* obj = malloc(sizeof(struct object));
308 : *
309 : * ...
310 : *
311 : * // insert it in the hashtable
312 : * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value));
313 : *
314 : * ...
315 : *
316 : * // deallocates all the objects iterating the hashtable
317 : * tommy_hashlin_foreach(&hashlin, free);
318 : *
319 : * // deallocates the hashtable
320 : * tommy_hashlin_done(&hashlin);
321 : * \endcode
322 : */
323 : void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func);
324 :
325 : /**
326 : * Calls the specified function with an argument for each element in the hashtable.
327 : */
328 : void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg);
329 :
330 : /**
331 : * Gets the number of elements.
332 : */
333 5001 : tommy_inline tommy_size_t tommy_hashlin_count(tommy_hashlin* hashlin)
334 : {
335 5001 : return hashlin->count;
336 : }
337 :
338 : /**
339 : * Gets the size of allocated memory.
340 : * It includes the size of the ::tommy_hashlin_node of the stored elements.
341 : */
342 : tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin);
343 :
344 : #endif
345 :
|