Line data Source code
1 : /*
2 : * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : *
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : *
11 : * 2. Redistributions in binary form must reproduce the above copyright
12 : * notice, this list of conditions and the following disclaimer in the
13 : * documentation and/or other materials provided with the distribution.
14 : *
15 : * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 : * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 : * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 : * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 : * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 : * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 : * POSSIBILITY OF SUCH DAMAGE.
26 : */
27 :
28 : #include "tommyhashlin.h"
29 : #include "tommylist.h"
30 :
31 : #include <assert.h> /* for assert */
32 :
33 : /******************************************************************************/
34 : /* hashlin */
35 :
36 : /**
37 : * Reallocation states.
38 : */
39 : #define TOMMY_HASHLIN_STATE_STABLE 0
40 : #define TOMMY_HASHLIN_STATE_GROW 1
41 : #define TOMMY_HASHLIN_STATE_SHRINK 2
42 :
43 : /**
44 : * Set the hashtable in stable state.
45 : */
46 63871 : tommy_inline void tommy_hashlin_stable(tommy_hashlin* hashlin)
47 : {
48 63871 : hashlin->state = TOMMY_HASHLIN_STATE_STABLE;
49 :
50 : /* setup low_mask/max/split to allow tommy_hashlin_bucket_ref() */
51 : /* and tommy_hashlin_foreach() to work regardless we are in stable state */
52 63871 : hashlin->low_max = hashlin->bucket_max;
53 63871 : hashlin->low_mask = hashlin->bucket_mask;
54 63871 : hashlin->split = 0;
55 63871 : }
56 :
57 5065 : void tommy_hashlin_init(tommy_hashlin* hashlin)
58 : {
59 : tommy_uint_t i;
60 :
61 : /* fixed initial size */
62 5065 : hashlin->bucket_bit = TOMMY_HASHLIN_BIT;
63 5065 : hashlin->bucket_max = 1 << hashlin->bucket_bit;
64 5065 : hashlin->bucket_mask = hashlin->bucket_max - 1;
65 5065 : hashlin->bucket[0] = tommy_cast(tommy_hashlin_node**, tommy_calloc(hashlin->bucket_max, sizeof(tommy_hashlin_node*)));
66 30390 : for (i = 1; i < TOMMY_HASHLIN_BIT; ++i)
67 25325 : hashlin->bucket[i] = hashlin->bucket[0];
68 :
69 : /* stable state */
70 5065 : tommy_hashlin_stable(hashlin);
71 :
72 5065 : hashlin->count = 0;
73 5065 : }
74 :
75 5065 : void tommy_hashlin_done(tommy_hashlin* hashlin)
76 : {
77 : tommy_uint_t i;
78 :
79 5065 : tommy_free(hashlin->bucket[0]);
80 5080 : for (i = TOMMY_HASHLIN_BIT; i < hashlin->bucket_bit; ++i) {
81 15 : tommy_hashlin_node** segment = hashlin->bucket[i];
82 15 : tommy_free(&segment[((tommy_ptrdiff_t)1) << i]);
83 : }
84 5065 : }
85 :
86 : /**
87 : * Grow one step.
88 : */
89 77497500 : tommy_inline void hashlin_grow_step(tommy_hashlin* hashlin)
90 : {
91 : /* grow if more than 50% full */
92 77497500 : if (hashlin->state != TOMMY_HASHLIN_STATE_GROW
93 32192435 : && hashlin->count > hashlin->bucket_max / 2
94 : ) {
95 : /* if we are stable, setup a new grow state */
96 : /* otherwise continue with the already setup shrink one */
97 : /* but in backward direction */
98 31907 : if (hashlin->state == TOMMY_HASHLIN_STATE_STABLE) {
99 : tommy_hashlin_node** segment;
100 :
101 : /* set the lower size */
102 31907 : hashlin->low_max = hashlin->bucket_max;
103 31907 : hashlin->low_mask = hashlin->bucket_mask;
104 :
105 : /* allocate the new vector using malloc() and not calloc() */
106 : /* because data is fully initialized in the split process */
107 31907 : segment = tommy_cast(tommy_hashlin_node**, tommy_malloc(hashlin->low_max * sizeof(tommy_hashlin_node*)));
108 :
109 : /* store it adjusting the offset */
110 : /* cast to ptrdiff_t to ensure to get a negative value */
111 31907 : hashlin->bucket[hashlin->bucket_bit] = &segment[-(tommy_ptrdiff_t)hashlin->low_max];
112 :
113 : /* grow the hash size */
114 31907 : ++hashlin->bucket_bit;
115 31907 : hashlin->bucket_max = 1 << hashlin->bucket_bit;
116 31907 : hashlin->bucket_mask = hashlin->bucket_max - 1;
117 :
118 : /* start from the beginning going forward */
119 31907 : hashlin->split = 0;
120 : }
121 :
122 : /* grow state */
123 31907 : hashlin->state = TOMMY_HASHLIN_STATE_GROW;
124 : }
125 :
126 : /* if we are growing */
127 77497500 : if (hashlin->state == TOMMY_HASHLIN_STATE_GROW) {
128 : /* compute the split target required to finish the reallocation before the next resize */
129 45336972 : tommy_size_t split_target = 2 * hashlin->count;
130 :
131 : /* reallocate buckets until the split target */
132 121323824 : while (hashlin->split + hashlin->low_max < split_target) {
133 : tommy_hashlin_node** split[2];
134 : tommy_hashlin_node* j;
135 : tommy_size_t mask;
136 :
137 : /* get the low bucket */
138 30676794 : split[0] = tommy_hashlin_pos(hashlin, hashlin->split);
139 :
140 : /* get the high bucket */
141 30676794 : split[1] = tommy_hashlin_pos(hashlin, hashlin->split + hashlin->low_max);
142 :
143 : /* save the low bucket */
144 30676794 : j = *split[0];
145 :
146 : /* reinitialize the buckets */
147 30676794 : *split[0] = 0;
148 30676794 : *split[1] = 0;
149 :
150 : /* the bit used to identify the bucket */
151 30676794 : mask = hashlin->low_max;
152 :
153 : /* flush the bucket */
154 90035412 : while (j) {
155 28681824 : tommy_hashlin_node* j_next = j->next;
156 28681824 : tommy_size_t pos = (j->index & mask) != 0;
157 28681824 : if (*split[pos])
158 968598 : tommy_list_insert_tail_not_empty(*split[pos], j);
159 : else
160 27713226 : tommy_list_insert_first(split[pos], j);
161 28681824 : j = j_next;
162 : }
163 :
164 : /* go forward */
165 30676794 : ++hashlin->split;
166 :
167 : /* if we have finished, change the state */
168 30676794 : if (hashlin->split == hashlin->low_max) {
169 : /* go in stable mode */
170 26914 : tommy_hashlin_stable(hashlin);
171 26914 : break;
172 : }
173 : }
174 : }
175 77497500 : }
176 :
177 : /**
178 : * Shrink one step.
179 : */
180 76495000 : tommy_inline void hashlin_shrink_step(tommy_hashlin* hashlin)
181 : {
182 : /* shrink if less than 12.5% full */
183 76495000 : if (hashlin->state != TOMMY_HASHLIN_STATE_SHRINK
184 71396740 : && hashlin->count < hashlin->bucket_max / 8
185 : ) {
186 : /* avoid to shrink the first bucket */
187 8069804 : if (hashlin->bucket_bit > TOMMY_HASHLIN_BIT) {
188 : /* if we are stable, setup a new shrink state */
189 : /* otherwise continue with the already setup grow one */
190 : /* but in backward direction */
191 31892 : if (hashlin->state == TOMMY_HASHLIN_STATE_STABLE) {
192 : /* set the lower size */
193 26900 : hashlin->low_max = hashlin->bucket_max / 2;
194 26900 : hashlin->low_mask = hashlin->bucket_mask / 2;
195 :
196 : /* start from the half going backward */
197 26900 : hashlin->split = hashlin->low_max;
198 : }
199 :
200 : /* start reallocation */
201 31892 : hashlin->state = TOMMY_HASHLIN_STATE_SHRINK;
202 : }
203 : }
204 :
205 : /* if we are shrinking */
206 76495000 : if (hashlin->state == TOMMY_HASHLIN_STATE_SHRINK) {
207 : /* compute the split target required to finish the reallocation before the next resize */
208 5130152 : tommy_size_t split_target = 8 * hashlin->count;
209 :
210 : /* reallocate buckets until the split target */
211 38905270 : while (hashlin->split + hashlin->low_max > split_target) {
212 : tommy_hashlin_node** split[2];
213 :
214 : /* go backward position */
215 28676858 : --hashlin->split;
216 :
217 : /* get the low bucket */
218 28676858 : split[0] = tommy_hashlin_pos(hashlin, hashlin->split);
219 :
220 : /* get the high bucket */
221 28676858 : split[1] = tommy_hashlin_pos(hashlin, hashlin->split + hashlin->low_max);
222 :
223 : /* concat the high bucket into the low one */
224 28676858 : tommy_list_concat(split[0], split[1]);
225 :
226 : /* if we have finished, clean up and change the state */
227 28676858 : if (hashlin->split == 0) {
228 : tommy_hashlin_node** segment;
229 :
230 : /* shrink the hash size */
231 31892 : --hashlin->bucket_bit;
232 31892 : hashlin->bucket_max = 1 << hashlin->bucket_bit;
233 31892 : hashlin->bucket_mask = hashlin->bucket_max - 1;
234 :
235 : /* free the last segment */
236 31892 : segment = hashlin->bucket[hashlin->bucket_bit];
237 31892 : tommy_free(&segment[((tommy_ptrdiff_t)1) << hashlin->bucket_bit]);
238 :
239 : /* go in stable mode */
240 31892 : tommy_hashlin_stable(hashlin);
241 31892 : break;
242 : }
243 : }
244 : }
245 76495000 : }
246 :
247 77497500 : void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash)
248 : {
249 77497500 : tommy_list_insert_tail(tommy_hashlin_bucket_ref(hashlin, hash), node, data);
250 :
251 77497500 : node->index = hash;
252 :
253 77497500 : ++hashlin->count;
254 :
255 77497500 : hashlin_grow_step(hashlin);
256 77497500 : }
257 :
258 68745609 : void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node)
259 : {
260 68745609 : tommy_list_remove_existing(tommy_hashlin_bucket_ref(hashlin, node->index), node);
261 :
262 68745609 : --hashlin->count;
263 :
264 68745609 : hashlin_shrink_step(hashlin);
265 :
266 68745609 : return node->data;
267 : }
268 :
269 14496891 : void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
270 : {
271 14496891 : tommy_hashlin_node** let_ptr = tommy_hashlin_bucket_ref(hashlin, hash);
272 14496891 : tommy_hashlin_node* node = *let_ptr;
273 :
274 30243782 : while (node) {
275 : /* we first check if the hash matches, as in the same bucket we may have multiples hash values */
276 8999391 : if (node->index == hash && cmp(cmp_arg, node->data) == 0) {
277 7749391 : tommy_list_remove_existing(let_ptr, node);
278 :
279 7749391 : --hashlin->count;
280 :
281 7749391 : hashlin_shrink_step(hashlin);
282 :
283 7749391 : return node->data;
284 : }
285 1250000 : node = node->next;
286 : }
287 :
288 6747500 : return 0;
289 : }
290 :
291 5001 : void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func)
292 : {
293 : tommy_size_t bucket_max;
294 : tommy_size_t pos;
295 :
296 : /* number of valid buckets */
297 5001 : bucket_max = hashlin->low_max + hashlin->split;
298 :
299 27001057 : for (pos = 0; pos < bucket_max; ++pos) {
300 26996056 : tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos);
301 :
302 67489612 : while (node) {
303 13497500 : void* data = node->data;
304 13497500 : node = node->next;
305 13497500 : func(data);
306 : }
307 : }
308 5001 : }
309 :
310 63 : void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg)
311 : {
312 : tommy_size_t bucket_max;
313 : tommy_size_t pos;
314 :
315 : /* number of valid buckets */
316 63 : bucket_max = hashlin->low_max + hashlin->split;
317 :
318 2004901 : for (pos = 0; pos < bucket_max; ++pos) {
319 2004838 : tommy_hashlin_node* node = *tommy_hashlin_pos(hashlin, pos);
320 :
321 5011567 : while (node) {
322 1001891 : void* data = node->data;
323 1001891 : node = node->next;
324 1001891 : func(arg, data);
325 : }
326 : }
327 63 : }
328 :
329 5001 : tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin)
330 : {
331 10002 : return hashlin->bucket_max * (tommy_size_t)sizeof(hashlin->bucket[0][0])
332 5001 : + hashlin->count * (tommy_size_t)sizeof(tommy_hashlin_node);
333 : }
334 :
|