Line data Source code
1 : /*
2 : * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
3 : *
4 : * Redistribution and use in source and binary forms, with or without
5 : * modification, are permitted provided that the following conditions
6 : * are met:
7 : *
8 : * 1. Redistributions of source code must retain the above copyright
9 : * notice, this list of conditions and the following disclaimer.
10 : *
11 : * 2. Redistributions in binary form must reproduce the above copyright
12 : * notice, this list of conditions and the following disclaimer in the
13 : * documentation and/or other materials provided with the distribution.
14 : *
15 : * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 : * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 : * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 : * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 : * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 : * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 : * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 : * POSSIBILITY OF SUCH DAMAGE.
26 : */
27 :
28 : /** \file
29 : * Trie optimized for cache utilization.
30 : *
31 : * This trie is a standard implementation that stores elements in the order defined
32 : * by the key.
33 : *
34 : * It needs an external allocator for the inner nodes in the trie.
35 : *
36 : * You can control the number of branches of each node using the ::TOMMY_TRIE_TREE_MAX
37 : * define. More branches imply more speed, but a bigger memory occupation.
38 : *
39 : * Compared to ::tommy_trie_inplace you have to provide a ::tommy_allocator allocator.
40 : * Note that the C malloc() is too slow to futfill this role.
41 : *
42 : * To initialize the trie you have to call tommy_allocator_init() to initialize
43 : * the allocator, and tommy_trie_init() for the trie.
44 : *
45 : * \code
46 : * tommy_allocator alloc;
47 : * tommy_trie trie;
48 : *
49 : * tommy_allocator_init(&alloc, TOMMY_TRIE_BLOCK_SIZE, TOMMY_TRIE_BLOCK_SIZE);
50 : *
51 : * tommy_trie_init(&trie, &alloc);
52 : * \endcode
53 : *
54 : * To insert elements in the trie you have to call tommy_trie_insert() for
55 : * each element.
56 : * In the insertion call you have to specify the address of the node, the
57 : * address of the object, and the key value to use.
58 : * The address of the object is used to initialize the tommy_node::data field
59 : * of the node, and the key to initialize the tommy_node::key field.
60 : *
61 : * \code
62 : * struct object {
63 : * int value;
64 : * // other fields
65 : * tommy_node node;
66 : * };
67 : *
68 : * struct object* obj = malloc(sizeof(struct object)); // creates the object
69 : *
70 : * obj->value = ...; // initializes the object
71 : *
72 : * tommy_trie_insert(&trie, &obj->node, obj, obj->value); // inserts the object
73 : * \endcode
74 : *
75 : * To find and element in the trie you have to call tommy_trie_search() providing
76 : * the key to search.
77 : *
78 : * \code
79 : * int value_to_find = 1;
80 : * struct object* obj = tommy_trie_search(&trie, value_to_find);
81 : * if (!obj) {
82 : * // not found
83 : * } else {
84 : * // found
85 : * }
86 : * \endcode
87 : *
88 : * To iterate over all the elements in the trie with the same key, you have to
89 : * use tommy_trie_bucket() and follow the tommy_node::next pointer until NULL.
90 : *
91 : * \code
92 : * int value_to_find = 1;
93 : * tommy_node* i = tommy_trie_bucket(&trie, value_to_find);
94 : * while (i) {
95 : * struct object* obj = i->data; // gets the object pointer
96 : *
97 : * printf("%d\n", obj->value); // process the object
98 : *
99 : * i = i->next; // goes to the next element
100 : * }
101 : * \endcode
102 : *
103 : * To remove an element from the trie you have to call tommy_trie_remove()
104 : * providing the key to search and remove.
105 : *
106 : * \code
107 : * struct object* obj = tommy_trie_remove(&trie, value_to_remove);
108 : * if (obj) {
109 : * free(obj); // frees the object allocated memory
110 : * }
111 : * \endcode
112 : *
113 : * To destroy the trie you have to remove all the elements, and deinitialize
114 : * the allocator using tommy_allocator_done().
115 : *
116 : * \code
117 : * tommy_allocator_done(&alloc);
118 : * \endcode
119 : *
120 : * Note that you cannot iterate over all the elements in the trie using the
121 : * trie itself. You have to insert all the elements also in a ::tommy_list,
122 : * and use the list to iterate. See the \ref multiindex example for more detail.
123 : */
124 :
125 : #ifndef __TOMMYTRIE_H
126 : #define __TOMMYTRIE_H
127 :
128 : #include "tommytypes.h"
129 : #include "tommyalloc.h"
130 :
131 : /******************************************************************************/
132 : /* trie */
133 :
134 : /**
135 : * Number of bits of the elements to store in the trie.
136 : *
137 : * If you need to store integers bigger than 32 bits you can
138 : * increse this value.
139 : *
140 : * Keeping this value small improves the performance of the trie.
141 : */
142 : #define TOMMY_TRIE_BIT 32
143 :
144 : /**
145 : * Number of branches on each inner node. It must be a power of 2.
146 : * Suggested values are 8, 16 and 32.
147 : * Any inner node, excluding leafs, contains a pointer to each branch.
148 : *
149 : * The default size is choosen to exactly fit a typical cache line of 64 bytes.
150 : */
151 : #define TOMMY_TRIE_TREE_MAX (64 / sizeof(void*))
152 :
153 : /**
154 : * Trie block size.
155 : * You must use this value to initialize the allocator.
156 : */
157 : #define TOMMY_TRIE_BLOCK_SIZE (TOMMY_TRIE_TREE_MAX * sizeof(void*))
158 :
159 : /** \internal
160 : * Number of bits for each branch.
161 : */
162 : #define TOMMY_TRIE_TREE_BIT TOMMY_ILOG2(TOMMY_TRIE_TREE_MAX)
163 :
164 : /** \internal
165 : * Number of bits of the first level.
166 : */
167 : #define TOMMY_TRIE_BUCKET_BIT ((TOMMY_TRIE_BIT % TOMMY_TRIE_TREE_BIT) + TOMMY_TRIE_TREE_BIT)
168 :
169 : /** \internal
170 : * Number of branches of the first level.
171 : * It's like a inner branch, but bigger to get any remainder bits.
172 : */
173 : #define TOMMY_TRIE_BUCKET_MAX (1 << TOMMY_TRIE_BUCKET_BIT)
174 :
175 : /**
176 : * Trie node.
177 : * This is the node that you have to include inside your objects.
178 : */
179 : typedef tommy_node tommy_trie_node;
180 :
181 : /**
182 : * Trie container type.
183 : * \note Don't use internal fields directly, but access the container only using functions.
184 : */
185 : typedef struct tommy_trie_struct {
186 : tommy_trie_node* bucket[TOMMY_TRIE_BUCKET_MAX]; /**< First tree level. */
187 : tommy_size_t count; /**< Number of elements. */
188 : tommy_size_t node_count; /**< Number of nodes. */
189 : tommy_allocator* alloc; /**< Allocator for internal nodes. */
190 : } tommy_trie;
191 :
192 : /**
193 : * Initializes the trie.
194 : * You have to provide an allocator initialized with *both* the size and align with TOMMY_TRIE_BLOCK_SIZE.
195 : * You can share this allocator with other tries.
196 : *
197 : * The tries is completely allocated through the allocator, and it doesn't need to be deinitialized.
198 : * \param alloc Allocator initialized with *both* the size and align with TOMMY_TRIE_BLOCK_SIZE.
199 : */
200 : void tommy_trie_init(tommy_trie* trie, tommy_allocator* alloc);
201 :
202 : /**
203 : * Inserts an element in the trie.
204 : * You have to provide the pointer of the node embedded into the object,
205 : * the pointer to the object and the key to use.
206 : * \param node Pointer to the node embedded into the object to insert.
207 : * \param data Pointer to the object to insert.
208 : * \param key Key to use to insert the object.
209 : */
210 : void tommy_trie_insert(tommy_trie* trie, tommy_trie_node* node, void* data, tommy_key_t key);
211 :
212 : /**
213 : * Searches and removes the first element with the specified key.
214 : * If the element is not found, 0 is returned.
215 : * If more equal elements are present, the first one is removed.
216 : * This operation is faster than calling tommy_trie_bucket() and tommy_trie_remove_existing() separately.
217 : * \param key Key of the element to find and remove.
218 : * \return The removed element, or 0 if not found.
219 : */
220 : void* tommy_trie_remove(tommy_trie* trie, tommy_key_t key);
221 :
222 : /**
223 : * Gets the bucket of the specified key.
224 : * The bucket is guaranteed to contain ALL and ONLY the elements with the specified key.
225 : * You can access elements in the bucket following the ::next pointer until 0.
226 : * \param key Key of the element to find.
227 : * \return The head of the bucket, or 0 if empty.
228 : */
229 : tommy_trie_node* tommy_trie_bucket(tommy_trie* trie, tommy_key_t key);
230 :
231 : /**
232 : * Searches an element in the trie.
233 : * You have to provide the key of the element you want to find.
234 : * If more elements with the same key are present, the first one is returned.
235 : * \param key Key of the element to find.
236 : * \return The first element found, or 0 if none.
237 : */
238 4000001 : tommy_inline void* tommy_trie_search(tommy_trie* trie, tommy_key_t key)
239 : {
240 4000001 : tommy_trie_node* i = tommy_trie_bucket(trie, key);
241 :
242 4000001 : if (!i)
243 2000001 : return 0;
244 :
245 2000000 : return i->data;
246 : }
247 :
248 : /**
249 : * Removes an element from the trie.
250 : * You must already have the address of the element to remove.
251 : * \return The tommy_node::data field of the node removed.
252 : */
253 : void* tommy_trie_remove_existing(tommy_trie* trie, tommy_trie_node* node);
254 :
255 : /**
256 : * Gets the number of elements.
257 : */
258 2 : tommy_inline tommy_size_t tommy_trie_count(tommy_trie* trie)
259 : {
260 2 : return trie->count;
261 : }
262 :
263 : /**
264 : * Gets the size of allocated memory.
265 : * It includes the size of the ::tommy_trie_node of the stored elements.
266 : */
267 : tommy_size_t tommy_trie_memory_usage(tommy_trie* trie);
268 :
269 : #endif
270 :
|