LCOV - code coverage report
Current view: top level - tommyds/tommyds - tommyhashlin.h (source / functions) Hit Total Coverage
Test: lcov.info Lines: 13 13 100.0 %
Date: 2018-04-02 17:50:51 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2010, Andrea Mazzoleni. All rights reserved.
       3             :  *
       4             :  * Redistribution and use in source and binary forms, with or without
       5             :  * modification, are permitted provided that the following conditions
       6             :  * are met:
       7             :  *
       8             :  * 1. Redistributions of source code must retain the above copyright
       9             :  *    notice, this list of conditions and the following disclaimer.
      10             :  *
      11             :  * 2. Redistributions in binary form must reproduce the above copyright
      12             :  *    notice, this list of conditions and the following disclaimer in the
      13             :  *    documentation and/or other materials provided with the distribution.
      14             :  *
      15             :  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
      16             :  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      17             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      18             :  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
      19             :  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      20             :  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
      21             :  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
      22             :  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
      23             :  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
      24             :  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
      25             :  * POSSIBILITY OF SUCH DAMAGE.
      26             :  */
      27             : 
      28             : /** \file
      29             :  * Linear chained hashtable.
      30             :  *
      31             :  * This hashtable resizes dynamically and progressively using a variation of the
      32             :  * linear hashing algorithm described in http://en.wikipedia.org/wiki/Linear_hashing
      33             :  *
      34             :  * It starts with the minimal size of 16 buckets, it doubles the size then it
      35             :  * reaches a load factor greater than 0.5 and it halves the size with a load
      36             :  * factor lower than 0.125.
      37             :  *
      38             :  * The progressive resize is good for real-time and interactive applications
      39             :  * as it makes insert and delete operations taking always the same time.
      40             :  *
      41             :  * For resizing it's used a dynamic array that supports access to not contigous
      42             :  * segments.
      43             :  * In this way we only allocate additional table segments on the heap, without
      44             :  * freeing the previous table, and then not increasing the heap fragmentation.
      45             :  *
      46             :  * The resize takes place inside tommy_hashlin_insert() and tommy_hashlin_remove().
      47             :  * No resize is done in the tommy_hashlin_search() operation.
      48             :  *
      49             :  * To initialize the hashtable you have to call tommy_hashlin_init().
      50             :  *
      51             :  * \code
      52             :  * tommy_hashslin hashlin;
      53             :  *
      54             :  * tommy_hashlin_init(&hashlin);
      55             :  * \endcode
      56             :  *
      57             :  * To insert elements in the hashtable you have to call tommy_hashlin_insert() for
      58             :  * each element.
      59             :  * In the insertion call you have to specify the address of the node, the
      60             :  * address of the object, and the hash value of the key to use.
      61             :  * The address of the object is used to initialize the tommy_node::data field
      62             :  * of the node, and the hash to initialize the tommy_node::key field.
      63             :  *
      64             :  * \code
      65             :  * struct object {
      66             :  *     int value;
      67             :  *     // other fields
      68             :  *     tommy_node node;
      69             :  * };
      70             :  *
      71             :  * struct object* obj = malloc(sizeof(struct object)); // creates the object
      72             :  *
      73             :  * obj->value = ...; // initializes the object
      74             :  *
      75             :  * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value)); // inserts the object
      76             :  * \endcode
      77             :  *
      78             :  * To find and element in the hashtable you have to call tommy_hashtable_search()
      79             :  * providing a comparison function, its argument, and the hash of the key to search.
      80             :  *
      81             :  * \code
      82             :  * int compare(const void* arg, const void* obj)
      83             :  * {
      84             :  *     return *(const int*)arg != ((const struct object*)obj)->value;
      85             :  * }
      86             :  *
      87             :  * int value_to_find = 1;
      88             :  * struct object* obj = tommy_hashlin_search(&hashlin, compare, &value_to_find, tommy_inthash_u32(value_to_find));
      89             :  * if (!obj) {
      90             :  *     // not found
      91             :  * } else {
      92             :  *     // found
      93             :  * }
      94             :  * \endcode
      95             :  *
      96             :  * To iterate over all the elements in the hashtable with the same key, you have to
      97             :  * use tommy_hashlin_bucket() and follow the tommy_node::next pointer until NULL.
      98             :  * You have also to check explicitely for the key, as the bucket may contains
      99             :  * different keys.
     100             :  *
     101             :  * \code
     102             :  * int value_to_find = 1;
     103             :  * tommy_node* i = tommy_hashlin_bucket(&hashlin, tommy_inthash_u32(value_to_find));
     104             :  * while (i) {
     105             :  *     struct object* obj = i->data; // gets the object pointer
     106             :  *
     107             :  *     if (obj->value == value_to_find) {
     108             :  *         printf("%d\n", obj->value); // process the object
     109             :  *     }
     110             :  *
     111             :  *     i = i->next; // goes to the next element
     112             :  * }
     113             :  * \endcode
     114             :  *
     115             :  * To remove an element from the hashtable you have to call tommy_hashlin_remove()
     116             :  * providing a comparison function, its argument, and the hash of the key to search
     117             :  * and remove.
     118             :  *
     119             :  * \code
     120             :  * struct object* obj = tommy_hashlin_remove(&hashlin, compare, &value_to_remove, tommy_inthash_u32(value_to_remove));
     121             :  * if (obj) {
     122             :  *     free(obj); // frees the object allocated memory
     123             :  * }
     124             :  * \endcode
     125             :  *
     126             :  * To destroy the hashtable you have to remove all the elements, and deinitialize
     127             :  * the hashtable calling tommy_hashlin_done().
     128             :  *
     129             :  * \code
     130             :  * tommy_hashlin_done(&hashlin);
     131             :  * \endcode
     132             :  *
     133             :  * If you need to iterate over all the elements in the hashtable, you can use
     134             :  * tommy_hashlin_foreach() or tommy_hashlin_foreach_arg().
     135             :  * If you need a more precise control with a real iteration, you have to insert
     136             :  * all the elements also in a ::tommy_list, and use the list to iterate.
     137             :  * See the \ref multiindex example for more detail.
     138             :  */
     139             : 
     140             : #ifndef __TOMMYHASHLIN_H
     141             : #define __TOMMYHASHLIN_H
     142             : 
     143             : #include "tommyhash.h"
     144             : 
     145             : /******************************************************************************/
     146             : /* hashlin */
     147             : 
     148             : /** \internal
     149             :  * Initial and minimal size of the hashtable expressed as a power of 2.
     150             :  * The initial size is 2^TOMMY_HASHLIN_BIT.
     151             :  */
     152             : #define TOMMY_HASHLIN_BIT 6
     153             : 
     154             : /**
     155             :  * Hashtable node.
     156             :  * This is the node that you have to include inside your objects.
     157             :  */
     158             : typedef tommy_node tommy_hashlin_node;
     159             : 
     160             : /**
     161             :  * Hashtable container type.
     162             :  * \note Don't use internal fields directly, but access the container only using functions.
     163             :  */
     164             : typedef struct tommy_hashlin_struct {
     165             :         tommy_hashlin_node** bucket[TOMMY_SIZE_BIT]; /**< Dynamic array of hash buckets. One list for each hash modulus. */
     166             :         tommy_size_t bucket_max; /**< Number of buckets. */
     167             :         tommy_size_t bucket_mask; /**< Bit mask to access the buckets. */
     168             :         tommy_size_t low_max; /**< Low order max value. */
     169             :         tommy_size_t low_mask; /**< Low order mask value. */
     170             :         tommy_size_t split; /**< Split position. */
     171             :         tommy_size_t count; /**< Number of elements. */
     172             :         tommy_uint_t bucket_bit; /**< Bits used in the bit mask. */
     173             :         tommy_uint_t state; /**< Reallocation state. */
     174             : } tommy_hashlin;
     175             : 
     176             : /**
     177             :  * Initializes the hashtable.
     178             :  */
     179             : void tommy_hashlin_init(tommy_hashlin* hashlin);
     180             : 
     181             : /**
     182             :  * Deinitializes the hashtable.
     183             :  *
     184             :  * You can call this function with elements still contained,
     185             :  * but such elements are not going to be freed by this call.
     186             :  */
     187             : void tommy_hashlin_done(tommy_hashlin* hashlin);
     188             : 
     189             : /**
     190             :  * Inserts an element in the hashtable.
     191             :  */
     192             : void tommy_hashlin_insert(tommy_hashlin* hashlin, tommy_hashlin_node* node, void* data, tommy_hash_t hash);
     193             : 
     194             : /**
     195             :  * Searches and removes an element from the hashtable.
     196             :  * You have to provide a compare function and the hash of the element you want to remove.
     197             :  * If the element is not found, 0 is returned.
     198             :  * If more equal elements are present, the first one is removed.
     199             :  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
     200             :  * The function should return 0 for equal elements, anything other for different elements.
     201             :  * \param cmp_arg Compare argument passed as first argument of the compare function.
     202             :  * \param hash Hash of the element to find and remove.
     203             :  * \return The removed element, or 0 if not found.
     204             :  */
     205             : void* tommy_hashlin_remove(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash);
     206             : 
     207             : /** \internal
     208             :  * Returns the bucket at the specified position.
     209             :  */
     210   308448199 : tommy_inline tommy_hashlin_node** tommy_hashlin_pos(tommy_hashlin* hashlin, tommy_hash_t pos)
     211             : {
     212             :         tommy_uint_t bsr;
     213             : 
     214             :         /* get the highest bit set, in case of all 0, return 0 */
     215   308448199 :         bsr = tommy_ilog2(pos | 1);
     216             : 
     217   308448199 :         return &hashlin->bucket[bsr][pos];
     218             : }
     219             : 
     220             : /** \internal
     221             :  * Returns a pointer to the bucket of the specified hash.
     222             :  */
     223   160740001 : tommy_inline tommy_hashlin_node** tommy_hashlin_bucket_ref(tommy_hashlin* hashlin, tommy_hash_t hash)
     224             : {
     225             :         tommy_size_t pos;
     226             :         tommy_size_t high_pos;
     227             : 
     228   160740001 :         pos = hash & hashlin->low_mask;
     229   160740001 :         high_pos = hash & hashlin->bucket_mask;
     230             : 
     231             :         /* if this position is already allocated in the high half */
     232   160740001 :         if (pos < hashlin->split) {
     233             :                 /* The following assigment is expected to be implemented */
     234             :                 /* with a conditional move instruction */
     235             :                 /* that results in a little better and constant performance */
     236             :                 /* regardless of the split position. */
     237             :                 /* This affects mostly the worst case, when the split value */
     238             :                 /* is near at its half, resulting in a totally unpredictable */
     239             :                 /* condition by the CPU. */
     240             :                 /* In such case the use of the conditional move is generally faster. */
     241             : 
     242             :                 /* use also the high bit */
     243    41603307 :                 pos = high_pos;
     244             :         }
     245             : 
     246   160740001 :         return tommy_hashlin_pos(hashlin, pos);
     247             : }
     248             : 
     249             : /**
     250             :  * Gets the bucket of the specified hash.
     251             :  * The bucket is guaranteed to contain ALL the elements with the specified hash,
     252             :  * but it can contain also others.
     253             :  * You can access elements in the bucket following the ::next pointer until 0.
     254             :  * \param hash Hash of the element to find.
     255             :  * \return The head of the bucket, or 0 if empty.
     256             :  */
     257           1 : tommy_inline tommy_hashlin_node* tommy_hashlin_bucket(tommy_hashlin* hashlin, tommy_hash_t hash)
     258             : {
     259           1 :         return *tommy_hashlin_bucket_ref(hashlin, hash);
     260             : }
     261             : 
     262             : /**
     263             :  * Searches an element in the hashtable.
     264             :  * You have to provide a compare function and the hash of the element you want to find.
     265             :  * If more equal elements are present, the first one is returned.
     266             :  * \param cmp Compare function called with cmp_arg as first argument and with the element to compare as a second one.
     267             :  * The function should return 0 for equal elements, anything other for different elements.
     268             :  * \param cmp_arg Compare argument passed as first argument of the compare function.
     269             :  * \param hash Hash of the element to find.
     270             :  * \return The first element found, or 0 if none.
     271             :  */
     272             : tommy_inline void* tommy_hashlin_search(tommy_hashlin* hashlin, tommy_search_func* cmp, const void* cmp_arg, tommy_hash_t hash)
     273             : {
     274             :         tommy_hashlin_node* i = tommy_hashlin_bucket(hashlin, hash);
     275             : 
     276             :         while (i) {
     277             :                 /* we first check if the hash matches, as in the same bucket we may have multiples hash values */
     278             :                 if (i->index == hash && cmp(cmp_arg, i->data) == 0)
     279             :                         return i->data;
     280             :                 i = i->next;
     281             :         }
     282             :         return 0;
     283             : }
     284             : 
     285             : /**
     286             :  * Removes an element from the hashtable.
     287             :  * You must already have the address of the element to remove.
     288             :  * \return The tommy_node::data field of the node removed.
     289             :  */
     290             : void* tommy_hashlin_remove_existing(tommy_hashlin* hashlin, tommy_hashlin_node* node);
     291             : 
     292             : /**
     293             :  * Calls the specified function for each element in the hashtable.
     294             :  *
     295             :  * You cannot add or remove elements from the inside of the callback,
     296             :  * but can use it to deallocate them.
     297             :  *
     298             :  * \code
     299             :  * tommy_hashlin hashlin;
     300             :  *
     301             :  * // initializes the hashtable
     302             :  * tommy_hashlin_init(&hashlin);
     303             :  *
     304             :  * ...
     305             :  *
     306             :  * // creates an object
     307             :  * struct object* obj = malloc(sizeof(struct object));
     308             :  *
     309             :  * ...
     310             :  *
     311             :  * // insert it in the hashtable
     312             :  * tommy_hashlin_insert(&hashlin, &obj->node, obj, tommy_inthash_u32(obj->value));
     313             :  *
     314             :  * ...
     315             :  *
     316             :  * // deallocates all the objects iterating the hashtable
     317             :  * tommy_hashlin_foreach(&hashlin, free);
     318             :  *
     319             :  * // deallocates the hashtable
     320             :  * tommy_hashlin_done(&hashlin);
     321             :  * \endcode
     322             :  */
     323             : void tommy_hashlin_foreach(tommy_hashlin* hashlin, tommy_foreach_func* func);
     324             : 
     325             : /**
     326             :  * Calls the specified function with an argument for each element in the hashtable.
     327             :  */
     328             : void tommy_hashlin_foreach_arg(tommy_hashlin* hashlin, tommy_foreach_arg_func* func, void* arg);
     329             : 
     330             : /**
     331             :  * Gets the number of elements.
     332             :  */
     333        5001 : tommy_inline tommy_size_t tommy_hashlin_count(tommy_hashlin* hashlin)
     334             : {
     335        5001 :         return hashlin->count;
     336             : }
     337             : 
     338             : /**
     339             :  * Gets the size of allocated memory.
     340             :  * It includes the size of the ::tommy_hashlin_node of the stored elements.
     341             :  */
     342             : tommy_size_t tommy_hashlin_memory_usage(tommy_hashlin* hashlin);
     343             : 
     344             : #endif
     345             : 

Generated by: LCOV version 1.13