khash.h

00001 /* The MIT License
00002 
00003    Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
00004 
00005    Permission is hereby granted, free of charge, to any person obtaining
00006    a copy of this software and associated documentation files (the
00007    "Software"), to deal in the Software without restriction, including
00008    without limitation the rights to use, copy, modify, merge, publish,
00009    distribute, sublicense, and/or sell copies of the Software, and to
00010    permit persons to whom the Software is furnished to do so, subject to
00011    the following conditions:
00012 
00013    The above copyright notice and this permission notice shall be
00014    included in all copies or substantial portions of the Software.
00015 
00016    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00017    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00018    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00019    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
00020    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
00021    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
00022    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00023    SOFTWARE.
00024 */
00025 
00026 /*
00027   An example:
00028 
00029 #include "khash.h"
00030 KHASH_MAP_INIT_INT(32, char)
00031 int main() {
00032     int ret, is_missing;
00033     khiter_t k;
00034     khash_t(32) *h = kh_init(32);
00035     k = kh_put(32, h, 5, &ret);
00036     if (!ret) kh_del(32, h, k);
00037     kh_value(h, k) = 10;
00038     k = kh_get(32, h, 10);
00039     is_missing = (k == kh_end(h));
00040     k = kh_get(32, h, 5);
00041     kh_del(32, h, k);
00042     for (k = kh_begin(h); k != kh_end(h); ++k)
00043         if (kh_exist(h, k)) kh_value(h, k) = 1;
00044     kh_destroy(32, h);
00045     return 0;
00046 }
00047 */
00048 
00049 /*
00050   2011-02-14 (0.2.5):
00051 
00052     * Allow to declare global functions.
00053 
00054   2009-09-26 (0.2.4):
00055 
00056     * Improve portability
00057 
00058   2008-09-19 (0.2.3):
00059 
00060     * Corrected the example
00061     * Improved interfaces
00062 
00063   2008-09-11 (0.2.2):
00064 
00065     * Improved speed a little in kh_put()
00066 
00067   2008-09-10 (0.2.1):
00068 
00069     * Added kh_clear()
00070     * Fixed a compiling error
00071 
00072   2008-09-02 (0.2.0):
00073 
00074     * Changed to token concatenation which increases flexibility.
00075 
00076   2008-08-31 (0.1.2):
00077 
00078     * Fixed a bug in kh_get(), which has not been tested previously.
00079 
00080   2008-08-31 (0.1.1):
00081 
00082     * Added destructor
00083 */
00084 
00085 
00086 #ifndef __AC_KHASH_H
00087 #define __AC_KHASH_H
00088 
00089 /*!
00090   @header
00091 
00092   Generic hash table library.
00093 
00094   @copyright Heng Li
00095  */
00096 
00097 #define AC_VERSION_KHASH_H "0.2.5"
00098 
00099 #include <stdlib.h>
00100 #include <string.h>
00101 #include <limits.h>
00102 
00103 /* compipler specific configuration */
00104 
00105 #if UINT_MAX == 0xffffffffu
00106 typedef unsigned int khint32_t;
00107 #elif ULONG_MAX == 0xffffffffu
00108 typedef unsigned long khint32_t;
00109 #endif
00110 
00111 #if ULONG_MAX == ULLONG_MAX
00112 typedef unsigned long khint64_t;
00113 #else
00114 typedef unsigned long long khint64_t;
00115 #endif
00116 
00117 #ifdef _MSC_VER
00118 #define inline __inline
00119 #endif
00120 
00121 typedef khint32_t khint_t;
00122 typedef khint_t khiter_t;
00123 
00124 #define __ac_HASH_PRIME_SIZE 32
00125 static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
00126 {
00127   0ul,          3ul,          11ul,         23ul,         53ul,
00128   97ul,         193ul,        389ul,        769ul,        1543ul,
00129   3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
00130   98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
00131   3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
00132   100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
00133   3221225473ul, 4294967291ul
00134 };
00135 
00136 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
00137 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
00138 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
00139 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
00140 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
00141 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
00142 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
00143 
00144 static const double __ac_HASH_UPPER = 0.77;
00145 
00146 #define KHASH_DECLARE(name, khkey_t, khval_t)                           \
00147     typedef struct {                                                    \
00148         khint_t n_buckets, size, n_occupied, upper_bound;               \
00149         khint32_t *flags;                                               \
00150         khkey_t *keys;                                                  \
00151         khval_t *vals;                                                  \
00152     } kh_##name##_t;                                                    \
00153     extern kh_##name##_t *kh_init_##name();                             \
00154     extern void kh_destroy_##name(kh_##name##_t *h);                    \
00155     extern void kh_clear_##name(kh_##name##_t *h);                      \
00156     extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key);  \
00157     extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
00158     extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
00159     extern void kh_del_##name(kh_##name##_t *h, khint_t x);
00160 
00161 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
00162     typedef struct {                                                    \
00163         khint_t n_buckets, size, n_occupied, upper_bound;               \
00164         khint32_t *flags;                                               \
00165         khkey_t *keys;                                                  \
00166         khval_t *vals;                                                  \
00167     } kh_##name##_t;                                                    \
00168     SCOPE kh_##name##_t *kh_init_##name() {                             \
00169         return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));        \
00170     }                                                                   \
00171     SCOPE void kh_destroy_##name(kh_##name##_t *h)                      \
00172     {                                                                   \
00173         if (h) {                                                        \
00174             free(h->keys); free(h->flags);                              \
00175             free(h->vals);                                              \
00176             free(h);                                                    \
00177         }                                                               \
00178     }                                                                   \
00179     SCOPE void kh_clear_##name(kh_##name##_t *h)                        \
00180     {                                                                   \
00181         if (h && h->flags) {                                            \
00182             memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
00183             h->size = h->n_occupied = 0;                                \
00184         }                                                               \
00185     }                                                                   \
00186     SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key)    \
00187     {                                                                   \
00188         if (h->n_buckets) {                                             \
00189             khint_t inc, k, i, last;                                    \
00190             k = __hash_func(key); i = k % h->n_buckets;                 \
00191             inc = 1 + k % (h->n_buckets - 1); last = i;                 \
00192             while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
00193                 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
00194                 else i += inc;                                          \
00195                 if (i == last) return h->n_buckets;                     \
00196             }                                                           \
00197             return __ac_iseither(h->flags, i)? h->n_buckets : i;        \
00198         } else return 0;                                                \
00199     }                                                                   \
00200     SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
00201     {                                                                   \
00202         khint32_t *new_flags = 0;                                       \
00203         khint_t j = 1;                                                  \
00204         {                                                               \
00205             khint_t t = __ac_HASH_PRIME_SIZE - 1;                       \
00206             while (__ac_prime_list[t] > new_n_buckets) --t;             \
00207             new_n_buckets = __ac_prime_list[t+1];                       \
00208             if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
00209             else {                                                      \
00210                 new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t));   \
00211                 memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
00212                 if (h->n_buckets < new_n_buckets) {                     \
00213                     h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
00214                     if (kh_is_map)                                      \
00215                         h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
00216                 }                                                       \
00217             }                                                           \
00218         }                                                               \
00219         if (j) {                                                        \
00220             for (j = 0; j != h->n_buckets; ++j) {                       \
00221                 if (__ac_iseither(h->flags, j) == 0) {                  \
00222                     khkey_t key = h->keys[j];                           \
00223                     khval_t val;                                        \
00224                     if (kh_is_map) val = h->vals[j];                    \
00225                     __ac_set_isdel_true(h->flags, j);                   \
00226                     while (1) {                                         \
00227                         khint_t inc, k, i;                              \
00228                         k = __hash_func(key);                           \
00229                         i = k % new_n_buckets;                          \
00230                         inc = 1 + k % (new_n_buckets - 1);              \
00231                         while (!__ac_isempty(new_flags, i)) {           \
00232                             if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
00233                             else i += inc;                              \
00234                         }                                               \
00235                         __ac_set_isempty_false(new_flags, i);           \
00236                         if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
00237                             { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
00238                             if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
00239                             __ac_set_isdel_true(h->flags, i);           \
00240                         } else {                                        \
00241                             h->keys[i] = key;                           \
00242                             if (kh_is_map) h->vals[i] = val;            \
00243                             break;                                      \
00244                         }                                               \
00245                     }                                                   \
00246                 }                                                       \
00247             }                                                           \
00248             if (h->n_buckets > new_n_buckets) {                         \
00249                 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
00250                 if (kh_is_map)                                          \
00251                     h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
00252             }                                                           \
00253             free(h->flags);                                             \
00254             h->flags = new_flags;                                       \
00255             h->n_buckets = new_n_buckets;                               \
00256             h->n_occupied = h->size;                                    \
00257             h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
00258         }                                                               \
00259     }                                                                   \
00260     SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
00261     {                                                                   \
00262         khint_t x;                                                      \
00263         if (h->n_occupied >= h->upper_bound) {                          \
00264             if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
00265             else kh_resize_##name(h, h->n_buckets + 1);                 \
00266         }                                                               \
00267         {                                                               \
00268             khint_t inc, k, i, site, last;                              \
00269             x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
00270             if (__ac_isempty(h->flags, i)) x = i;                       \
00271             else {                                                      \
00272                 inc = 1 + k % (h->n_buckets - 1); last = i;             \
00273                 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
00274                     if (__ac_isdel(h->flags, i)) site = i;              \
00275                     if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
00276                     else i += inc;                                      \
00277                     if (i == last) { x = site; break; }                 \
00278                 }                                                       \
00279                 if (x == h->n_buckets) {                                \
00280                     if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
00281                     else x = i;                                         \
00282                 }                                                       \
00283             }                                                           \
00284         }                                                               \
00285         if (__ac_isempty(h->flags, x)) {                                \
00286             h->keys[x] = key;                                           \
00287             __ac_set_isboth_false(h->flags, x);                         \
00288             ++h->size; ++h->n_occupied;                                 \
00289             *ret = 1;                                                   \
00290         } else if (__ac_isdel(h->flags, x)) {                           \
00291             h->keys[x] = key;                                           \
00292             __ac_set_isboth_false(h->flags, x);                         \
00293             ++h->size;                                                  \
00294             *ret = 2;                                                   \
00295         } else *ret = 0;                                                \
00296         return x;                                                       \
00297     }                                                                   \
00298     SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)               \
00299     {                                                                   \
00300         if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {         \
00301             __ac_set_isdel_true(h->flags, x);                           \
00302             --h->size;                                                  \
00303         }                                                               \
00304     }
00305 
00306 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
00307     KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
00308 
00309 /* --- BEGIN OF HASH FUNCTIONS --- */
00310 
00311 /*! @function
00312   @abstract     Integer hash function
00313   @param  key   The integer [khint32_t]
00314   @return       The hash value [khint_t]
00315  */
00316 #define kh_int_hash_func(key) (khint32_t)(key)
00317 /*! @function
00318   @abstract     Integer comparison function
00319  */
00320 #define kh_int_hash_equal(a, b) ((a) == (b))
00321 /*! @function
00322   @abstract     64-bit integer hash function
00323   @param  key   The integer [khint64_t]
00324   @return       The hash value [khint_t]
00325  */
00326 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
00327 /*! @function
00328   @abstract     64-bit integer comparison function
00329  */
00330 #define kh_int64_hash_equal(a, b) ((a) == (b))
00331 /*! @function
00332   @abstract     const char* hash function
00333   @param  s     Pointer to a null terminated string
00334   @return       The hash value
00335  */
00336 static inline khint_t __ac_X31_hash_string(const char *s)
00337 {
00338     khint_t h = *s;
00339     if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
00340     return h;
00341 }
00342 /*! @function
00343   @abstract     Another interface to const char* hash function
00344   @param  key   Pointer to a null terminated string [const char*]
00345   @return       The hash value [khint_t]
00346  */
00347 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
00348 /*! @function
00349   @abstract     Const char* comparison function
00350  */
00351 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
00352 
00353 /* --- END OF HASH FUNCTIONS --- */
00354 
00355 /* Other necessary macros... */
00356 
00357 /*!
00358   @abstract Type of the hash table.
00359   @param  name  Name of the hash table [symbol]
00360  */
00361 #define khash_t(name) kh_##name##_t
00362 
00363 /*! @function
00364   @abstract     Initiate a hash table.
00365   @param  name  Name of the hash table [symbol]
00366   @return       Pointer to the hash table [khash_t(name)*]
00367  */
00368 #define kh_init(name) kh_init_##name()
00369 
00370 /*! @function
00371   @abstract     Destroy a hash table.
00372   @param  name  Name of the hash table [symbol]
00373   @param  h     Pointer to the hash table [khash_t(name)*]
00374  */
00375 #define kh_destroy(name, h) kh_destroy_##name(h)
00376 
00377 /*! @function
00378   @abstract     Reset a hash table without deallocating memory.
00379   @param  name  Name of the hash table [symbol]
00380   @param  h     Pointer to the hash table [khash_t(name)*]
00381  */
00382 #define kh_clear(name, h) kh_clear_##name(h)
00383 
00384 /*! @function
00385   @abstract     Resize a hash table.
00386   @param  name  Name of the hash table [symbol]
00387   @param  h     Pointer to the hash table [khash_t(name)*]
00388   @param  s     New size [khint_t]
00389  */
00390 #define kh_resize(name, h, s) kh_resize_##name(h, s)
00391 
00392 /*! @function
00393   @abstract     Insert a key to the hash table.
00394   @param  name  Name of the hash table [symbol]
00395   @param  h     Pointer to the hash table [khash_t(name)*]
00396   @param  k     Key [type of keys]
00397   @param  r     Extra return code: 0 if the key is present in the hash table;
00398                 1 if the bucket is empty (never used); 2 if the element in
00399                 the bucket has been deleted [int*]
00400   @return       Iterator to the inserted element [khint_t]
00401  */
00402 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
00403 
00404 /*! @function
00405   @abstract     Retrieve a key from the hash table.
00406   @param  name  Name of the hash table [symbol]
00407   @param  h     Pointer to the hash table [khash_t(name)*]
00408   @param  k     Key [type of keys]
00409   @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
00410  */
00411 #define kh_get(name, h, k) kh_get_##name(h, k)
00412 
00413 /*! @function
00414   @abstract     Remove a key from the hash table.
00415   @param  name  Name of the hash table [symbol]
00416   @param  h     Pointer to the hash table [khash_t(name)*]
00417   @param  k     Iterator to the element to be deleted [khint_t]
00418  */
00419 #define kh_del(name, h, k) kh_del_##name(h, k)
00420 
00421 
00422 /*! @function
00423   @abstract     Test whether a bucket contains data.
00424   @param  h     Pointer to the hash table [khash_t(name)*]
00425   @param  x     Iterator to the bucket [khint_t]
00426   @return       1 if containing data; 0 otherwise [int]
00427  */
00428 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
00429 
00430 /*! @function
00431   @abstract     Get key given an iterator
00432   @param  h     Pointer to the hash table [khash_t(name)*]
00433   @param  x     Iterator to the bucket [khint_t]
00434   @return       Key [type of keys]
00435  */
00436 #define kh_key(h, x) ((h)->keys[x])
00437 
00438 /*! @function
00439   @abstract     Get value given an iterator
00440   @param  h     Pointer to the hash table [khash_t(name)*]
00441   @param  x     Iterator to the bucket [khint_t]
00442   @return       Value [type of values]
00443   @discussion   For hash sets, calling this results in segfault.
00444  */
00445 #define kh_val(h, x) ((h)->vals[x])
00446 
00447 /*! @function
00448   @abstract     Alias of kh_val()
00449  */
00450 #define kh_value(h, x) ((h)->vals[x])
00451 
00452 /*! @function
00453   @abstract     Get the start iterator
00454   @param  h     Pointer to the hash table [khash_t(name)*]
00455   @return       The start iterator [khint_t]
00456  */
00457 #define kh_begin(h) (khint_t)(0)
00458 
00459 /*! @function
00460   @abstract     Get the end iterator
00461   @param  h     Pointer to the hash table [khash_t(name)*]
00462   @return       The end iterator [khint_t]
00463  */
00464 #define kh_end(h) ((h)->n_buckets)
00465 
00466 /*! @function
00467   @abstract     Get the number of elements in the hash table
00468   @param  h     Pointer to the hash table [khash_t(name)*]
00469   @return       Number of elements in the hash table [khint_t]
00470  */
00471 #define kh_size(h) ((h)->size)
00472 
00473 /*! @function
00474   @abstract     Get the number of buckets in the hash table
00475   @param  h     Pointer to the hash table [khash_t(name)*]
00476   @return       Number of buckets in the hash table [khint_t]
00477  */
00478 #define kh_n_buckets(h) ((h)->n_buckets)
00479 
00480 /* More conenient interfaces */
00481 
00482 /*! @function
00483   @abstract     Instantiate a hash set containing integer keys
00484   @param  name  Name of the hash table [symbol]
00485  */
00486 #define KHASH_SET_INIT_INT(name)                                        \
00487     KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
00488 
00489 /*! @function
00490   @abstract     Instantiate a hash map containing integer keys
00491   @param  name  Name of the hash table [symbol]
00492   @param  khval_t  Type of values [type]
00493  */
00494 #define KHASH_MAP_INIT_INT(name, khval_t)                               \
00495     KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
00496 
00497 /*! @function
00498   @abstract     Instantiate a hash map containing 64-bit integer keys
00499   @param  name  Name of the hash table [symbol]
00500  */
00501 #define KHASH_SET_INIT_INT64(name)                                      \
00502     KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
00503 
00504 /*! @function
00505   @abstract     Instantiate a hash map containing 64-bit integer keys
00506   @param  name  Name of the hash table [symbol]
00507   @param  khval_t  Type of values [type]
00508  */
00509 #define KHASH_MAP_INIT_INT64(name, khval_t)                             \
00510     KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
00511 
00512 typedef const char *kh_cstr_t;
00513 /*! @function
00514   @abstract     Instantiate a hash map containing const char* keys
00515   @param  name  Name of the hash table [symbol]
00516  */
00517 #define KHASH_SET_INIT_STR(name)                                        \
00518     KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
00519 
00520 /*! @function
00521   @abstract     Instantiate a hash map containing const char* keys
00522   @param  name  Name of the hash table [symbol]
00523   @param  khval_t  Type of values [type]
00524  */
00525 #define KHASH_MAP_INIT_STR(name, khval_t)                               \
00526     KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
00527 
00528 #endif /* __AC_KHASH_H */
Generated on Tue Sep 6 17:52:01 2011 for libStatGen Software by  doxygen 1.6.3