libStatGen Software  1
khash.h
00001 /* The MIT License
00002 
00003    Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
00004 
00005    Permission is hereby granted, free of charge, to any person obtaining
00006    a copy of this software and associated documentation files (the
00007    "Software"), to deal in the Software without restriction, including
00008    without limitation the rights to use, copy, modify, merge, publish,
00009    distribute, sublicense, and/or sell copies of the Software, and to
00010    permit persons to whom the Software is furnished to do so, subject to
00011    the following conditions:
00012 
00013    The above copyright notice and this permission notice shall be
00014    included in all copies or substantial portions of the Software.
00015 
00016    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00017    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00018    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00019    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
00020    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
00021    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
00022    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00023    SOFTWARE.
00024 */
00025 
00026 /*
00027   An example:
00028 
00029 #include "khash.h"
00030 KHASH_MAP_INIT_INT(32, char)
00031 int main() {
00032     int ret, is_missing;
00033     khiter_t k;
00034     khash_t(32) *h = kh_init(32);
00035     k = kh_put(32, h, 5, &ret);
00036     if (!ret) kh_del(32, h, k);
00037     kh_value(h, k) = 10;
00038     k = kh_get(32, h, 10);
00039     is_missing = (k == kh_end(h));
00040     k = kh_get(32, h, 5);
00041     kh_del(32, h, k);
00042     for (k = kh_begin(h); k != kh_end(h); ++k)
00043         if (kh_exist(h, k)) kh_value(h, k) = 1;
00044     kh_destroy(32, h);
00045     return 0;
00046 }
00047 */
00048 
00049 /*
00050   2011-02-14 (0.2.5):
00051 
00052     * Allow to declare global functions.
00053 
00054   2009-09-26 (0.2.4):
00055 
00056     * Improve portability
00057 
00058   2008-09-19 (0.2.3):
00059 
00060     * Corrected the example
00061     * Improved interfaces
00062 
00063   2008-09-11 (0.2.2):
00064 
00065     * Improved speed a little in kh_put()
00066 
00067   2008-09-10 (0.2.1):
00068 
00069     * Added kh_clear()
00070     * Fixed a compiling error
00071 
00072   2008-09-02 (0.2.0):
00073 
00074     * Changed to token concatenation which increases flexibility.
00075 
00076   2008-08-31 (0.1.2):
00077 
00078     * Fixed a bug in kh_get(), which has not been tested previously.
00079 
00080   2008-08-31 (0.1.1):
00081 
00082     * Added destructor
00083 */
00084 
00085 
00086 #ifndef __AC_KHASH_H
00087 #define __AC_KHASH_H
00088 
00089 /*!
00090   @header
00091 
00092   Generic hash table library.
00093 
00094   @copyright Heng Li
00095  */
00096 
00097 #define AC_VERSION_KHASH_H "0.2.5"
00098 
00099 #include <stdlib.h>
00100 #include <string.h>
00101 #include <limits.h>
00102 
00103 /* compiler specific configuration */
00104 
00105 #if UINT_MAX == 0xffffffffu
00106 typedef unsigned int khint32_t;
00107 #elif ULONG_MAX == 0xffffffffu
00108 typedef unsigned long khint32_t;
00109 #endif
00110 
00111 #if ULONG_MAX == ULLONG_MAX
00112 typedef unsigned long khint64_t;
00113 #else
00114 typedef unsigned long long khint64_t;
00115 #endif
00116 
00117 #ifdef _MSC_VER
00118 #define inline __inline
00119 #endif
00120 
00121 #ifdef _WIN32
00122 #define inline __inline
00123 #endif
00124 
00125 typedef khint32_t khint_t;
00126 typedef khint_t khiter_t;
00127 
00128 #define __ac_HASH_PRIME_SIZE 32
00129 static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
00130 {
00131   0ul,          3ul,          11ul,         23ul,         53ul,
00132   97ul,         193ul,        389ul,        769ul,        1543ul,
00133   3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
00134   98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
00135   3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
00136   100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
00137   3221225473ul, 4294967291ul
00138 };
00139 
00140 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
00141 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
00142 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
00143 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
00144 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
00145 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
00146 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
00147 
00148 static const double __ac_HASH_UPPER = 0.77;
00149 
00150 #define KHASH_DECLARE(name, khkey_t, khval_t)                           \
00151     typedef struct {                                                    \
00152         khint_t n_buckets, size, n_occupied, upper_bound;               \
00153         khint32_t *flags;                                               \
00154         khkey_t *keys;                                                  \
00155         khval_t *vals;                                                  \
00156     } kh_##name##_t;                                                    \
00157     extern kh_##name##_t *kh_init_##name();                             \
00158     extern void kh_destroy_##name(kh_##name##_t *h);                    \
00159     extern void kh_clear_##name(kh_##name##_t *h);                      \
00160     extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key);  \
00161     extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
00162     extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
00163     extern void kh_del_##name(kh_##name##_t *h, khint_t x);
00164 
00165 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
00166     typedef struct {                                                    \
00167         khint_t n_buckets, size, n_occupied, upper_bound;               \
00168         khint32_t *flags;                                               \
00169         khkey_t *keys;                                                  \
00170         khval_t *vals;                                                  \
00171     } kh_##name##_t;                                                    \
00172     SCOPE kh_##name##_t *kh_init_##name() {                             \
00173         return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));        \
00174     }                                                                   \
00175     SCOPE void kh_destroy_##name(kh_##name##_t *h)                      \
00176     {                                                                   \
00177         if (h) {                                                        \
00178             free(h->keys); free(h->flags);                              \
00179             free(h->vals);                                              \
00180             free(h);                                                    \
00181         }                                                               \
00182     }                                                                   \
00183     SCOPE void kh_clear_##name(kh_##name##_t *h)                        \
00184     {                                                                   \
00185         if (h && h->flags) {                                            \
00186             memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
00187             h->size = h->n_occupied = 0;                                \
00188         }                                                               \
00189     }                                                                   \
00190     SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key)    \
00191     {                                                                   \
00192         if (h->n_buckets) {                                             \
00193             khint_t inc, k, i, last;                                    \
00194             k = __hash_func(key); i = k % h->n_buckets;                 \
00195             inc = 1 + k % (h->n_buckets - 1); last = i;                 \
00196             while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
00197                 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
00198                 else i += inc;                                          \
00199                 if (i == last) return h->n_buckets;                     \
00200             }                                                           \
00201             return __ac_iseither(h->flags, i)? h->n_buckets : i;        \
00202         } else return 0;                                                \
00203     }                                                                   \
00204     SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
00205     {                                                                   \
00206         khint32_t *new_flags = 0;                                       \
00207         khint_t j = 1;                                                  \
00208         {                                                               \
00209             khint_t t = __ac_HASH_PRIME_SIZE - 1;                       \
00210             while (__ac_prime_list[t] > new_n_buckets) --t;             \
00211             new_n_buckets = __ac_prime_list[t+1];                       \
00212             if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
00213             else {                                                      \
00214                 new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t));   \
00215                 memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
00216                 if (h->n_buckets < new_n_buckets) {                     \
00217                     h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
00218                     if (kh_is_map)                                      \
00219                         h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
00220                 }                                                       \
00221             }                                                           \
00222         }                                                               \
00223         if (j) {                                                        \
00224             for (j = 0; j != h->n_buckets; ++j) {                       \
00225                 if (__ac_iseither(h->flags, j) == 0) {                  \
00226                     khkey_t key = h->keys[j];                           \
00227                     khval_t val;                                        \
00228                     if (kh_is_map) val = h->vals[j];                    \
00229                     __ac_set_isdel_true(h->flags, j);                   \
00230                     while (1) {                                         \
00231                         khint_t inc, k, i;                              \
00232                         k = __hash_func(key);                           \
00233                         i = k % new_n_buckets;                          \
00234                         inc = 1 + k % (new_n_buckets - 1);              \
00235                         while (!__ac_isempty(new_flags, i)) {           \
00236                             if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
00237                             else i += inc;                              \
00238                         }                                               \
00239                         __ac_set_isempty_false(new_flags, i);           \
00240                         if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
00241                             { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
00242                             if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
00243                             __ac_set_isdel_true(h->flags, i);           \
00244                         } else {                                        \
00245                             h->keys[i] = key;                           \
00246                             if (kh_is_map) h->vals[i] = val;            \
00247                             break;                                      \
00248                         }                                               \
00249                     }                                                   \
00250                 }                                                       \
00251             }                                                           \
00252             if (h->n_buckets > new_n_buckets) {                         \
00253                 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
00254                 if (kh_is_map)                                          \
00255                     h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
00256             }                                                           \
00257             free(h->flags);                                             \
00258             h->flags = new_flags;                                       \
00259             h->n_buckets = new_n_buckets;                               \
00260             h->n_occupied = h->size;                                    \
00261             h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
00262         }                                                               \
00263     }                                                                   \
00264     SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
00265     {                                                                   \
00266         khint_t x;                                                      \
00267         if (h->n_occupied >= h->upper_bound) {                          \
00268             if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
00269             else kh_resize_##name(h, h->n_buckets + 1);                 \
00270         }                                                               \
00271         {                                                               \
00272             khint_t inc, k, i, site, last;                              \
00273             x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
00274             if (__ac_isempty(h->flags, i)) x = i;                       \
00275             else {                                                      \
00276                 inc = 1 + k % (h->n_buckets - 1); last = i;             \
00277                 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
00278                     if (__ac_isdel(h->flags, i)) site = i;              \
00279                     if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
00280                     else i += inc;                                      \
00281                     if (i == last) { x = site; break; }                 \
00282                 }                                                       \
00283                 if (x == h->n_buckets) {                                \
00284                     if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
00285                     else x = i;                                         \
00286                 }                                                       \
00287             }                                                           \
00288         }                                                               \
00289         if (__ac_isempty(h->flags, x)) {                                \
00290             h->keys[x] = key;                                           \
00291             __ac_set_isboth_false(h->flags, x);                         \
00292             ++h->size; ++h->n_occupied;                                 \
00293             *ret = 1;                                                   \
00294         } else if (__ac_isdel(h->flags, x)) {                           \
00295             h->keys[x] = key;                                           \
00296             __ac_set_isboth_false(h->flags, x);                         \
00297             ++h->size;                                                  \
00298             *ret = 2;                                                   \
00299         } else *ret = 0;                                                \
00300         return x;                                                       \
00301     }                                                                   \
00302     SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)               \
00303     {                                                                   \
00304         if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {         \
00305             __ac_set_isdel_true(h->flags, x);                           \
00306             --h->size;                                                  \
00307         }                                                               \
00308     }
00309 
00310 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
00311     KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
00312 
00313 /* --- BEGIN OF HASH FUNCTIONS --- */
00314 
00315 /*! @function
00316   @abstract     Integer hash function
00317   @param  key   The integer [khint32_t]
00318   @return       The hash value [khint_t]
00319  */
00320 #define kh_int_hash_func(key) (khint32_t)(key)
00321 /*! @function
00322   @abstract     Integer comparison function
00323  */
00324 #define kh_int_hash_equal(a, b) ((a) == (b))
00325 /*! @function
00326   @abstract     64-bit integer hash function
00327   @param  key   The integer [khint64_t]
00328   @return       The hash value [khint_t]
00329  */
00330 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
00331 /*! @function
00332   @abstract     64-bit integer comparison function
00333  */
00334 #define kh_int64_hash_equal(a, b) ((a) == (b))
00335 /*! @function
00336   @abstract     const char* hash function
00337   @param  s     Pointer to a null terminated string
00338   @return       The hash value
00339  */
00340 static inline khint_t __ac_X31_hash_string(const char *s)
00341 {
00342     khint_t h = *s;
00343     if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
00344     return h;
00345 }
00346 /*! @function
00347   @abstract     Another interface to const char* hash function
00348   @param  key   Pointer to a null terminated string [const char*]
00349   @return       The hash value [khint_t]
00350  */
00351 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
00352 /*! @function
00353   @abstract     Const char* comparison function
00354  */
00355 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
00356 
00357 /* --- END OF HASH FUNCTIONS --- */
00358 
00359 /* Other necessary macros... */
00360 
00361 /*!
00362   @abstract Type of the hash table.
00363   @param  name  Name of the hash table [symbol]
00364  */
00365 #define khash_t(name) kh_##name##_t
00366 
00367 /*! @function
00368   @abstract     Initiate a hash table.
00369   @param  name  Name of the hash table [symbol]
00370   @return       Pointer to the hash table [khash_t(name)*]
00371  */
00372 #define kh_init(name) kh_init_##name()
00373 
00374 /*! @function
00375   @abstract     Destroy a hash table.
00376   @param  name  Name of the hash table [symbol]
00377   @param  h     Pointer to the hash table [khash_t(name)*]
00378  */
00379 #define kh_destroy(name, h) kh_destroy_##name(h)
00380 
00381 /*! @function
00382   @abstract     Reset a hash table without deallocating memory.
00383   @param  name  Name of the hash table [symbol]
00384   @param  h     Pointer to the hash table [khash_t(name)*]
00385  */
00386 #define kh_clear(name, h) kh_clear_##name(h)
00387 
00388 /*! @function
00389   @abstract     Resize a hash table.
00390   @param  name  Name of the hash table [symbol]
00391   @param  h     Pointer to the hash table [khash_t(name)*]
00392   @param  s     New size [khint_t]
00393  */
00394 #define kh_resize(name, h, s) kh_resize_##name(h, s)
00395 
00396 /*! @function
00397   @abstract     Insert a key to the hash table.
00398   @param  name  Name of the hash table [symbol]
00399   @param  h     Pointer to the hash table [khash_t(name)*]
00400   @param  k     Key [type of keys]
00401   @param  r     Extra return code: 0 if the key is present in the hash table;
00402                 1 if the bucket is empty (never used); 2 if the element in
00403                 the bucket has been deleted [int*]
00404   @return       Iterator to the inserted element [khint_t]
00405  */
00406 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
00407 
00408 /*! @function
00409   @abstract     Retrieve a key from the hash table.
00410   @param  name  Name of the hash table [symbol]
00411   @param  h     Pointer to the hash table [khash_t(name)*]
00412   @param  k     Key [type of keys]
00413   @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
00414  */
00415 #define kh_get(name, h, k) kh_get_##name(h, k)
00416 
00417 /*! @function
00418   @abstract     Remove a key from the hash table.
00419   @param  name  Name of the hash table [symbol]
00420   @param  h     Pointer to the hash table [khash_t(name)*]
00421   @param  k     Iterator to the element to be deleted [khint_t]
00422  */
00423 #define kh_del(name, h, k) kh_del_##name(h, k)
00424 
00425 
00426 /*! @function
00427   @abstract     Test whether a bucket contains data.
00428   @param  h     Pointer to the hash table [khash_t(name)*]
00429   @param  x     Iterator to the bucket [khint_t]
00430   @return       1 if containing data; 0 otherwise [int]
00431  */
00432 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
00433 
00434 /*! @function
00435   @abstract     Get key given an iterator
00436   @param  h     Pointer to the hash table [khash_t(name)*]
00437   @param  x     Iterator to the bucket [khint_t]
00438   @return       Key [type of keys]
00439  */
00440 #define kh_key(h, x) ((h)->keys[x])
00441 
00442 /*! @function
00443   @abstract     Get value given an iterator
00444   @param  h     Pointer to the hash table [khash_t(name)*]
00445   @param  x     Iterator to the bucket [khint_t]
00446   @return       Value [type of values]
00447   @discussion   For hash sets, calling this results in segfault.
00448  */
00449 #define kh_val(h, x) ((h)->vals[x])
00450 
00451 /*! @function
00452   @abstract     Alias of kh_val()
00453  */
00454 #define kh_value(h, x) ((h)->vals[x])
00455 
00456 /*! @function
00457   @abstract     Get the start iterator
00458   @param  h     Pointer to the hash table [khash_t(name)*]
00459   @return       The start iterator [khint_t]
00460  */
00461 #define kh_begin(h) (khint_t)(0)
00462 
00463 /*! @function
00464   @abstract     Get the end iterator
00465   @param  h     Pointer to the hash table [khash_t(name)*]
00466   @return       The end iterator [khint_t]
00467  */
00468 #define kh_end(h) ((h)->n_buckets)
00469 
00470 /*! @function
00471   @abstract     Get the number of elements in the hash table
00472   @param  h     Pointer to the hash table [khash_t(name)*]
00473   @return       Number of elements in the hash table [khint_t]
00474  */
00475 #define kh_size(h) ((h)->size)
00476 
00477 /*! @function
00478   @abstract     Get the number of buckets in the hash table
00479   @param  h     Pointer to the hash table [khash_t(name)*]
00480   @return       Number of buckets in the hash table [khint_t]
00481  */
00482 #define kh_n_buckets(h) ((h)->n_buckets)
00483 
00484 /* More conenient interfaces */
00485 
00486 /*! @function
00487   @abstract     Instantiate a hash set containing integer keys
00488   @param  name  Name of the hash table [symbol]
00489  */
00490 #define KHASH_SET_INIT_INT(name)                                        \
00491     KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
00492 
00493 /*! @function
00494   @abstract     Instantiate a hash map containing integer keys
00495   @param  name  Name of the hash table [symbol]
00496   @param  khval_t  Type of values [type]
00497  */
00498 #define KHASH_MAP_INIT_INT(name, khval_t)                               \
00499     KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
00500 
00501 /*! @function
00502   @abstract     Instantiate a hash map containing 64-bit integer keys
00503   @param  name  Name of the hash table [symbol]
00504  */
00505 #define KHASH_SET_INIT_INT64(name)                                      \
00506     KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
00507 
00508 /*! @function
00509   @abstract     Instantiate a hash map containing 64-bit integer keys
00510   @param  name  Name of the hash table [symbol]
00511   @param  khval_t  Type of values [type]
00512  */
00513 #define KHASH_MAP_INIT_INT64(name, khval_t)                             \
00514     KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
00515 
00516 typedef const char *kh_cstr_t;
00517 /*! @function
00518   @abstract     Instantiate a hash map containing const char* keys
00519   @param  name  Name of the hash table [symbol]
00520  */
00521 #define KHASH_SET_INIT_STR(name)                                        \
00522     KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
00523 
00524 /*! @function
00525   @abstract     Instantiate a hash map containing const char* keys
00526   @param  name  Name of the hash table [symbol]
00527   @param  khval_t  Type of values [type]
00528  */
00529 #define KHASH_MAP_INIT_STR(name, khval_t)                               \
00530     KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
00531 
00532 #endif /* __AC_KHASH_H */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends