libStatGen Software
1
|
00001 /* The MIT License 00002 00003 Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk> 00004 00005 Permission is hereby granted, free of charge, to any person obtaining 00006 a copy of this software and associated documentation files (the 00007 "Software"), to deal in the Software without restriction, including 00008 without limitation the rights to use, copy, modify, merge, publish, 00009 distribute, sublicense, and/or sell copies of the Software, and to 00010 permit persons to whom the Software is furnished to do so, subject to 00011 the following conditions: 00012 00013 The above copyright notice and this permission notice shall be 00014 included in all copies or substantial portions of the Software. 00015 00016 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00017 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00018 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00019 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 00020 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 00021 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 00022 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 00023 SOFTWARE. 00024 */ 00025 00026 /* 00027 An example: 00028 00029 #include "khash.h" 00030 KHASH_MAP_INIT_INT(32, char) 00031 int main() { 00032 int ret, is_missing; 00033 khiter_t k; 00034 khash_t(32) *h = kh_init(32); 00035 k = kh_put(32, h, 5, &ret); 00036 if (!ret) kh_del(32, h, k); 00037 kh_value(h, k) = 10; 00038 k = kh_get(32, h, 10); 00039 is_missing = (k == kh_end(h)); 00040 k = kh_get(32, h, 5); 00041 kh_del(32, h, k); 00042 for (k = kh_begin(h); k != kh_end(h); ++k) 00043 if (kh_exist(h, k)) kh_value(h, k) = 1; 00044 kh_destroy(32, h); 00045 return 0; 00046 } 00047 */ 00048 00049 /* 00050 2011-02-14 (0.2.5): 00051 00052 * Allow to declare global functions. 00053 00054 2009-09-26 (0.2.4): 00055 00056 * Improve portability 00057 00058 2008-09-19 (0.2.3): 00059 00060 * Corrected the example 00061 * Improved interfaces 00062 00063 2008-09-11 (0.2.2): 00064 00065 * Improved speed a little in kh_put() 00066 00067 2008-09-10 (0.2.1): 00068 00069 * Added kh_clear() 00070 * Fixed a compiling error 00071 00072 2008-09-02 (0.2.0): 00073 00074 * Changed to token concatenation which increases flexibility. 00075 00076 2008-08-31 (0.1.2): 00077 00078 * Fixed a bug in kh_get(), which has not been tested previously. 00079 00080 2008-08-31 (0.1.1): 00081 00082 * Added destructor 00083 */ 00084 00085 00086 #ifndef __AC_KHASH_H 00087 #define __AC_KHASH_H 00088 00089 /*! 00090 @header 00091 00092 Generic hash table library. 00093 00094 @copyright Heng Li 00095 */ 00096 00097 #define AC_VERSION_KHASH_H "0.2.5" 00098 00099 #include <stdlib.h> 00100 #include <string.h> 00101 #include <limits.h> 00102 00103 /* compiler specific configuration */ 00104 00105 #if UINT_MAX == 0xffffffffu 00106 typedef unsigned int khint32_t; 00107 #elif ULONG_MAX == 0xffffffffu 00108 typedef unsigned long khint32_t; 00109 #endif 00110 00111 #if ULONG_MAX == ULLONG_MAX 00112 typedef unsigned long khint64_t; 00113 #else 00114 typedef unsigned long long khint64_t; 00115 #endif 00116 00117 #ifdef _MSC_VER 00118 #define inline __inline 00119 #endif 00120 00121 #ifdef _WIN32 00122 #define inline __inline 00123 #endif 00124 00125 typedef khint32_t khint_t; 00126 typedef khint_t khiter_t; 00127 00128 #define __ac_HASH_PRIME_SIZE 32 00129 static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] = 00130 { 00131 0ul, 3ul, 11ul, 23ul, 53ul, 00132 97ul, 193ul, 389ul, 769ul, 1543ul, 00133 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 00134 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 00135 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 00136 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 00137 3221225473ul, 4294967291ul 00138 }; 00139 00140 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) 00141 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) 00142 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) 00143 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) 00144 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) 00145 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) 00146 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) 00147 00148 static const double __ac_HASH_UPPER = 0.77; 00149 00150 #define KHASH_DECLARE(name, khkey_t, khval_t) \ 00151 typedef struct { \ 00152 khint_t n_buckets, size, n_occupied, upper_bound; \ 00153 khint32_t *flags; \ 00154 khkey_t *keys; \ 00155 khval_t *vals; \ 00156 } kh_##name##_t; \ 00157 extern kh_##name##_t *kh_init_##name(); \ 00158 extern void kh_destroy_##name(kh_##name##_t *h); \ 00159 extern void kh_clear_##name(kh_##name##_t *h); \ 00160 extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ 00161 extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ 00162 extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ 00163 extern void kh_del_##name(kh_##name##_t *h, khint_t x); 00164 00165 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 00166 typedef struct { \ 00167 khint_t n_buckets, size, n_occupied, upper_bound; \ 00168 khint32_t *flags; \ 00169 khkey_t *keys; \ 00170 khval_t *vals; \ 00171 } kh_##name##_t; \ 00172 SCOPE kh_##name##_t *kh_init_##name() { \ 00173 return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ 00174 } \ 00175 SCOPE void kh_destroy_##name(kh_##name##_t *h) \ 00176 { \ 00177 if (h) { \ 00178 free(h->keys); free(h->flags); \ 00179 free(h->vals); \ 00180 free(h); \ 00181 } \ 00182 } \ 00183 SCOPE void kh_clear_##name(kh_##name##_t *h) \ 00184 { \ 00185 if (h && h->flags) { \ 00186 memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \ 00187 h->size = h->n_occupied = 0; \ 00188 } \ 00189 } \ 00190 SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ 00191 { \ 00192 if (h->n_buckets) { \ 00193 khint_t inc, k, i, last; \ 00194 k = __hash_func(key); i = k % h->n_buckets; \ 00195 inc = 1 + k % (h->n_buckets - 1); last = i; \ 00196 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ 00197 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ 00198 else i += inc; \ 00199 if (i == last) return h->n_buckets; \ 00200 } \ 00201 return __ac_iseither(h->flags, i)? h->n_buckets : i; \ 00202 } else return 0; \ 00203 } \ 00204 SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ 00205 { \ 00206 khint32_t *new_flags = 0; \ 00207 khint_t j = 1; \ 00208 { \ 00209 khint_t t = __ac_HASH_PRIME_SIZE - 1; \ 00210 while (__ac_prime_list[t] > new_n_buckets) --t; \ 00211 new_n_buckets = __ac_prime_list[t+1]; \ 00212 if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \ 00213 else { \ 00214 new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ 00215 memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \ 00216 if (h->n_buckets < new_n_buckets) { \ 00217 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ 00218 if (kh_is_map) \ 00219 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ 00220 } \ 00221 } \ 00222 } \ 00223 if (j) { \ 00224 for (j = 0; j != h->n_buckets; ++j) { \ 00225 if (__ac_iseither(h->flags, j) == 0) { \ 00226 khkey_t key = h->keys[j]; \ 00227 khval_t val; \ 00228 if (kh_is_map) val = h->vals[j]; \ 00229 __ac_set_isdel_true(h->flags, j); \ 00230 while (1) { \ 00231 khint_t inc, k, i; \ 00232 k = __hash_func(key); \ 00233 i = k % new_n_buckets; \ 00234 inc = 1 + k % (new_n_buckets - 1); \ 00235 while (!__ac_isempty(new_flags, i)) { \ 00236 if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \ 00237 else i += inc; \ 00238 } \ 00239 __ac_set_isempty_false(new_flags, i); \ 00240 if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \ 00241 { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ 00242 if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ 00243 __ac_set_isdel_true(h->flags, i); \ 00244 } else { \ 00245 h->keys[i] = key; \ 00246 if (kh_is_map) h->vals[i] = val; \ 00247 break; \ 00248 } \ 00249 } \ 00250 } \ 00251 } \ 00252 if (h->n_buckets > new_n_buckets) { \ 00253 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ 00254 if (kh_is_map) \ 00255 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ 00256 } \ 00257 free(h->flags); \ 00258 h->flags = new_flags; \ 00259 h->n_buckets = new_n_buckets; \ 00260 h->n_occupied = h->size; \ 00261 h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ 00262 } \ 00263 } \ 00264 SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ 00265 { \ 00266 khint_t x; \ 00267 if (h->n_occupied >= h->upper_bound) { \ 00268 if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \ 00269 else kh_resize_##name(h, h->n_buckets + 1); \ 00270 } \ 00271 { \ 00272 khint_t inc, k, i, site, last; \ 00273 x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \ 00274 if (__ac_isempty(h->flags, i)) x = i; \ 00275 else { \ 00276 inc = 1 + k % (h->n_buckets - 1); last = i; \ 00277 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ 00278 if (__ac_isdel(h->flags, i)) site = i; \ 00279 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ 00280 else i += inc; \ 00281 if (i == last) { x = site; break; } \ 00282 } \ 00283 if (x == h->n_buckets) { \ 00284 if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ 00285 else x = i; \ 00286 } \ 00287 } \ 00288 } \ 00289 if (__ac_isempty(h->flags, x)) { \ 00290 h->keys[x] = key; \ 00291 __ac_set_isboth_false(h->flags, x); \ 00292 ++h->size; ++h->n_occupied; \ 00293 *ret = 1; \ 00294 } else if (__ac_isdel(h->flags, x)) { \ 00295 h->keys[x] = key; \ 00296 __ac_set_isboth_false(h->flags, x); \ 00297 ++h->size; \ 00298 *ret = 2; \ 00299 } else *ret = 0; \ 00300 return x; \ 00301 } \ 00302 SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ 00303 { \ 00304 if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ 00305 __ac_set_isdel_true(h->flags, x); \ 00306 --h->size; \ 00307 } \ 00308 } 00309 00310 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 00311 KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) 00312 00313 /* --- BEGIN OF HASH FUNCTIONS --- */ 00314 00315 /*! @function 00316 @abstract Integer hash function 00317 @param key The integer [khint32_t] 00318 @return The hash value [khint_t] 00319 */ 00320 #define kh_int_hash_func(key) (khint32_t)(key) 00321 /*! @function 00322 @abstract Integer comparison function 00323 */ 00324 #define kh_int_hash_equal(a, b) ((a) == (b)) 00325 /*! @function 00326 @abstract 64-bit integer hash function 00327 @param key The integer [khint64_t] 00328 @return The hash value [khint_t] 00329 */ 00330 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) 00331 /*! @function 00332 @abstract 64-bit integer comparison function 00333 */ 00334 #define kh_int64_hash_equal(a, b) ((a) == (b)) 00335 /*! @function 00336 @abstract const char* hash function 00337 @param s Pointer to a null terminated string 00338 @return The hash value 00339 */ 00340 static inline khint_t __ac_X31_hash_string(const char *s) 00341 { 00342 khint_t h = *s; 00343 if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; 00344 return h; 00345 } 00346 /*! @function 00347 @abstract Another interface to const char* hash function 00348 @param key Pointer to a null terminated string [const char*] 00349 @return The hash value [khint_t] 00350 */ 00351 #define kh_str_hash_func(key) __ac_X31_hash_string(key) 00352 /*! @function 00353 @abstract Const char* comparison function 00354 */ 00355 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) 00356 00357 /* --- END OF HASH FUNCTIONS --- */ 00358 00359 /* Other necessary macros... */ 00360 00361 /*! 00362 @abstract Type of the hash table. 00363 @param name Name of the hash table [symbol] 00364 */ 00365 #define khash_t(name) kh_##name##_t 00366 00367 /*! @function 00368 @abstract Initiate a hash table. 00369 @param name Name of the hash table [symbol] 00370 @return Pointer to the hash table [khash_t(name)*] 00371 */ 00372 #define kh_init(name) kh_init_##name() 00373 00374 /*! @function 00375 @abstract Destroy a hash table. 00376 @param name Name of the hash table [symbol] 00377 @param h Pointer to the hash table [khash_t(name)*] 00378 */ 00379 #define kh_destroy(name, h) kh_destroy_##name(h) 00380 00381 /*! @function 00382 @abstract Reset a hash table without deallocating memory. 00383 @param name Name of the hash table [symbol] 00384 @param h Pointer to the hash table [khash_t(name)*] 00385 */ 00386 #define kh_clear(name, h) kh_clear_##name(h) 00387 00388 /*! @function 00389 @abstract Resize a hash table. 00390 @param name Name of the hash table [symbol] 00391 @param h Pointer to the hash table [khash_t(name)*] 00392 @param s New size [khint_t] 00393 */ 00394 #define kh_resize(name, h, s) kh_resize_##name(h, s) 00395 00396 /*! @function 00397 @abstract Insert a key to the hash table. 00398 @param name Name of the hash table [symbol] 00399 @param h Pointer to the hash table [khash_t(name)*] 00400 @param k Key [type of keys] 00401 @param r Extra return code: 0 if the key is present in the hash table; 00402 1 if the bucket is empty (never used); 2 if the element in 00403 the bucket has been deleted [int*] 00404 @return Iterator to the inserted element [khint_t] 00405 */ 00406 #define kh_put(name, h, k, r) kh_put_##name(h, k, r) 00407 00408 /*! @function 00409 @abstract Retrieve a key from the hash table. 00410 @param name Name of the hash table [symbol] 00411 @param h Pointer to the hash table [khash_t(name)*] 00412 @param k Key [type of keys] 00413 @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] 00414 */ 00415 #define kh_get(name, h, k) kh_get_##name(h, k) 00416 00417 /*! @function 00418 @abstract Remove a key from the hash table. 00419 @param name Name of the hash table [symbol] 00420 @param h Pointer to the hash table [khash_t(name)*] 00421 @param k Iterator to the element to be deleted [khint_t] 00422 */ 00423 #define kh_del(name, h, k) kh_del_##name(h, k) 00424 00425 00426 /*! @function 00427 @abstract Test whether a bucket contains data. 00428 @param h Pointer to the hash table [khash_t(name)*] 00429 @param x Iterator to the bucket [khint_t] 00430 @return 1 if containing data; 0 otherwise [int] 00431 */ 00432 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) 00433 00434 /*! @function 00435 @abstract Get key given an iterator 00436 @param h Pointer to the hash table [khash_t(name)*] 00437 @param x Iterator to the bucket [khint_t] 00438 @return Key [type of keys] 00439 */ 00440 #define kh_key(h, x) ((h)->keys[x]) 00441 00442 /*! @function 00443 @abstract Get value given an iterator 00444 @param h Pointer to the hash table [khash_t(name)*] 00445 @param x Iterator to the bucket [khint_t] 00446 @return Value [type of values] 00447 @discussion For hash sets, calling this results in segfault. 00448 */ 00449 #define kh_val(h, x) ((h)->vals[x]) 00450 00451 /*! @function 00452 @abstract Alias of kh_val() 00453 */ 00454 #define kh_value(h, x) ((h)->vals[x]) 00455 00456 /*! @function 00457 @abstract Get the start iterator 00458 @param h Pointer to the hash table [khash_t(name)*] 00459 @return The start iterator [khint_t] 00460 */ 00461 #define kh_begin(h) (khint_t)(0) 00462 00463 /*! @function 00464 @abstract Get the end iterator 00465 @param h Pointer to the hash table [khash_t(name)*] 00466 @return The end iterator [khint_t] 00467 */ 00468 #define kh_end(h) ((h)->n_buckets) 00469 00470 /*! @function 00471 @abstract Get the number of elements in the hash table 00472 @param h Pointer to the hash table [khash_t(name)*] 00473 @return Number of elements in the hash table [khint_t] 00474 */ 00475 #define kh_size(h) ((h)->size) 00476 00477 /*! @function 00478 @abstract Get the number of buckets in the hash table 00479 @param h Pointer to the hash table [khash_t(name)*] 00480 @return Number of buckets in the hash table [khint_t] 00481 */ 00482 #define kh_n_buckets(h) ((h)->n_buckets) 00483 00484 /* More conenient interfaces */ 00485 00486 /*! @function 00487 @abstract Instantiate a hash set containing integer keys 00488 @param name Name of the hash table [symbol] 00489 */ 00490 #define KHASH_SET_INIT_INT(name) \ 00491 KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) 00492 00493 /*! @function 00494 @abstract Instantiate a hash map containing integer keys 00495 @param name Name of the hash table [symbol] 00496 @param khval_t Type of values [type] 00497 */ 00498 #define KHASH_MAP_INIT_INT(name, khval_t) \ 00499 KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) 00500 00501 /*! @function 00502 @abstract Instantiate a hash map containing 64-bit integer keys 00503 @param name Name of the hash table [symbol] 00504 */ 00505 #define KHASH_SET_INIT_INT64(name) \ 00506 KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) 00507 00508 /*! @function 00509 @abstract Instantiate a hash map containing 64-bit integer keys 00510 @param name Name of the hash table [symbol] 00511 @param khval_t Type of values [type] 00512 */ 00513 #define KHASH_MAP_INIT_INT64(name, khval_t) \ 00514 KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) 00515 00516 typedef const char *kh_cstr_t; 00517 /*! @function 00518 @abstract Instantiate a hash map containing const char* keys 00519 @param name Name of the hash table [symbol] 00520 */ 00521 #define KHASH_SET_INIT_STR(name) \ 00522 KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) 00523 00524 /*! @function 00525 @abstract Instantiate a hash map containing const char* keys 00526 @param name Name of the hash table [symbol] 00527 @param khval_t Type of values [type] 00528 */ 00529 #define KHASH_MAP_INIT_STR(name, khval_t) \ 00530 KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) 00531 00532 #endif /* __AC_KHASH_H */