libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010-2012 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "StringHash.h" 00019 #include "InputFile.h" 00020 #include "Error.h" 00021 00022 StringHash::StringHash(int startsize) 00023 : StringHashBase() 00024 { 00025 count = 0; 00026 size = startsize; 00027 mask = startsize - 1; 00028 00029 // In this implementation, the size of hash tables must be a power of two 00030 if (startsize & mask) 00031 error("StringHash: Hash table size must be a power of two.\n"); 00032 00033 strings = new String * [size]; 00034 objects = new void * [size]; 00035 keys = new unsigned int [size]; 00036 00037 for (unsigned int i = 0; i < size; i++) 00038 { 00039 strings[i] = NULL; 00040 objects[i] = NULL; 00041 } 00042 }; 00043 00044 StringHash::~StringHash() 00045 { 00046 for (unsigned int i = 0; i < size; i++) 00047 if (strings[i] != NULL) 00048 delete strings[i]; 00049 00050 if(strings) delete [] strings; 00051 if(objects) delete [] objects; 00052 if(keys) delete [] keys; 00053 } 00054 00055 void StringHash::Clear() 00056 { 00057 for (unsigned int i = 0; i < size; i++) 00058 if (strings[i] != NULL) 00059 { 00060 delete strings[i]; 00061 strings[i] = NULL; 00062 } 00063 00064 count = 0; 00065 00066 if (size > 256) 00067 SetSize(256); 00068 } 00069 00070 void StringHash::SetSize(int newsize) 00071 { 00072 int newmask = newsize - 1; 00073 00074 String ** newstrings = new String * [newsize]; 00075 void ** newobjects = new void * [newsize]; 00076 unsigned int * newkeys = new unsigned int [newsize]; 00077 00078 for (int i = 0; i < newsize; i++) 00079 { 00080 newstrings[i] = NULL; 00081 newobjects[i] = NULL; 00082 } 00083 00084 if (count) 00085 for (unsigned int i = 0; i < size; i++) 00086 if (strings[i] != NULL) 00087 { 00088 unsigned int key = keys[i]; 00089 unsigned int h = key & newmask; 00090 00091 while (newstrings[h] != NULL && 00092 (newkeys[h] != key || 00093 (!stringsEqual(*(newstrings[h]), *(strings[i]))))) 00094 h = (h + 1) & newmask; 00095 00096 newkeys[h] = key; 00097 newstrings[h] = strings[i]; 00098 newobjects[h] = objects[i]; 00099 } 00100 00101 if(strings) delete [] strings; 00102 if(objects) delete [] objects; 00103 if(keys) delete [] keys; 00104 00105 strings = newstrings; 00106 objects = newobjects; 00107 keys = newkeys; 00108 size = newsize; 00109 mask = newmask; 00110 } 00111 00112 int StringHash::Add(const String & string, void * object) 00113 { 00114 unsigned int key = getKey(string); 00115 unsigned int h = Iterate(key, string); 00116 00117 if (strings[h] == NULL) 00118 Insert(h, key, string); 00119 00120 objects[h] = object; 00121 00122 if (count * 2 > size) 00123 { 00124 Grow(); 00125 return Iterate(key, string); 00126 } 00127 00128 return h; 00129 } 00130 00131 int StringHash::Find(const String & string, void *(*create_object)()) 00132 { 00133 unsigned int key = getKey(string); 00134 unsigned int h = Iterate(key, string); 00135 00136 if (strings[h] == NULL && create_object == NULL) 00137 return -1; 00138 00139 if (strings[h] == NULL && create_object != NULL) 00140 { 00141 Insert(h, key, string); 00142 objects[h] = create_object(); 00143 00144 if (count * 2 > size) 00145 { 00146 Grow(); 00147 return Iterate(key, string); 00148 } 00149 } 00150 00151 return h; 00152 } 00153 00154 int StringHash::Find(const String & string) const 00155 { 00156 unsigned int key = getKey(string); 00157 unsigned int h = Iterate(key, string); 00158 00159 if (strings[h] == NULL) 00160 return -1; 00161 00162 return h; 00163 } 00164 void * StringHash::CreateHash() 00165 { 00166 return (void *) new StringHash(); 00167 } 00168 00169 void StringHash::Delete(unsigned int index) 00170 { 00171 if (index >= size || strings[index] == NULL) 00172 return; 00173 00174 delete strings[index]; 00175 strings[index] = NULL; 00176 count--; 00177 00178 if (count * 8 < size && size > 32) 00179 Shrink(); 00180 else 00181 { 00182 // rehash the next strings until we find empty slot 00183 index = (index + 1) & mask; 00184 00185 while (strings[index] != NULL) 00186 { 00187 if ((keys[index] & mask) != index) 00188 { 00189 unsigned int h = Iterate(keys[index], *strings[index]); 00190 00191 if (h != (unsigned int) index) 00192 { 00193 keys[h] = keys[index]; 00194 strings[h] = strings[index]; 00195 objects[h] = objects[index]; 00196 00197 strings[index] = NULL; 00198 objects[index] = NULL; 00199 } 00200 } 00201 00202 index = (index + 1) & mask; 00203 } 00204 } 00205 } 00206 00207 void StringHash::ReadLinesFromFile(const char * filename) 00208 { 00209 IFILE f = ifopen(filename, "rb"); 00210 if (f == NULL) return; 00211 ReadLinesFromFile(f); 00212 ifclose(f); 00213 } 00214 00215 void StringHash::ReadLinesFromFile(FILE * f) 00216 { 00217 String buffer; 00218 00219 while (!feof(f)) 00220 { 00221 buffer.ReadLine(f); 00222 Add(buffer.Trim()); 00223 } 00224 } 00225 00226 void StringHash::ReadLinesFromFile(IFILE & f) 00227 { 00228 String buffer; 00229 00230 while (!ifeof(f)) 00231 { 00232 buffer.ReadLine(f); 00233 Add(buffer.Trim()); 00234 } 00235 } 00236 00237 // StringIntHash implementation 00238 00239 StringIntHash::StringIntHash(int startsize) 00240 : StringHashBase() 00241 { 00242 count = 0; 00243 size = startsize; 00244 mask = startsize - 1; 00245 00246 // In this implementation, the size of hash tables must be a power of two 00247 if (startsize & mask) 00248 error("StringIntHash: Hash table size must be a power of two.\n"); 00249 00250 strings = new String * [size]; 00251 integers = new int [size]; 00252 keys = new unsigned int [size]; 00253 00254 for (unsigned int i = 0; i < size; i++) 00255 strings[i] = NULL; 00256 }; 00257 00258 StringIntHash::~StringIntHash() 00259 { 00260 for (unsigned int i = 0; i < size; i++) 00261 if (strings[i] != NULL) 00262 delete strings[i]; 00263 00264 if(strings) delete [] strings; 00265 if(integers) delete [] integers; 00266 if(keys) delete [] keys; 00267 } 00268 00269 void StringIntHash::SetSize(int newsize) 00270 { 00271 int newmask = newsize - 1; 00272 00273 String ** newstrings = new String * [newsize]; 00274 int * newintegers = new int [newsize]; 00275 unsigned int * newkeys = new unsigned int [newsize]; 00276 00277 for (int i = 0; i < newsize; i++) 00278 newstrings[i] = NULL; 00279 00280 for (unsigned int i = 0; i < size; i++) 00281 if (strings[i] != NULL) 00282 { 00283 unsigned int key = keys[i]; 00284 unsigned int h = key & newmask; 00285 00286 while (newstrings[h] != NULL && 00287 (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i]))))) 00288 h = (h + 1) & newmask; 00289 00290 newkeys[h] = key; 00291 newstrings[h] = strings[i]; 00292 newintegers[h] = integers[i]; 00293 } 00294 00295 if(strings) delete [] strings; 00296 if(integers) delete [] integers; 00297 if(keys) delete [] keys; 00298 00299 strings = newstrings; 00300 integers = newintegers; 00301 keys = newkeys; 00302 size = newsize; 00303 mask = newmask; 00304 } 00305 00306 void StringIntHash::Clear() 00307 { 00308 for (unsigned int i = 0; i < size; i++) 00309 if (strings[i] != NULL) 00310 { 00311 delete strings[i]; 00312 strings[i] = NULL; 00313 } 00314 00315 count = 0; 00316 00317 if (size > 256) 00318 SetSize(256); 00319 } 00320 00321 int StringIntHash::Add(const String & string, int value) 00322 { 00323 unsigned int key = getKey(string); 00324 unsigned int h = Iterate(key, string); 00325 00326 if (strings[h] == NULL) 00327 Insert(h, key, string); 00328 00329 integers[h] = value; 00330 00331 if (count * 2 > size) 00332 { 00333 Grow(); 00334 return Iterate(key, string); 00335 } 00336 00337 return h; 00338 } 00339 00340 int StringIntHash::Find(const String & string, int defaultValue) 00341 { 00342 unsigned int key = getKey(string); 00343 unsigned int h = Iterate(key, string); 00344 00345 if (strings[h] == NULL) 00346 { 00347 Insert(h, key, string); 00348 integers[h] = defaultValue; 00349 00350 if (count * 2 > size) 00351 { 00352 Grow(); 00353 return Iterate(key, string); 00354 } 00355 } 00356 00357 return h; 00358 } 00359 00360 int StringIntHash::Find(const String & string) const 00361 { 00362 unsigned int key = getKey(string); 00363 unsigned int h = Iterate(key, string); 00364 00365 if (strings[h] == NULL) 00366 return -1; 00367 00368 return h; 00369 } 00370 00371 void StringIntHash::Delete(unsigned int index) 00372 { 00373 if (index >= size || strings[index] == NULL) 00374 return; 00375 00376 delete strings[index]; 00377 strings[index] = NULL; 00378 count--; 00379 00380 if (count * 8 < size && size > 32) 00381 Shrink(); 00382 else 00383 { 00384 // rehash the next strings until we find empty slot 00385 index = (index + 1) & mask; 00386 00387 while (strings[index] != NULL) 00388 { 00389 if ((keys[index] & mask) != index) 00390 { 00391 unsigned int h = Iterate(keys[index], *strings[index]); 00392 00393 if (h != (unsigned int) index) 00394 { 00395 keys[h] = keys[index]; 00396 strings[h] = strings[index]; 00397 integers[h] = integers[index]; 00398 00399 strings[index] = NULL; 00400 } 00401 } 00402 00403 index = (index + 1) & mask; 00404 } 00405 } 00406 } 00407 00408 // StringDoubleHash implementation 00409 00410 StringDoubleHash::StringDoubleHash(int startsize) 00411 : StringHashBase() 00412 { 00413 count = 0; 00414 size = startsize; 00415 mask = startsize - 1; 00416 00417 // In this implementation, the size of hash tables must be a power of two 00418 if (startsize & mask) 00419 error("StringDoubleHash: Hash table size must be a power of two.\n"); 00420 00421 strings = new String * [size]; 00422 doubles = new double [size]; 00423 keys = new unsigned int [size]; 00424 00425 for (unsigned int i = 0; i < size; i++) 00426 strings[i] = NULL; 00427 }; 00428 00429 StringDoubleHash::~StringDoubleHash() 00430 { 00431 for (unsigned int i = 0; i < size; i++) 00432 if (strings[i] != NULL) 00433 delete strings[i]; 00434 00435 if(strings) delete [] strings; 00436 if(doubles) delete [] doubles; 00437 if(keys) delete [] keys; 00438 } 00439 00440 void StringDoubleHash::SetSize(int newsize) 00441 { 00442 int newmask = newsize - 1; 00443 00444 String ** newstrings = new String * [newsize]; 00445 double * newdoubles = new double [newsize]; 00446 unsigned int * newkeys = new unsigned int [newsize]; 00447 00448 for (int i = 0; i < newsize; i++) 00449 newstrings[i] = NULL; 00450 00451 for (unsigned int i = 0; i < size; i++) 00452 if (strings[i] != NULL) 00453 { 00454 unsigned int key = keys[i]; 00455 unsigned int h = key & newmask; 00456 00457 while (newstrings[h] != NULL && 00458 (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i]))))) 00459 h = (h + 1) & newmask; 00460 00461 newkeys[h] = key; 00462 newstrings[h] = strings[i]; 00463 newdoubles[h] = doubles[i]; 00464 } 00465 00466 if(strings) delete [] strings; 00467 if(doubles) delete [] doubles; 00468 if(keys) delete [] keys; 00469 00470 strings = newstrings; 00471 doubles = newdoubles; 00472 keys = newkeys; 00473 size = newsize; 00474 mask = newmask; 00475 } 00476 00477 int StringDoubleHash::Add(const String & string, double value) 00478 { 00479 unsigned int key = getKey(string); 00480 unsigned int h = Iterate(key, string); 00481 00482 if (strings[h] == NULL) 00483 Insert(h, key, string); 00484 00485 doubles[h] = value; 00486 00487 if (count * 2 > size) 00488 { 00489 Grow(); 00490 return Iterate(key, string); 00491 } 00492 00493 return h; 00494 } 00495 00496 int StringDoubleHash::Find(const String & string, double defaultValue) 00497 { 00498 unsigned int key = getKey(string); 00499 unsigned int h = Iterate(key, string); 00500 00501 if (strings[h] == NULL) 00502 { 00503 Insert(h, key, string); 00504 doubles[h] = defaultValue; 00505 00506 if (count * 2 > size) 00507 { 00508 Grow(); 00509 return Iterate(key, string); 00510 } 00511 } 00512 00513 return h; 00514 } 00515 00516 int StringDoubleHash::Find(const String & string) const 00517 { 00518 unsigned int key = getKey(string); 00519 unsigned int h = Iterate(key, string); 00520 00521 if (strings[h] == NULL) 00522 return -1; 00523 00524 return h; 00525 } 00526 00527 void StringDoubleHash::Delete(unsigned int index) 00528 { 00529 if (index >= size || strings[index] == NULL) 00530 return; 00531 00532 delete strings[index]; 00533 strings[index] = NULL; 00534 count--; 00535 00536 if (count * 8 < size && size > 32) 00537 Shrink(); 00538 else 00539 { 00540 // rehash the next strings until we find empty slot 00541 index = (index + 1) & mask; 00542 00543 while (strings[index] != NULL) 00544 { 00545 if ((keys[index] & mask) != index) 00546 { 00547 unsigned int h = Iterate(keys[index], *strings[index]); 00548 00549 if (h != (unsigned int) index) 00550 { 00551 keys[h] = keys[index]; 00552 strings[h] = strings[index]; 00553 doubles[h] = doubles[index]; 00554 00555 strings[index] = NULL; 00556 } 00557 } 00558 00559 index = (index + 1) & mask; 00560 } 00561 } 00562 } 00563 00564 void StringHash::Print() 00565 { 00566 Print(stdout); 00567 } 00568 00569 void StringHash::Print(const char * filename) 00570 { 00571 FILE * output = fopen(filename, "wt"); 00572 if (output == NULL) 00573 return; 00574 Print(output); 00575 fclose(output); 00576 } 00577 00578 void StringHash::Print(FILE * output) 00579 { 00580 for (unsigned int i = 0; i < size; i++) 00581 if (SlotInUse(i)) 00582 strings[i]->WriteLine(output); 00583 } 00584 00585 String StringHash::StringList(char separator) 00586 { 00587 String list; 00588 00589 for (unsigned int i = 0; i < size; i++) 00590 if (SlotInUse(i)) 00591 list += *strings[i] + separator; 00592 00593 list.SetLength(list.Length() - 1); 00594 00595 return list; 00596 } 00597 00598 int StringIntHash::GetCount(const String & key) const 00599 { 00600 int index = Find(key); 00601 return index == -1 ? 0 : integers[index]; 00602 } 00603 00604 int StringIntHash::IncrementCount(const String & key) 00605 { 00606 int index = Find(key); 00607 00608 if (index != -1) 00609 return ++(integers[index]); 00610 00611 SetInteger(key, 1); 00612 return 1; 00613 } 00614 00615 int StringIntHash::IncrementCount(const String & key, int amount) 00616 { 00617 int index = Find(key); 00618 00619 if (index != -1) 00620 return (integers[index] += amount); 00621 00622 SetInteger(key, amount); 00623 return amount; 00624 } 00625 00626 int StringIntHash::DecrementCount(const String & key) 00627 { 00628 int index = Find(key); 00629 00630 if (index != -1) 00631 return --(integers[index]); 00632 00633 SetInteger(key, -1); 00634 return -1; 00635 } 00636 00637 void StringDoubleHash::Clear() 00638 { 00639 for (unsigned int i = 0; i < size; i++) 00640 if (strings[i] != NULL) 00641 { 00642 delete strings[i]; 00643 strings[i] = NULL; 00644 } 00645 00646 count = 0; 00647 00648 if (size > 256) 00649 SetSize(256); 00650 } 00651 00652 StringHash & StringHash::operator = (const StringHash & rhs) 00653 { 00654 Clear(); 00655 00656 for (int i = 0; i < rhs.Capacity(); i++) 00657 if (rhs.SlotInUse(i)) 00658 Add(*(rhs.strings[i]), rhs.objects[i]); 00659 00660 return *this; 00661 } 00662 00663 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs) 00664 { 00665 Clear(); 00666 00667 for (int i = 0; i < rhs.Capacity(); i++) 00668 if (rhs.SlotInUse(i)) 00669 Add(*(rhs.strings[i]), rhs.integers[i]); 00670 00671 return *this; 00672 } 00673 00674 bool StringIntHash::operator == (const StringIntHash & rhs) const 00675 { 00676 if (Capacity() != rhs.Capacity()) return false; 00677 if (Entries() != rhs.Entries()) return false; 00678 for (int i = 0; i < rhs.Capacity(); i++) 00679 { 00680 if(rhs.SlotInUse(i) != SlotInUse(i)) 00681 { 00682 return(false); 00683 } 00684 if (rhs.SlotInUse(i)) 00685 { 00686 if(*(strings[i]) != *(rhs.strings[i])) 00687 { 00688 return(false); 00689 } 00690 if(rhs.integers[i] != integers[i]) 00691 { 00692 return(false); 00693 } 00694 } 00695 } 00696 return(true); 00697 } 00698 00699 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs) 00700 { 00701 Clear(); 00702 00703 for (int i = 0; i < rhs.Capacity(); i++) 00704 if (rhs.SlotInUse(i)) 00705 Add(*(rhs.strings[i]), rhs.doubles[i]); 00706 00707 return *this; 00708 } 00709 00710 void StringHash::Swap(StringHash & s) 00711 { 00712 String ** tstrings = s.strings; 00713 s.strings = strings; 00714 strings = tstrings; 00715 00716 void ** tobjects = s.objects; 00717 s.objects = objects; 00718 objects = tobjects; 00719 00720 unsigned int * tkeys = s.keys; 00721 s.keys = keys; 00722 keys = tkeys; 00723 00724 unsigned int temp = s.count; 00725 s.count = count; 00726 count = temp; 00727 00728 temp = s.size; 00729 s.size = size; 00730 size = temp; 00731 00732 temp = s.mask; 00733 s.mask = mask; 00734 mask = temp; 00735 } 00736