StringHash.cpp

00001 /*
00002  *  Copyright (C) 2010-2012  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "StringHash.h"
00019 #include "InputFile.h"
00020 #include "Error.h"
00021 
00022 StringHash::StringHash(int startsize)
00023     : StringHashBase()
00024 {
00025     count = 0;
00026     size  = startsize;
00027     mask  = startsize - 1;
00028 
00029     // In this implementation, the size of hash tables must be a power of two
00030     if (startsize & mask)
00031         error("StringHash: Hash table size must be a power of two.\n");
00032 
00033     strings = new String * [size];
00034     objects = new void * [size];
00035     keys    = new unsigned int [size];
00036 
00037     for (unsigned int i = 0; i < size; i++)
00038     {
00039         strings[i] = NULL;
00040         objects[i] = NULL;
00041     }
00042 };
00043 
00044 StringHash::~StringHash()
00045 {
00046     for (unsigned int i = 0; i < size; i++)
00047         if (strings[i] != NULL)
00048             delete strings[i];
00049 
00050     delete [] strings;
00051     delete [] objects;
00052     delete [] keys;
00053 }
00054 
00055 void StringHash::Clear()
00056 {
00057     for (unsigned int i = 0; i < size; i++)
00058         if (strings[i] != NULL)
00059         {
00060             delete strings[i];
00061             strings[i] = NULL;
00062         }
00063 
00064     count = 0;
00065 
00066     if (size > 256)
00067         SetSize(256);
00068 }
00069 
00070 void StringHash::SetSize(int newsize)
00071 {
00072     int newmask = newsize - 1;
00073 
00074     String   ** newstrings = new String * [newsize];
00075     void     ** newobjects = new void * [newsize];
00076     unsigned int * newkeys = new unsigned int [newsize];
00077 
00078     for (int i = 0; i < newsize; i++)
00079     {
00080         newstrings[i] = NULL;
00081         newobjects[i] = NULL;
00082     }
00083 
00084     if (count)
00085         for (unsigned int i = 0; i < size; i++)
00086             if (strings[i] != NULL)
00087             {
00088                 unsigned int key = keys[i];
00089                 unsigned int h   = key & newmask;
00090 
00091                 while (newstrings[h] != NULL &&
00092                         (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
00093                     h = (h + 1) & newmask;
00094 
00095                 newkeys[h] = key;
00096                 newstrings[h] = strings[i];
00097                 newobjects[h] = objects[i];
00098             }
00099 
00100     delete [] strings;
00101     delete [] objects;
00102     delete [] keys;
00103 
00104     strings = newstrings;
00105     objects = newobjects;
00106     keys = newkeys;
00107     size = newsize;
00108     mask = newmask;
00109 }
00110 
00111 int StringHash::Add(const String & string, void * object)
00112 {
00113     unsigned int key = getKey(string);
00114     unsigned int h   = Iterate(key, string);
00115 
00116     if (strings[h] == NULL)
00117         Insert(h, key, string);
00118 
00119     objects[h] = object;
00120 
00121     if (count * 2 > size)
00122     {
00123         Grow();
00124         return Iterate(key, string);
00125     }
00126 
00127     return h;
00128 }
00129 
00130 int StringHash::Find(const String & string,  void *(*create_object)())
00131 {
00132     unsigned int key = getKey(string);
00133     unsigned int h   = Iterate(key, string);
00134 
00135     if (strings[h] == NULL && create_object == NULL)
00136         return -1;
00137 
00138     if (strings[h] == NULL && create_object != NULL)
00139     {
00140         Insert(h, key, string);
00141         objects[h] = create_object();
00142 
00143         if (count * 2 > size)
00144         {
00145             Grow();
00146             return Iterate(key, string);
00147         }
00148     }
00149 
00150     return h;
00151 }
00152 
00153 int StringHash::Find(const String & string) const
00154 {
00155     unsigned int key = getKey(string);
00156     unsigned int h   = Iterate(key, string);
00157 
00158     if (strings[h] == NULL)
00159         return -1;
00160 
00161     return h;
00162 }
00163 void * StringHash::CreateHash()
00164 {
00165     return (void *) new StringHash();
00166 }
00167 
00168 void StringHash::Delete(unsigned int index)
00169 {
00170     if (index >= size || strings[index] == NULL)
00171         return;
00172 
00173     delete strings[index];
00174     strings[index] = NULL;
00175     count--;
00176 
00177     if (count * 8 < size && size > 32)
00178         Shrink();
00179     else
00180     {
00181         // rehash the next strings until we find empty slot
00182         index = (index + 1) & mask;
00183 
00184         while (strings[index] != NULL)
00185         {
00186             if ((keys[index] & mask) != index)
00187             {
00188                 unsigned int h = Iterate(keys[index], *strings[index]);
00189 
00190                 if (h != (unsigned int) index)
00191                 {
00192                     keys[h] = keys[index];
00193                     strings[h] = strings[index];
00194                     objects[h] = objects[index];
00195 
00196                     strings[index] = NULL;
00197                     objects[index] = NULL;
00198                 }
00199             }
00200 
00201             index = (index + 1) & mask;
00202         }
00203     }
00204 }
00205 
00206 void StringHash::ReadLinesFromFile(const char * filename)
00207 {
00208     IFILE f = ifopen(filename, "rb");
00209     if (f == NULL) return;
00210     ReadLinesFromFile(f);
00211     ifclose(f);
00212 }
00213 
00214 void StringHash::ReadLinesFromFile(FILE * f)
00215 {
00216     String buffer;
00217     
00218     while (!feof(f))
00219     {
00220         buffer.ReadLine(f);
00221         Add(buffer.Trim());
00222     }
00223 }
00224 
00225 void StringHash::ReadLinesFromFile(IFILE & f)
00226 {
00227     String buffer;
00228 
00229     while (!ifeof(f))
00230     {
00231         buffer.ReadLine(f);
00232         Add(buffer.Trim());
00233     }
00234 }
00235 
00236 // StringIntHash implementation
00237 
00238 StringIntHash::StringIntHash(int startsize)
00239     : StringHashBase()
00240 {
00241     count = 0;
00242     size  = startsize;
00243     mask  = startsize - 1;
00244 
00245     // In this implementation, the size of hash tables must be a power of two
00246     if (startsize & mask)
00247         error("StringIntHash: Hash table size must be a power of two.\n");
00248 
00249     strings  = new String * [size];
00250     integers = new int [size];
00251     keys     = new unsigned int [size];
00252 
00253     for (unsigned int i = 0; i < size; i++)
00254         strings[i] = NULL;
00255 };
00256 
00257 StringIntHash::~StringIntHash()
00258 {
00259     for (unsigned int i = 0; i < size; i++)
00260         if (strings[i] != NULL)
00261             delete strings[i];
00262 
00263     delete [] strings;
00264     delete [] integers;
00265     delete [] keys;
00266 }
00267 
00268 void StringIntHash::SetSize(int newsize)
00269 {
00270     int newmask = newsize - 1;
00271 
00272     String   ** newstrings = new String * [newsize];
00273     int      * newintegers = new int [newsize];
00274     unsigned int * newkeys = new unsigned int [newsize];
00275 
00276     for (int i = 0; i < newsize; i++)
00277         newstrings[i] = NULL;
00278 
00279     for (unsigned int i = 0; i < size; i++)
00280         if (strings[i] != NULL)
00281         {
00282             unsigned int key = keys[i];
00283             unsigned int h   = key & newmask;
00284 
00285             while (newstrings[h] != NULL &&
00286                    (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
00287                 h = (h + 1) & newmask;
00288 
00289             newkeys[h] = key;
00290             newstrings[h] = strings[i];
00291             newintegers[h] = integers[i];
00292         }
00293 
00294     delete [] strings;
00295     delete [] integers;
00296     delete [] keys;
00297 
00298     strings = newstrings;
00299     integers = newintegers;
00300     keys = newkeys;
00301     size = newsize;
00302     mask = newmask;
00303 }
00304 
00305 void StringIntHash::Clear()
00306 {
00307     for (unsigned int i = 0; i < size; i++)
00308         if (strings[i] != NULL)
00309         {
00310             delete strings[i];
00311             strings[i] = NULL;
00312         }
00313 
00314     count = 0;
00315 
00316     if (size > 256)
00317         SetSize(256);
00318 }
00319 
00320 int StringIntHash::Add(const String & string, int value)
00321 {
00322     unsigned int key = getKey(string);
00323     unsigned int h   = Iterate(key, string);
00324 
00325     if (strings[h] == NULL)
00326         Insert(h, key, string);
00327 
00328     integers[h] = value;
00329 
00330     if (count * 2 > size)
00331     {
00332         Grow();
00333         return Iterate(key, string);
00334     }
00335 
00336     return h;
00337 }
00338 
00339 int StringIntHash::Find(const String & string,  int defaultValue)
00340 {
00341     unsigned int key = getKey(string);
00342     unsigned int h   = Iterate(key, string);
00343 
00344     if (strings[h] == NULL)
00345     {
00346         Insert(h, key, string);
00347         integers[h] = defaultValue;
00348 
00349         if (count * 2 > size)
00350         {
00351             Grow();
00352             return Iterate(key, string);
00353         }
00354     }
00355 
00356     return h;
00357 }
00358 
00359 int StringIntHash::Find(const String & string) const
00360 {
00361     unsigned int key = getKey(string);
00362     unsigned int h   = Iterate(key, string);
00363 
00364     if (strings[h] == NULL)
00365         return -1;
00366 
00367     return h;
00368 }
00369 
00370 void StringIntHash::Delete(unsigned int index)
00371 {
00372     if (index >= size || strings[index] == NULL)
00373         return;
00374 
00375     delete strings[index];
00376     strings[index] = NULL;
00377     count--;
00378 
00379     if (count * 8 < size && size > 32)
00380         Shrink();
00381     else
00382     {
00383         // rehash the next strings until we find empty slot
00384         index = (index + 1) & mask;
00385 
00386         while (strings[index] != NULL)
00387         {
00388             if ((keys[index] & mask) != index)
00389             {
00390                 unsigned int h = Iterate(keys[index], *strings[index]);
00391 
00392                 if (h != (unsigned int) index)
00393                 {
00394                     keys[h] = keys[index];
00395                     strings[h] = strings[index];
00396                     integers[h] = integers[index];
00397 
00398                     strings[index] = NULL;
00399                 }
00400             }
00401 
00402             index = (index + 1) & mask;
00403         }
00404     }
00405 }
00406 
00407 // StringDoubleHash implementation
00408 
00409 StringDoubleHash::StringDoubleHash(int startsize)
00410     : StringHashBase()
00411 {
00412     count = 0;
00413     size  = startsize;
00414     mask  = startsize - 1;
00415 
00416     // In this implementation, the size of hash tables must be a power of two
00417     if (startsize & mask)
00418         error("StringDoubleHash: Hash table size must be a power of two.\n");
00419 
00420     strings  = new String * [size];
00421     doubles  = new double [size];
00422     keys     = new unsigned int [size];
00423 
00424     for (unsigned int i = 0; i < size; i++)
00425         strings[i] = NULL;
00426 };
00427 
00428 StringDoubleHash::~StringDoubleHash()
00429 {
00430     for (unsigned int i = 0; i < size; i++)
00431         if (strings[i] != NULL)
00432             delete strings[i];
00433 
00434     delete [] strings;
00435     delete [] doubles;
00436     delete [] keys;
00437 }
00438 
00439 void StringDoubleHash::SetSize(int newsize)
00440 {
00441     int newmask = newsize - 1;
00442 
00443     String   ** newstrings = new String * [newsize];
00444     double    * newdoubles = new double [newsize];
00445     unsigned int * newkeys = new unsigned int [newsize];
00446 
00447     for (int i = 0; i < newsize; i++)
00448         newstrings[i] = NULL;
00449 
00450     for (unsigned int i = 0; i < size; i++)
00451         if (strings[i] != NULL)
00452         {
00453             unsigned int key = keys[i];
00454             unsigned int h   = key & newmask;
00455 
00456             while (newstrings[h] != NULL &&
00457                     (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
00458                 h = (h + 1) & newmask;
00459 
00460             newkeys[h] = key;
00461             newstrings[h] = strings[i];
00462             newdoubles[h] = doubles[i];
00463         }
00464 
00465     delete [] strings;
00466     delete [] doubles;
00467     delete [] keys;
00468 
00469     strings = newstrings;
00470     doubles = newdoubles;
00471     keys = newkeys;
00472     size = newsize;
00473     mask = newmask;
00474 }
00475 
00476 int StringDoubleHash::Add(const String & string, double value)
00477 {
00478     unsigned int key = getKey(string);
00479     unsigned int h   = Iterate(key, string);
00480 
00481     if (strings[h] == NULL)
00482         Insert(h, key, string);
00483 
00484     doubles[h] = value;
00485 
00486     if (count * 2 > size)
00487     {
00488         Grow();
00489         return Iterate(key, string);
00490     }
00491 
00492     return h;
00493 }
00494 
00495 int StringDoubleHash::Find(const String & string, double defaultValue)
00496 {
00497     unsigned int key = getKey(string);
00498     unsigned int h   = Iterate(key, string);
00499 
00500     if (strings[h] == NULL)
00501     {
00502         Insert(h, key, string);
00503         doubles[h] = defaultValue;
00504 
00505         if (count * 2 > size)
00506         {
00507             Grow();
00508             return Iterate(key, string);
00509         }
00510     }
00511 
00512     return h;
00513 }
00514 
00515 int StringDoubleHash::Find(const String & string) const
00516 {
00517     unsigned int key = getKey(string);
00518     unsigned int h   = Iterate(key, string);
00519 
00520     if (strings[h] == NULL)
00521         return -1;
00522 
00523     return h;
00524 }
00525 
00526 void StringDoubleHash::Delete(unsigned int index)
00527 {
00528     if (index >= size || strings[index] == NULL)
00529         return;
00530 
00531     delete strings[index];
00532     strings[index] = NULL;
00533     count--;
00534 
00535     if (count * 8 < size && size > 32)
00536         Shrink();
00537     else
00538     {
00539         // rehash the next strings until we find empty slot
00540         index = (index + 1) & mask;
00541 
00542         while (strings[index] != NULL)
00543         {
00544             if ((keys[index] & mask) != index)
00545             {
00546                 unsigned int h = Iterate(keys[index], *strings[index]);
00547 
00548                 if (h != (unsigned int) index)
00549                 {
00550                     keys[h] = keys[index];
00551                     strings[h] = strings[index];
00552                     doubles[h] = doubles[index];
00553 
00554                     strings[index] = NULL;
00555                 }
00556             }
00557 
00558             index = (index + 1) & mask;
00559         }
00560     }
00561 }
00562 
00563 void StringHash::Print()
00564 {
00565     Print(stdout);
00566 }
00567 
00568 void StringHash::Print(const char * filename)
00569 {
00570     FILE * output = fopen(filename, "wt");
00571     if (output == NULL)
00572         return;
00573     Print(output);
00574     fclose(output);
00575 }
00576 
00577 void StringHash::Print(FILE * output)
00578 {
00579     for (unsigned int i = 0; i < size; i++)
00580         if (SlotInUse(i))
00581             strings[i]->WriteLine(output);
00582 }
00583 
00584 String StringHash::StringList(char separator)
00585 {
00586     String list;
00587 
00588     for (unsigned int i = 0; i < size; i++)
00589         if (SlotInUse(i))
00590             list += *strings[i] + separator;
00591 
00592     list.SetLength(list.Length() - 1);
00593 
00594     return list;
00595 }
00596 
00597 int StringIntHash::GetCount(const String & key) const
00598 {
00599     int index = Find(key);
00600     return index == -1 ?  0 : integers[index];
00601 }
00602 
00603 int StringIntHash::IncrementCount(const String & key)
00604 {
00605     int index = Find(key);
00606 
00607     if (index != -1)
00608         return ++(integers[index]);
00609 
00610     SetInteger(key, 1);
00611     return 1;
00612 }
00613 
00614 int StringIntHash::IncrementCount(const String & key, int amount)
00615 {
00616     int index = Find(key);
00617 
00618     if (index != -1)
00619         return (integers[index] += amount);
00620 
00621     SetInteger(key, amount);
00622     return amount;
00623 }
00624 
00625 int StringIntHash::DecrementCount(const String & key)
00626 {
00627     int index = Find(key);
00628 
00629     if (index != -1)
00630         return --(integers[index]);
00631 
00632     SetInteger(key, -1);
00633     return -1;
00634 }
00635 
00636 void StringDoubleHash::Clear()
00637 {
00638     for (unsigned int i = 0; i < size; i++)
00639         if (strings[i] != NULL)
00640         {
00641             delete strings[i];
00642             strings[i] = NULL;
00643         }
00644 
00645     count = 0;
00646 
00647     if (size > 256)
00648         SetSize(256);
00649 }
00650 
00651 StringHash & StringHash::operator = (const StringHash & rhs)
00652 {
00653     Clear();
00654 
00655     for (int i = 0; i < rhs.Capacity(); i++)
00656         if (rhs.SlotInUse(i))
00657             Add(*(rhs.strings[i]), rhs.objects[i]);
00658 
00659     return *this;
00660 }
00661 
00662 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs)
00663 {
00664     Clear();
00665 
00666     for (int i = 0; i < rhs.Capacity(); i++)
00667         if (rhs.SlotInUse(i))
00668             Add(*(rhs.strings[i]), rhs.integers[i]);
00669 
00670     return *this;
00671 }
00672 
00673 bool StringIntHash::operator == (const StringIntHash & rhs) const
00674 {
00675     if (Capacity() != rhs.Capacity()) return false;
00676     if (Entries() != rhs.Entries()) return false;
00677     for (int i = 0; i < rhs.Capacity(); i++)
00678     {
00679         if(rhs.SlotInUse(i) != SlotInUse(i))
00680         {
00681             return(false);
00682         }
00683         if (rhs.SlotInUse(i))
00684         {
00685             if(*(strings[i]) != *(rhs.strings[i]))
00686             {
00687                 return(false);
00688             }
00689             if(rhs.integers[i] != integers[i])
00690             {
00691                 return(false);
00692             }
00693         }
00694     }
00695     return(true);
00696 }
00697 
00698 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs)
00699 {
00700     Clear();
00701 
00702     for (int i = 0; i < rhs.Capacity(); i++)
00703         if (rhs.SlotInUse(i))
00704             Add(*(rhs.strings[i]), rhs.doubles[i]);
00705 
00706     return *this;
00707 }
00708 
00709 void StringHash::Swap(StringHash & s)
00710 {
00711     String ** tstrings = s.strings;
00712     s.strings = strings;
00713     strings = tstrings;
00714 
00715     void ** tobjects = s.objects;
00716     s.objects = objects;
00717     objects = tobjects;
00718 
00719     unsigned int * tkeys = s.keys;
00720     s.keys = keys;
00721     keys = tkeys;
00722 
00723     unsigned int temp = s.count;
00724     s.count = count;
00725     count = temp;
00726 
00727     temp = s.size;
00728     s.size = size;
00729     size = temp;
00730 
00731     temp = s.mask;
00732     s.mask = mask;
00733     mask = temp;
00734 }
00735 
Generated on Mon Feb 11 13:45:19 2013 for libStatGen Software by  doxygen 1.6.3