libStatGen Software  1
StringHash.cpp
00001 /*
00002  *  Copyright (C) 2010-2012  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "StringHash.h"
00019 #include "InputFile.h"
00020 #include "Error.h"
00021 
00022 StringHash::StringHash(int startsize)
00023     : StringHashBase()
00024 {
00025     count = 0;
00026     size  = startsize;
00027     mask  = startsize - 1;
00028 
00029     // In this implementation, the size of hash tables must be a power of two
00030     if (startsize & mask)
00031         error("StringHash: Hash table size must be a power of two.\n");
00032 
00033     strings = new String * [size];
00034     objects = new void * [size];
00035     keys    = new unsigned int [size];
00036 
00037     for (unsigned int i = 0; i < size; i++)
00038     {
00039         strings[i] = NULL;
00040         objects[i] = NULL;
00041     }
00042 };
00043 
00044 StringHash::~StringHash()
00045 {
00046     for (unsigned int i = 0; i < size; i++)
00047         if (strings[i] != NULL)
00048             delete strings[i];
00049 
00050     if(strings) delete [] strings;
00051     if(objects) delete [] objects;
00052     if(keys) delete [] keys;
00053 }
00054 
00055 void StringHash::Clear()
00056 {
00057     for (unsigned int i = 0; i < size; i++)
00058         if (strings[i] != NULL)
00059         {
00060             delete strings[i];
00061             strings[i] = NULL;
00062         }
00063 
00064     count = 0;
00065 
00066     if (size > 256)
00067         SetSize(256);
00068 }
00069 
00070 void StringHash::SetSize(int newsize)
00071 {
00072     int newmask = newsize - 1;
00073 
00074     String   ** newstrings = new String * [newsize];
00075     void     ** newobjects = new void * [newsize];
00076     unsigned int * newkeys = new unsigned int [newsize];
00077 
00078     for (int i = 0; i < newsize; i++)
00079     {
00080         newstrings[i] = NULL;
00081         newobjects[i] = NULL;
00082     }
00083 
00084     if (count)
00085         for (unsigned int i = 0; i < size; i++)
00086             if (strings[i] != NULL)
00087             {
00088                 unsigned int key = keys[i];
00089                 unsigned int h   = key & newmask;
00090 
00091                 while (newstrings[h] != NULL &&
00092                         (newkeys[h] != key || 
00093                          (!stringsEqual(*(newstrings[h]), *(strings[i])))))
00094                     h = (h + 1) & newmask;
00095 
00096                 newkeys[h] = key;
00097                 newstrings[h] = strings[i];
00098                 newobjects[h] = objects[i];
00099             }
00100 
00101     if(strings) delete [] strings;
00102     if(objects) delete [] objects;
00103     if(keys) delete [] keys;
00104 
00105     strings = newstrings;
00106     objects = newobjects;
00107     keys = newkeys;
00108     size = newsize;
00109     mask = newmask;
00110 }
00111 
00112 int StringHash::Add(const String & string, void * object)
00113 {
00114     unsigned int key = getKey(string);
00115     unsigned int h   = Iterate(key, string);
00116 
00117     if (strings[h] == NULL)
00118         Insert(h, key, string);
00119 
00120     objects[h] = object;
00121 
00122     if (count * 2 > size)
00123     {
00124         Grow();
00125         return Iterate(key, string);
00126     }
00127 
00128     return h;
00129 }
00130 
00131 int StringHash::Find(const String & string,  void *(*create_object)())
00132 {
00133     unsigned int key = getKey(string);
00134     unsigned int h   = Iterate(key, string);
00135 
00136     if (strings[h] == NULL && create_object == NULL)
00137         return -1;
00138 
00139     if (strings[h] == NULL && create_object != NULL)
00140     {
00141         Insert(h, key, string);
00142         objects[h] = create_object();
00143 
00144         if (count * 2 > size)
00145         {
00146             Grow();
00147             return Iterate(key, string);
00148         }
00149     }
00150 
00151     return h;
00152 }
00153 
00154 int StringHash::Find(const String & string) const
00155 {
00156     unsigned int key = getKey(string);
00157     unsigned int h   = Iterate(key, string);
00158 
00159     if (strings[h] == NULL)
00160         return -1;
00161 
00162     return h;
00163 }
00164 void * StringHash::CreateHash()
00165 {
00166     return (void *) new StringHash();
00167 }
00168 
00169 void StringHash::Delete(unsigned int index)
00170 {
00171     if (index >= size || strings[index] == NULL)
00172         return;
00173 
00174     delete strings[index];
00175     strings[index] = NULL;
00176     count--;
00177 
00178     if (count * 8 < size && size > 32)
00179         Shrink();
00180     else
00181     {
00182         // rehash the next strings until we find empty slot
00183         index = (index + 1) & mask;
00184 
00185         while (strings[index] != NULL)
00186         {
00187             if ((keys[index] & mask) != index)
00188             {
00189                 unsigned int h = Iterate(keys[index], *strings[index]);
00190 
00191                 if (h != (unsigned int) index)
00192                 {
00193                     keys[h] = keys[index];
00194                     strings[h] = strings[index];
00195                     objects[h] = objects[index];
00196 
00197                     strings[index] = NULL;
00198                     objects[index] = NULL;
00199                 }
00200             }
00201 
00202             index = (index + 1) & mask;
00203         }
00204     }
00205 }
00206 
00207 void StringHash::ReadLinesFromFile(const char * filename)
00208 {
00209     IFILE f = ifopen(filename, "rb");
00210     if (f == NULL) return;
00211     ReadLinesFromFile(f);
00212     ifclose(f);
00213 }
00214 
00215 void StringHash::ReadLinesFromFile(FILE * f)
00216 {
00217     String buffer;
00218     
00219     while (!feof(f))
00220     {
00221         buffer.ReadLine(f);
00222         Add(buffer.Trim());
00223     }
00224 }
00225 
00226 void StringHash::ReadLinesFromFile(IFILE & f)
00227 {
00228     String buffer;
00229 
00230     while (!ifeof(f))
00231     {
00232         buffer.ReadLine(f);
00233         Add(buffer.Trim());
00234     }
00235 }
00236 
00237 // StringIntHash implementation
00238 
00239 StringIntHash::StringIntHash(int startsize)
00240     : StringHashBase()
00241 {
00242     count = 0;
00243     size  = startsize;
00244     mask  = startsize - 1;
00245 
00246     // In this implementation, the size of hash tables must be a power of two
00247     if (startsize & mask)
00248         error("StringIntHash: Hash table size must be a power of two.\n");
00249 
00250     strings  = new String * [size];
00251     integers = new int [size];
00252     keys     = new unsigned int [size];
00253 
00254     for (unsigned int i = 0; i < size; i++)
00255         strings[i] = NULL;
00256 };
00257 
00258 StringIntHash::~StringIntHash()
00259 {
00260     for (unsigned int i = 0; i < size; i++)
00261         if (strings[i] != NULL)
00262             delete strings[i];
00263 
00264     if(strings) delete [] strings;
00265     if(integers) delete [] integers;
00266     if(keys) delete [] keys;
00267 }
00268 
00269 void StringIntHash::SetSize(int newsize)
00270 {
00271     int newmask = newsize - 1;
00272 
00273     String   ** newstrings = new String * [newsize];
00274     int      * newintegers = new int [newsize];
00275     unsigned int * newkeys = new unsigned int [newsize];
00276 
00277     for (int i = 0; i < newsize; i++)
00278         newstrings[i] = NULL;
00279 
00280     for (unsigned int i = 0; i < size; i++)
00281         if (strings[i] != NULL)
00282         {
00283             unsigned int key = keys[i];
00284             unsigned int h   = key & newmask;
00285 
00286             while (newstrings[h] != NULL &&
00287                    (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
00288                 h = (h + 1) & newmask;
00289 
00290             newkeys[h] = key;
00291             newstrings[h] = strings[i];
00292             newintegers[h] = integers[i];
00293         }
00294 
00295     if(strings) delete [] strings;
00296     if(integers) delete [] integers;
00297     if(keys) delete [] keys;
00298 
00299     strings = newstrings;
00300     integers = newintegers;
00301     keys = newkeys;
00302     size = newsize;
00303     mask = newmask;
00304 }
00305 
00306 void StringIntHash::Clear()
00307 {
00308     for (unsigned int i = 0; i < size; i++)
00309         if (strings[i] != NULL)
00310         {
00311             delete strings[i];
00312             strings[i] = NULL;
00313         }
00314 
00315     count = 0;
00316 
00317     if (size > 256)
00318         SetSize(256);
00319 }
00320 
00321 int StringIntHash::Add(const String & string, int value)
00322 {
00323     unsigned int key = getKey(string);
00324     unsigned int h   = Iterate(key, string);
00325 
00326     if (strings[h] == NULL)
00327         Insert(h, key, string);
00328 
00329     integers[h] = value;
00330 
00331     if (count * 2 > size)
00332     {
00333         Grow();
00334         return Iterate(key, string);
00335     }
00336 
00337     return h;
00338 }
00339 
00340 int StringIntHash::Find(const String & string,  int defaultValue)
00341 {
00342     unsigned int key = getKey(string);
00343     unsigned int h   = Iterate(key, string);
00344 
00345     if (strings[h] == NULL)
00346     {
00347         Insert(h, key, string);
00348         integers[h] = defaultValue;
00349 
00350         if (count * 2 > size)
00351         {
00352             Grow();
00353             return Iterate(key, string);
00354         }
00355     }
00356 
00357     return h;
00358 }
00359 
00360 int StringIntHash::Find(const String & string) const
00361 {
00362     unsigned int key = getKey(string);
00363     unsigned int h   = Iterate(key, string);
00364 
00365     if (strings[h] == NULL)
00366         return -1;
00367 
00368     return h;
00369 }
00370 
00371 void StringIntHash::Delete(unsigned int index)
00372 {
00373     if (index >= size || strings[index] == NULL)
00374         return;
00375 
00376     delete strings[index];
00377     strings[index] = NULL;
00378     count--;
00379 
00380     if (count * 8 < size && size > 32)
00381         Shrink();
00382     else
00383     {
00384         // rehash the next strings until we find empty slot
00385         index = (index + 1) & mask;
00386 
00387         while (strings[index] != NULL)
00388         {
00389             if ((keys[index] & mask) != index)
00390             {
00391                 unsigned int h = Iterate(keys[index], *strings[index]);
00392 
00393                 if (h != (unsigned int) index)
00394                 {
00395                     keys[h] = keys[index];
00396                     strings[h] = strings[index];
00397                     integers[h] = integers[index];
00398 
00399                     strings[index] = NULL;
00400                 }
00401             }
00402 
00403             index = (index + 1) & mask;
00404         }
00405     }
00406 }
00407 
00408 // StringDoubleHash implementation
00409 
00410 StringDoubleHash::StringDoubleHash(int startsize)
00411     : StringHashBase()
00412 {
00413     count = 0;
00414     size  = startsize;
00415     mask  = startsize - 1;
00416 
00417     // In this implementation, the size of hash tables must be a power of two
00418     if (startsize & mask)
00419         error("StringDoubleHash: Hash table size must be a power of two.\n");
00420 
00421     strings  = new String * [size];
00422     doubles  = new double [size];
00423     keys     = new unsigned int [size];
00424 
00425     for (unsigned int i = 0; i < size; i++)
00426         strings[i] = NULL;
00427 };
00428 
00429 StringDoubleHash::~StringDoubleHash()
00430 {
00431     for (unsigned int i = 0; i < size; i++)
00432         if (strings[i] != NULL)
00433             delete strings[i];
00434 
00435     if(strings) delete [] strings;
00436     if(doubles) delete [] doubles;
00437     if(keys) delete [] keys;
00438 }
00439 
00440 void StringDoubleHash::SetSize(int newsize)
00441 {
00442     int newmask = newsize - 1;
00443 
00444     String   ** newstrings = new String * [newsize];
00445     double    * newdoubles = new double [newsize];
00446     unsigned int * newkeys = new unsigned int [newsize];
00447 
00448     for (int i = 0; i < newsize; i++)
00449         newstrings[i] = NULL;
00450 
00451     for (unsigned int i = 0; i < size; i++)
00452         if (strings[i] != NULL)
00453         {
00454             unsigned int key = keys[i];
00455             unsigned int h   = key & newmask;
00456 
00457             while (newstrings[h] != NULL &&
00458                     (newkeys[h] != key || (!stringsEqual(*(newstrings[h]), *(strings[i])))))
00459                 h = (h + 1) & newmask;
00460 
00461             newkeys[h] = key;
00462             newstrings[h] = strings[i];
00463             newdoubles[h] = doubles[i];
00464         }
00465 
00466     if(strings) delete [] strings;
00467     if(doubles) delete [] doubles;
00468     if(keys) delete [] keys;
00469 
00470     strings = newstrings;
00471     doubles = newdoubles;
00472     keys = newkeys;
00473     size = newsize;
00474     mask = newmask;
00475 }
00476 
00477 int StringDoubleHash::Add(const String & string, double value)
00478 {
00479     unsigned int key = getKey(string);
00480     unsigned int h   = Iterate(key, string);
00481 
00482     if (strings[h] == NULL)
00483         Insert(h, key, string);
00484 
00485     doubles[h] = value;
00486 
00487     if (count * 2 > size)
00488     {
00489         Grow();
00490         return Iterate(key, string);
00491     }
00492 
00493     return h;
00494 }
00495 
00496 int StringDoubleHash::Find(const String & string, double defaultValue)
00497 {
00498     unsigned int key = getKey(string);
00499     unsigned int h   = Iterate(key, string);
00500 
00501     if (strings[h] == NULL)
00502     {
00503         Insert(h, key, string);
00504         doubles[h] = defaultValue;
00505 
00506         if (count * 2 > size)
00507         {
00508             Grow();
00509             return Iterate(key, string);
00510         }
00511     }
00512 
00513     return h;
00514 }
00515 
00516 int StringDoubleHash::Find(const String & string) const
00517 {
00518     unsigned int key = getKey(string);
00519     unsigned int h   = Iterate(key, string);
00520 
00521     if (strings[h] == NULL)
00522         return -1;
00523 
00524     return h;
00525 }
00526 
00527 void StringDoubleHash::Delete(unsigned int index)
00528 {
00529     if (index >= size || strings[index] == NULL)
00530         return;
00531 
00532     delete strings[index];
00533     strings[index] = NULL;
00534     count--;
00535 
00536     if (count * 8 < size && size > 32)
00537         Shrink();
00538     else
00539     {
00540         // rehash the next strings until we find empty slot
00541         index = (index + 1) & mask;
00542 
00543         while (strings[index] != NULL)
00544         {
00545             if ((keys[index] & mask) != index)
00546             {
00547                 unsigned int h = Iterate(keys[index], *strings[index]);
00548 
00549                 if (h != (unsigned int) index)
00550                 {
00551                     keys[h] = keys[index];
00552                     strings[h] = strings[index];
00553                     doubles[h] = doubles[index];
00554 
00555                     strings[index] = NULL;
00556                 }
00557             }
00558 
00559             index = (index + 1) & mask;
00560         }
00561     }
00562 }
00563 
00564 void StringHash::Print()
00565 {
00566     Print(stdout);
00567 }
00568 
00569 void StringHash::Print(const char * filename)
00570 {
00571     FILE * output = fopen(filename, "wt");
00572     if (output == NULL)
00573         return;
00574     Print(output);
00575     fclose(output);
00576 }
00577 
00578 void StringHash::Print(FILE * output)
00579 {
00580     for (unsigned int i = 0; i < size; i++)
00581         if (SlotInUse(i))
00582             strings[i]->WriteLine(output);
00583 }
00584 
00585 String StringHash::StringList(char separator)
00586 {
00587     String list;
00588 
00589     for (unsigned int i = 0; i < size; i++)
00590         if (SlotInUse(i))
00591             list += *strings[i] + separator;
00592 
00593     list.SetLength(list.Length() - 1);
00594 
00595     return list;
00596 }
00597 
00598 int StringIntHash::GetCount(const String & key) const
00599 {
00600     int index = Find(key);
00601     return index == -1 ?  0 : integers[index];
00602 }
00603 
00604 int StringIntHash::IncrementCount(const String & key)
00605 {
00606     int index = Find(key);
00607 
00608     if (index != -1)
00609         return ++(integers[index]);
00610 
00611     SetInteger(key, 1);
00612     return 1;
00613 }
00614 
00615 int StringIntHash::IncrementCount(const String & key, int amount)
00616 {
00617     int index = Find(key);
00618 
00619     if (index != -1)
00620         return (integers[index] += amount);
00621 
00622     SetInteger(key, amount);
00623     return amount;
00624 }
00625 
00626 int StringIntHash::DecrementCount(const String & key)
00627 {
00628     int index = Find(key);
00629 
00630     if (index != -1)
00631         return --(integers[index]);
00632 
00633     SetInteger(key, -1);
00634     return -1;
00635 }
00636 
00637 void StringDoubleHash::Clear()
00638 {
00639     for (unsigned int i = 0; i < size; i++)
00640         if (strings[i] != NULL)
00641         {
00642             delete strings[i];
00643             strings[i] = NULL;
00644         }
00645 
00646     count = 0;
00647 
00648     if (size > 256)
00649         SetSize(256);
00650 }
00651 
00652 StringHash & StringHash::operator = (const StringHash & rhs)
00653 {
00654     Clear();
00655 
00656     for (int i = 0; i < rhs.Capacity(); i++)
00657         if (rhs.SlotInUse(i))
00658             Add(*(rhs.strings[i]), rhs.objects[i]);
00659 
00660     return *this;
00661 }
00662 
00663 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs)
00664 {
00665     Clear();
00666 
00667     for (int i = 0; i < rhs.Capacity(); i++)
00668         if (rhs.SlotInUse(i))
00669             Add(*(rhs.strings[i]), rhs.integers[i]);
00670 
00671     return *this;
00672 }
00673 
00674 bool StringIntHash::operator == (const StringIntHash & rhs) const
00675 {
00676     if (Capacity() != rhs.Capacity()) return false;
00677     if (Entries() != rhs.Entries()) return false;
00678     for (int i = 0; i < rhs.Capacity(); i++)
00679     {
00680         if(rhs.SlotInUse(i) != SlotInUse(i))
00681         {
00682             return(false);
00683         }
00684         if (rhs.SlotInUse(i))
00685         {
00686             if(*(strings[i]) != *(rhs.strings[i]))
00687             {
00688                 return(false);
00689             }
00690             if(rhs.integers[i] != integers[i])
00691             {
00692                 return(false);
00693             }
00694         }
00695     }
00696     return(true);
00697 }
00698 
00699 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs)
00700 {
00701     Clear();
00702 
00703     for (int i = 0; i < rhs.Capacity(); i++)
00704         if (rhs.SlotInUse(i))
00705             Add(*(rhs.strings[i]), rhs.doubles[i]);
00706 
00707     return *this;
00708 }
00709 
00710 void StringHash::Swap(StringHash & s)
00711 {
00712     String ** tstrings = s.strings;
00713     s.strings = strings;
00714     strings = tstrings;
00715 
00716     void ** tobjects = s.objects;
00717     s.objects = objects;
00718     objects = tobjects;
00719 
00720     unsigned int * tkeys = s.keys;
00721     s.keys = keys;
00722     keys = tkeys;
00723 
00724     unsigned int temp = s.count;
00725     s.count = count;
00726     count = temp;
00727 
00728     temp = s.size;
00729     s.size = size;
00730     size = temp;
00731 
00732     temp = s.mask;
00733     s.mask = mask;
00734     mask = temp;
00735 }
00736 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends