StringHash.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "StringHash.h"
00019 #include "InputFile.h"
00020 #include "Error.h"
00021 
00022 StringHash::StringHash(int startsize)
00023 {
00024     count = 0;
00025     size  = startsize;
00026     mask  = startsize - 1;
00027 
00028     // In this implementation, the size of hash tables must be a power of two
00029     if (startsize & mask)
00030         error("StringHash: Hash table size must be a power of two.\n");
00031 
00032     strings = new String * [size];
00033     objects = new void * [size];
00034     keys    = new unsigned int [size];
00035 
00036     for (unsigned int i = 0; i < size; i++)
00037     {
00038         strings[i] = NULL;
00039         objects[i] = NULL;
00040     }
00041 };
00042 
00043 StringHash::~StringHash()
00044 {
00045     for (unsigned int i = 0; i < size; i++)
00046         if (strings[i] != NULL)
00047             delete strings[i];
00048 
00049     delete [] strings;
00050     delete [] objects;
00051     delete [] keys;
00052 }
00053 
00054 void StringHash::Clear()
00055 {
00056     for (unsigned int i = 0; i < size; i++)
00057         if (strings[i] != NULL)
00058         {
00059             delete strings[i];
00060             strings[i] = NULL;
00061         }
00062 
00063     count = 0;
00064 
00065     if (size > 256)
00066         SetSize(256);
00067 }
00068 
00069 void StringHash::SetSize(int newsize)
00070 {
00071     int newmask = newsize - 1;
00072 
00073     String   ** newstrings = new String * [newsize];
00074     void     ** newobjects = new void * [newsize];
00075     unsigned int * newkeys = new unsigned int [newsize];
00076 
00077     for (int i = 0; i < newsize; i++)
00078     {
00079         newstrings[i] = NULL;
00080         newobjects[i] = NULL;
00081     }
00082 
00083     if (count)
00084         for (unsigned int i = 0; i < size; i++)
00085             if (strings[i] != NULL)
00086             {
00087                 unsigned int key = keys[i];
00088                 unsigned int h   = key & newmask;
00089 
00090                 while (newstrings[h] != NULL &&
00091                         (newkeys[h] != key || newstrings[h]->SlowCompare(*strings[i]) != 0))
00092                     h = (h + 1) & newmask;
00093 
00094                 newkeys[h] = key;
00095                 newstrings[h] = strings[i];
00096                 newobjects[h] = objects[i];
00097             }
00098 
00099     delete [] strings;
00100     delete [] objects;
00101     delete [] keys;
00102 
00103     strings = newstrings;
00104     objects = newobjects;
00105     keys = newkeys;
00106     size = newsize;
00107     mask = newmask;
00108 }
00109 
00110 int StringHash::Add(const String & string, void * object)
00111 {
00112     unsigned int key = hash_no_case((unsigned char *)(const char *) string, string.Length(), 0);
00113     unsigned int h   = Iterate(key, string);
00114 
00115     if (strings[h] == NULL)
00116         Insert(h, key, string);
00117 
00118     objects[h] = object;
00119 
00120     if (count * 2 > size)
00121     {
00122         Grow();
00123         return Iterate(key, string);
00124     }
00125 
00126     return h;
00127 }
00128 
00129 int StringHash::Find(const String & string,  void *(*create_object)())
00130 {
00131     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00132     unsigned int h   = Iterate(key, string);
00133 
00134     if (strings[h] == NULL && create_object == NULL)
00135         return -1;
00136 
00137     if (strings[h] == NULL && create_object != NULL)
00138     {
00139         Insert(h, key, string);
00140         objects[h] = create_object();
00141 
00142         if (count * 2 > size)
00143         {
00144             Grow();
00145             return Iterate(key, string);
00146         }
00147     }
00148 
00149     return h;
00150 }
00151 
00152 int StringHash::Find(const String & string) const
00153 {
00154     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00155     unsigned int h   = Iterate(key, string);
00156 
00157     if (strings[h] == NULL)
00158         return -1;
00159 
00160     return h;
00161 }
00162 void * StringHash::CreateHash()
00163 {
00164     return (void *) new StringHash();
00165 }
00166 
00167 void StringHash::Delete(unsigned int index)
00168 {
00169     if (index >= size || strings[index] == NULL)
00170         return;
00171 
00172     delete strings[index];
00173     strings[index] = NULL;
00174     count--;
00175 
00176     if (count * 8 < size && size > 32)
00177         Shrink();
00178     else
00179     {
00180         // rehash the next strings until we find empty slot
00181         index = (index + 1) & mask;
00182 
00183         while (strings[index] != NULL)
00184         {
00185             if ((keys[index] & mask) != index)
00186             {
00187                 unsigned int h = Iterate(keys[index], *strings[index]);
00188 
00189                 if (h != (unsigned int) index)
00190                 {
00191                     keys[h] = keys[index];
00192                     strings[h] = strings[index];
00193                     objects[h] = objects[index];
00194 
00195                     strings[index] = NULL;
00196                     objects[index] = NULL;
00197                 }
00198             }
00199 
00200             index = (index + 1) & mask;
00201         }
00202     }
00203 }
00204 
00205 void StringHash::ReadLinesFromFile(const char * filename)
00206 {
00207     IFILE f = ifopen(filename, "rb");
00208     if (f == NULL) return;
00209     ReadLinesFromFile(f);
00210     ifclose(f);
00211 }
00212 
00213 void StringHash::ReadLinesFromFile(FILE * f)
00214 {
00215     String buffer;
00216     
00217     while (!feof(f))
00218     {
00219         buffer.ReadLine(f);
00220         Add(buffer.Trim());
00221     }
00222 }
00223 
00224 #ifdef __ZLIB_AVAILABLE__
00225 void StringHash::ReadLinesFromFile(IFILE & f)
00226 {
00227     String buffer;
00228 
00229     while (!ifeof(f))
00230     {
00231         buffer.ReadLine(f);
00232         Add(buffer.Trim());
00233     }
00234 }
00235 #endif
00236 
00237 // StringIntHash implementation
00238 
00239 StringIntHash::StringIntHash(int startsize)
00240 {
00241     count = 0;
00242     size  = startsize;
00243     mask  = startsize - 1;
00244 
00245     // In this implementation, the size of hash tables must be a power of two
00246     if (startsize & mask)
00247         error("StringIntHash: Hash table size must be a power of two.\n");
00248 
00249     strings  = new String * [size];
00250     integers = new int [size];
00251     keys     = new unsigned int [size];
00252 
00253     for (unsigned int i = 0; i < size; i++)
00254         strings[i] = NULL;
00255 };
00256 
00257 StringIntHash::~StringIntHash()
00258 {
00259     for (unsigned int i = 0; i < size; i++)
00260         if (strings[i] != NULL)
00261             delete strings[i];
00262 
00263     delete [] strings;
00264     delete [] integers;
00265     delete [] keys;
00266 }
00267 
00268 void StringIntHash::SetSize(int newsize)
00269 {
00270     int newmask = newsize - 1;
00271 
00272     String   ** newstrings = new String * [newsize];
00273     int      * newintegers = new int [newsize];
00274     unsigned int * newkeys = new unsigned int [newsize];
00275 
00276     for (int i = 0; i < newsize; i++)
00277         newstrings[i] = NULL;
00278 
00279     for (unsigned int i = 0; i < size; i++)
00280         if (strings[i] != NULL)
00281         {
00282             unsigned int key = keys[i];
00283             unsigned int h   = key & newmask;
00284 
00285             while (newstrings[h] != NULL &&
00286                     (newkeys[h] != key || newstrings[h]->SlowCompare(*strings[i]) != 0))
00287                 h = (h + 1) & newmask;
00288 
00289             newkeys[h] = key;
00290             newstrings[h] = strings[i];
00291             newintegers[h] = integers[i];
00292         }
00293 
00294     delete [] strings;
00295     delete [] integers;
00296     delete [] keys;
00297 
00298     strings = newstrings;
00299     integers = newintegers;
00300     keys = newkeys;
00301     size = newsize;
00302     mask = newmask;
00303 }
00304 
00305 void StringIntHash::Clear()
00306 {
00307     for (unsigned int i = 0; i < size; i++)
00308         if (strings[i] != NULL)
00309         {
00310             delete strings[i];
00311             strings[i] = NULL;
00312         }
00313 
00314     count = 0;
00315 
00316     if (size > 256)
00317         SetSize(256);
00318 }
00319 
00320 int StringIntHash::Add(const String & string, int value)
00321 {
00322     unsigned int key = hash_no_case((unsigned char *)(const char *) string, string.Length(), 0);
00323     unsigned int h   = Iterate(key, string);
00324 
00325     if (strings[h] == NULL)
00326         Insert(h, key, string);
00327 
00328     integers[h] = value;
00329 
00330     if (count * 2 > size)
00331     {
00332         Grow();
00333         return Iterate(key, string);
00334     }
00335 
00336     return h;
00337 }
00338 
00339 int StringIntHash::Find(const String & string,  int defaultValue)
00340 {
00341     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00342     unsigned int h   = Iterate(key, string);
00343 
00344     if (strings[h] == NULL)
00345     {
00346         Insert(h, key, string);
00347         integers[h] = defaultValue;
00348 
00349         if (count * 2 > size)
00350         {
00351             Grow();
00352             return Iterate(key, string);
00353         }
00354     }
00355 
00356     return h;
00357 }
00358 
00359 int StringIntHash::Find(const String & string) const
00360 {
00361     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00362     unsigned int h   = Iterate(key, string);
00363 
00364     if (strings[h] == NULL)
00365         return -1;
00366 
00367     return h;
00368 }
00369 
00370 void StringIntHash::Delete(unsigned int index)
00371 {
00372     if (index >= size || strings[index] == NULL)
00373         return;
00374 
00375     delete strings[index];
00376     strings[index] = NULL;
00377     count--;
00378 
00379     if (count * 8 < size && size > 32)
00380         Shrink();
00381     else
00382     {
00383         // rehash the next strings until we find empty slot
00384         index = (index + 1) & mask;
00385 
00386         while (strings[index] != NULL)
00387         {
00388             if ((keys[index] & mask) != index)
00389             {
00390                 unsigned int h = Iterate(keys[index], *strings[index]);
00391 
00392                 if (h != (unsigned int) index)
00393                 {
00394                     keys[h] = keys[index];
00395                     strings[h] = strings[index];
00396                     integers[h] = integers[index];
00397 
00398                     strings[index] = NULL;
00399                 }
00400             }
00401 
00402             index = (index + 1) & mask;
00403         }
00404     }
00405 }
00406 
00407 // StringDoubleHash implementation
00408 
00409 StringDoubleHash::StringDoubleHash(int startsize)
00410 {
00411     count = 0;
00412     size  = startsize;
00413     mask  = startsize - 1;
00414 
00415     // In this implementation, the size of hash tables must be a power of two
00416     if (startsize & mask)
00417         error("StringDoubleHash: Hash table size must be a power of two.\n");
00418 
00419     strings  = new String * [size];
00420     doubles  = new double [size];
00421     keys     = new unsigned int [size];
00422 
00423     for (unsigned int i = 0; i < size; i++)
00424         strings[i] = NULL;
00425 };
00426 
00427 StringDoubleHash::~StringDoubleHash()
00428 {
00429     for (unsigned int i = 0; i < size; i++)
00430         if (strings[i] != NULL)
00431             delete strings[i];
00432 
00433     delete [] strings;
00434     delete [] doubles;
00435     delete [] keys;
00436 }
00437 
00438 void StringDoubleHash::SetSize(int newsize)
00439 {
00440     int newmask = newsize - 1;
00441 
00442     String   ** newstrings = new String * [newsize];
00443     double    * newdoubles = new double [newsize];
00444     unsigned int * newkeys = new unsigned int [newsize];
00445 
00446     for (int i = 0; i < newsize; i++)
00447         newstrings[i] = NULL;
00448 
00449     for (unsigned int i = 0; i < size; i++)
00450         if (strings[i] != NULL)
00451         {
00452             unsigned int key = keys[i];
00453             unsigned int h   = key & newmask;
00454 
00455             while (newstrings[h] != NULL &&
00456                     (newkeys[h] != key || newstrings[h]->SlowCompare(*strings[i]) != 0))
00457                 h = (h + 1) & newmask;
00458 
00459             newkeys[h] = key;
00460             newstrings[h] = strings[i];
00461             newdoubles[h] = doubles[i];
00462         }
00463 
00464     delete [] strings;
00465     delete [] doubles;
00466     delete [] keys;
00467 
00468     strings = newstrings;
00469     doubles = newdoubles;
00470     keys = newkeys;
00471     size = newsize;
00472     mask = newmask;
00473 }
00474 
00475 int StringDoubleHash::Add(const String & string, double value)
00476 {
00477     unsigned int key = hash_no_case((unsigned char *)(const char *) string, string.Length(), 0);
00478     unsigned int h   = Iterate(key, string);
00479 
00480     if (strings[h] == NULL)
00481         Insert(h, key, string);
00482 
00483     doubles[h] = value;
00484 
00485     if (count * 2 > size)
00486     {
00487         Grow();
00488         return Iterate(key, string);
00489     }
00490 
00491     return h;
00492 }
00493 
00494 int StringDoubleHash::Find(const String & string, double defaultValue)
00495 {
00496     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00497     unsigned int h   = Iterate(key, string);
00498 
00499     if (strings[h] == NULL)
00500     {
00501         Insert(h, key, string);
00502         doubles[h] = defaultValue;
00503 
00504         if (count * 2 > size)
00505         {
00506             Grow();
00507             return Iterate(key, string);
00508         }
00509     }
00510 
00511     return h;
00512 }
00513 
00514 int StringDoubleHash::Find(const String & string) const
00515 {
00516     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00517     unsigned int h   = Iterate(key, string);
00518 
00519     if (strings[h] == NULL)
00520         return -1;
00521 
00522     return h;
00523 }
00524 
00525 void StringDoubleHash::Delete(unsigned int index)
00526 {
00527     if (index >= size || strings[index] == NULL)
00528         return;
00529 
00530     delete strings[index];
00531     strings[index] = NULL;
00532     count--;
00533 
00534     if (count * 8 < size && size > 32)
00535         Shrink();
00536     else
00537     {
00538         // rehash the next strings until we find empty slot
00539         index = (index + 1) & mask;
00540 
00541         while (strings[index] != NULL)
00542         {
00543             if ((keys[index] & mask) != index)
00544             {
00545                 unsigned int h = Iterate(keys[index], *strings[index]);
00546 
00547                 if (h != (unsigned int) index)
00548                 {
00549                     keys[h] = keys[index];
00550                     strings[h] = strings[index];
00551                     doubles[h] = doubles[index];
00552 
00553                     strings[index] = NULL;
00554                 }
00555             }
00556 
00557             index = (index + 1) & mask;
00558         }
00559     }
00560 }
00561 
00562 void StringHash::Print()
00563 {
00564     Print(stdout);
00565 }
00566 
00567 void StringHash::Print(const char * filename)
00568 {
00569     FILE * output = fopen(filename, "wt");
00570     if (output == NULL)
00571         return;
00572     Print(output);
00573     fclose(output);
00574 }
00575 
00576 void StringHash::Print(FILE * output)
00577 {
00578     for (unsigned int i = 0; i < size; i++)
00579         if (SlotInUse(i))
00580             strings[i]->WriteLine(output);
00581 }
00582 
00583 String StringHash::StringList(char separator)
00584 {
00585     String list;
00586 
00587     for (unsigned int i = 0; i < size; i++)
00588         if (SlotInUse(i))
00589             list += *strings[i] + separator;
00590 
00591     list.SetLength(list.Length() - 1);
00592 
00593     return list;
00594 }
00595 
00596 int StringIntHash::GetCount(const String & key) const
00597 {
00598     int index = Find(key);
00599     return index == -1 ?  0 : integers[index];
00600 }
00601 
00602 int StringIntHash::IncrementCount(const String & key)
00603 {
00604     int index = Find(key);
00605 
00606     if (index != -1)
00607         return ++(integers[index]);
00608 
00609     SetInteger(key, 1);
00610     return 1;
00611 }
00612 
00613 int StringIntHash::IncrementCount(const String & key, int amount)
00614 {
00615     int index = Find(key);
00616 
00617     if (index != -1)
00618         return (integers[index] += amount);
00619 
00620     SetInteger(key, amount);
00621     return amount;
00622 }
00623 
00624 int StringIntHash::DecrementCount(const String & key)
00625 {
00626     int index = Find(key);
00627 
00628     if (index != -1)
00629         return --(integers[index]);
00630 
00631     SetInteger(key, -1);
00632     return -1;
00633 }
00634 
00635 void StringDoubleHash::Clear()
00636 {
00637     for (unsigned int i = 0; i < size; i++)
00638         if (strings[i] != NULL)
00639         {
00640             delete strings[i];
00641             strings[i] = NULL;
00642         }
00643 
00644     count = 0;
00645 
00646     if (size > 256)
00647         SetSize(256);
00648 }
00649 
00650 StringHash & StringHash::operator = (const StringHash & rhs)
00651 {
00652     Clear();
00653 
00654     for (int i = 0; i < rhs.Capacity(); i++)
00655         if (rhs.SlotInUse(i))
00656             Add(*(rhs.strings[i]), rhs.objects[i]);
00657 
00658     return *this;
00659 }
00660 
00661 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs)
00662 {
00663     Clear();
00664 
00665     for (int i = 0; i < rhs.Capacity(); i++)
00666         if (rhs.SlotInUse(i))
00667             Add(*(rhs.strings[i]), rhs.integers[i]);
00668 
00669     return *this;
00670 }
00671 
00672 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs)
00673 {
00674     Clear();
00675 
00676     for (int i = 0; i < rhs.Capacity(); i++)
00677         if (rhs.SlotInUse(i))
00678             Add(*(rhs.strings[i]), rhs.doubles[i]);
00679 
00680     return *this;
00681 }
00682 
00683 void StringHash::Swap(StringHash & s)
00684 {
00685     String ** tstrings = s.strings;
00686     s.strings = strings;
00687     strings = tstrings;
00688 
00689     void ** tobjects = s.objects;
00690     s.objects = objects;
00691     objects = tobjects;
00692 
00693     unsigned int * tkeys = s.keys;
00694     s.keys = keys;
00695     keys = tkeys;
00696 
00697     unsigned int temp = s.count;
00698     s.count = count;
00699     count = temp;
00700 
00701     temp = s.size;
00702     s.size = size;
00703     size = temp;
00704 
00705     temp = s.mask;
00706     s.mask = mask;
00707     mask = temp;
00708 }
00709 
Generated on Tue Sep 6 17:52:01 2011 for libStatGen Software by  doxygen 1.6.3