StringHash.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "StringHash.h"
00019 #include "Error.h"
00020 
00021 StringHash::StringHash(int startsize)
00022 {
00023     count = 0;
00024     size  = startsize;
00025     mask  = startsize - 1;
00026 
00027     // In this implementation, the size of hash tables must be a power of two
00028     if (startsize & mask)
00029         error("StringHash: Hash table size must be a power of two.\n");
00030 
00031     strings = new String * [size];
00032     objects = new void * [size];
00033     keys    = new unsigned int [size];
00034 
00035     for (unsigned int i = 0; i < size; i++)
00036     {
00037         strings[i] = NULL;
00038         objects[i] = NULL;
00039     }
00040 };
00041 
00042 StringHash::~StringHash()
00043 {
00044     for (unsigned int i = 0; i < size; i++)
00045         if (strings[i] != NULL)
00046             delete strings[i];
00047 
00048     delete [] strings;
00049     delete [] objects;
00050     delete [] keys;
00051 }
00052 
00053 void StringHash::Clear()
00054 {
00055     for (unsigned int i = 0; i < size; i++)
00056         if (strings[i] != NULL)
00057         {
00058             delete strings[i];
00059             strings[i] = NULL;
00060         }
00061 
00062     count = 0;
00063 
00064     if (size > 256)
00065         SetSize(256);
00066 }
00067 
00068 void StringHash::SetSize(int newsize)
00069 {
00070     int newmask = newsize - 1;
00071 
00072     String   ** newstrings = new String * [newsize];
00073     void     ** newobjects = new void * [newsize];
00074     unsigned int * newkeys = new unsigned int [newsize];
00075 
00076     for (int i = 0; i < newsize; i++)
00077     {
00078         newstrings[i] = NULL;
00079         newobjects[i] = NULL;
00080     }
00081 
00082     if (count)
00083         for (unsigned int i = 0; i < size; i++)
00084             if (strings[i] != NULL)
00085             {
00086                 unsigned int key = keys[i];
00087                 unsigned int h   = key & newmask;
00088 
00089                 while (newstrings[h] != NULL &&
00090                         (newkeys[h] != key || newstrings[h]->SlowCompare(*strings[i]) != 0))
00091                     h = (h + 1) & newmask;
00092 
00093                 newkeys[h] = key;
00094                 newstrings[h] = strings[i];
00095                 newobjects[h] = objects[i];
00096             }
00097 
00098     delete [] strings;
00099     delete [] objects;
00100     delete [] keys;
00101 
00102     strings = newstrings;
00103     objects = newobjects;
00104     keys = newkeys;
00105     size = newsize;
00106     mask = newmask;
00107 }
00108 
00109 int StringHash::Add(const String & string, void * object)
00110 {
00111     unsigned int key = hash_no_case((unsigned char *)(const char *) string, string.Length(), 0);
00112     unsigned int h   = Iterate(key, string);
00113 
00114     if (strings[h] == NULL)
00115         Insert(h, key, string);
00116 
00117     objects[h] = object;
00118 
00119     if (count * 2 > size)
00120     {
00121         Grow();
00122         return Iterate(key, string);
00123     }
00124 
00125     return h;
00126 }
00127 
00128 int StringHash::Find(const String & string,  void *(*create_object)())
00129 {
00130     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00131     unsigned int h   = Iterate(key, string);
00132 
00133     if (strings[h] == NULL && create_object == NULL)
00134         return -1;
00135 
00136     if (strings[h] == NULL && create_object != NULL)
00137     {
00138         Insert(h, key, string);
00139         objects[h] = create_object();
00140 
00141         if (count * 2 > size)
00142         {
00143             Grow();
00144             return Iterate(key, string);
00145         }
00146     }
00147 
00148     return h;
00149 }
00150 
00151 int StringHash::Find(const String & string) const
00152 {
00153     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00154     unsigned int h   = Iterate(key, string);
00155 
00156     if (strings[h] == NULL)
00157         return -1;
00158 
00159     return h;
00160 }
00161 void * StringHash::CreateHash()
00162 {
00163     return (void *) new StringHash();
00164 }
00165 
00166 void StringHash::Delete(unsigned int index)
00167 {
00168     if (index >= size || strings[index] == NULL)
00169         return;
00170 
00171     delete strings[index];
00172     strings[index] = NULL;
00173     count--;
00174 
00175     if (count * 8 < size && size > 32)
00176         Shrink();
00177     else
00178     {
00179         // rehash the next strings until we find empty slot
00180         index = (index + 1) & mask;
00181 
00182         while (strings[index] != NULL)
00183         {
00184             if ((keys[index] & mask) != index)
00185             {
00186                 unsigned int h = Iterate(keys[index], *strings[index]);
00187 
00188                 if (h != (unsigned int) index)
00189                 {
00190                     keys[h] = keys[index];
00191                     strings[h] = strings[index];
00192                     objects[h] = objects[index];
00193 
00194                     strings[index] = NULL;
00195                     objects[index] = NULL;
00196                 }
00197             }
00198 
00199             index = (index + 1) & mask;
00200         }
00201     }
00202 }
00203 
00204 void StringHash::ReadLinesFromFile(const char * filename)
00205 {
00206     IFILE f = ifopen(filename, "rb");
00207     if (f == NULL) return;
00208     ReadLinesFromFile(f);
00209     ifclose(f);
00210 }
00211 
00212 void StringHash::ReadLinesFromFile(FILE * f)
00213 {
00214     String buffer;
00215 
00216     while (!feof(f))
00217     {
00218         buffer.ReadLine(f);
00219         Add(buffer.Trim());
00220     }
00221 }
00222 
00223 #ifdef __ZLIB_AVAILABLE__
00224 void StringHash::ReadLinesFromFile(IFILE & f)
00225 {
00226     String buffer;
00227 
00228     while (!ifeof(f))
00229     {
00230         buffer.ReadLine(f);
00231         Add(buffer.Trim());
00232     }
00233 }
00234 #endif
00235 
00236 // StringIntHash implementation
00237 
00238 StringIntHash::StringIntHash(int startsize)
00239 {
00240     count = 0;
00241     size  = startsize;
00242     mask  = startsize - 1;
00243 
00244     // In this implementation, the size of hash tables must be a power of two
00245     if (startsize & mask)
00246         error("StringIntHash: Hash table size must be a power of two.\n");
00247 
00248     strings  = new String * [size];
00249     integers = new int [size];
00250     keys     = new unsigned int [size];
00251 
00252     for (unsigned int i = 0; i < size; i++)
00253         strings[i] = NULL;
00254 };
00255 
00256 StringIntHash::~StringIntHash()
00257 {
00258     for (unsigned int i = 0; i < size; i++)
00259         if (strings[i] != NULL)
00260             delete strings[i];
00261 
00262     delete [] strings;
00263     delete [] integers;
00264     delete [] keys;
00265 }
00266 
00267 void StringIntHash::SetSize(int newsize)
00268 {
00269     int newmask = newsize - 1;
00270 
00271     String   ** newstrings = new String * [newsize];
00272     int      * newintegers = new int [newsize];
00273     unsigned int * newkeys = new unsigned int [newsize];
00274 
00275     for (int i = 0; i < newsize; i++)
00276         newstrings[i] = NULL;
00277 
00278     for (unsigned int i = 0; i < size; i++)
00279         if (strings[i] != NULL)
00280         {
00281             unsigned int key = keys[i];
00282             unsigned int h   = key & newmask;
00283 
00284             while (newstrings[h] != NULL &&
00285                     (newkeys[h] != key || newstrings[h]->SlowCompare(*strings[i]) != 0))
00286                 h = (h + 1) & newmask;
00287 
00288             newkeys[h] = key;
00289             newstrings[h] = strings[i];
00290             newintegers[h] = integers[i];
00291         }
00292 
00293     delete [] strings;
00294     delete [] integers;
00295     delete [] keys;
00296 
00297     strings = newstrings;
00298     integers = newintegers;
00299     keys = newkeys;
00300     size = newsize;
00301     mask = newmask;
00302 }
00303 
00304 void StringIntHash::Clear()
00305 {
00306     for (unsigned int i = 0; i < size; i++)
00307         if (strings[i] != NULL)
00308         {
00309             delete strings[i];
00310             strings[i] = NULL;
00311         }
00312 
00313     count = 0;
00314 
00315     if (size > 256)
00316         SetSize(256);
00317 }
00318 
00319 int StringIntHash::Add(const String & string, int value)
00320 {
00321     unsigned int key = hash_no_case((unsigned char *)(const char *) string, string.Length(), 0);
00322     unsigned int h   = Iterate(key, string);
00323 
00324     if (strings[h] == NULL)
00325         Insert(h, key, string);
00326 
00327     integers[h] = value;
00328 
00329     if (count * 2 > size)
00330     {
00331         Grow();
00332         return Iterate(key, string);
00333     }
00334 
00335     return h;
00336 }
00337 
00338 int StringIntHash::Find(const String & string,  int defaultValue)
00339 {
00340     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00341     unsigned int h   = Iterate(key, string);
00342 
00343     if (strings[h] == NULL)
00344     {
00345         Insert(h, key, string);
00346         integers[h] = defaultValue;
00347 
00348         if (count * 2 > size)
00349         {
00350             Grow();
00351             return Iterate(key, string);
00352         }
00353     }
00354 
00355     return h;
00356 }
00357 
00358 int StringIntHash::Find(const String & string) const
00359 {
00360     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00361     unsigned int h   = Iterate(key, string);
00362 
00363     if (strings[h] == NULL)
00364         return -1;
00365 
00366     return h;
00367 }
00368 
00369 void StringIntHash::Delete(unsigned int index)
00370 {
00371     if (index >= size || strings[index] == NULL)
00372         return;
00373 
00374     delete strings[index];
00375     strings[index] = NULL;
00376     count--;
00377 
00378     if (count * 8 < size && size > 32)
00379         Shrink();
00380     else
00381     {
00382         // rehash the next strings until we find empty slot
00383         index = (index + 1) & mask;
00384 
00385         while (strings[index] != NULL)
00386         {
00387             if ((keys[index] & mask) != index)
00388             {
00389                 unsigned int h = Iterate(keys[index], *strings[index]);
00390 
00391                 if (h != (unsigned int) index)
00392                 {
00393                     keys[h] = keys[index];
00394                     strings[h] = strings[index];
00395                     integers[h] = integers[index];
00396 
00397                     strings[index] = NULL;
00398                 }
00399             }
00400 
00401             index = (index + 1) & mask;
00402         }
00403     }
00404 }
00405 
00406 // StringDoubleHash implementation
00407 
00408 StringDoubleHash::StringDoubleHash(int startsize)
00409 {
00410     count = 0;
00411     size  = startsize;
00412     mask  = startsize - 1;
00413 
00414     // In this implementation, the size of hash tables must be a power of two
00415     if (startsize & mask)
00416         error("StringDoubleHash: Hash table size must be a power of two.\n");
00417 
00418     strings  = new String * [size];
00419     doubles  = new double [size];
00420     keys     = new unsigned int [size];
00421 
00422     for (unsigned int i = 0; i < size; i++)
00423         strings[i] = NULL;
00424 };
00425 
00426 StringDoubleHash::~StringDoubleHash()
00427 {
00428     for (unsigned int i = 0; i < size; i++)
00429         if (strings[i] != NULL)
00430             delete strings[i];
00431 
00432     delete [] strings;
00433     delete [] doubles;
00434     delete [] keys;
00435 }
00436 
00437 void StringDoubleHash::SetSize(int newsize)
00438 {
00439     int newmask = newsize - 1;
00440 
00441     String   ** newstrings = new String * [newsize];
00442     double    * newdoubles = new double [newsize];
00443     unsigned int * newkeys = new unsigned int [newsize];
00444 
00445     for (int i = 0; i < newsize; i++)
00446         newstrings[i] = NULL;
00447 
00448     for (unsigned int i = 0; i < size; i++)
00449         if (strings[i] != NULL)
00450         {
00451             unsigned int key = keys[i];
00452             unsigned int h   = key & newmask;
00453 
00454             while (newstrings[h] != NULL &&
00455                     (newkeys[h] != key || newstrings[h]->SlowCompare(*strings[i]) != 0))
00456                 h = (h + 1) & newmask;
00457 
00458             newkeys[h] = key;
00459             newstrings[h] = strings[i];
00460             newdoubles[h] = doubles[i];
00461         }
00462 
00463     delete [] strings;
00464     delete [] doubles;
00465     delete [] keys;
00466 
00467     strings = newstrings;
00468     doubles = newdoubles;
00469     keys = newkeys;
00470     size = newsize;
00471     mask = newmask;
00472 }
00473 
00474 int StringDoubleHash::Add(const String & string, double value)
00475 {
00476     unsigned int key = hash_no_case((unsigned char *)(const char *) string, string.Length(), 0);
00477     unsigned int h   = Iterate(key, string);
00478 
00479     if (strings[h] == NULL)
00480         Insert(h, key, string);
00481 
00482     doubles[h] = value;
00483 
00484     if (count * 2 > size)
00485     {
00486         Grow();
00487         return Iterate(key, string);
00488     }
00489 
00490     return h;
00491 }
00492 
00493 int StringDoubleHash::Find(const String & string, double defaultValue)
00494 {
00495     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00496     unsigned int h   = Iterate(key, string);
00497 
00498     if (strings[h] == NULL)
00499     {
00500         Insert(h, key, string);
00501         doubles[h] = defaultValue;
00502 
00503         if (count * 2 > size)
00504         {
00505             Grow();
00506             return Iterate(key, string);
00507         }
00508     }
00509 
00510     return h;
00511 }
00512 
00513 int StringDoubleHash::Find(const String & string) const
00514 {
00515     unsigned int key = hash_no_case(string.uchar(), string.Length(), 0);
00516     unsigned int h   = Iterate(key, string);
00517 
00518     if (strings[h] == NULL)
00519         return -1;
00520 
00521     return h;
00522 }
00523 
00524 void StringDoubleHash::Delete(unsigned int index)
00525 {
00526     if (index >= size || strings[index] == NULL)
00527         return;
00528 
00529     delete strings[index];
00530     strings[index] = NULL;
00531     count--;
00532 
00533     if (count * 8 < size && size > 32)
00534         Shrink();
00535     else
00536     {
00537         // rehash the next strings until we find empty slot
00538         index = (index + 1) & mask;
00539 
00540         while (strings[index] != NULL)
00541         {
00542             if ((keys[index] & mask) != index)
00543             {
00544                 unsigned int h = Iterate(keys[index], *strings[index]);
00545 
00546                 if (h != (unsigned int) index)
00547                 {
00548                     keys[h] = keys[index];
00549                     strings[h] = strings[index];
00550                     doubles[h] = doubles[index];
00551 
00552                     strings[index] = NULL;
00553                 }
00554             }
00555 
00556             index = (index + 1) & mask;
00557         }
00558     }
00559 }
00560 
00561 void StringHash::Print()
00562 {
00563     Print(stdout);
00564 }
00565 
00566 void StringHash::Print(const char * filename)
00567 {
00568     FILE * output = fopen(filename, "wt");
00569     if (output == NULL)
00570         return;
00571     Print(output);
00572     fclose(output);
00573 }
00574 
00575 void StringHash::Print(FILE * output)
00576 {
00577     for (unsigned int i = 0; i < size; i++)
00578         if (SlotInUse(i))
00579             strings[i]->WriteLine(output);
00580 }
00581 
00582 int StringIntHash::GetCount(const String & key) const
00583 {
00584     int index = Find(key);
00585     return index == -1 ?  0 : integers[index];
00586 }
00587 
00588 int StringIntHash::IncrementCount(const String & key)
00589 {
00590     int index = Find(key);
00591 
00592     if (index != -1)
00593         return ++(integers[index]);
00594 
00595     SetInteger(key, 1);
00596     return 1;
00597 }
00598 
00599 int StringIntHash::DecrementCount(const String & key)
00600 {
00601     int index = Find(key);
00602 
00603     if (index != -1)
00604         return --(integers[index]);
00605 
00606     SetInteger(key, -1);
00607     return -1;
00608 }
00609 
00610 void StringDoubleHash::Clear()
00611 {
00612     for (unsigned int i = 0; i < size; i++)
00613         if (strings[i] != NULL)
00614         {
00615             delete strings[i];
00616             strings[i] = NULL;
00617         }
00618 
00619     count = 0;
00620 
00621     if (size > 256)
00622         SetSize(256);
00623 }
00624 
00625 StringHash & StringHash::operator = (const StringHash & rhs)
00626 {
00627     Clear();
00628 
00629     for (int i = 0; i < rhs.Capacity(); i++)
00630         if (rhs.SlotInUse(i))
00631             Add(*(rhs.strings[i]), rhs.objects[i]);
00632 
00633     return *this;
00634 }
00635 
00636 StringIntHash & StringIntHash::operator = (const StringIntHash & rhs)
00637 {
00638     Clear();
00639 
00640     for (int i = 0; i < rhs.Capacity(); i++)
00641         if (rhs.SlotInUse(i))
00642             Add(*(rhs.strings[i]), rhs.integers[i]);
00643 
00644     return *this;
00645 }
00646 
00647 StringDoubleHash & StringDoubleHash::operator = (const StringDoubleHash & rhs)
00648 {
00649     Clear();
00650 
00651     for (int i = 0; i < rhs.Capacity(); i++)
00652         if (rhs.SlotInUse(i))
00653             Add(*(rhs.strings[i]), rhs.doubles[i]);
00654 
00655     return *this;
00656 }
00657 
00658 void StringHash::Swap(StringHash & s)
00659 {
00660     String ** tstrings = s.strings;
00661     s.strings = strings;
00662     strings = tstrings;
00663 
00664     void ** tobjects = s.objects;
00665     s.objects = objects;
00666     objects = tobjects;
00667 
00668     unsigned int * tkeys = s.keys;
00669     s.keys = keys;
00670     keys = tkeys;
00671 
00672     unsigned int temp = s.count;
00673     s.count = count;
00674     count = temp;
00675 
00676     temp = s.size;
00677     s.size = size;
00678     size = temp;
00679 
00680     temp = s.mask;
00681     s.mask = mask;
00682     mask = temp;
00683 }
00684 
Generated on Wed Nov 17 15:38:29 2010 for StatGen Software by  doxygen 1.6.3