MemoryMapArray.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __MEMORYMAPARRAY_H
00019 #define __MEMORYMAPARRAY_H
00020 
00021 #ifndef __STDC_LIMIT_MACROS
00022 #define __STDC_LIMIT_MACROS
00023 #endif
00024 #include <errno.h>
00025 #include <stdint.h>
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 
00030 #include <string>
00031 #include <sys/types.h>
00032 #include <time.h>
00033 
00034 // STL:
00035 #include <ostream>
00036 #include <sstream>
00037 
00038 #include "MemoryMap.h"
00039 
00040 
00041 //
00042 // This file defines a template for generating memory map backed arrays
00043 // of different types of values.
00044 //
00045 // The template assumes that the mapped files are broken into two parts,
00046 // first, a header (MemoryMapArrayHeader), then followed by the data
00047 // in the array.
00048 //
00049 // typedefs are used to declare various types of arrays beforehand,
00050 // since there will be only a few.
00051 //
00052 // They are:
00053 //      mmapArrayUint32_t;
00054 //      mmapArrayBool_t;
00055 //      mmapArray4Bit_t;
00056 //
00057 // XXX consider adding env("USER"), argv[0], date/time creation, etc.
00058 //
00059 class MemoryMapArrayHeader
00060 {
00061 public:
00062     void constructorClear()
00063     {
00064         memset(this, 0, sizeof(*this));
00065     }
00066     uint32_t     typeCookie;
00067     uint32_t     typeVersion;
00068     uint32_t     contentCookie;
00069     uint32_t     contentVersion;
00070     size_t       headerSize;
00071 
00072     // file generation info
00073     time_t      creationDate;
00074     char        creationUser[32];
00075     char        creationHost[32];
00076     char        application[32];
00077     // now describe the data:
00078     size_t      elementCount;
00079     void debugPrint(FILE *);
00080     size_t getHeaderSize(int i)
00081     {
00082         return sizeof(*this);
00083     }
00084 
00085     void setApplication(const char *s)
00086     {
00087         strncpy(application, s, sizeof(application)-1);
00088     }
00089     void setCreationUser(const char *s)
00090     {
00091         strncpy(creationUser, s, sizeof(creationUser)-1);
00092     }
00093     void setCreationHost(const char *s)
00094     {
00095         strncpy(creationHost, s, sizeof(creationHost)-1);
00096     }
00097 };
00098 
00099 //
00100 // stream output for header information
00101 //
00102 std::ostream &operator << (std::ostream &stream, MemoryMapArrayHeader &h);
00103 
00104 //
00105 // This class object represents the application specific information that doesn't
00106 // fit in the general header above.  Since it is only allocated via an mmap operation,
00107 // as part of the mapped file, the destructor must never be called.  The virtual
00108 // destructor is declared to eliminate gcc warnings.
00109 //
00110 // For many arrays, this will be empty.
00111 //
00112 struct MemoryMapGenericHeader
00113 {
00114 protected:
00115     size_t  headerSize;     // set in ::create and ::open only
00116 public:
00117     size_t getHeaderSize()
00118     {
00119         return headerSize;
00120     }
00121     // other stuff follows...
00122 };
00123 
00124 template <
00125 class elementT,
00126 typename indexT,
00127 unsigned int cookieVal,
00128 unsigned int versionVal,
00129 elementT accessorFunc(void *base, indexT),
00130 void setterFunc(void *base, indexT, elementT),
00131 size_t elementCount2BytesFunc(indexT),
00132 class arrayHeaderClass>
00133 class MemoryMapArray : public MemoryMap
00134 {
00135 protected:
00136     arrayHeaderClass    *header;
00137     void                *data;
00138     std::string         errorStr;
00139 public:
00140     void constructorClear()
00141     {
00142         header = NULL;
00143         data = NULL;
00144 //      errorStr = "";
00145     }
00146     MemoryMapArray()
00147     {
00148         constructorClear();
00149     }
00150     ~MemoryMapArray()
00151     {
00152         if (data) close();
00153     }
00154 
00155     const std::string &getErrorString()
00156     {
00157         return errorStr;
00158     }
00159 
00160     arrayHeaderClass &getHeader()
00161     {
00162         return *header;
00163     }
00164 
00165     void setContentCookie(uint32_t c)
00166     {
00167         header->contentCookie = c;
00168     }
00169     void setContentVersion(uint32_t v)
00170     {
00171         header->contentVersion = v;
00172     }
00173 
00174     // accessing
00175     inline elementT operator[](indexT i)
00176     {
00177         return accessorFunc(data, i);
00178     }
00179     inline void set(indexT i, elementT v)
00180     {
00181         setterFunc(data, i, v);
00182     }
00183 
00184 
00185 
00186     /// Create a vector with elementCount memebers.
00187     //
00188     /// Does administrative setup of the header and populating this
00189     /// class members.  User will need to finish populating the
00190     /// contents of the metaData and data sections.
00191     ///
00192     /// If file==NULL, the underlying allocation is done via malloc(),
00193     /// so that the results of write access to this vecor are not
00194     /// saved in a file.
00195     ///
00196     /// If file!=NULL, a file will be created on disk, and all
00197     /// write accesses done via the method ::set will be persistent
00198     /// in that file.
00199     ///
00200     int create(const char *file, indexT elementCount, int optionalHeaderCount = 0)
00201     {
00202         size_t len = elementCount2BytesFunc(elementCount) +
00203                      header->getHeaderSize(optionalHeaderCount);
00204         int rc;
00205         rc = MemoryMap::create(file, len);
00206         if (rc)
00207         {
00208             std::ostringstream buf;
00209             buf << file << ": failed to create file";
00210             errorStr = buf.str();
00211             close();
00212             return rc;
00213         }
00214         header = (arrayHeaderClass *) MemoryMap::data;
00215         header->constructorClear();
00216         header->typeCookie = cookieVal;
00217         header->typeVersion = versionVal;
00218         header->headerSize = header->getHeaderSize(optionalHeaderCount);
00219         header->elementCount = elementCount;
00220         data = (elementT *)((char *) MemoryMap::data + header->headerSize);
00221 
00222         const char *env;
00223         char hostname[256];
00224         env = getenv("USER");
00225         if (env) header->setCreationUser(env);
00226         header->creationDate = time(NULL);
00227 #if defined(WIN32)
00228         hostname[0] = '\0';
00229 #else
00230         gethostname(hostname, sizeof(hostname));
00231 #endif
00232         header->setCreationHost(hostname);
00233         return 0;
00234     }
00235 
00236     /// allow anonymous (malloc) create.
00237     ///
00238     /// we do this when we don't expect to save the results.
00239     ///
00240     /// The single use case so far is in GenomeSequence::populateDBSNP.
00241     ///
00242     int create(indexT elementCount, int optionalHeaderCount = 0)
00243     {
00244         return create(NULL, elementCount, optionalHeaderCount);
00245     }
00246 
00247     //
00248     // Open the given filename.  flags may be set to
00249     // O_RDONLY or O_RDWR, and allows the file to be
00250     // condtionally written to.
00251     //
00252     // Several sanity checks are done:
00253     //   compare the expected cookie value to the actual one
00254     //   compare the expected version value to the actual one
00255     //
00256     // if either condition is not met, the member errorStr is
00257     // set to explain why, and true is returned.
00258     //
00259     // If there were no errors, false is returned.
00260     //
00261     bool open(const char *file, int flags = O_RDONLY)
00262     {
00263         int rc = MemoryMap::open(file, flags);
00264         if (rc)
00265         {
00266             std::ostringstream buf;
00267             buf << file << ": open() failed (error=" << strerror(errno) << ").";
00268             errorStr = buf.str();
00269             return true;
00270         }
00271         header = (arrayHeaderClass *) MemoryMap::data;
00272         data = (elementT *)((char *) MemoryMap::data + header->headerSize);
00273         if (header->typeCookie!=cookieVal)
00274         {
00275             std::ostringstream buf;
00276             buf << file << ": wrong type of file (expected type "
00277             << cookieVal << " but got " << header->typeCookie << ")";
00278             errorStr = buf.str();
00279             // XXX insert better error handling
00280             close();
00281             return true;
00282         }
00283         if (header->typeVersion!=versionVal)
00284         {
00285             std::ostringstream buf;
00286             buf << file << ": wrong version of file (expected version "
00287             << versionVal << " but got " << header->typeVersion << ")";
00288             errorStr = buf.str();
00289             // XXX insert better error handling
00290             close();
00291             return true;
00292         }
00293         return false;
00294     }
00295 
00296     void close()
00297     {
00298         constructorClear();
00299         MemoryMap::close();
00300     }
00301     void debugPrint(FILE *f)
00302     {
00303         if (header) header->debugPrint(f);
00304     }
00305 
00306     size_t getElementCount()
00307     {
00308         return header->elementCount;
00309     }
00310 
00311 };
00312 
00313 struct emptyGenericHeader : public MemoryMapGenericHeader
00314 {
00315 public:
00316     size_t getHeaderSize()
00317     {
00318         return sizeof(*this);
00319     }
00320 };
00321 
00322 //
00323 // define the uint32 array type:
00324 //
00325 inline uint32_t mmapUint32Access(void *base, uint32_t index)
00326 {
00327     return ((uint32_t *)base)[index];
00328 }
00329 inline void mmapUint32Set(void *base, uint32_t index, uint32_t v)
00330 {
00331     ((uint32_t *)base)[index] = v;
00332 }
00333 inline size_t mmapUint32elementCount2Bytes(uint32_t i)
00334 {
00335     return sizeof(uint32_t) * i;
00336 }
00337 
00338 typedef MemoryMapArray<
00339 uint32_t,
00340 uint32_t,
00341 0x16b3816c,
00342 20090109,
00343 mmapUint32Access,
00344 mmapUint32Set,
00345 mmapUint32elementCount2Bytes,
00346 MemoryMapArrayHeader
00347 > mmapArrayUint32_t;
00348 
00349 //
00350 // define the boolean memory mapped array type.
00351 // NB: it is limited to 2**32 elements
00352 //
00353 inline bool mmapBoolAccess(void *base, uint32_t i)
00354 {
00355     return (((char*)base)[i>>3] >> (i&0x7)) & 0x1;
00356 }
00357 inline void mmapBoolSet(void *base, uint32_t i, bool v)
00358 {
00359     ((unsigned char*) base)[i>>3] =
00360         (((unsigned char*) base)[i>>3] & ~(1<<(i&0x7))) |
00361         ((v&1)<<(i&0x7));
00362 }
00363 inline size_t mmapBoolelementCount2Bytes(uint32_t i)
00364 {
00365     return (size_t)(i+7)/8;
00366 }
00367 
00368 typedef MemoryMapArray<
00369 bool,
00370 uint32_t,
00371 0xac6c1dc7,
00372 20090109,
00373 mmapBoolAccess,
00374 mmapBoolSet,
00375 mmapBoolelementCount2Bytes,
00376 MemoryMapArrayHeader
00377 > mmapArrayBool_t;
00378 
00379 //
00380 // define the two bit memory mapped array type:
00381 //
00382 inline uint32_t mmap2BitAccess(void *base, uint32_t i)
00383 {
00384     return (((unsigned char*)base)[i>>2] >> ((i&0x3)<<1)) & 0x3;
00385 }
00386 inline void mmap2BitSet(void *base, uint32_t i, uint32_t v)
00387 {
00388     ((unsigned char*) base)[i>>2] =
00389         (((unsigned char*) base)[i>>2]      // original value
00390          & ~(3<<((i&0x03)<<1)))              // logical AND off the original value
00391         | ((v&0x03)<<((i&0x3)<<1));         // logical OR in the new value
00392 }
00393 inline size_t mmap2BitElementCount2Bytes(uint32_t i)
00394 {
00395     return (size_t)(i+3)/4;
00396 }
00397 
00398 typedef MemoryMapArray<
00399 uint32_t,
00400 uint32_t,
00401 0x25b3ea5f,
00402 20090109,
00403 mmap2BitAccess,
00404 mmap2BitSet,
00405 mmap2BitElementCount2Bytes,
00406 MemoryMapArrayHeader
00407 > mmapArray2Bit_t;
00408 
00409 //
00410 // define the four bit memory mapped array type:
00411 //
00412 inline uint32_t mmap4BitAccess(void *base, uint32_t i)
00413 {
00414     return (((unsigned char*)base)[i>>1] >> ((i&0x1)<<2)) & 0xf;
00415 }
00416 inline void mmap4BitSet(void *base, uint32_t i, uint32_t v)
00417 {
00418     ((unsigned char*) base)[i>>1] =
00419         (((unsigned char*) base)[i>>1]      // original value
00420          & ~(7<<((i&0x01)<<2)))              // logical AND off the original value
00421         | ((v&0x0f)<<((i&0x1)<<2));         // logical OR in the new value
00422 }
00423 inline size_t mmap4BitelementCount2Bytes(uint32_t i)
00424 {
00425     return (size_t)(i+1)/2;
00426 }
00427 
00428 typedef MemoryMapArray<
00429 uint32_t,
00430 uint32_t,
00431 0x418e1874,
00432 20090109,
00433 mmap4BitAccess,
00434 mmap4BitSet,
00435 mmap4BitelementCount2Bytes,
00436 MemoryMapArrayHeader
00437 > mmapArray4Bit_t;
00438 
00439 #if 0
00440 // XXX this is example code I want to use to define arrays of genome wide match values
00441 class   baseRecord
00442 {
00443     unsigned int base:4;
00444     unsigned int qScore:7;
00445     unsigned int conflicts:5;   // how many cases of poorer matches that disagree
00446 };
00447 
00448 //
00449 // define the baseRecord array type:
00450 //
00451 inline baseRecord &mmapBaseRecordAccess(void *base, uint32_t index)
00452 {
00453     return *((baseRecord *)((char *)base + index*sizeof(baseRecord)));
00454 }
00455 inline void mmapBaseRecordSet(void *base, uint32_t index, baseRecord &v)
00456 {
00457     mmapBaseRecordAccess(base, index) = v;
00458 }
00459 inline size_t mmapBaseRecordElementCount2Bytes(uint32_t i)
00460 {
00461     return sizeof(baseRecord) * i;
00462 }
00463 
00464 typedef MemoryMapArray<
00465 baseRecord &,
00466 uint32_t,
00467 0x12341234,
00468 0xdeadbeef,
00469 &mmapBaseRecordAccess,
00470 mmapBaseRecordSet,
00471 mmapBaseRecordElementCount2Bytes,
00472 MemoryMapArrayHeader
00473 > mmapArrayBaseRecord_t;
00474 #endif
00475 
00476 #endif
Generated on Wed Nov 17 15:38:29 2010 for StatGen Software by  doxygen 1.6.3