libStatGen Software  1
MemoryMapArray.h
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __MEMORYMAPARRAY_H
00019 #define __MEMORYMAPARRAY_H
00020 
00021 #ifndef __STDC_LIMIT_MACROS
00022 #define __STDC_LIMIT_MACROS
00023 #endif
00024 #include <errno.h>
00025 #include <stdint.h>
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 
00030 #ifndef _WIN32
00031 #include <unistd.h> // for gethostname()
00032 #endif
00033 
00034 #include <string>
00035 #include <sys/types.h>
00036 #include <time.h>
00037 
00038 // STL:
00039 #include <ostream>
00040 #include <sstream>
00041 
00042 #include "Generic.h"
00043 #include "MemoryMap.h"
00044 
00045 
00046 //
00047 // This file defines a template for generating memory map backed arrays
00048 // of different types of values.
00049 //
00050 // The template assumes that the mapped files are broken into two parts,
00051 // first, a header (MemoryMapArrayHeader), then followed by the data
00052 // in the array.
00053 //
00054 // typedefs are used to declare various types of arrays beforehand,
00055 // since there will be only a few.
00056 //
00057 // They are:
00058 //      mmapArrayUint32_t;
00059 //      mmapArrayBool_t;
00060 //      mmapArray4Bit_t;
00061 //
00062 // XXX consider adding env("USER"), argv[0], date/time creation, etc.
00063 //
00064 class MemoryMapArrayHeader
00065 {
00066 public:
00067     void constructorClear()
00068     {
00069         memset(this, 0, sizeof(*this));
00070     }
00071     uint32_t     typeCookie;
00072     uint32_t     typeVersion;
00073     uint32_t     contentCookie;
00074     uint32_t     contentVersion;
00075     size_t       headerSize;
00076 
00077     // file generation info
00078     time_t      creationDate;
00079     char        creationUser[32];
00080     char        creationHost[32];
00081     char        application[32];
00082     // now describe the data:
00083     size_t      elementCount;
00084     void debugPrint(FILE *);
00085     size_t getHeaderSize(int i)
00086     {
00087         return sizeof(*this);
00088     }
00089 
00090     void setApplication(const char *s)
00091     {
00092         strncpy(application, s, sizeof(application)-1);
00093         application[sizeof(application)-1] = '\0';
00094     }
00095     void setCreationUser(const char *s)
00096     {
00097         strncpy(creationUser, s, sizeof(creationUser)-1);
00098         creationUser[sizeof(creationUser)-1] = '\0';
00099     }
00100     void setCreationHost(const char *s)
00101     {
00102         strncpy(creationHost, s, sizeof(creationHost)-1);
00103         creationHost[sizeof(creationHost)-1] = '\0';
00104     }
00105 };
00106 
00107 //
00108 // stream output for header information
00109 //
00110 std::ostream &operator << (std::ostream &stream, MemoryMapArrayHeader &h);
00111 
00112 //
00113 // This class object represents the application specific information that doesn't
00114 // fit in the general header above.  Since it is only allocated via an mmap operation,
00115 // as part of the mapped file, the destructor must never be called.  The virtual
00116 // destructor is declared to eliminate gcc warnings.
00117 //
00118 // For many arrays, this will be empty.
00119 //
00120 struct MemoryMapGenericHeader
00121 {
00122 protected:
00123     size_t  headerSize;     // set in ::create and ::open only
00124 public:
00125     size_t getHeaderSize()
00126     {
00127         return headerSize;
00128     }
00129     // other stuff follows...
00130 };
00131 
00132 template <
00133 class elementT,
00134 typename indexT,
00135 unsigned int cookieVal,
00136 unsigned int versionVal,
00137 elementT accessorFunc(char *base, indexT),
00138 void setterFunc(char *base, indexT, elementT),
00139 size_t elementCount2BytesFunc(indexT),
00140 class arrayHeaderClass>
00141 class MemoryMapArray : public MemoryMap
00142 {
00143 protected:
00144     arrayHeaderClass    *header;
00145     char                *data;
00146     std::string         errorStr;
00147 public:
00148     void constructorClear()
00149     {
00150         header = NULL;
00151         data = NULL;
00152 //      errorStr = "";
00153     }
00154     MemoryMapArray()
00155     {
00156         constructorClear();
00157     }
00158     ~MemoryMapArray()
00159     {
00160         if (data) close();
00161     }
00162 
00163     const std::string &getErrorString()
00164     {
00165         return errorStr;
00166     }
00167 
00168     arrayHeaderClass &getHeader()
00169     {
00170         return *header;
00171     }
00172 
00173     void setContentCookie(uint32_t c)
00174     {
00175         header->contentCookie = c;
00176     }
00177     void setContentVersion(uint32_t v)
00178     {
00179         header->contentVersion = v;
00180     }
00181 
00182     // accessing
00183     inline elementT operator[](indexT i)
00184     {
00185         return accessorFunc(data, i);
00186     }
00187     inline void set(indexT i, elementT v)
00188     {
00189         setterFunc(data, i, v);
00190     }
00191 
00192 
00193 
00194     /// Create a vector with elementCount memebers.
00195     //
00196     /// Does administrative setup of the header and populating this
00197     /// class members.  User will need to finish populating the
00198     /// contents of the metaData and data sections.
00199     ///
00200     /// If file==NULL, the underlying allocation is done via malloc(),
00201     /// so that the results of write access to this vecor are not
00202     /// saved in a file.
00203     ///
00204     /// If file!=NULL, a file will be created on disk, and all
00205     /// write accesses done via the method ::set will be persistent
00206     /// in that file.
00207     ///
00208     int create(const char *file, indexT elementCount, int optionalHeaderCount = 0)
00209     {
00210         size_t len = elementCount2BytesFunc(elementCount) +
00211                      header->getHeaderSize(optionalHeaderCount);
00212         int rc;
00213         rc = MemoryMap::create(file, len);
00214         if (rc)
00215         {
00216             std::ostringstream buf;
00217             buf << file << ": failed to create file";
00218             errorStr = buf.str();
00219             close();
00220             return rc;
00221         }
00222         header = (arrayHeaderClass *) MemoryMap::data;
00223         header->constructorClear();
00224         header->typeCookie = cookieVal;
00225         header->typeVersion = versionVal;
00226         header->headerSize = header->getHeaderSize(optionalHeaderCount);
00227         header->elementCount = elementCount;
00228         data = (char *)((char *) MemoryMap::data + header->headerSize);
00229 
00230         const char *env;
00231         char hostname[256];
00232         env = getenv("USER");
00233         if (env) header->setCreationUser(env);
00234         header->creationDate = time(NULL);
00235 #if defined(_WIN32)
00236         hostname[0] = '\0';
00237 #else
00238         gethostname(hostname, sizeof(hostname));
00239 #endif
00240         header->setCreationHost(hostname);
00241         return 0;
00242     }
00243 
00244     /// allow anonymous (malloc) create.
00245     ///
00246     /// we do this when we don't expect to save the results.
00247     ///
00248     /// The single use case so far is in GenomeSequence::populateDBSNP.
00249     ///
00250     int create(indexT elementCount, int optionalHeaderCount = 0)
00251     {
00252         return create(NULL, elementCount, optionalHeaderCount);
00253     }
00254 
00255     //
00256     // Open the given filename.  flags may be set to
00257     // O_RDONLY or O_RDWR, and allows the file to be
00258     // condtionally written to.
00259     //
00260     // Several sanity checks are done:
00261     //   compare the expected cookie value to the actual one
00262     //   compare the expected version value to the actual one
00263     //
00264     // if either condition is not met, the member errorStr is
00265     // set to explain why, and true is returned.
00266     //
00267     // If there were no errors, false is returned.
00268     //
00269     bool open(const char *file, int flags = O_RDONLY)
00270     {
00271         int rc = MemoryMap::open(file, flags);
00272         if (rc)
00273         {
00274             std::ostringstream buf;
00275             buf << file << ": open() failed (error=" << strerror(errno) << ").";
00276             errorStr = buf.str();
00277             return true;
00278         }
00279         header = (arrayHeaderClass *) MemoryMap::data;
00280         data = (char *)((char *) MemoryMap::data + header->headerSize);
00281         if (header->typeCookie!=cookieVal)
00282         {
00283             std::ostringstream buf;
00284             buf << file << ": wrong type of file (expected type "
00285             << cookieVal << " but got " << header->typeCookie << ")";
00286             errorStr = buf.str();
00287             // XXX insert better error handling
00288             close();
00289             return true;
00290         }
00291         if (header->typeVersion!=versionVal)
00292         {
00293             std::ostringstream buf;
00294             buf << file << ": wrong version of file (expected version "
00295             << versionVal << " but got " << header->typeVersion << ")";
00296             errorStr = buf.str();
00297             // XXX insert better error handling
00298             close();
00299             return true;
00300         }
00301         return false;
00302     }
00303 
00304     bool close()
00305     {
00306         constructorClear();
00307         return MemoryMap::close();
00308     }
00309     void debugPrint(FILE *f)
00310     {
00311         if (header) header->debugPrint(f);
00312     }
00313 
00314     size_t getElementCount() const
00315     {
00316         return header->elementCount;
00317     }
00318 
00319 };
00320 
00321 struct emptyGenericHeader : public MemoryMapGenericHeader
00322 {
00323 public:
00324     size_t getHeaderSize()
00325     {
00326         return sizeof(*this);
00327     }
00328 };
00329 
00330 //
00331 // define the uint32 array type:
00332 //
00333 inline uint32_t mmapUint32Access(char *base, uint32_t index)
00334 {
00335     return ((uint32_t *)base)[index];
00336 }
00337 inline void mmapUint32Set(char *base, uint32_t index, uint32_t v)
00338 {
00339     ((uint32_t *)base)[index] = v;
00340 }
00341 inline size_t mmapUint32elementCount2Bytes(uint32_t i)
00342 {
00343     return sizeof(uint32_t) * i;
00344 }
00345 
00346 typedef MemoryMapArray<
00347 uint32_t,
00348 uint32_t,
00349 0x16b3816c,
00350 20090109,
00351 mmapUint32Access,
00352 mmapUint32Set,
00353 mmapUint32elementCount2Bytes,
00354 MemoryMapArrayHeader
00355 > mmapArrayUint32_t;
00356 
00357 //
00358 // define the boolean memory mapped array type.
00359 // NB: it is limited to 2**32 elements
00360 //
00361 
00362 typedef MemoryMapArray<
00363 uint32_t,
00364 uint32_t,
00365 0xac6c1dc7,
00366 20090109,
00367 PackedAccess_1Bit,
00368 PackedAssign_1Bit,
00369 Packed1BitElementCount2Bytes,
00370 MemoryMapArrayHeader
00371 > mmapArrayBool_t;
00372 
00373 //
00374 // define the two bit memory mapped array type:
00375 //
00376 
00377 typedef MemoryMapArray<
00378 uint32_t,
00379 uint32_t,
00380 0x25b3ea5f,
00381 20090109,
00382 PackedAccess_2Bit,
00383 PackedAssign_2Bit,
00384 Packed2BitElementCount2Bytes,
00385 MemoryMapArrayHeader
00386 > mmapArray2Bit_t;
00387 
00388 typedef MemoryMapArray<
00389 uint32_t,
00390 uint32_t,
00391 0x418e1874,
00392 20090109,
00393 PackedAccess_4Bit,
00394 PackedAssign_4Bit,
00395 Packed4BitElementCount2Bytes,
00396 MemoryMapArrayHeader
00397 > mmapArray4Bit_t;
00398 
00399 #if 0
00400 // XXX this is example code I want to use to define arrays of genome wide match values
00401 class   baseRecord
00402 {
00403     unsigned int base:4;
00404     unsigned int qScore:7;
00405     unsigned int conflicts:5;   // how many cases of poorer matches that disagree
00406 };
00407 
00408 //
00409 // define the baseRecord array type:
00410 //
00411 inline baseRecord &mmapBaseRecordAccess(void *base, uint32_t index)
00412 {
00413     return *((baseRecord *)((char *)base + index*sizeof(baseRecord)));
00414 }
00415 inline void mmapBaseRecordSet(void *base, uint32_t index, baseRecord &v)
00416 {
00417     mmapBaseRecordAccess(base, index) = v;
00418 }
00419 inline size_t mmapBaseRecordElementCount2Bytes(uint32_t i)
00420 {
00421     return sizeof(baseRecord) * i;
00422 }
00423 
00424 typedef MemoryMapArray<
00425 baseRecord &,
00426 uint32_t,
00427 0x12341234,
00428 0xdeadbeef,
00429 &mmapBaseRecordAccess,
00430 mmapBaseRecordSet,
00431 mmapBaseRecordElementCount2Bytes,
00432 MemoryMapArrayHeader
00433 > mmapArrayBaseRecord_t;
00434 #endif
00435 
00436 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends