MemoryMapArray.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __MEMORYMAPARRAY_H
00019 #define __MEMORYMAPARRAY_H
00020 
00021 #ifndef __STDC_LIMIT_MACROS
00022 #define __STDC_LIMIT_MACROS
00023 #endif
00024 #include <errno.h>
00025 #include <stdint.h>
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <unistd.h> // for gethostname()
00030 
00031 #include <string>
00032 #include <sys/types.h>
00033 #include <time.h>
00034 
00035 // STL:
00036 #include <ostream>
00037 #include <sstream>
00038 
00039 #include "Generic.h"
00040 #include "MemoryMap.h"
00041 
00042 
00043 //
00044 // This file defines a template for generating memory map backed arrays
00045 // of different types of values.
00046 //
00047 // The template assumes that the mapped files are broken into two parts,
00048 // first, a header (MemoryMapArrayHeader), then followed by the data
00049 // in the array.
00050 //
00051 // typedefs are used to declare various types of arrays beforehand,
00052 // since there will be only a few.
00053 //
00054 // They are:
00055 //      mmapArrayUint32_t;
00056 //      mmapArrayBool_t;
00057 //      mmapArray4Bit_t;
00058 //
00059 // XXX consider adding env("USER"), argv[0], date/time creation, etc.
00060 //
00061 class MemoryMapArrayHeader
00062 {
00063 public:
00064     void constructorClear()
00065     {
00066         memset(this, 0, sizeof(*this));
00067     }
00068     uint32_t     typeCookie;
00069     uint32_t     typeVersion;
00070     uint32_t     contentCookie;
00071     uint32_t     contentVersion;
00072     size_t       headerSize;
00073 
00074     // file generation info
00075     time_t      creationDate;
00076     char        creationUser[32];
00077     char        creationHost[32];
00078     char        application[32];
00079     // now describe the data:
00080     size_t      elementCount;
00081     void debugPrint(FILE *);
00082     size_t getHeaderSize(int i)
00083     {
00084         return sizeof(*this);
00085     }
00086 
00087     void setApplication(const char *s)
00088     {
00089         strncpy(application, s, sizeof(application)-1);
00090     }
00091     void setCreationUser(const char *s)
00092     {
00093         strncpy(creationUser, s, sizeof(creationUser)-1);
00094     }
00095     void setCreationHost(const char *s)
00096     {
00097         strncpy(creationHost, s, sizeof(creationHost)-1);
00098     }
00099 };
00100 
00101 //
00102 // stream output for header information
00103 //
00104 std::ostream &operator << (std::ostream &stream, MemoryMapArrayHeader &h);
00105 
00106 //
00107 // This class object represents the application specific information that doesn't
00108 // fit in the general header above.  Since it is only allocated via an mmap operation,
00109 // as part of the mapped file, the destructor must never be called.  The virtual
00110 // destructor is declared to eliminate gcc warnings.
00111 //
00112 // For many arrays, this will be empty.
00113 //
00114 struct MemoryMapGenericHeader
00115 {
00116 protected:
00117     size_t  headerSize;     // set in ::create and ::open only
00118 public:
00119     size_t getHeaderSize()
00120     {
00121         return headerSize;
00122     }
00123     // other stuff follows...
00124 };
00125 
00126 template <
00127 class elementT,
00128 typename indexT,
00129 unsigned int cookieVal,
00130 unsigned int versionVal,
00131 elementT accessorFunc(char *base, indexT),
00132 void setterFunc(char *base, indexT, elementT),
00133 size_t elementCount2BytesFunc(indexT),
00134 class arrayHeaderClass>
00135 class MemoryMapArray : public MemoryMap
00136 {
00137 protected:
00138     arrayHeaderClass    *header;
00139     char                *data;
00140     std::string         errorStr;
00141 public:
00142     void constructorClear()
00143     {
00144         header = NULL;
00145         data = NULL;
00146 //      errorStr = "";
00147     }
00148     MemoryMapArray()
00149     {
00150         constructorClear();
00151     }
00152     ~MemoryMapArray()
00153     {
00154         if (data) close();
00155     }
00156 
00157     const std::string &getErrorString()
00158     {
00159         return errorStr;
00160     }
00161 
00162     arrayHeaderClass &getHeader()
00163     {
00164         return *header;
00165     }
00166 
00167     void setContentCookie(uint32_t c)
00168     {
00169         header->contentCookie = c;
00170     }
00171     void setContentVersion(uint32_t v)
00172     {
00173         header->contentVersion = v;
00174     }
00175 
00176     // accessing
00177     inline elementT operator[](indexT i)
00178     {
00179         return accessorFunc(data, i);
00180     }
00181     inline void set(indexT i, elementT v)
00182     {
00183         setterFunc(data, i, v);
00184     }
00185 
00186 
00187 
00188     /// Create a vector with elementCount memebers.
00189     //
00190     /// Does administrative setup of the header and populating this
00191     /// class members.  User will need to finish populating the
00192     /// contents of the metaData and data sections.
00193     ///
00194     /// If file==NULL, the underlying allocation is done via malloc(),
00195     /// so that the results of write access to this vecor are not
00196     /// saved in a file.
00197     ///
00198     /// If file!=NULL, a file will be created on disk, and all
00199     /// write accesses done via the method ::set will be persistent
00200     /// in that file.
00201     ///
00202     int create(const char *file, indexT elementCount, int optionalHeaderCount = 0)
00203     {
00204         size_t len = elementCount2BytesFunc(elementCount) +
00205                      header->getHeaderSize(optionalHeaderCount);
00206         int rc;
00207         rc = MemoryMap::create(file, len);
00208         if (rc)
00209         {
00210             std::ostringstream buf;
00211             buf << file << ": failed to create file";
00212             errorStr = buf.str();
00213             close();
00214             return rc;
00215         }
00216         header = (arrayHeaderClass *) MemoryMap::data;
00217         header->constructorClear();
00218         header->typeCookie = cookieVal;
00219         header->typeVersion = versionVal;
00220         header->headerSize = header->getHeaderSize(optionalHeaderCount);
00221         header->elementCount = elementCount;
00222         data = (char *)((char *) MemoryMap::data + header->headerSize);
00223 
00224         const char *env;
00225         char hostname[256];
00226         env = getenv("USER");
00227         if (env) header->setCreationUser(env);
00228         header->creationDate = time(NULL);
00229 #if defined(WIN32)
00230         hostname[0] = '\0';
00231 #else
00232         gethostname(hostname, sizeof(hostname));
00233 #endif
00234         header->setCreationHost(hostname);
00235         return 0;
00236     }
00237 
00238     /// allow anonymous (malloc) create.
00239     ///
00240     /// we do this when we don't expect to save the results.
00241     ///
00242     /// The single use case so far is in GenomeSequence::populateDBSNP.
00243     ///
00244     int create(indexT elementCount, int optionalHeaderCount = 0)
00245     {
00246         return create(NULL, elementCount, optionalHeaderCount);
00247     }
00248 
00249     //
00250     // Open the given filename.  flags may be set to
00251     // O_RDONLY or O_RDWR, and allows the file to be
00252     // condtionally written to.
00253     //
00254     // Several sanity checks are done:
00255     //   compare the expected cookie value to the actual one
00256     //   compare the expected version value to the actual one
00257     //
00258     // if either condition is not met, the member errorStr is
00259     // set to explain why, and true is returned.
00260     //
00261     // If there were no errors, false is returned.
00262     //
00263     bool open(const char *file, int flags = O_RDONLY)
00264     {
00265         int rc = MemoryMap::open(file, flags);
00266         if (rc)
00267         {
00268             std::ostringstream buf;
00269             buf << file << ": open() failed (error=" << strerror(errno) << ").";
00270             errorStr = buf.str();
00271             return true;
00272         }
00273         header = (arrayHeaderClass *) MemoryMap::data;
00274         data = (char *)((char *) MemoryMap::data + header->headerSize);
00275         if (header->typeCookie!=cookieVal)
00276         {
00277             std::ostringstream buf;
00278             buf << file << ": wrong type of file (expected type "
00279             << cookieVal << " but got " << header->typeCookie << ")";
00280             errorStr = buf.str();
00281             // XXX insert better error handling
00282             close();
00283             return true;
00284         }
00285         if (header->typeVersion!=versionVal)
00286         {
00287             std::ostringstream buf;
00288             buf << file << ": wrong version of file (expected version "
00289             << versionVal << " but got " << header->typeVersion << ")";
00290             errorStr = buf.str();
00291             // XXX insert better error handling
00292             close();
00293             return true;
00294         }
00295         return false;
00296     }
00297 
00298     bool close()
00299     {
00300         constructorClear();
00301         return MemoryMap::close();
00302     }
00303     void debugPrint(FILE *f)
00304     {
00305         if (header) header->debugPrint(f);
00306     }
00307 
00308     size_t getElementCount() const
00309     {
00310         return header->elementCount;
00311     }
00312 
00313 };
00314 
00315 struct emptyGenericHeader : public MemoryMapGenericHeader
00316 {
00317 public:
00318     size_t getHeaderSize()
00319     {
00320         return sizeof(*this);
00321     }
00322 };
00323 
00324 //
00325 // define the uint32 array type:
00326 //
00327 inline uint32_t mmapUint32Access(char *base, uint32_t index)
00328 {
00329     return ((uint32_t *)base)[index];
00330 }
00331 inline void mmapUint32Set(char *base, uint32_t index, uint32_t v)
00332 {
00333     ((uint32_t *)base)[index] = v;
00334 }
00335 inline size_t mmapUint32elementCount2Bytes(uint32_t i)
00336 {
00337     return sizeof(uint32_t) * i;
00338 }
00339 
00340 typedef MemoryMapArray<
00341 uint32_t,
00342 uint32_t,
00343 0x16b3816c,
00344 20090109,
00345 mmapUint32Access,
00346 mmapUint32Set,
00347 mmapUint32elementCount2Bytes,
00348 MemoryMapArrayHeader
00349 > mmapArrayUint32_t;
00350 
00351 //
00352 // define the boolean memory mapped array type.
00353 // NB: it is limited to 2**32 elements
00354 //
00355 
00356 typedef MemoryMapArray<
00357 uint32_t,
00358 uint32_t,
00359 0xac6c1dc7,
00360 20090109,
00361 PackedAccess_1Bit,
00362 PackedAssign_1Bit,
00363 Packed1BitElementCount2Bytes,
00364 MemoryMapArrayHeader
00365 > mmapArrayBool_t;
00366 
00367 //
00368 // define the two bit memory mapped array type:
00369 //
00370 
00371 typedef MemoryMapArray<
00372 uint32_t,
00373 uint32_t,
00374 0x25b3ea5f,
00375 20090109,
00376 PackedAccess_2Bit,
00377 PackedAssign_2Bit,
00378 Packed2BitElementCount2Bytes,
00379 MemoryMapArrayHeader
00380 > mmapArray2Bit_t;
00381 
00382 typedef MemoryMapArray<
00383 uint32_t,
00384 uint32_t,
00385 0x418e1874,
00386 20090109,
00387 PackedAccess_4Bit,
00388 PackedAssign_4Bit,
00389 Packed4BitElementCount2Bytes,
00390 MemoryMapArrayHeader
00391 > mmapArray4Bit_t;
00392 
00393 #if 0
00394 // XXX this is example code I want to use to define arrays of genome wide match values
00395 class   baseRecord
00396 {
00397     unsigned int base:4;
00398     unsigned int qScore:7;
00399     unsigned int conflicts:5;   // how many cases of poorer matches that disagree
00400 };
00401 
00402 //
00403 // define the baseRecord array type:
00404 //
00405 inline baseRecord &mmapBaseRecordAccess(void *base, uint32_t index)
00406 {
00407     return *((baseRecord *)((char *)base + index*sizeof(baseRecord)));
00408 }
00409 inline void mmapBaseRecordSet(void *base, uint32_t index, baseRecord &v)
00410 {
00411     mmapBaseRecordAccess(base, index) = v;
00412 }
00413 inline size_t mmapBaseRecordElementCount2Bytes(uint32_t i)
00414 {
00415     return sizeof(baseRecord) * i;
00416 }
00417 
00418 typedef MemoryMapArray<
00419 baseRecord &,
00420 uint32_t,
00421 0x12341234,
00422 0xdeadbeef,
00423 &mmapBaseRecordAccess,
00424 mmapBaseRecordSet,
00425 mmapBaseRecordElementCount2Bytes,
00426 MemoryMapArrayHeader
00427 > mmapArrayBaseRecord_t;
00428 #endif
00429 
00430 #endif
Generated on Mon Feb 11 13:45:18 2013 for libStatGen Software by  doxygen 1.6.3