libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __MEMORYMAPARRAY_H 00019 #define __MEMORYMAPARRAY_H 00020 00021 #ifndef __STDC_LIMIT_MACROS 00022 #define __STDC_LIMIT_MACROS 00023 #endif 00024 #include <errno.h> 00025 #include <stdint.h> 00026 #include <stdio.h> 00027 #include <stdlib.h> 00028 #include <string.h> 00029 00030 #ifndef _WIN32 00031 #include <unistd.h> // for gethostname() 00032 #endif 00033 00034 #include <string> 00035 #include <sys/types.h> 00036 #include <time.h> 00037 00038 // STL: 00039 #include <ostream> 00040 #include <sstream> 00041 00042 #include "Generic.h" 00043 #include "MemoryMap.h" 00044 00045 00046 // 00047 // This file defines a template for generating memory map backed arrays 00048 // of different types of values. 00049 // 00050 // The template assumes that the mapped files are broken into two parts, 00051 // first, a header (MemoryMapArrayHeader), then followed by the data 00052 // in the array. 00053 // 00054 // typedefs are used to declare various types of arrays beforehand, 00055 // since there will be only a few. 00056 // 00057 // They are: 00058 // mmapArrayUint32_t; 00059 // mmapArrayBool_t; 00060 // mmapArray4Bit_t; 00061 // 00062 // XXX consider adding env("USER"), argv[0], date/time creation, etc. 00063 // 00064 class MemoryMapArrayHeader 00065 { 00066 public: 00067 void constructorClear() 00068 { 00069 memset(this, 0, sizeof(*this)); 00070 } 00071 uint32_t typeCookie; 00072 uint32_t typeVersion; 00073 uint32_t contentCookie; 00074 uint32_t contentVersion; 00075 size_t headerSize; 00076 00077 // file generation info 00078 time_t creationDate; 00079 char creationUser[32]; 00080 char creationHost[32]; 00081 char application[32]; 00082 // now describe the data: 00083 size_t elementCount; 00084 void debugPrint(FILE *); 00085 size_t getHeaderSize(int i) 00086 { 00087 return sizeof(*this); 00088 } 00089 00090 void setApplication(const char *s) 00091 { 00092 strncpy(application, s, sizeof(application)-1); 00093 application[sizeof(application)-1] = '\0'; 00094 } 00095 void setCreationUser(const char *s) 00096 { 00097 strncpy(creationUser, s, sizeof(creationUser)-1); 00098 creationUser[sizeof(creationUser)-1] = '\0'; 00099 } 00100 void setCreationHost(const char *s) 00101 { 00102 strncpy(creationHost, s, sizeof(creationHost)-1); 00103 creationHost[sizeof(creationHost)-1] = '\0'; 00104 } 00105 }; 00106 00107 // 00108 // stream output for header information 00109 // 00110 std::ostream &operator << (std::ostream &stream, MemoryMapArrayHeader &h); 00111 00112 // 00113 // This class object represents the application specific information that doesn't 00114 // fit in the general header above. Since it is only allocated via an mmap operation, 00115 // as part of the mapped file, the destructor must never be called. The virtual 00116 // destructor is declared to eliminate gcc warnings. 00117 // 00118 // For many arrays, this will be empty. 00119 // 00120 struct MemoryMapGenericHeader 00121 { 00122 protected: 00123 size_t headerSize; // set in ::create and ::open only 00124 public: 00125 size_t getHeaderSize() 00126 { 00127 return headerSize; 00128 } 00129 // other stuff follows... 00130 }; 00131 00132 template < 00133 class elementT, 00134 typename indexT, 00135 unsigned int cookieVal, 00136 unsigned int versionVal, 00137 elementT accessorFunc(char *base, indexT), 00138 void setterFunc(char *base, indexT, elementT), 00139 size_t elementCount2BytesFunc(indexT), 00140 class arrayHeaderClass> 00141 class MemoryMapArray : public MemoryMap 00142 { 00143 protected: 00144 arrayHeaderClass *header; 00145 char *data; 00146 std::string errorStr; 00147 public: 00148 void constructorClear() 00149 { 00150 header = NULL; 00151 data = NULL; 00152 // errorStr = ""; 00153 } 00154 MemoryMapArray() 00155 { 00156 constructorClear(); 00157 } 00158 ~MemoryMapArray() 00159 { 00160 if (data) close(); 00161 } 00162 00163 const std::string &getErrorString() 00164 { 00165 return errorStr; 00166 } 00167 00168 arrayHeaderClass &getHeader() 00169 { 00170 return *header; 00171 } 00172 00173 void setContentCookie(uint32_t c) 00174 { 00175 header->contentCookie = c; 00176 } 00177 void setContentVersion(uint32_t v) 00178 { 00179 header->contentVersion = v; 00180 } 00181 00182 // accessing 00183 inline elementT operator[](indexT i) 00184 { 00185 return accessorFunc(data, i); 00186 } 00187 inline void set(indexT i, elementT v) 00188 { 00189 setterFunc(data, i, v); 00190 } 00191 00192 00193 00194 /// Create a vector with elementCount memebers. 00195 // 00196 /// Does administrative setup of the header and populating this 00197 /// class members. User will need to finish populating the 00198 /// contents of the metaData and data sections. 00199 /// 00200 /// If file==NULL, the underlying allocation is done via malloc(), 00201 /// so that the results of write access to this vecor are not 00202 /// saved in a file. 00203 /// 00204 /// If file!=NULL, a file will be created on disk, and all 00205 /// write accesses done via the method ::set will be persistent 00206 /// in that file. 00207 /// 00208 int create(const char *file, indexT elementCount, int optionalHeaderCount = 0) 00209 { 00210 size_t len = elementCount2BytesFunc(elementCount) + 00211 header->getHeaderSize(optionalHeaderCount); 00212 int rc; 00213 rc = MemoryMap::create(file, len); 00214 if (rc) 00215 { 00216 std::ostringstream buf; 00217 buf << file << ": failed to create file"; 00218 errorStr = buf.str(); 00219 close(); 00220 return rc; 00221 } 00222 header = (arrayHeaderClass *) MemoryMap::data; 00223 header->constructorClear(); 00224 header->typeCookie = cookieVal; 00225 header->typeVersion = versionVal; 00226 header->headerSize = header->getHeaderSize(optionalHeaderCount); 00227 header->elementCount = elementCount; 00228 data = (char *)((char *) MemoryMap::data + header->headerSize); 00229 00230 const char *env; 00231 char hostname[256]; 00232 env = getenv("USER"); 00233 if (env) header->setCreationUser(env); 00234 header->creationDate = time(NULL); 00235 #if defined(_WIN32) 00236 hostname[0] = '\0'; 00237 #else 00238 gethostname(hostname, sizeof(hostname)); 00239 #endif 00240 header->setCreationHost(hostname); 00241 return 0; 00242 } 00243 00244 /// allow anonymous (malloc) create. 00245 /// 00246 /// we do this when we don't expect to save the results. 00247 /// 00248 /// The single use case so far is in GenomeSequence::populateDBSNP. 00249 /// 00250 int create(indexT elementCount, int optionalHeaderCount = 0) 00251 { 00252 return create(NULL, elementCount, optionalHeaderCount); 00253 } 00254 00255 // 00256 // Open the given filename. flags may be set to 00257 // O_RDONLY or O_RDWR, and allows the file to be 00258 // condtionally written to. 00259 // 00260 // Several sanity checks are done: 00261 // compare the expected cookie value to the actual one 00262 // compare the expected version value to the actual one 00263 // 00264 // if either condition is not met, the member errorStr is 00265 // set to explain why, and true is returned. 00266 // 00267 // If there were no errors, false is returned. 00268 // 00269 bool open(const char *file, int flags = O_RDONLY) 00270 { 00271 int rc = MemoryMap::open(file, flags); 00272 if (rc) 00273 { 00274 std::ostringstream buf; 00275 buf << file << ": open() failed (error=" << strerror(errno) << ")."; 00276 errorStr = buf.str(); 00277 return true; 00278 } 00279 header = (arrayHeaderClass *) MemoryMap::data; 00280 data = (char *)((char *) MemoryMap::data + header->headerSize); 00281 if (header->typeCookie!=cookieVal) 00282 { 00283 std::ostringstream buf; 00284 buf << file << ": wrong type of file (expected type " 00285 << cookieVal << " but got " << header->typeCookie << ")"; 00286 errorStr = buf.str(); 00287 // XXX insert better error handling 00288 close(); 00289 return true; 00290 } 00291 if (header->typeVersion!=versionVal) 00292 { 00293 std::ostringstream buf; 00294 buf << file << ": wrong version of file (expected version " 00295 << versionVal << " but got " << header->typeVersion << ")"; 00296 errorStr = buf.str(); 00297 // XXX insert better error handling 00298 close(); 00299 return true; 00300 } 00301 return false; 00302 } 00303 00304 bool close() 00305 { 00306 constructorClear(); 00307 return MemoryMap::close(); 00308 } 00309 void debugPrint(FILE *f) 00310 { 00311 if (header) header->debugPrint(f); 00312 } 00313 00314 size_t getElementCount() const 00315 { 00316 return header->elementCount; 00317 } 00318 00319 }; 00320 00321 struct emptyGenericHeader : public MemoryMapGenericHeader 00322 { 00323 public: 00324 size_t getHeaderSize() 00325 { 00326 return sizeof(*this); 00327 } 00328 }; 00329 00330 // 00331 // define the uint32 array type: 00332 // 00333 inline uint32_t mmapUint32Access(char *base, uint32_t index) 00334 { 00335 return ((uint32_t *)base)[index]; 00336 } 00337 inline void mmapUint32Set(char *base, uint32_t index, uint32_t v) 00338 { 00339 ((uint32_t *)base)[index] = v; 00340 } 00341 inline size_t mmapUint32elementCount2Bytes(uint32_t i) 00342 { 00343 return sizeof(uint32_t) * i; 00344 } 00345 00346 typedef MemoryMapArray< 00347 uint32_t, 00348 uint32_t, 00349 0x16b3816c, 00350 20090109, 00351 mmapUint32Access, 00352 mmapUint32Set, 00353 mmapUint32elementCount2Bytes, 00354 MemoryMapArrayHeader 00355 > mmapArrayUint32_t; 00356 00357 // 00358 // define the boolean memory mapped array type. 00359 // NB: it is limited to 2**32 elements 00360 // 00361 00362 typedef MemoryMapArray< 00363 uint32_t, 00364 uint32_t, 00365 0xac6c1dc7, 00366 20090109, 00367 PackedAccess_1Bit, 00368 PackedAssign_1Bit, 00369 Packed1BitElementCount2Bytes, 00370 MemoryMapArrayHeader 00371 > mmapArrayBool_t; 00372 00373 // 00374 // define the two bit memory mapped array type: 00375 // 00376 00377 typedef MemoryMapArray< 00378 uint32_t, 00379 uint32_t, 00380 0x25b3ea5f, 00381 20090109, 00382 PackedAccess_2Bit, 00383 PackedAssign_2Bit, 00384 Packed2BitElementCount2Bytes, 00385 MemoryMapArrayHeader 00386 > mmapArray2Bit_t; 00387 00388 typedef MemoryMapArray< 00389 uint32_t, 00390 uint32_t, 00391 0x418e1874, 00392 20090109, 00393 PackedAccess_4Bit, 00394 PackedAssign_4Bit, 00395 Packed4BitElementCount2Bytes, 00396 MemoryMapArrayHeader 00397 > mmapArray4Bit_t; 00398 00399 #if 0 00400 // XXX this is example code I want to use to define arrays of genome wide match values 00401 class baseRecord 00402 { 00403 unsigned int base:4; 00404 unsigned int qScore:7; 00405 unsigned int conflicts:5; // how many cases of poorer matches that disagree 00406 }; 00407 00408 // 00409 // define the baseRecord array type: 00410 // 00411 inline baseRecord &mmapBaseRecordAccess(void *base, uint32_t index) 00412 { 00413 return *((baseRecord *)((char *)base + index*sizeof(baseRecord))); 00414 } 00415 inline void mmapBaseRecordSet(void *base, uint32_t index, baseRecord &v) 00416 { 00417 mmapBaseRecordAccess(base, index) = v; 00418 } 00419 inline size_t mmapBaseRecordElementCount2Bytes(uint32_t i) 00420 { 00421 return sizeof(baseRecord) * i; 00422 } 00423 00424 typedef MemoryMapArray< 00425 baseRecord &, 00426 uint32_t, 00427 0x12341234, 00428 0xdeadbeef, 00429 &mmapBaseRecordAccess, 00430 mmapBaseRecordSet, 00431 mmapBaseRecordElementCount2Bytes, 00432 MemoryMapArrayHeader 00433 > mmapArrayBaseRecord_t; 00434 #endif 00435 00436 #endif