libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include <stdlib.h> 00019 #include "GlfRecord.h" 00020 #include "GlfException.h" 00021 #include "StringBasics.h" 00022 00023 std::string GlfRecord::REF_BASE_CHAR = "XACMGRSVTQYHKSVN"; 00024 00025 GlfRecord::GlfRecord() 00026 { 00027 reset(); 00028 } 00029 00030 00031 GlfRecord::~GlfRecord() 00032 { 00033 reset(); 00034 } 00035 00036 00037 // Reset the record for a new entry, clearing out previous values. 00038 void GlfRecord::reset() 00039 { 00040 myRecTypeRefBase = 0; 00041 myRec1Base.offset = 0; 00042 myRec1Base.min_depth = 0; 00043 myRec1Base.rmsMapQ = 0; 00044 for(int i = 0; i < 10; i++) 00045 { 00046 myRec1Base.lk[i] = 0; 00047 } 00048 00049 myRec2Base.offset = 0; 00050 myRec2Base.min_depth = 0; 00051 myRec2Base.rmsMapQ = 0; 00052 myRec2Base.lkHom1 = 0; 00053 myRec2Base.lkHom2 = 0; 00054 myRec2Base.lkHet = 0; 00055 myRec2Base.indelLen1 = 0; 00056 myRec2Base.indelLen2 = 0; 00057 00058 myIndelSeq1.reset(); 00059 myIndelSeq2.reset(); 00060 } 00061 00062 00063 // Read the record from the specified file. Assumes the file is in 00064 // the correct position for reading the record. 00065 bool GlfRecord::read(IFILE filePtr) 00066 { 00067 // Read the record type and reference base. 00068 int numRead = 0; 00069 int byteLen = sizeof(uint8_t); 00070 numRead = ifread(filePtr, &myRecTypeRefBase, byteLen); 00071 if(numRead != byteLen) 00072 { 00073 String errorMsg = "Failed to read the record type & reference base ("; 00074 errorMsg += byteLen; 00075 errorMsg += " bytes). Only read "; 00076 errorMsg += numRead; 00077 errorMsg += " bytes."; 00078 std::string errorString = errorMsg.c_str(); 00079 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00080 return(false); 00081 } 00082 00083 // TODO, split up by types of records... 00084 switch(getRecordType()) 00085 { 00086 case 0: 00087 // Last record. 00088 // Nothing more to read. 00089 break; 00090 case 1: 00091 // Read type 1. 00092 readType1(filePtr); 00093 break; 00094 case 2: 00095 // Read type 2. 00096 readType2(filePtr); 00097 break; 00098 default: 00099 String errorMsg = "Failed to read the record: unknown type: "; 00100 errorMsg += getRecordType(); 00101 std::string errorString = errorMsg.c_str(); 00102 throw(GlfException(GlfStatus::INVALID, errorString)); 00103 return(false); 00104 break; 00105 }; 00106 00107 // Successfully read, return success. 00108 return(true); 00109 } 00110 00111 00112 // Write the record to the specified file. 00113 bool GlfRecord::write(IFILE filePtr) const 00114 { 00115 // TODO, split up by types of records... 00116 switch(getRecordType()) 00117 { 00118 case 0: 00119 writeRtypeRef(filePtr); 00120 break; 00121 case 1: 00122 // write type 1. 00123 writeType1(filePtr); 00124 break; 00125 case 2: 00126 // write type 2. 00127 writeType2(filePtr); 00128 break; 00129 default: 00130 // unknown type, return error. 00131 String errorMsg = "Failed to write the record: unknown type: "; 00132 errorMsg += getRecordType(); 00133 std::string errorString = errorMsg.c_str(); 00134 throw(GlfException(GlfStatus::INVALID, errorString)); 00135 return(false); 00136 break; 00137 }; 00138 00139 return(true); 00140 } 00141 00142 00143 void GlfRecord::print() const 00144 { 00145 std::cout << "record_type: " << getRecordType() 00146 << "; ref_base: " << getRefBase() 00147 << "; ref_base_char: " << getRefBaseChar() 00148 << "\n"; 00149 00150 // TODO, split up by types of records... 00151 switch(getRecordType()) 00152 { 00153 case 0: 00154 break; 00155 case 1: 00156 // print type 1. 00157 std::cout << "\toffset: " << myRec1Base.offset 00158 << "; min_lk: " << (myRec1Base.min_depth >> 24) 00159 << "; read_depth: " << (myRec1Base.min_depth & 0xFFFFFF) 00160 << "; rmsMapQ: " << (int)myRec1Base.rmsMapQ; 00161 for(int i = 0; i < 10; ++i) 00162 { 00163 std::cout << "; lk[" << i << "]: " << (int)myRec1Base.lk[i]; 00164 } 00165 00166 std::cout << "\n"; 00167 break; 00168 case 2: 00169 // print type 2. 00170 std::cout << "\toffset: " << myRec2Base.offset 00171 << "; min_lk: " << (myRec2Base.min_depth >> 24) 00172 << "; read_depth: " << (myRec2Base.min_depth & 0xFFFFF) 00173 << "; rmsMapQ: " << (int)myRec2Base.rmsMapQ 00174 << "; lkHom1: " << (int)myRec2Base.lkHom1 00175 << "; lkHom2: " << (int)myRec2Base.lkHom2 00176 << "; lkHet: " << (int)myRec2Base.lkHet 00177 << "; indelLen1: " << myRec2Base.indelLen1 00178 << "; indelLen2: " << myRec2Base.indelLen2 00179 << "; myIndelSeq1: " << myIndelSeq1.c_str() 00180 << "; myIndelSeq2: " << myIndelSeq2.c_str() 00181 << "\n"; 00182 break; 00183 default: 00184 break; 00185 }; 00186 } 00187 00188 bool GlfRecord::setRtypeRef(uint8_t rtypeRef) 00189 { 00190 myRecTypeRefBase = rtypeRef; 00191 return(true); 00192 } 00193 00194 bool GlfRecord::setRecordType(uint8_t recType) 00195 { 00196 myRecTypeRefBase = 00197 (myRecTypeRefBase & REF_BASE_MASK) | (recType << REC_TYPE_SHIFT); 00198 return(true); 00199 } 00200 00201 bool GlfRecord::setRefBaseInt(uint8_t refBase) 00202 { 00203 myRecTypeRefBase = 00204 (myRecTypeRefBase & REC_TYPE_MASK) | (refBase & REF_BASE_MASK); 00205 return(true); 00206 } 00207 00208 // bool GlfRecord::setRefBaseChar(char refBase) 00209 // { 00210 00211 // uint8_t refBaseInt = REF_BASE_CHAR_TO_INT[refBase]; 00212 // return(setRefBaseInt(refBaseInt)); 00213 // } 00214 00215 bool GlfRecord::setOffset(uint32_t offset) 00216 { 00217 myRec1Base.offset = offset; 00218 myRec2Base.offset = offset; 00219 return(true); 00220 } 00221 00222 bool GlfRecord::setMinDepth(uint32_t minDepth) 00223 { 00224 myRec1Base.min_depth = minDepth; 00225 myRec2Base.min_depth = minDepth; 00226 return(true); 00227 } 00228 00229 bool GlfRecord::setMinLk(uint8_t minLk) 00230 { 00231 setMinDepth((myRec1Base.min_depth & READ_DEPTH_MASK) | 00232 (minLk << MIN_LK_SHIFT)); 00233 return(true); 00234 } 00235 00236 bool GlfRecord::setReadDepth(uint32_t readDepth) 00237 { 00238 setMinDepth((myRec1Base.min_depth & MIN_LK_MASK) | 00239 (readDepth & READ_DEPTH_MASK)); 00240 return(true); 00241 } 00242 00243 bool GlfRecord::setRmsMapQ(uint8_t rmsMapQ) 00244 { 00245 myRec1Base.rmsMapQ = rmsMapQ; 00246 myRec2Base.rmsMapQ = rmsMapQ; 00247 return(true); 00248 } 00249 00250 // Accessors to get the gneric values. 00251 char GlfRecord::getRefBaseChar() const 00252 { 00253 int index = myRecTypeRefBase & REF_BASE_MASK; 00254 if((index > REF_BASE_MAX) || (index < 0)) 00255 { 00256 // TODO throw exception. 00257 return('N'); 00258 } 00259 return(REF_BASE_CHAR[index]); 00260 } 00261 00262 00263 uint32_t GlfRecord::getOffset() const 00264 { 00265 if(getRecordType() == 1) 00266 { 00267 return(myRec1Base.offset); 00268 } 00269 else if(getRecordType() == 2) 00270 { 00271 return(myRec2Base.offset); 00272 } 00273 throw(GlfException(GlfStatus::UNKNOWN, 00274 "Tried to call getOffset for Record not of type 1 or 2.")); 00275 return(0); 00276 } 00277 00278 uint32_t GlfRecord::getMinDepth() const 00279 { 00280 if(getRecordType() == 1) 00281 { 00282 return(myRec1Base.min_depth); 00283 } 00284 else if(getRecordType() == 2) 00285 { 00286 return(myRec2Base.min_depth); 00287 } 00288 throw(GlfException(GlfStatus::UNKNOWN, 00289 "Tried to call getMinDepth for Record not of type 1 or 2.")); 00290 return(0); 00291 } 00292 00293 uint8_t GlfRecord::getMinLk() const 00294 { 00295 if(getRecordType() == 1) 00296 { 00297 return(myRec1Base.min_depth >> MIN_LK_SHIFT); 00298 } 00299 else if(getRecordType() == 2) 00300 { 00301 return(myRec2Base.min_depth >> MIN_LK_SHIFT); 00302 } 00303 throw(GlfException(GlfStatus::UNKNOWN, 00304 "Tried to call getMinLk for Record not of type 1 or 2.")); 00305 return(0); 00306 } 00307 00308 uint32_t GlfRecord::getReadDepth() const 00309 { 00310 if(getRecordType() == 1) 00311 { 00312 return(myRec1Base.min_depth & READ_DEPTH_MASK); 00313 } 00314 else if(getRecordType() == 2) 00315 { 00316 return(myRec2Base.min_depth & READ_DEPTH_MASK); 00317 } 00318 throw(GlfException(GlfStatus::UNKNOWN, 00319 "Tried to call getReadDepth for Record not of type 1 or 2.")); 00320 return(0); 00321 } 00322 00323 uint8_t GlfRecord::getRmsMapQ() const 00324 { 00325 if(getRecordType() == 1) 00326 { 00327 return(myRec1Base.rmsMapQ); 00328 } 00329 else if(getRecordType() == 2) 00330 { 00331 return(myRec2Base.rmsMapQ); 00332 } 00333 throw(GlfException(GlfStatus::UNKNOWN, 00334 "Tried to call getRmsMapQ for Record not of type 1 or 2.")); 00335 return(0); 00336 } 00337 00338 00339 // Accessors for getting record type 1 00340 bool GlfRecord::setLk(int index, uint8_t value) 00341 { 00342 if((index < 0) || (index >= NUM_REC1_LIKELIHOOD)) 00343 { 00344 // Out of range. 00345 throw(GlfException(GlfStatus::UNKNOWN, 00346 "Trying to set Record Type 1 likelihood position< 0 or >= 10.")); 00347 return(false); 00348 } 00349 00350 // In range. 00351 myRec1Base.lk[index] = value; 00352 return(true); 00353 } 00354 00355 uint8_t GlfRecord::getLk(int index) 00356 { 00357 if(getRecordType() != 1) 00358 { 00359 throw(GlfException(GlfStatus::UNKNOWN, 00360 "Tried to call getLk for Record not of type 1.")); 00361 return(0); 00362 } 00363 if((index < 0) || (index >= NUM_REC1_LIKELIHOOD)) 00364 { 00365 throw(GlfException(GlfStatus::UNKNOWN, 00366 "Tried to call getLk for index < 0 or >= 10.")); 00367 return(0); 00368 } 00369 return(myRec1Base.lk[index]); 00370 } 00371 00372 00373 // Accessors for getting record type 2 00374 bool GlfRecord::setLkHom1(uint8_t lk) 00375 { 00376 myRec2Base.lkHom1 = lk; 00377 return(true); 00378 } 00379 00380 bool GlfRecord::setLkHom2(uint8_t lk) 00381 { 00382 myRec2Base.lkHom2 = lk; 00383 return(true); 00384 } 00385 00386 bool GlfRecord::setLkHet(uint8_t lk) 00387 { 00388 myRec2Base.lkHet = lk; 00389 return(true); 00390 } 00391 00392 bool GlfRecord::setInsertionIndel1(const std::string& indelSeq) 00393 { 00394 myRec2Base.indelLen1 = indelSeq.length(); 00395 myIndelSeq1 = indelSeq; 00396 return(true); 00397 } 00398 00399 bool GlfRecord::setDeletionIndel1(const std::string& indelSeq) 00400 { 00401 myRec2Base.indelLen1 = 0 - (indelSeq.length()); 00402 myIndelSeq1 = indelSeq; 00403 return(true); 00404 } 00405 00406 bool GlfRecord::setInsertionIndel2(const std::string& indelSeq) 00407 { 00408 myRec2Base.indelLen2 = indelSeq.length(); 00409 myIndelSeq2 = indelSeq; 00410 return(true); 00411 } 00412 00413 bool GlfRecord::setDeletionIndel2(const std::string& indelSeq) 00414 { 00415 myRec2Base.indelLen2 = 0 - (indelSeq.length()); 00416 myIndelSeq2 = indelSeq; 00417 return(true); 00418 } 00419 00420 uint8_t GlfRecord::getLkHom1() 00421 { 00422 if(getRecordType() != 2) 00423 { 00424 throw(GlfException(GlfStatus::UNKNOWN, 00425 "Tried to call getLkHom1 for Record not of type 2.")); 00426 return(0); 00427 } 00428 return(myRec2Base.lkHom1); 00429 } 00430 00431 uint8_t GlfRecord::getLkHom2() 00432 { 00433 if(getRecordType() != 2) 00434 { 00435 throw(GlfException(GlfStatus::UNKNOWN, 00436 "Tried to call getLkHom2 for Record not of type 2.")); 00437 return(0); 00438 } 00439 return(myRec2Base.lkHom2); 00440 } 00441 00442 uint8_t GlfRecord::getLkHet() 00443 { 00444 if(getRecordType() != 2) 00445 { 00446 throw(GlfException(GlfStatus::UNKNOWN, 00447 "Tried to call getLkHet for Record not of type 2.")); 00448 return(0); 00449 } 00450 return(myRec2Base.lkHet); 00451 } 00452 00453 int16_t GlfRecord::getIndel1(std::string& indelSeq) 00454 { 00455 if(getRecordType() != 2) 00456 { 00457 throw(GlfException(GlfStatus::UNKNOWN, 00458 "Tried to call getIndel1 for Record not of type 2.")); 00459 return(0); 00460 } 00461 indelSeq = myIndelSeq1.c_str(); 00462 return(myRec2Base.indelLen1); 00463 } 00464 00465 int16_t GlfRecord::getIndel2(std::string& indelSeq) 00466 { 00467 if(getRecordType() != 2) 00468 { 00469 throw(GlfException(GlfStatus::UNKNOWN, 00470 "Tried to call getIndel2 for Record not of type 2.")); 00471 return(0); 00472 } 00473 indelSeq = myIndelSeq2.c_str(); 00474 return(myRec2Base.indelLen2); 00475 } 00476 00477 00478 void GlfRecord::readType1(IFILE filePtr) 00479 { 00480 // Read record type 1 information. 00481 int numRead = 0; 00482 numRead = ifread(filePtr, &myRec1Base, REC1_BASE_SIZE); 00483 if(numRead != REC1_BASE_SIZE) 00484 { 00485 String errorMsg = "Failed to read record of type 1 ("; 00486 errorMsg += REC1_BASE_SIZE; 00487 errorMsg += " bytes). Only read "; 00488 errorMsg += numRead; 00489 errorMsg += " bytes."; 00490 std::string errorString = errorMsg.c_str(); 00491 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00492 } 00493 00494 // Record type 1 is fixed size and has no additional variable length 00495 // fields, so done reading. 00496 } 00497 00498 00499 void GlfRecord::readType2(IFILE filePtr) 00500 { 00501 // Read record type 2 information. 00502 int numRead = 0; 00503 numRead = ifread(filePtr, &myRec2Base, REC2_BASE_SIZE); 00504 if(numRead != REC2_BASE_SIZE) 00505 { 00506 String errorMsg = "Failed to read record of type 2 base info ("; 00507 errorMsg += REC2_BASE_SIZE; 00508 errorMsg += " bytes). Only read "; 00509 errorMsg += numRead; 00510 errorMsg += " bytes."; 00511 std::string errorString = errorMsg.c_str(); 00512 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00513 } 00514 00515 // Record type 2 has 2 additional variable length fields. Read those 00516 // fields. 00517 int16_t len = abs(myRec2Base.indelLen1); 00518 numRead = myIndelSeq1.readFromFile(filePtr, len); 00519 if(numRead != len) 00520 { 00521 String errorMsg = "Failed to read record of type 2, 1st indel sequence ("; 00522 errorMsg += len; 00523 errorMsg += " bytes). Only read "; 00524 errorMsg += numRead; 00525 errorMsg += " bytes."; 00526 std::string errorString = errorMsg.c_str(); 00527 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00528 } 00529 len = abs(myRec2Base.indelLen2); 00530 numRead = myIndelSeq2.readFromFile(filePtr, len); 00531 if(numRead != len) 00532 { 00533 String errorMsg = "Failed to read record of type 2, 2nd indel sequence ("; 00534 errorMsg += len; 00535 errorMsg += " bytes). Only read "; 00536 errorMsg += numRead; 00537 errorMsg += " bytes."; 00538 std::string errorString = errorMsg.c_str(); 00539 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00540 } 00541 } 00542 00543 00544 void GlfRecord::writeRtypeRef(IFILE filePtr) const 00545 { 00546 int byteLen = sizeof(myRecTypeRefBase); 00547 int numWrite = 00548 ifwrite(filePtr, &myRecTypeRefBase, byteLen); 00549 if(numWrite != byteLen) 00550 { 00551 String errorMsg = 00552 "Failed to write the length of the record type and reference base ("; 00553 errorMsg += byteLen; 00554 errorMsg += " bytes). Only wrote "; 00555 errorMsg += numWrite; 00556 errorMsg += " bytes."; 00557 std::string errorString = errorMsg.c_str(); 00558 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00559 } 00560 } 00561 00562 00563 void GlfRecord::writeType1(IFILE filePtr) const 00564 { 00565 // Write the generic record field that all records have. 00566 writeRtypeRef(filePtr); 00567 00568 // Record type 1 is fixed size and has no additional variable length 00569 // fields, so just write the base info. 00570 int numWrite = ifwrite(filePtr, &myRec1Base, REC1_BASE_SIZE); 00571 if(numWrite != REC1_BASE_SIZE) 00572 { 00573 // failed to write. 00574 String errorMsg = "Failed to write record of type 1 ("; 00575 errorMsg += REC1_BASE_SIZE; 00576 errorMsg += " bytes). Only wrote "; 00577 errorMsg += numWrite; 00578 errorMsg += " bytes."; 00579 std::string errorString = errorMsg.c_str(); 00580 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00581 } 00582 00583 00584 // Done writing the record. 00585 } 00586 00587 00588 void GlfRecord::writeType2(IFILE filePtr) const 00589 { 00590 // Write the generic record field that all records have. 00591 writeRtypeRef(filePtr); 00592 00593 // Write the record type 2 base info. 00594 int numWrite = ifwrite(filePtr, &myRec2Base, REC2_BASE_SIZE); 00595 if(numWrite != REC2_BASE_SIZE) 00596 { 00597 // failed to write. 00598 String errorMsg = "Failed to write record of type 2 base info ("; 00599 errorMsg += REC2_BASE_SIZE; 00600 errorMsg += " bytes). Only wrote "; 00601 errorMsg += numWrite; 00602 errorMsg += " bytes."; 00603 std::string errorString = errorMsg.c_str(); 00604 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00605 } 00606 00607 // Record type 2 has 2 additional variable length fields. Write those 00608 // fields. 00609 int len = myIndelSeq1.length(); 00610 numWrite = ifwrite(filePtr, myIndelSeq1.c_str(), len); 00611 if(numWrite != len) 00612 { 00613 // failed to write. 00614 String errorMsg = "Failed to write record of type 2, 1st indel sequence ("; 00615 errorMsg += len; 00616 errorMsg += " bytes). Only wrote "; 00617 errorMsg += numWrite; 00618 errorMsg += " bytes."; 00619 std::string errorString = errorMsg.c_str(); 00620 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00621 } 00622 len = myIndelSeq2.length(); 00623 numWrite = ifwrite(filePtr, myIndelSeq2.c_str(), len); 00624 if(numWrite != len) 00625 { 00626 // failed to write. 00627 String errorMsg = "Failed to write record of type 2, 2nd indel sequence ("; 00628 errorMsg += len; 00629 errorMsg += " bytes). Only wrote "; 00630 errorMsg += numWrite; 00631 errorMsg += " bytes."; 00632 std::string errorString = errorMsg.c_str(); 00633 throw(GlfException(GlfStatus::FAIL_IO, errorString)); 00634 } 00635 00636 // Done writing the record. 00637 }