GlfRecord.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include <stdlib.h>
00019 #include "GlfRecord.h"
00020 #include "GlfException.h"
00021 #include "StringBasics.h"
00022 
00023 std::string GlfRecord::REF_BASE_CHAR = "XACMGRSVTQYHKSVN";
00024 
00025 GlfRecord::GlfRecord()
00026 {
00027     reset();
00028 }
00029 
00030 
00031 GlfRecord::~GlfRecord()
00032 {
00033     reset();
00034 }
00035 
00036 
00037 // Reset the record for a new entry, clearing out previous values.
00038 void GlfRecord::reset()
00039 {
00040     myRecTypeRefBase = 0;
00041     myRec1Base.offset = 0;
00042     myRec1Base.min_depth = 0;
00043     myRec1Base.rmsMapQ = 0;
00044     for(int i = 0; i < 10; i++)
00045     {
00046         myRec1Base.lk[i] = 0;
00047     }
00048 
00049     myRec2Base.offset = 0;
00050     myRec2Base.min_depth = 0;
00051     myRec2Base.rmsMapQ = 0;
00052     myRec2Base.lkHom1 = 0;
00053     myRec2Base.lkHom2 = 0;
00054     myRec2Base.lkHet = 0;
00055     myRec2Base.indelLen1 = 0;
00056     myRec2Base.indelLen2 = 0;
00057 
00058     myIndelSeq1.reset();
00059     myIndelSeq2.reset();
00060 }
00061 
00062 
00063 // Read the record from the specified file.  Assumes the file is in
00064 // the correct position for reading the record.
00065 bool GlfRecord::read(IFILE filePtr)
00066 {
00067     // Read the record type and reference base.
00068     int numRead = 0;
00069     int byteLen = sizeof(uint8_t);
00070     numRead = ifread(filePtr, &myRecTypeRefBase, byteLen);
00071     if(numRead != byteLen)
00072     {
00073         String errorMsg = "Failed to read the record type & reference base (";
00074         errorMsg += byteLen;
00075         errorMsg += " bytes).  Only read ";
00076         errorMsg += numRead;
00077         errorMsg += " bytes.";
00078         std::string errorString = errorMsg.c_str();
00079         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00080         return(false);
00081     }
00082 
00083     // TODO, split up by types of records...
00084     switch(getRecordType())
00085     {
00086         case 0:
00087             //  Last record.
00088             // Nothing more to read.
00089             break;
00090         case 1:
00091             // Read type 1.
00092             readType1(filePtr);
00093             break;
00094         case 2:
00095             // Read type 2.
00096             readType2(filePtr);
00097             break;
00098         default:
00099             String errorMsg = "Failed to read the record: unknown type: ";
00100             errorMsg += getRecordType();
00101             std::string errorString = errorMsg.c_str();
00102             throw(GlfException(GlfStatus::INVALID, errorString));
00103             return(false);
00104             break;
00105     };
00106 
00107     // Successfully read, return success.
00108     return(true);
00109 }
00110 
00111 
00112 // Write the record to the specified file.
00113 bool GlfRecord::write(IFILE filePtr) const
00114 {
00115     // TODO, split up by types of records...
00116     switch(getRecordType())
00117     {
00118         case 0:
00119             writeRtypeRef(filePtr);
00120             break;
00121         case 1:
00122             // write type 1.
00123             writeType1(filePtr);
00124             break;
00125         case 2:
00126             // write type 2.
00127             writeType2(filePtr);
00128             break;
00129         default:
00130             // unknown type, return error.
00131             String errorMsg = "Failed to write the record: unknown type: ";
00132             errorMsg += getRecordType();
00133             std::string errorString = errorMsg.c_str();
00134             throw(GlfException(GlfStatus::INVALID, errorString));
00135             return(false);
00136             break;
00137     };
00138     
00139     return(true);
00140 }
00141 
00142 
00143 void GlfRecord::print() const
00144 {
00145     std::cout << "record_type: " << getRecordType()
00146               << "; ref_base: " << getRefBase()
00147               << "; ref_base_char: " << getRefBaseChar()
00148               << "\n";
00149 
00150     // TODO, split up by types of records...
00151     switch(getRecordType())
00152     {
00153         case 0:
00154             break;
00155         case 1:
00156             // print type 1.
00157             std::cout << "\toffset: " << myRec1Base.offset
00158                       << "; min_lk: " << (myRec1Base.min_depth >> 24)
00159                       << "; read_depth: " << (myRec1Base.min_depth & 0xFFFFFF)
00160                       << "; rmsMapQ: " << (int)myRec1Base.rmsMapQ;
00161             for(int i = 0; i < 10; ++i)
00162             {
00163                 std::cout << "; lk[" << i << "]: " << (int)myRec1Base.lk[i];
00164             }
00165 
00166             std::cout << "\n";
00167             break;
00168         case 2:
00169             // print type 2.
00170             std::cout << "\toffset: " << myRec2Base.offset
00171                       << "; min_lk: " << (myRec2Base.min_depth >> 24)
00172                       << "; read_depth: " << (myRec2Base.min_depth & 0xFFFFF)
00173                       << "; rmsMapQ: " << (int)myRec2Base.rmsMapQ
00174                       << "; lkHom1: " << (int)myRec2Base.lkHom1
00175                       << "; lkHom2: " << (int)myRec2Base.lkHom2
00176                       << "; lkHet: " << (int)myRec2Base.lkHet
00177                       << "; indelLen1: " << myRec2Base.indelLen1
00178                       << "; indelLen2: " << myRec2Base.indelLen2
00179                       << "; myIndelSeq1: " << myIndelSeq1.c_str()
00180                       << "; myIndelSeq2: " << myIndelSeq2.c_str()
00181                       << "\n";
00182             break;
00183         default:
00184             break;
00185     };
00186 }
00187 
00188 bool GlfRecord::setRtypeRef(uint8_t rtypeRef)
00189 {
00190     myRecTypeRefBase = rtypeRef;
00191     return(true);
00192 }
00193 
00194 bool GlfRecord::setRecordType(uint8_t recType)
00195 {
00196     myRecTypeRefBase = 
00197         (myRecTypeRefBase & REF_BASE_MASK) | (recType << REC_TYPE_SHIFT);
00198     return(true);
00199 }
00200 
00201 bool GlfRecord::setRefBaseInt(uint8_t refBase)
00202 {
00203     myRecTypeRefBase = 
00204         (myRecTypeRefBase & REC_TYPE_MASK) | (refBase & REF_BASE_MASK);
00205     return(true);
00206 }
00207 
00208 // bool GlfRecord::setRefBaseChar(char refBase)
00209 // {
00210 
00211 //     uint8_t refBaseInt = REF_BASE_CHAR_TO_INT[refBase];
00212 //     return(setRefBaseInt(refBaseInt));
00213 // }
00214 
00215 bool GlfRecord::setOffset(uint32_t offset)
00216 {
00217     myRec1Base.offset = offset;
00218     myRec2Base.offset = offset;
00219     return(true);
00220 }
00221 
00222 bool GlfRecord::setMinDepth(uint32_t minDepth)
00223 {
00224     myRec1Base.min_depth = minDepth;
00225     myRec2Base.min_depth = minDepth;
00226     return(true);
00227 }
00228 
00229 bool GlfRecord::setMinLk(uint8_t minLk)
00230 {
00231     setMinDepth((myRec1Base.min_depth & READ_DEPTH_MASK) |
00232                 (minLk << MIN_LK_SHIFT));
00233     return(true);
00234 }
00235 
00236 bool GlfRecord::setReadDepth(uint32_t readDepth)
00237 {
00238     setMinDepth((myRec1Base.min_depth & MIN_LK_MASK) |
00239                 (readDepth & READ_DEPTH_MASK));
00240     return(true);
00241 }
00242 
00243 bool GlfRecord::setRmsMapQ(uint8_t rmsMapQ)
00244 {
00245     myRec1Base.rmsMapQ = rmsMapQ;
00246     myRec2Base.rmsMapQ = rmsMapQ;
00247     return(true);
00248 }
00249 
00250 // Accessors to get the gneric values.
00251 char GlfRecord::getRefBaseChar() const
00252 {
00253     int index = myRecTypeRefBase & REF_BASE_MASK;
00254     if((index > REF_BASE_MAX) || (index < 0))
00255     {
00256         // TODO throw exception.
00257         return('N');
00258     }
00259     return(REF_BASE_CHAR[index]);
00260 }
00261 
00262 
00263 uint32_t GlfRecord::getOffset()
00264 {
00265     if(getRecordType() == 1)
00266     {
00267         return(myRec1Base.offset);
00268     }
00269     else if(getRecordType() == 2)
00270     {
00271         return(myRec2Base.offset);
00272     }
00273     throw(GlfException(GlfStatus::UNKNOWN, 
00274                        "Tried to call getOffset for Record not of type 1 or 2."));
00275     return(0);
00276 }
00277 
00278 uint32_t GlfRecord::getMinDepth()
00279 {
00280     if(getRecordType() == 1)
00281     {
00282         return(myRec1Base.min_depth);
00283     }
00284     else if(getRecordType() == 2)
00285     {
00286         return(myRec2Base.min_depth);
00287     }
00288     throw(GlfException(GlfStatus::UNKNOWN, 
00289                        "Tried to call getMinDepth for Record not of type 1 or 2."));
00290     return(0);
00291 }
00292 
00293 uint8_t GlfRecord::getMinLk()
00294 {
00295     if(getRecordType() == 1)
00296     {
00297         return(myRec1Base.min_depth >> MIN_LK_SHIFT);
00298     }
00299     else if(getRecordType() == 2)
00300     {
00301         return(myRec2Base.min_depth >> MIN_LK_SHIFT);
00302     }
00303     throw(GlfException(GlfStatus::UNKNOWN, 
00304                        "Tried to call getMinLk for Record not of type 1 or 2."));
00305     return(0);
00306 }
00307 
00308 uint32_t GlfRecord::getReadDepth()
00309 {
00310     if(getRecordType() == 1)
00311     {
00312         return(myRec1Base.min_depth & READ_DEPTH_MASK);
00313     }
00314     else if(getRecordType() == 2)
00315     {
00316         return(myRec2Base.min_depth & READ_DEPTH_MASK);
00317     }
00318     throw(GlfException(GlfStatus::UNKNOWN, 
00319                        "Tried to call getReadDepth for Record not of type 1 or 2."));
00320     return(0);
00321 }
00322 
00323 uint8_t GlfRecord::getRmsMapQ()
00324 {
00325     if(getRecordType() == 1)
00326     {
00327         return(myRec1Base.rmsMapQ);
00328     }
00329     else if(getRecordType() == 2)
00330     {
00331         return(myRec2Base.rmsMapQ);
00332     }
00333     throw(GlfException(GlfStatus::UNKNOWN, 
00334                        "Tried to call getRmsMapQ for Record not of type 1 or 2."));
00335     return(0);
00336 }
00337 
00338     
00339 // Accessors for getting record type 1
00340 bool GlfRecord::setLk(int index, uint8_t value)
00341 {
00342     if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
00343     {
00344         //  Out of range.
00345         throw(GlfException(GlfStatus::UNKNOWN, 
00346                            "Trying to set Record Type 1 likelihood position< 0 or >= 10."));
00347         return(false);
00348     }
00349 
00350     // In range.
00351     myRec1Base.lk[index] = value;
00352     return(true);
00353 }
00354 
00355 uint8_t GlfRecord::getLk(int index)
00356 {
00357     if(getRecordType() != 1)
00358     {
00359         throw(GlfException(GlfStatus::UNKNOWN, 
00360                            "Tried to call getLk for Record not of type 1."));
00361         return(0);
00362     }
00363     if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
00364     {
00365         throw(GlfException(GlfStatus::UNKNOWN, 
00366                            "Tried to call getLk for index < 0 or >= 10."));
00367         return(0);
00368     }
00369     return(myRec1Base.lk[index]);
00370 }
00371 
00372     
00373 // Accessors for getting record type 2
00374 bool GlfRecord::setLkHom1(uint8_t lk)
00375 {
00376     myRec2Base.lkHom1 = lk;
00377     return(true);
00378 }
00379 
00380 bool GlfRecord::setLkHom2(uint8_t lk)
00381 {
00382     myRec2Base.lkHom2 = lk;
00383     return(true);
00384 }
00385 
00386 bool GlfRecord::setLkHet(uint8_t lk)
00387 {
00388     myRec2Base.lkHet = lk;
00389     return(true);
00390 }
00391 
00392 bool GlfRecord::setInsertionIndel1(const std::string& indelSeq)
00393 {
00394     myRec2Base.indelLen1 = indelSeq.length();
00395     myIndelSeq1 = indelSeq;
00396     return(true);
00397 }
00398 
00399 bool GlfRecord::setDeletionIndel1(const std::string& indelSeq)
00400 {
00401     myRec2Base.indelLen1 = -(indelSeq.length());
00402     myIndelSeq1 = indelSeq;
00403     return(true);
00404 }
00405 
00406 bool GlfRecord::setInsertionIndel2(const std::string& indelSeq)
00407 {
00408     myRec2Base.indelLen2 = indelSeq.length();
00409     myIndelSeq2 = indelSeq;
00410     return(true);
00411 }
00412 
00413 bool GlfRecord::setDeletionIndel2(const std::string& indelSeq)
00414 {
00415     myRec2Base.indelLen2 = -(indelSeq.length());
00416     myIndelSeq2 = indelSeq;
00417     return(true);
00418 }
00419 
00420 uint8_t GlfRecord::getLkHom1()
00421 {
00422     if(getRecordType() != 2)
00423     {
00424         throw(GlfException(GlfStatus::UNKNOWN, 
00425                            "Tried to call getLkHom1 for Record not of type 2."));
00426         return(0);
00427     }
00428     return(myRec2Base.lkHom1);
00429 }
00430 
00431 uint8_t GlfRecord::getLkHom2()
00432 {
00433     if(getRecordType() != 2)
00434     {
00435         throw(GlfException(GlfStatus::UNKNOWN, 
00436                            "Tried to call getLkHom2 for Record not of type 2."));
00437         return(0);
00438     }
00439     return(myRec2Base.lkHom2);
00440 }
00441 
00442 uint8_t GlfRecord::getLkHet()
00443 {
00444     if(getRecordType() != 2)
00445     {
00446         throw(GlfException(GlfStatus::UNKNOWN, 
00447                            "Tried to call getLkHet for Record not of type 2."));
00448         return(0);
00449     }
00450     return(myRec2Base.lkHet);
00451 }
00452 
00453 int16_t GlfRecord::getIndel1(std::string& indelSeq)
00454 {
00455     if(getRecordType() != 2)
00456     {
00457         throw(GlfException(GlfStatus::UNKNOWN, 
00458                            "Tried to call getIndel1 for Record not of type 2."));
00459         return(0);
00460     }
00461     indelSeq = myIndelSeq1.c_str();
00462     return(myRec2Base.indelLen1);
00463 }
00464 
00465 int16_t GlfRecord::getIndel2(std::string& indelSeq)
00466 {
00467     if(getRecordType() != 2)
00468     {
00469         throw(GlfException(GlfStatus::UNKNOWN, 
00470                            "Tried to call getIndel2 for Record not of type 2."));
00471         return(0);
00472     }
00473     indelSeq = myIndelSeq2.c_str();
00474     return(myRec2Base.indelLen2);
00475 }
00476 
00477 
00478 void GlfRecord::readType1(IFILE filePtr)
00479 {
00480     // Read record type 1 information.
00481     int numRead = 0;
00482     numRead = ifread(filePtr, &myRec1Base, REC1_BASE_SIZE);
00483     if(numRead != REC1_BASE_SIZE)
00484     {
00485         String errorMsg = "Failed to read record of type 1 (";
00486         errorMsg += REC1_BASE_SIZE;
00487         errorMsg += " bytes).  Only read ";
00488         errorMsg += numRead;
00489         errorMsg += " bytes.";
00490         std::string errorString = errorMsg.c_str();
00491         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00492     }
00493 
00494     // Record type 1 is fixed size and has no additional variable length
00495     // fields, so done reading.
00496 }
00497 
00498 
00499 void GlfRecord::readType2(IFILE filePtr)
00500 {
00501     // Read record type 2 information.
00502     int numRead = 0;
00503     numRead = ifread(filePtr, &myRec2Base, REC2_BASE_SIZE);
00504     if(numRead != REC2_BASE_SIZE)
00505     {
00506         String errorMsg = "Failed to read record of type 2 base info (";
00507         errorMsg += REC2_BASE_SIZE;
00508         errorMsg += " bytes).  Only read ";
00509         errorMsg += numRead;
00510         errorMsg += " bytes.";
00511         std::string errorString = errorMsg.c_str();
00512         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00513     }
00514 
00515     // Record type 2 has 2 additional variable length fields.  Read those
00516     // fields.
00517     int16_t len = abs(myRec2Base.indelLen1);
00518     numRead = myIndelSeq1.readFromFile(filePtr, len);
00519     if(numRead != len)
00520     {
00521         String errorMsg = "Failed to read record of type 2, 1st indel sequence (";
00522         errorMsg += len;
00523         errorMsg += " bytes).  Only read ";
00524         errorMsg += numRead;
00525         errorMsg += " bytes.";
00526         std::string errorString = errorMsg.c_str();
00527         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00528     }
00529     len = abs(myRec2Base.indelLen2);
00530     numRead = myIndelSeq2.readFromFile(filePtr, len);
00531     if(numRead != len)
00532     {
00533         String errorMsg = "Failed to read record of type 2, 2nd indel sequence (";
00534         errorMsg += len;
00535         errorMsg += " bytes).  Only read ";
00536         errorMsg += numRead;
00537         errorMsg += " bytes.";
00538         std::string errorString = errorMsg.c_str();
00539         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00540     }
00541 }
00542 
00543 
00544 void GlfRecord::writeRtypeRef(IFILE filePtr) const
00545 {
00546     int byteLen = sizeof(myRecTypeRefBase);
00547     int numWrite = 
00548         ifwrite(filePtr, &myRecTypeRefBase, byteLen);
00549     if(numWrite != byteLen)
00550     {
00551         String errorMsg = 
00552             "Failed to write the length of the record type and reference base (";
00553         errorMsg += byteLen;
00554         errorMsg += " bytes).  Only wrote ";
00555         errorMsg += numWrite;
00556         errorMsg += " bytes.";
00557         std::string errorString = errorMsg.c_str();
00558         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00559     }
00560 }
00561 
00562 
00563 void GlfRecord::writeType1(IFILE filePtr) const
00564 {
00565     // Write the generic record field that all records have.
00566     writeRtypeRef(filePtr);
00567     
00568     // Record type 1 is fixed size and has no additional variable length
00569     // fields, so just write the base info.
00570     int numWrite = ifwrite(filePtr, &myRec1Base, REC1_BASE_SIZE);
00571     if(numWrite != REC1_BASE_SIZE)
00572     {
00573         // failed to write.
00574         String errorMsg = "Failed to write record of type 1 (";
00575         errorMsg += REC1_BASE_SIZE;
00576         errorMsg += " bytes).  Only wrote ";
00577         errorMsg += numWrite;
00578         errorMsg += " bytes.";
00579         std::string errorString = errorMsg.c_str();
00580         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00581     }
00582     
00583 
00584     // Done writing the record.
00585 }
00586 
00587 
00588 void GlfRecord::writeType2(IFILE filePtr) const
00589 {
00590     // Write the generic record field that all records have.
00591     writeRtypeRef(filePtr);
00592 
00593     // Write the record type 2 base info.
00594     int numWrite = ifwrite(filePtr, &myRec2Base, REC2_BASE_SIZE);
00595     if(numWrite != REC2_BASE_SIZE)
00596     {
00597         // failed to write.
00598         String errorMsg = "Failed to write record of type 2 base info (";
00599         errorMsg += REC2_BASE_SIZE;
00600         errorMsg += " bytes).  Only wrote ";
00601         errorMsg += numWrite;
00602         errorMsg += " bytes.";
00603         std::string errorString = errorMsg.c_str();
00604         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00605     }
00606     
00607     // Record type 2 has 2 additional variable length fields.  Write those
00608     // fields.
00609     int len = myIndelSeq1.length();
00610     numWrite = ifwrite(filePtr, myIndelSeq1.c_str(), len);
00611     if(numWrite != len)
00612     {
00613         // failed to write.
00614         String errorMsg = "Failed to write record of type 2, 1st indel sequence (";
00615         errorMsg += len;
00616         errorMsg += " bytes).  Only wrote ";
00617         errorMsg += numWrite;
00618         errorMsg += " bytes.";
00619         std::string errorString = errorMsg.c_str();
00620         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00621     }
00622     len = myIndelSeq2.length();
00623     numWrite = ifwrite(filePtr, myIndelSeq2.c_str(), len);
00624     if(numWrite != len)
00625     {
00626         // failed to write.
00627         String errorMsg = "Failed to write record of type 2, 2nd indel sequence (";
00628         errorMsg += len;
00629         errorMsg += " bytes).  Only wrote ";
00630         errorMsg += numWrite;
00631         errorMsg += " bytes.";
00632         std::string errorString = errorMsg.c_str();
00633         throw(GlfException(GlfStatus::FAIL_IO, errorString));
00634     }
00635 
00636     // Done writing the record.
00637 }
Generated on Tue Sep 6 17:52:01 2011 for libStatGen Software by  doxygen 1.6.3