00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021
00022 #include "bam.h"
00023
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026
00027 #include "BaseUtilities.h"
00028
00029
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032
00033
00034 SamRecord::SamRecord()
00035 : myStatus()
00036 {
00037 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00038
00039 myRecordPtr =
00040 (bamRecordStruct *) malloc(defaultAllocSize);
00041
00042 myCigarTempBuffer = NULL;
00043 myCigarTempBufferAllocatedSize = 0;
00044
00045 allocatedSize = defaultAllocSize;
00046
00047 resetRecord();
00048 }
00049
00050
00051 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00052 : myStatus(errorHandlingType)
00053 {
00054 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00055
00056 myRecordPtr =
00057 (bamRecordStruct *) malloc(defaultAllocSize);
00058
00059 myCigarTempBuffer = NULL;
00060 myCigarTempBufferAllocatedSize = 0;
00061
00062 allocatedSize = defaultAllocSize;
00063
00064 resetRecord();
00065 }
00066
00067
00068 SamRecord::~SamRecord()
00069 {
00070 resetRecord();
00071
00072 if(myRecordPtr != NULL)
00073 {
00074 free(myRecordPtr);
00075 myRecordPtr = NULL;
00076 }
00077 if(myCigarTempBuffer != NULL)
00078 {
00079 free(myCigarTempBuffer);
00080 myCigarTempBuffer = NULL;
00081 myCigarTempBufferAllocatedSize = 0;
00082 }
00083 }
00084
00085
00086
00087 void SamRecord::resetRecord()
00088 {
00089 myIsBufferSynced = true;
00090
00091 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00092 myRecordPtr->myReferenceID = -1;
00093 myRecordPtr->myPosition = -1;
00094 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00095 myRecordPtr->myMapQuality = 0;
00096 myRecordPtr->myBin = DEFAULT_BIN;
00097 myRecordPtr->myCigarLength = 0;
00098 myRecordPtr->myFlag = 0;
00099 myRecordPtr->myReadLength = 0;
00100 myRecordPtr->myMateReferenceID = -1;
00101 myRecordPtr->myMatePosition = -1;
00102 myRecordPtr->myInsertSize = 0;
00103
00104
00105 myReadName = DEFAULT_READ_NAME;
00106 myReferenceName = "*";
00107 myMateReferenceName = "*";
00108 myCigar = "*";
00109 mySequence = "*";
00110 myQuality = "*";
00111 myNeedToSetTagsFromBuffer = false;
00112 myNeedToSetTagsInBuffer = false;
00113
00114
00115 myAlignmentLength = -1;
00116 myUnclippedStartOffset = -1;
00117 myUnclippedEndOffset = -1;
00118
00119 clearTags();
00120
00121
00122
00123
00124
00125 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
00126 myRecordPtr->myReadNameLength);
00127
00128
00129 myIsReadNameBufferValid = true;
00130 myIsCigarBufferValid = true;
00131 myIsSequenceBufferValid = true;
00132 myIsQualityBufferValid = true;
00133 myIsTagsBufferValid = true;
00134 myIsBinValid = true;
00135
00136 myCigarTempBufferLength = -1;
00137
00138 myStatus = SamStatus::SUCCESS;
00139
00140 NOT_FOUND_TAG_STRING = "";
00141 NOT_FOUND_TAG_INT = -1;
00142 NOT_FOUND_TAG_DOUBLE = -1;
00143 }
00144
00145
00146
00147 void SamRecord::resetTagIter()
00148 {
00149 myLastTagIndex = -1;
00150 }
00151
00152
00153
00154
00155 bool SamRecord::isValid(SamFileHeader& header)
00156 {
00157 myStatus = SamStatus::SUCCESS;
00158 SamValidationErrors invalidSamErrors;
00159 if(!SamValidator::isValid(header, *this, invalidSamErrors))
00160 {
00161
00162 std::string errorMessage = "";
00163 invalidSamErrors.getErrorString(errorMessage);
00164 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00165 return(false);
00166 }
00167
00168 return(true);
00169 }
00170
00171
00172 bool SamRecord::setReadName(const char* readName)
00173 {
00174 myReadName = readName;
00175 myIsBufferSynced = false;
00176 myIsReadNameBufferValid = false;
00177 myStatus = SamStatus::SUCCESS;
00178
00179
00180
00181 if(myReadName.Length() == 0)
00182 {
00183
00184 myReadName = DEFAULT_READ_NAME;
00185 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00186 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00187 return(false);
00188 }
00189
00190 return true;
00191 }
00192
00193
00194 bool SamRecord::setFlag(uint16_t flag)
00195 {
00196 myStatus = SamStatus::SUCCESS;
00197 myRecordPtr->myFlag = flag;
00198 return true;
00199 }
00200
00201
00202 bool SamRecord::setReferenceName(SamFileHeader& header,
00203 const char* referenceName)
00204 {
00205 myStatus = SamStatus::SUCCESS;
00206
00207 myReferenceName = referenceName;
00208 myRecordPtr->myReferenceID = header.getReferenceID(referenceName);
00209
00210 return true;
00211 }
00212
00213
00214 bool SamRecord::set1BasedPosition(int32_t position)
00215 {
00216 return(set0BasedPosition(position - 1));
00217 }
00218
00219
00220 bool SamRecord::set0BasedPosition(int32_t position)
00221 {
00222 myStatus = SamStatus::SUCCESS;
00223 myRecordPtr->myPosition = position;
00224 myIsBinValid = false;
00225 return true;
00226 }
00227
00228
00229 bool SamRecord::setMapQuality(uint8_t mapQuality)
00230 {
00231 myStatus = SamStatus::SUCCESS;
00232 myRecordPtr->myMapQuality = mapQuality;
00233 return true;
00234 }
00235
00236
00237 bool SamRecord::setCigar(const char* cigar)
00238 {
00239 myStatus = SamStatus::SUCCESS;
00240 myCigar = cigar;
00241
00242 myIsBufferSynced = false;
00243 myIsCigarBufferValid = false;
00244 myCigarTempBufferLength = -1;
00245 myIsBinValid = false;
00246
00247
00248 myAlignmentLength = -1;
00249 myUnclippedStartOffset = -1;
00250 myUnclippedEndOffset = -1;
00251
00252 return true;
00253 }
00254
00255
00256 bool SamRecord::setCigar(const Cigar& cigar)
00257 {
00258 myStatus = SamStatus::SUCCESS;
00259 cigar.getCigarString(myCigar);
00260
00261 myIsBufferSynced = false;
00262 myIsCigarBufferValid = false;
00263 myCigarTempBufferLength = -1;
00264 myIsBinValid = false;
00265
00266
00267 myAlignmentLength = -1;
00268 myUnclippedStartOffset = -1;
00269 myUnclippedEndOffset = -1;
00270
00271 return true;
00272 }
00273
00274
00275 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00276 const char* mateReferenceName)
00277 {
00278 myStatus = SamStatus::SUCCESS;
00279
00280
00281
00282 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00283 {
00284 myMateReferenceName = myReferenceName;
00285 }
00286 else
00287 {
00288 myMateReferenceName = mateReferenceName;
00289 }
00290
00291
00292 myRecordPtr->myMateReferenceID =
00293 header.getReferenceID(myMateReferenceName);
00294
00295 return true;
00296 }
00297
00298
00299 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00300 {
00301 return(set0BasedMatePosition(matePosition - 1));
00302 }
00303
00304
00305 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00306 {
00307 myStatus = SamStatus::SUCCESS;
00308 myRecordPtr->myMatePosition = matePosition;
00309 return true;
00310 }
00311
00312
00313 bool SamRecord::setInsertSize(int32_t insertSize)
00314 {
00315 myStatus = SamStatus::SUCCESS;
00316 myRecordPtr->myInsertSize = insertSize;
00317 return true;
00318 }
00319
00320
00321 bool SamRecord::setSequence(const char* seq)
00322 {
00323 myStatus = SamStatus::SUCCESS;
00324 mySequence = seq;
00325
00326 myIsBufferSynced = false;
00327 myIsSequenceBufferValid = false;
00328 return true;
00329 }
00330
00331
00332 bool SamRecord::setQuality(const char* quality)
00333 {
00334 myStatus = SamStatus::SUCCESS;
00335 myQuality = quality;
00336 myIsBufferSynced = false;
00337 myIsQualityBufferValid = false;
00338 return true;
00339 }
00340
00341
00342
00343 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr,
00344 SamFileHeader& header)
00345 {
00346 myStatus = SamStatus::SUCCESS;
00347 if((filePtr == NULL) || (filePtr->isOpen() == false))
00348 {
00349
00350 myStatus.setStatus(SamStatus::FAIL_ORDER,
00351 "Can't read from an unopened file.");
00352 return(SamStatus::FAIL_ORDER);
00353 }
00354
00355
00356 resetRecord();
00357
00358
00359 int numBytes =
00360 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00361
00362 if(ifeof(filePtr))
00363 {
00364 if(numBytes == 0)
00365 {
00366
00367 myStatus.setStatus(SamStatus::NO_MORE_RECS,
00368 "No more records left to read.");
00369 return(SamStatus::NO_MORE_RECS);
00370 }
00371 else
00372 {
00373
00374
00375 myStatus.setStatus(SamStatus::FAIL_PARSE,
00376 "EOF reached in the middle of a record.");
00377 return(SamStatus::FAIL_PARSE);
00378 }
00379 }
00380
00381
00382 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00383 {
00384
00385
00386 return(SamStatus::FAIL_MEM);
00387 }
00388
00389
00390 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00391 != (unsigned int)myRecordPtr->myBlockSize)
00392 {
00393
00394 resetRecord();
00395 myStatus.setStatus(SamStatus::FAIL_IO,
00396 "Failed to read the record");
00397 return(SamStatus::FAIL_IO);
00398 }
00399
00400 setVariablesForNewBuffer(header);
00401
00402
00403 return(SamStatus::SUCCESS);
00404 }
00405
00406
00407
00408 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00409 uint32_t fromBufferSize,
00410 SamFileHeader& header)
00411 {
00412 myStatus = SamStatus::SUCCESS;
00413 if((fromBuffer == NULL) || (fromBufferSize == 0))
00414 {
00415
00416 myStatus.setStatus(SamStatus::FAIL_PARSE,
00417 "Cannot parse an empty file.");
00418 return(SamStatus::FAIL_PARSE);
00419 }
00420
00421
00422 resetRecord();
00423
00424
00425 if(!allocateRecordStructure(fromBufferSize))
00426 {
00427
00428 return(SamStatus::FAIL_MEM);
00429 }
00430
00431 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00432
00433 setVariablesForNewBuffer(header);
00434
00435
00436 return(SamStatus::SUCCESS);
00437 }
00438
00439
00440
00441
00442 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00443 {
00444 myStatus = SamStatus::SUCCESS;
00445 bool status = true;
00446 int key = 0;
00447 int intVal = 0;
00448 int index = 0;
00449 char bamvtype;
00450
00451 int tagBufferSize = 0;
00452
00453
00454 if(myNeedToSetTagsFromBuffer)
00455 {
00456 if(!setTagsFromBuffer())
00457 {
00458
00459 return(false);
00460 }
00461 }
00462
00463 switch (vtype)
00464 {
00465 case 'A' :
00466 index = integers.Length();
00467 bamvtype = vtype;
00468 integers.Push((const int)*(valuePtr));
00469 tagBufferSize += 4;
00470 break;
00471 case 'i' :
00472 index = integers.Length();
00473 intVal = atoi((const char *)valuePtr);
00474
00475
00476
00477
00478 if(intVal < 0)
00479 {
00480
00481
00482 if(intVal > std::numeric_limits<char>::min())
00483 {
00484
00485 bamvtype = 'c';
00486 tagBufferSize += 4;
00487 }
00488 else if(intVal > std::numeric_limits<short>::min())
00489 {
00490
00491 bamvtype = 's';
00492 tagBufferSize += 5;
00493 }
00494 else
00495 {
00496
00497 bamvtype = 'i';
00498 tagBufferSize += 7;
00499 }
00500 }
00501 else
00502 {
00503
00504 if(intVal < std::numeric_limits<unsigned char>::max())
00505 {
00506
00507 bamvtype = 'C';
00508 tagBufferSize += 4;
00509 }
00510 else if(intVal < std::numeric_limits<unsigned short>::max())
00511 {
00512
00513 bamvtype = 'S';
00514 tagBufferSize += 5;
00515 }
00516 else
00517 {
00518
00519 bamvtype = 'I';
00520 tagBufferSize += 7;
00521 }
00522 }
00523 integers.Push(intVal);
00524 break;
00525 case 'Z' :
00526 index = strings.Length();
00527 bamvtype = vtype;
00528 strings.Push((const char *)valuePtr);
00529 tagBufferSize += 4 + strings.Last().Length();
00530 break;
00531 case 'f' :
00532 index = doubles.Length();
00533 bamvtype = vtype;
00534 doubles.Push(atof((const char *)valuePtr));
00535 tagBufferSize += 7;
00536 break;
00537 default :
00538 fprintf(stderr,
00539 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00540 vtype);
00541 myStatus.setStatus(SamStatus::FAIL_PARSE,
00542 "Unknown custom field in a tag");
00543 status = false;
00544 }
00545
00546
00547 if(status)
00548 {
00549
00550 myNeedToSetTagsInBuffer = true;
00551 myIsTagsBufferValid = false;
00552 myIsBufferSynced = false;
00553
00554 key = MAKEKEY(tag[0], tag[1], bamvtype);
00555 extras.Add(key, index);
00556 myTagBufferSize += tagBufferSize;
00557 }
00558 return(status);
00559 }
00560
00561
00562
00563 const void* SamRecord::getRecordBuffer()
00564 {
00565 myStatus = SamStatus::SUCCESS;
00566 bool status = true;
00567 if(myIsBufferSynced == false)
00568 {
00569 status &= fixBuffer();
00570 }
00571
00572 if(myNeedToSetTagsInBuffer)
00573 {
00574 status &= setTagsInBuffer();
00575 }
00576 if(!status)
00577 {
00578 return(NULL);
00579 }
00580 return (const void *)myRecordPtr;
00581 }
00582
00583
00584
00585 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
00586 {
00587 myStatus = SamStatus::SUCCESS;
00588 if((filePtr == NULL) || (filePtr->isOpen() == false))
00589 {
00590
00591 myStatus.setStatus(SamStatus::FAIL_ORDER,
00592 "Can't write to an unopened file.");
00593 return(SamStatus::FAIL_ORDER);
00594 }
00595
00596 if(myIsBufferSynced == false)
00597 {
00598 if(!fixBuffer())
00599 {
00600 return(myStatus.getStatus());
00601 }
00602 }
00603
00604
00605 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
00606 unsigned int numBytesWritten =
00607 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
00608
00609
00610 if(numBytesToWrite == numBytesWritten)
00611 {
00612 return(SamStatus::SUCCESS);
00613 }
00614
00615 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
00616 return(SamStatus::FAIL_IO);
00617 }
00618
00619
00620 int32_t SamRecord::getBlockSize()
00621 {
00622 myStatus = SamStatus::SUCCESS;
00623
00624
00625 if(myIsBufferSynced == false)
00626 {
00627 fixBuffer();
00628 }
00629 return myRecordPtr->myBlockSize;
00630 }
00631
00632
00633
00634 const char* SamRecord::getReferenceName()
00635 {
00636 myStatus = SamStatus::SUCCESS;
00637 return myReferenceName.c_str();
00638 }
00639
00640
00641 int32_t SamRecord::getReferenceID()
00642 {
00643 myStatus = SamStatus::SUCCESS;
00644 return myRecordPtr->myReferenceID;
00645 }
00646
00647
00648 int32_t SamRecord::get1BasedPosition()
00649 {
00650 myStatus = SamStatus::SUCCESS;
00651 return (myRecordPtr->myPosition + 1);
00652 }
00653
00654
00655 int32_t SamRecord::get0BasedPosition()
00656 {
00657 myStatus = SamStatus::SUCCESS;
00658 return myRecordPtr->myPosition;
00659 }
00660
00661
00662 uint8_t SamRecord::getReadNameLength()
00663 {
00664 myStatus = SamStatus::SUCCESS;
00665
00666
00667 if(myIsReadNameBufferValid)
00668 {
00669 return(myRecordPtr->myReadNameLength);
00670 }
00671
00672 return(myReadName.Length() + 1);
00673 }
00674
00675
00676 uint8_t SamRecord::getMapQuality()
00677 {
00678 myStatus = SamStatus::SUCCESS;
00679 return myRecordPtr->myMapQuality;
00680 }
00681
00682
00683 uint16_t SamRecord::getBin()
00684 {
00685 myStatus = SamStatus::SUCCESS;
00686 if(!myIsBinValid)
00687 {
00688
00689
00690 myRecordPtr->myBin =
00691 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
00692 myIsBinValid = true;
00693 }
00694 return(myRecordPtr->myBin);
00695 }
00696
00697
00698 uint16_t SamRecord::getCigarLength()
00699 {
00700 myStatus = SamStatus::SUCCESS;
00701
00702
00703 if(myIsCigarBufferValid)
00704 {
00705 return myRecordPtr->myCigarLength;
00706 }
00707
00708 if(myCigarTempBufferLength == -1)
00709 {
00710
00711
00712 parseCigarString();
00713 }
00714
00715
00716 return(myCigarTempBufferLength);
00717 }
00718
00719
00720 uint16_t SamRecord::getFlag()
00721 {
00722 myStatus = SamStatus::SUCCESS;
00723 return myRecordPtr->myFlag;
00724 }
00725
00726
00727 int32_t SamRecord::getReadLength()
00728 {
00729 myStatus = SamStatus::SUCCESS;
00730 if(myIsSequenceBufferValid == false)
00731 {
00732
00733 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
00734 {
00735 return(0);
00736 }
00737
00738 return(mySequence.Length());
00739 }
00740 return(myRecordPtr->myReadLength);
00741 }
00742
00743
00744
00745
00746 const char* SamRecord::getMateReferenceName()
00747 {
00748 myStatus = SamStatus::SUCCESS;
00749 return myMateReferenceName.c_str();
00750 }
00751
00752
00753
00754
00755
00756 const char* SamRecord::getMateReferenceNameOrEqual()
00757 {
00758 myStatus = SamStatus::SUCCESS;
00759 if(myMateReferenceName == "*")
00760 {
00761 return(myMateReferenceName);
00762 }
00763 if(myMateReferenceName == getReferenceName())
00764 {
00765 return(FIELD_ABSENT_STRING);
00766 }
00767 else
00768 {
00769 return(myMateReferenceName);
00770 }
00771 }
00772
00773
00774 int32_t SamRecord::getMateReferenceID()
00775 {
00776 myStatus = SamStatus::SUCCESS;
00777 return myRecordPtr->myMateReferenceID;
00778 }
00779
00780
00781 int32_t SamRecord::get1BasedMatePosition()
00782 {
00783 myStatus = SamStatus::SUCCESS;
00784 return (myRecordPtr->myMatePosition + 1);
00785 }
00786
00787
00788 int32_t SamRecord::get0BasedMatePosition()
00789 {
00790 myStatus = SamStatus::SUCCESS;
00791 return myRecordPtr->myMatePosition;
00792 }
00793
00794
00795 int32_t SamRecord::getInsertSize()
00796 {
00797 myStatus = SamStatus::SUCCESS;
00798 return myRecordPtr->myInsertSize;
00799 }
00800
00801
00802
00803 int32_t SamRecord::get0BasedAlignmentEnd()
00804 {
00805 myStatus = SamStatus::SUCCESS;
00806 if(myAlignmentLength == -1)
00807 {
00808
00809 parseCigar();
00810 }
00811
00812 if(myAlignmentLength == 0)
00813 {
00814
00815 return(myRecordPtr->myPosition);
00816 }
00817 return(myRecordPtr->myPosition + myAlignmentLength - 1);
00818 }
00819
00820
00821
00822 int32_t SamRecord::get1BasedAlignmentEnd()
00823 {
00824 return(get0BasedAlignmentEnd() + 1);
00825 }
00826
00827
00828
00829 int32_t SamRecord::getAlignmentLength()
00830 {
00831 myStatus = SamStatus::SUCCESS;
00832 if(myAlignmentLength == -1)
00833 {
00834
00835 parseCigar();
00836 }
00837
00838 return(myAlignmentLength);
00839 }
00840
00841
00842 int32_t SamRecord::get0BasedUnclippedStart()
00843 {
00844 myStatus = SamStatus::SUCCESS;
00845 if(myUnclippedStartOffset == -1)
00846 {
00847
00848 parseCigar();
00849 }
00850 return(myRecordPtr->myPosition - myUnclippedStartOffset);
00851 }
00852
00853
00854
00855 int32_t SamRecord::get1BasedUnclippedStart()
00856 {
00857 return(get0BasedUnclippedStart() + 1);
00858 }
00859
00860
00861
00862 int32_t SamRecord::get0BasedUnclippedEnd()
00863 {
00864
00865
00866 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
00867 }
00868
00869
00870
00871 int32_t SamRecord::get1BasedUnclippedEnd()
00872 {
00873 return(get0BasedUnclippedEnd() + 1);
00874 }
00875
00876
00877
00878 const char* SamRecord::getReadName()
00879 {
00880 myStatus = SamStatus::SUCCESS;
00881 if(myReadName.Length() == 0)
00882 {
00883
00884
00885 myReadName = (char*)&(myRecordPtr->myData);
00886 }
00887 return myReadName.c_str();
00888 }
00889
00890
00891 const char* SamRecord::getCigar()
00892 {
00893 myStatus = SamStatus::SUCCESS;
00894 if(myCigar.Length() == 0)
00895 {
00896
00897
00898 parseCigarBinary();
00899 }
00900 return myCigar.c_str();
00901 }
00902
00903
00904 const char* SamRecord::getSequence()
00905 {
00906 myStatus = SamStatus::SUCCESS;
00907 if(mySequence.Length() == 0)
00908 {
00909
00910
00911 setSequenceAndQualityFromBuffer();
00912 }
00913 return mySequence.c_str();
00914 }
00915
00916
00917 const char* SamRecord::getQuality()
00918 {
00919 myStatus = SamStatus::SUCCESS;
00920 if(myQuality.Length() == 0)
00921 {
00922
00923
00924 setSequenceAndQualityFromBuffer();
00925 }
00926 return myQuality.c_str();
00927 }
00928
00929
00930 char SamRecord::getSequence(int index)
00931 {
00932 static const char * asciiBases = "0AC.G...T......N";
00933
00934
00935 int32_t readLen = getReadLength();
00936
00937
00938 if(readLen == 0)
00939 {
00940 String exceptionString = "SamRecord::getSequence(";
00941 exceptionString += index;
00942 exceptionString += ") is not allowed since sequence = '*'";
00943 throw std::runtime_error(exceptionString.c_str());
00944 }
00945 else if((index < 0) || (index >= readLen))
00946 {
00947
00948 String exceptionString = "SamRecord::getSequence(";
00949 exceptionString += index;
00950 exceptionString += ") is out of range. Index must be between 0 and ";
00951 exceptionString += (readLen - 1);
00952 throw std::runtime_error(exceptionString.c_str());
00953 }
00954
00955 if(mySequence.Length() == 0)
00956 {
00957
00958
00959 unsigned char * packedSequence =
00960 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
00961 myRecordPtr->myCigarLength * sizeof(int);
00962
00963 return(index & 1 ?
00964 asciiBases[packedSequence[index / 2] & 0xF] :
00965 asciiBases[packedSequence[index / 2] >> 4]);
00966 }
00967 else
00968 {
00969
00970 return(mySequence[index]);
00971 }
00972 }
00973
00974 char SamRecord::getQuality(int index)
00975 {
00976
00977 int32_t readLen = getReadLength();
00978
00979
00980
00981 if(readLen == 0)
00982 {
00983 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
00984 }
00985 else if((index < 0) || (index >= readLen))
00986 {
00987
00988 String exceptionString = "SamRecord::getQuality(";
00989 exceptionString += index;
00990 exceptionString += ") is out of range. Index must be between 0 and ";
00991 exceptionString += (readLen - 1);
00992 throw std::runtime_error(exceptionString.c_str());
00993 }
00994
00995 if(myQuality.Length() == 0)
00996 {
00997
00998 unsigned char * packedQuality =
00999 (unsigned char *)myRecordPtr->myData +
01000 myRecordPtr->myReadNameLength +
01001 myRecordPtr->myCigarLength * sizeof(int) +
01002 (myRecordPtr->myReadLength + 1) / 2;
01003 return(packedQuality[index] + 33);
01004 }
01005 else
01006 {
01007
01008 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01009 {
01010
01011 return(0xFF);
01012 }
01013 else
01014 {
01015 return(myQuality[index]);
01016 }
01017 }
01018 }
01019
01020
01021 Cigar* SamRecord::getCigarInfo()
01022 {
01023
01024
01025
01026
01027 if(myAlignmentLength == -1)
01028 {
01029
01030 parseCigar();
01031 }
01032 return(&myCigarRoller);
01033 }
01034
01035
01036 uint32_t SamRecord::getTagLength()
01037 {
01038 myStatus = SamStatus::SUCCESS;
01039 if(myNeedToSetTagsFromBuffer)
01040 {
01041
01042
01043 unsigned char * tagStart =
01044 (unsigned char *)myRecordPtr->myData
01045 + myRecordPtr->myReadNameLength
01046 + myRecordPtr->myCigarLength * sizeof(int)
01047 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01048
01049
01050
01051
01052 uint32_t nonTagSize =
01053 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01054
01055 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01056 return(tagSize);
01057 }
01058
01059
01060 return(myTagBufferSize);
01061 }
01062
01063
01064
01065
01066
01067
01068
01069 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01070 {
01071 myStatus = SamStatus::SUCCESS;
01072 if(myNeedToSetTagsFromBuffer)
01073 {
01074 if(!setTagsFromBuffer())
01075 {
01076
01077
01078 return(false);
01079 }
01080 }
01081
01082
01083
01084 myLastTagIndex++;
01085 int maxTagIndex = extras.Capacity();
01086 if(myLastTagIndex >= maxTagIndex)
01087 {
01088
01089
01090
01091 return(false);
01092 }
01093
01094 bool tagFound = false;
01095
01096 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01097 {
01098 if(extras.SlotInUse(myLastTagIndex))
01099 {
01100
01101 int key = extras.GetKey(myLastTagIndex);
01102 getTag(key, tag);
01103 getVtype(key, vtype);
01104 tagFound = true;
01105
01106 switch (vtype)
01107 {
01108 case 'A' :
01109 *value = getIntegerPtr(myLastTagIndex);
01110 break;
01111 case 'f' :
01112 *value = getDoublePtr(myLastTagIndex);
01113 break;
01114 case 'c' :
01115 case 'C' :
01116 case 's' :
01117 case 'S' :
01118 case 'i' :
01119 case 'I' :
01120 vtype = 'i';
01121 *value = getIntegerPtr(myLastTagIndex);
01122 break;
01123 case 'Z' :
01124 *value = getStringPtr(myLastTagIndex);
01125 break;
01126 default:
01127 myStatus.setStatus(SamStatus::FAIL_PARSE,
01128 "Unknown tag type");
01129 tagFound = false;
01130 break;
01131 }
01132 }
01133 if(!tagFound)
01134 {
01135
01136 myLastTagIndex++;
01137 }
01138 }
01139 return(tagFound);
01140 }
01141
01142
01143
01144 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01145 String& cigar, String& sequence, String& quality)
01146 {
01147 myStatus = SamStatus::SUCCESS;
01148 if(myIsBufferSynced == false)
01149 {
01150 if(!fixBuffer())
01151 {
01152
01153 return(false);
01154 }
01155 }
01156 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01157
01158 readName = getReadName();
01159
01160 if(myStatus != SamStatus::SUCCESS)
01161 {
01162
01163 return(false);
01164 }
01165 cigar = getCigar();
01166
01167 if(myStatus != SamStatus::SUCCESS)
01168 {
01169
01170 return(false);
01171 }
01172 sequence = getSequence();
01173
01174 if(myStatus != SamStatus::SUCCESS)
01175 {
01176
01177 return(false);
01178 }
01179 quality = getQuality();
01180
01181 if(myStatus != SamStatus::SUCCESS)
01182 {
01183
01184 return(false);
01185 }
01186 return(true);
01187 }
01188
01189
01190 bool SamRecord::isIntegerType(char vtype) const
01191 {
01192 if((vtype == 'c') || (vtype == 'C') ||
01193 (vtype == 's') || (vtype == 'S') ||
01194 (vtype == 'i') || (vtype == 'I'))
01195 {
01196 return(true);
01197 }
01198 return(false);
01199 }
01200
01201
01202 bool SamRecord::isDoubleType(char vtype) const
01203 {
01204 if(vtype == 'f')
01205 {
01206 return(true);
01207 }
01208 return(false);
01209 }
01210
01211
01212 bool SamRecord::isCharType(char vtype) const
01213 {
01214 if(vtype == 'A')
01215 {
01216 return(true);
01217 }
01218 return(false);
01219 }
01220
01221
01222 bool SamRecord::isStringType(char vtype) const
01223 {
01224 if(vtype == 'Z')
01225 {
01226 return(true);
01227 }
01228 return(false);
01229 }
01230
01231
01232 void SamRecord::clearTags()
01233 {
01234 if(extras.Entries() != 0)
01235 {
01236 extras.Clear();
01237 }
01238 strings.Clear();
01239 integers.Clear();
01240 doubles.Clear();
01241 myTagBufferSize = 0;
01242 resetTagIter();
01243 }
01244
01245
01246
01247 const SamStatus& SamRecord::getStatus()
01248 {
01249 return(myStatus);
01250 }
01251
01252
01253 String & SamRecord::getString(const char * tag)
01254 {
01255
01256 myStatus = SamStatus::SUCCESS;
01257
01258 if(myNeedToSetTagsFromBuffer)
01259 {
01260 if(!setTagsFromBuffer())
01261 {
01262
01263
01264
01265 }
01266 }
01267
01268 int key = MAKEKEY(tag[0], tag[1], 'Z');
01269 int offset = extras.Find(key);
01270
01271 int value;
01272 if (offset < 0)
01273 {
01274
01275 return(NOT_FOUND_TAG_STRING);
01276 }
01277 else
01278 value = extras[offset];
01279
01280 return strings[value];
01281 }
01282
01283 int & SamRecord::getInteger(const char * tag)
01284 {
01285
01286 myStatus = SamStatus::SUCCESS;
01287
01288 if(myNeedToSetTagsFromBuffer)
01289 {
01290 if(!setTagsFromBuffer())
01291 {
01292
01293
01294
01295 }
01296 }
01297
01298 int key = MAKEKEY(tag[0], tag[1], 'i');
01299 int offset = extras.Find(key);
01300
01301 int value;
01302 if (offset < 0)
01303 {
01304
01305 return NOT_FOUND_TAG_INT;
01306 }
01307 else
01308 value = extras[offset];
01309
01310 return integers[value];
01311 }
01312
01313 double & SamRecord::getDouble(const char * tag)
01314 {
01315
01316 myStatus = SamStatus::SUCCESS;
01317
01318 if(myNeedToSetTagsFromBuffer)
01319 {
01320 if(!setTagsFromBuffer())
01321 {
01322
01323
01324
01325 }
01326 }
01327
01328 int key = MAKEKEY(tag[0], tag[1], 'f');
01329 int offset = extras.Find(key);
01330
01331 int value;
01332 if (offset < 0)
01333 {
01334
01335 return NOT_FOUND_TAG_DOUBLE;
01336 }
01337 else
01338 value = extras[offset];
01339
01340 return doubles[value];
01341 }
01342
01343
01344 bool SamRecord::checkTag(const char * tag, char type)
01345 {
01346
01347 myStatus = SamStatus::SUCCESS;
01348
01349 if(myNeedToSetTagsFromBuffer)
01350 {
01351 if(!setTagsFromBuffer())
01352 {
01353
01354
01355 return("");
01356 }
01357 }
01358
01359 int key = MAKEKEY(tag[0], tag[1], type);
01360
01361 return (extras.Find(key) != LH_NOTFOUND);
01362 }
01363
01364
01365
01366
01367 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01368 {
01369
01370
01371 if(myAlignmentLength == -1)
01372 {
01373 parseCigar();
01374 }
01375 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01376 }
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413
01414
01415
01416
01417
01418
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431
01432
01433 bool SamRecord::allocateRecordStructure(int size)
01434 {
01435 if (allocatedSize < size)
01436 {
01437 bamRecordStruct* tmpRecordPtr =
01438 (bamRecordStruct *)realloc(myRecordPtr, size);
01439 if(tmpRecordPtr == NULL)
01440 {
01441
01442 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01443 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
01444 return(false);
01445 }
01446
01447 myRecordPtr = tmpRecordPtr;
01448 allocatedSize = size;
01449 }
01450 return(true);
01451 }
01452
01453
01454
01455 void* SamRecord::getStringPtr(int index)
01456 {
01457 int value = extras[index];
01458
01459 return &(strings[value]);
01460 }
01461
01462 void* SamRecord::getIntegerPtr(int offset)
01463 {
01464 int value = extras[offset];
01465
01466 return &(integers[value]);
01467 }
01468
01469 void* SamRecord::getDoublePtr(int offset)
01470 {
01471 int value = extras[offset];
01472
01473 return &(doubles[value]);
01474 }
01475
01476
01477
01478 bool SamRecord::fixBuffer()
01479 {
01480
01481 if(myIsBufferSynced)
01482 {
01483
01484 return(true);
01485 }
01486
01487
01488 if(!myIsBinValid)
01489 {
01490
01491
01492 myRecordPtr->myBin =
01493 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
01494 myIsBinValid = true;
01495 }
01496
01497
01498 bool status = true;
01499
01500
01501 uint8_t newReadNameLen = getReadNameLength();
01502 uint16_t newCigarLen = getCigarLength();
01503 int32_t newReadLen = getReadLength();
01504 uint32_t newTagLen = getTagLength();
01505 uint32_t bamSequenceLen = (newReadLen+1)/2;
01506
01507
01508
01509
01510 int newBufferSize =
01511 ((unsigned char*)(&(myRecordPtr->myData)) -
01512 (unsigned char*)myRecordPtr) +
01513 newReadNameLen + ((newCigarLen)*4) +
01514 newReadLen + bamSequenceLen + newTagLen;
01515
01516 if(!allocateRecordStructure(newBufferSize))
01517 {
01518
01519 return(false);
01520 }
01521
01522
01523
01524
01525
01526
01527 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
01528 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
01529 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
01530
01531
01532
01533 if(myIsTagsBufferValid &&
01534 (readNameLenChange | cigarLenChange | readLenChange))
01535 {
01536 status &= setTagsFromBuffer();
01537
01538
01539 myIsTagsBufferValid = false;
01540 }
01541
01542
01543
01544
01545 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
01546 (readNameLenChange | cigarLenChange | readLenChange))
01547 {
01548 setSequenceAndQualityFromBuffer();
01549
01550
01551 myIsQualityBufferValid = false;
01552 myIsSequenceBufferValid = false;
01553 }
01554
01555
01556
01557 if((myIsCigarBufferValid) &&
01558 (readNameLenChange))
01559 {
01560 status &= parseCigarBinary();
01561 myIsCigarBufferValid = false;
01562 }
01563
01564
01565 if(!myIsReadNameBufferValid)
01566 {
01567 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
01568 newReadNameLen);
01569
01570
01571 myRecordPtr->myReadNameLength = newReadNameLen;
01572 myIsReadNameBufferValid = true;
01573 }
01574
01575 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
01576 myRecordPtr->myReadNameLength;
01577
01578
01579 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
01580
01581 if(!myIsCigarBufferValid)
01582 {
01583
01584
01585 myRecordPtr->myCigarLength = newCigarLen;
01586 memcpy(packedCigar, myCigarTempBuffer,
01587 myRecordPtr->myCigarLength * sizeof(uint32_t));
01588
01589 myIsCigarBufferValid = true;
01590 }
01591
01592 unsigned char * packedSequence = readNameEnds +
01593 myRecordPtr->myCigarLength * sizeof(int);
01594 unsigned char * packedQuality = packedSequence + bamSequenceLen;
01595
01596 if(!myIsSequenceBufferValid || !myIsQualityBufferValid)
01597 {
01598 myRecordPtr->myReadLength = newReadLen;
01599
01600
01601 bool noQuality = false;
01602 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01603 {
01604 noQuality = true;
01605 }
01606
01607 for (int i = 0; i < myRecordPtr->myReadLength; i++)
01608 {
01609 if(!myIsSequenceBufferValid)
01610 {
01611
01612 int seqVal = 0;
01613 switch(mySequence[i])
01614 {
01615 case '=':
01616 seqVal = 0;
01617 break;
01618 case 'A':
01619 case 'a':
01620 seqVal = 1;
01621 break;
01622 case 'C':
01623 case 'c':
01624 seqVal = 2;
01625 break;
01626 case 'G':
01627 case 'g':
01628 seqVal = 4;
01629 break;
01630 case 'T':
01631 case 't':
01632 seqVal = 8;
01633 break;
01634 case 'N':
01635 case 'n':
01636 case '.':
01637 seqVal = 15;
01638 break;
01639 default:
01640 myStatus.addError(SamStatus::FAIL_PARSE,
01641 "Unknown Sequence character found.");
01642 status = false;
01643 break;
01644 };
01645
01646 if(i & 1)
01647 {
01648
01649
01650 packedSequence[i/2] |= seqVal;
01651 }
01652 else
01653 {
01654
01655 packedSequence[i/2] = seqVal << 4;
01656 }
01657 }
01658
01659 if(!myIsQualityBufferValid)
01660 {
01661
01662 if((noQuality) || (myQuality.Length() <= i))
01663 {
01664
01665
01666 packedQuality[i] = 0xFF;
01667 }
01668 else
01669 {
01670
01671 packedQuality[i] = myQuality[i] - 33;
01672 }
01673 }
01674 }
01675 myIsQualityBufferValid = true;
01676 myIsSequenceBufferValid = true;
01677 }
01678
01679 if(!myIsTagsBufferValid)
01680 {
01681 status &= setTagsInBuffer();
01682 }
01683
01684
01685 myRecordPtr->myReadNameLength = newReadNameLen;
01686 myRecordPtr->myCigarLength = newCigarLen;
01687 myRecordPtr->myReadLength = newReadLen;
01688
01689
01690
01691 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
01692
01693 if(status)
01694 {
01695 myIsBufferSynced = true;
01696 }
01697
01698 return(status);
01699 }
01700
01701
01702
01703
01704
01705 void SamRecord::setSequenceAndQualityFromBuffer()
01706 {
01707
01708
01709
01710
01711
01712
01713 bool extractSequence = false;
01714 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
01715 {
01716 extractSequence = true;
01717 }
01718
01719
01720 bool extractQuality = false;
01721 if(myIsQualityBufferValid && (myQuality.Length() == 0))
01722 {
01723 extractQuality = true;
01724 }
01725
01726
01727
01728 if(!extractSequence && !extractQuality)
01729 {
01730 return;
01731 }
01732
01733
01734 if(extractSequence)
01735 {
01736 mySequence.SetLength(myRecordPtr->myReadLength);
01737 }
01738 if(extractQuality)
01739 {
01740 myQuality.SetLength(myRecordPtr->myReadLength);
01741 }
01742
01743 unsigned char * readNameEnds =
01744 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01745 unsigned char * packedSequence =
01746 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
01747 unsigned char * packedQuality =
01748 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
01749
01750 const char * asciiBases = "0AC.G...T......N";
01751
01752
01753
01754 bool qualitySpecified = false;
01755
01756 for (int i = 0; i < myRecordPtr->myReadLength; i++)
01757 {
01758 if(extractSequence)
01759 {
01760 mySequence[i] = i & 1 ?
01761 asciiBases[packedSequence[i / 2] & 0xF] :
01762 asciiBases[packedSequence[i / 2] >> 4];
01763 }
01764
01765 if(extractQuality)
01766 {
01767 if(packedQuality[i] != 0xFF)
01768 {
01769
01770 qualitySpecified = true;
01771 }
01772
01773 myQuality[i] = packedQuality[i] + 33;
01774 }
01775 }
01776
01777
01778 if(myRecordPtr->myReadLength == 0)
01779 {
01780 if(extractSequence)
01781 {
01782 mySequence = "*";
01783 }
01784 if(extractQuality)
01785 {
01786 myQuality = "*";
01787 }
01788 }
01789 else if(extractQuality && !qualitySpecified)
01790 {
01791
01792 myQuality = "*";
01793 }
01794 }
01795
01796
01797
01798 bool SamRecord::parseCigar()
01799 {
01800
01801 if(myCigar.Length() == 0)
01802 {
01803
01804 return(parseCigarBinary());
01805 }
01806 return(parseCigarString());
01807 }
01808
01809
01810 bool SamRecord::parseCigarBinary()
01811 {
01812
01813
01814
01815 if(myCigar.Length() != 0)
01816 {
01817
01818 return(true);
01819 }
01820
01821 unsigned char * readNameEnds =
01822 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01823
01824 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
01825
01826 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
01827
01828 myCigarRoller.getCigarString(myCigar);
01829
01830 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
01831
01832 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
01833 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
01834
01835
01836 if(myRecordPtr->myCigarLength == 0)
01837 {
01838 myCigar = "*";
01839 return(true);
01840 }
01841
01842
01843 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
01844 if(newBufferSize > myCigarTempBufferAllocatedSize)
01845 {
01846 uint32_t* tempBufferPtr =
01847 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
01848 if(tempBufferPtr == NULL)
01849 {
01850
01851
01852 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01853 myStatus.addError(SamStatus::FAIL_MEM,
01854 "Failed to Allocate Memory.");
01855 return(false);
01856 }
01857 myCigarTempBuffer = tempBufferPtr;
01858 myCigarTempBufferAllocatedSize = newBufferSize;
01859 }
01860
01861 memcpy(myCigarTempBuffer, packedCigar,
01862 myRecordPtr->myCigarLength * sizeof(uint32_t));
01863
01864
01865 myCigarTempBufferLength = myRecordPtr->myCigarLength;
01866
01867 return(true);
01868 }
01869
01870
01871 bool SamRecord::parseCigarString()
01872 {
01873 myCigarTempBufferLength = 0;
01874 if(myCigar == "*")
01875 {
01876
01877 myAlignmentLength = 0;
01878 myUnclippedStartOffset = 0;
01879 myUnclippedEndOffset = 0;
01880 myCigarRoller.clear();
01881 return(true);
01882 }
01883
01884 myCigarRoller.Set(myCigar);
01885
01886 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
01887
01888 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
01889 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
01890
01891
01892
01893
01894 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
01895 if(newBufferSize > myCigarTempBufferAllocatedSize)
01896 {
01897 uint32_t* tempBufferPtr =
01898 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
01899 if(tempBufferPtr == NULL)
01900 {
01901
01902
01903 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01904 myStatus.addError(SamStatus::FAIL_MEM,
01905 "Failed to Allocate Memory.");
01906 return(false);
01907 }
01908 myCigarTempBuffer = tempBufferPtr;
01909 myCigarTempBufferAllocatedSize = newBufferSize;
01910 }
01911
01912
01913 bool status = true;
01914
01915
01916 char *cigarOp;
01917 const char* cigarEntryStart = myCigar.c_str();
01918 int opLen = 0;
01919 int op = 0;
01920
01921 unsigned int * packedCigar = myCigarTempBuffer;
01922
01923
01924 const char* endCigarString = cigarEntryStart + myCigar.Length();
01925 while(cigarEntryStart < endCigarString)
01926 {
01927 bool validCigarEntry = true;
01928
01929
01930 opLen = strtol(cigarEntryStart, &cigarOp, 10);
01931
01932 switch(*cigarOp)
01933 {
01934 case('M'):
01935 op = 0;
01936 break;
01937 case('I'):
01938
01939
01940 op = 1;
01941 break;
01942 case('D'):
01943 op = 2;
01944 break;
01945 case('N'):
01946 op = 3;
01947 break;
01948 case('S'):
01949 op = 4;
01950 break;
01951 case('H'):
01952 op = 5;
01953 break;
01954 case('P'):
01955 op = 6;
01956 break;
01957 default:
01958 fprintf(stderr, "ERROR parsing cigar\n");
01959 validCigarEntry = false;
01960 status = false;
01961 myStatus.addError(SamStatus::FAIL_PARSE,
01962 "Unknown operation found when parsing the Cigar.");
01963 break;
01964 };
01965 if(validCigarEntry)
01966 {
01967
01968 ++myCigarTempBufferLength;
01969 *packedCigar = (opLen << 4) | op;
01970 packedCigar++;
01971 }
01972
01973 cigarEntryStart = ++cigarOp;
01974 }
01975
01976
01977 return(status);
01978 }
01979
01980
01981 bool SamRecord::setTagsFromBuffer()
01982 {
01983
01984 if(myNeedToSetTagsFromBuffer == false)
01985 {
01986
01987 return(true);
01988 }
01989
01990
01991 myNeedToSetTagsFromBuffer = false;
01992
01993 unsigned char * readNameEnds =
01994 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01995 unsigned char * packedSequence =
01996 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
01997 unsigned char * packedQuality =
01998 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
01999
02000 unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02001
02002
02003 bool status = true;
02004
02005
02006 clearTags();
02007 while (myRecordPtr->myBlockSize + 4 -
02008 (extraPtr - (unsigned char *)myRecordPtr) > 0)
02009 {
02010 int key;
02011 int value;
02012 void * content = extraPtr + 3;
02013 int tagBufferSize = 0;
02014
02015 switch (extraPtr[2])
02016 {
02017 case 'A' :
02018 key = MAKEKEY(extraPtr[0], extraPtr[1], 'A');
02019 value = integers.Length();
02020 integers.Push(* (char *) content);
02021 extraPtr += 4;
02022 tagBufferSize += 4;
02023 break;
02024 case 'c' :
02025 key = MAKEKEY(extraPtr[0], extraPtr[1], 'c');
02026 value = integers.Length();
02027 integers.Push(* (char *) content);
02028 extraPtr += 4;
02029 tagBufferSize += 4;
02030 break;
02031 case 'C' :
02032 key = MAKEKEY(extraPtr[0], extraPtr[1], 'C');
02033 value = integers.Length();
02034 integers.Push(* (unsigned char *) content);
02035 extraPtr += 4;
02036 tagBufferSize += 4;
02037 break;
02038 case 's' :
02039 key = MAKEKEY(extraPtr[0], extraPtr[1], 's');
02040 value = integers.Length();
02041 integers.Push(* (short *) content);
02042 extraPtr += 5;
02043 tagBufferSize += 5;
02044 break;
02045 case 'S' :
02046 key = MAKEKEY(extraPtr[0], extraPtr[1], 'S');
02047 value = integers.Length();
02048 integers.Push(* (unsigned short *) content);
02049 extraPtr += 5;
02050 tagBufferSize += 5;
02051 break;
02052 case 'i' :
02053 key = MAKEKEY(extraPtr[0], extraPtr[1], 'i');
02054 value = integers.Length();
02055 integers.Push(* (int *) content);
02056 extraPtr += 7;
02057 tagBufferSize += 7;
02058 break;
02059 case 'I' :
02060 key = MAKEKEY(extraPtr[0], extraPtr[1], 'I');
02061 value = integers.Length();
02062 integers.Push((int) * (unsigned int *) content);
02063 extraPtr += 7;
02064 tagBufferSize += 7;
02065 break;
02066 case 'Z' :
02067 key = MAKEKEY(extraPtr[0], extraPtr[1], 'Z');
02068 value = strings.Length();
02069 strings.Push((const char *) content);
02070 extraPtr += 4 + strings.Last().Length();
02071 tagBufferSize += 4 + strings.Last().Length();
02072 break;
02073 case 'f' :
02074 key = MAKEKEY(extraPtr[0], extraPtr[1], 'f');
02075 value = doubles.Length();
02076 doubles.Push(* (float *) content);
02077 fprintf(stderr, "\n\nFLOAT!!!\n\n");
02078 extraPtr += 7;
02079 tagBufferSize += 7;
02080 break;
02081 default :
02082 fprintf(stderr,
02083 "parsing BAM - Unknown custom field of type %c%c:%c\n",
02084 extraPtr[0], extraPtr[1], extraPtr[2]);
02085
02086
02087 extraPtr += 3;
02088 myStatus.addError(SamStatus::FAIL_PARSE,
02089 "Unknown tag type.");
02090 status = false;
02091 }
02092
02093
02094 if(status)
02095 {
02096 extras.Add(key, value);
02097 myTagBufferSize += tagBufferSize;
02098 }
02099 }
02100 return(status);
02101 }
02102
02103
02104 bool SamRecord::setTagsInBuffer()
02105 {
02106
02107
02108
02109 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
02110 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
02111 (unsigned char*)myRecordPtr) +
02112 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
02113 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
02114
02115
02116 if(!allocateRecordStructure(newBufferSize))
02117 {
02118
02119 return(false);
02120 }
02121
02122 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02123 myRecordPtr->myReadNameLength;
02124 unsigned char * packedSequence = readNameEnds +
02125 myRecordPtr->myCigarLength * sizeof(int);
02126 unsigned char * packedQuality =
02127 packedSequence + bamSequenceLength;
02128
02129 char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
02130
02131 bool status = true;
02132
02133
02134 if (extras.Entries())
02135 {
02136 for (int i = 0; i < extras.Capacity(); i++)
02137 {
02138 if (extras.SlotInUse(i))
02139 {
02140 int key = extras.GetKey(i);
02141 getTag(key, extraPtr);
02142 extraPtr += 2;
02143 getVtype(key, extraPtr[0]);
02144 char vtype = extraPtr[0];
02145
02146
02147 extraPtr += 1;
02148
02149 switch (vtype)
02150 {
02151 case 'A' :
02152 *(char*)extraPtr = (char)getInteger(i);
02153
02154 extraPtr += 1;
02155 break;
02156 case 'c' :
02157 *(int8_t*)extraPtr = (int8_t)getInteger(i);
02158
02159 extraPtr += 1;
02160 break;
02161 case 'C' :
02162 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
02163
02164 extraPtr += 1;
02165 break;
02166 case 's' :
02167 *(int16_t*)extraPtr = (int16_t)getInteger(i);
02168
02169 extraPtr += 2;
02170 break;
02171 case 'S' :
02172 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
02173
02174 extraPtr += 2;
02175 break;
02176 case 'i' :
02177 *(int32_t*)extraPtr = (int32_t)getInteger(i);
02178
02179 extraPtr += 4;
02180 break;
02181 case 'I' :
02182 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
02183
02184 extraPtr += 4;
02185 break;
02186 case 'Z' :
02187 sprintf(extraPtr, "%s", getString(i).c_str());
02188 extraPtr += getString(i).Length() + 1;
02189 break;
02190 case 'f' :
02191
02192 sprintf(extraPtr, "%f", getDouble(i));
02193 extraPtr += 4;
02194 break;
02195 default :
02196 myStatus.addError(SamStatus::FAIL_PARSE,
02197 "Unknown tag type.");
02198 status = false;
02199 break;
02200 }
02201 }
02202 }
02203 }
02204
02205
02206
02207 if(extraPtr != (char*)myRecordPtr + newBufferSize)
02208 {
02209 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
02210 myStatus.addError(SamStatus::FAIL_PARSE,
02211 "ERROR updating the buffer. Incorrect size.");
02212 status = false;
02213 }
02214
02215
02216
02217 myNeedToSetTagsInBuffer = false;
02218 myIsTagsBufferValid = true;
02219 return(status);
02220 }
02221
02222
02223
02224
02225
02226 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
02227 {
02228
02229
02230 myReferenceName =
02231 header.getReferenceLabel(myRecordPtr->myReferenceID);
02232 myMateReferenceName =
02233 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
02234
02235
02236 myReadName.SetLength(0);
02237 myCigar.SetLength(0);
02238 mySequence.SetLength(0);
02239 myQuality.SetLength(0);
02240 myNeedToSetTagsFromBuffer = true;
02241 myNeedToSetTagsInBuffer = false;
02242
02243
02244 myIsBufferSynced = true;
02245
02246 myIsReadNameBufferValid = true;
02247 myIsCigarBufferValid = true;
02248 myIsSequenceBufferValid = true;
02249 myIsQualityBufferValid = true;
02250 myIsTagsBufferValid = true;
02251 myIsBinValid = true;
02252 }
02253
02254
02255
02256 void SamRecord::getVtype(int key, char& vtype) const
02257 {
02258
02259 vtype = (key >> 16) & 0xFF;
02260 }
02261
02262
02263 void SamRecord::getTag(int key, char* tag) const
02264 {
02265
02266 tag[0] = key & 0xFF;
02267 tag[1] = (key >> 8) & 0xFF;
02268 tag[2] = 0;
02269 }
02270
02271
02272
02273 String & SamRecord::getString(int index)
02274 {
02275 int value = extras[index];
02276
02277 return strings[value];
02278 }
02279
02280 int & SamRecord::getInteger(int offset)
02281 {
02282 int value = extras[offset];
02283
02284 return integers[value];
02285 }
02286
02287 double & SamRecord::getDouble(int offset)
02288 {
02289 int value = extras[offset];
02290
02291 return doubles[value];
02292 }
02293
02294