00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021
00022 #include "bam.h"
00023
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032
00033
00034 SamRecord::SamRecord()
00035 : myStatus(),
00036 myRefPtr(NULL),
00037 mySequenceTranslation(NONE)
00038 {
00039 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040
00041 myRecordPtr =
00042 (bamRecordStruct *) malloc(defaultAllocSize);
00043
00044 myCigarTempBuffer = NULL;
00045 myCigarTempBufferAllocatedSize = 0;
00046
00047 allocatedSize = defaultAllocSize;
00048
00049 resetRecord();
00050 }
00051
00052
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054 : myStatus(errorHandlingType),
00055 myRefPtr(NULL),
00056 mySequenceTranslation(NONE)
00057 {
00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059
00060 myRecordPtr =
00061 (bamRecordStruct *) malloc(defaultAllocSize);
00062
00063 myCigarTempBuffer = NULL;
00064 myCigarTempBufferAllocatedSize = 0;
00065
00066 allocatedSize = defaultAllocSize;
00067
00068 resetRecord();
00069 }
00070
00071
00072 SamRecord::~SamRecord()
00073 {
00074 resetRecord();
00075
00076 if(myRecordPtr != NULL)
00077 {
00078 free(myRecordPtr);
00079 myRecordPtr = NULL;
00080 }
00081 if(myCigarTempBuffer != NULL)
00082 {
00083 free(myCigarTempBuffer);
00084 myCigarTempBuffer = NULL;
00085 myCigarTempBufferAllocatedSize = 0;
00086 }
00087 }
00088
00089
00090
00091 void SamRecord::resetRecord()
00092 {
00093 myIsBufferSynced = true;
00094
00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096 myRecordPtr->myReferenceID = -1;
00097 myRecordPtr->myPosition = -1;
00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099 myRecordPtr->myMapQuality = 0;
00100 myRecordPtr->myBin = DEFAULT_BIN;
00101 myRecordPtr->myCigarLength = 0;
00102 myRecordPtr->myFlag = 0;
00103 myRecordPtr->myReadLength = 0;
00104 myRecordPtr->myMateReferenceID = -1;
00105 myRecordPtr->myMatePosition = -1;
00106 myRecordPtr->myInsertSize = 0;
00107
00108
00109
00110
00111 myReadName = DEFAULT_READ_NAME;
00112 myReferenceName = "*";
00113 myMateReferenceName = "*";
00114 myCigar = "*";
00115 mySequence = "*";
00116 mySeqWithEq.clear();
00117 mySeqWithoutEq.clear();
00118 myQuality = "*";
00119 myNeedToSetTagsFromBuffer = false;
00120 myNeedToSetTagsInBuffer = false;
00121
00122
00123 myAlignmentLength = -1;
00124 myUnclippedStartOffset = -1;
00125 myUnclippedEndOffset = -1;
00126
00127 clearTags();
00128
00129
00130
00131
00132
00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
00134 myRecordPtr->myReadNameLength);
00135
00136
00137 myIsReadNameBufferValid = true;
00138 myIsCigarBufferValid = true;
00139 myIsSequenceBufferValid = true;
00140 myBufferSequenceTranslation = NONE;
00141 myIsQualityBufferValid = true;
00142 myIsTagsBufferValid = true;
00143 myIsBinValid = true;
00144
00145 myCigarTempBufferLength = -1;
00146
00147 myStatus = SamStatus::SUCCESS;
00148
00149 NOT_FOUND_TAG_STRING = "";
00150 NOT_FOUND_TAG_INT = -1;
00151 NOT_FOUND_TAG_DOUBLE = -1;
00152 }
00153
00154
00155
00156 void SamRecord::resetTagIter()
00157 {
00158 myLastTagIndex = -1;
00159 }
00160
00161
00162
00163
00164 bool SamRecord::isValid(SamFileHeader& header)
00165 {
00166 myStatus = SamStatus::SUCCESS;
00167 SamValidationErrors invalidSamErrors;
00168 if(!SamValidator::isValid(header, *this, invalidSamErrors))
00169 {
00170
00171 std::string errorMessage = "";
00172 invalidSamErrors.getErrorString(errorMessage);
00173 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00174 return(false);
00175 }
00176
00177 return(true);
00178 }
00179
00180
00181
00182 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr,
00183 SamFileHeader& header)
00184 {
00185 myStatus = SamStatus::SUCCESS;
00186 if((filePtr == NULL) || (filePtr->isOpen() == false))
00187 {
00188
00189 myStatus.setStatus(SamStatus::FAIL_ORDER,
00190 "Can't read from an unopened file.");
00191 return(SamStatus::FAIL_ORDER);
00192 }
00193
00194
00195 resetRecord();
00196
00197
00198 int numBytes =
00199 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00200
00201 if(ifeof(filePtr))
00202 {
00203 if(numBytes == 0)
00204 {
00205
00206 myStatus.setStatus(SamStatus::NO_MORE_RECS,
00207 "No more records left to read.");
00208 return(SamStatus::NO_MORE_RECS);
00209 }
00210 else
00211 {
00212
00213
00214 myStatus.setStatus(SamStatus::FAIL_PARSE,
00215 "EOF reached in the middle of a record.");
00216 return(SamStatus::FAIL_PARSE);
00217 }
00218 }
00219
00220
00221 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00222 {
00223
00224
00225 return(SamStatus::FAIL_MEM);
00226 }
00227
00228
00229 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00230 != (unsigned int)myRecordPtr->myBlockSize)
00231 {
00232
00233 resetRecord();
00234 myStatus.setStatus(SamStatus::FAIL_IO,
00235 "Failed to read the record");
00236 return(SamStatus::FAIL_IO);
00237 }
00238
00239 setVariablesForNewBuffer(header);
00240
00241
00242 return(SamStatus::SUCCESS);
00243 }
00244
00245
00246 void SamRecord::setReference(GenomeSequence* reference)
00247 {
00248 myRefPtr = reference;
00249 }
00250
00251
00252
00253
00254
00255 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00256 {
00257 mySequenceTranslation = translation;
00258 }
00259
00260
00261 bool SamRecord::setReadName(const char* readName)
00262 {
00263 myReadName = readName;
00264 myIsBufferSynced = false;
00265 myIsReadNameBufferValid = false;
00266 myStatus = SamStatus::SUCCESS;
00267
00268
00269
00270 if(myReadName.Length() == 0)
00271 {
00272
00273 myReadName = DEFAULT_READ_NAME;
00274 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00275 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00276 return(false);
00277 }
00278
00279 return true;
00280 }
00281
00282
00283 bool SamRecord::setFlag(uint16_t flag)
00284 {
00285 myStatus = SamStatus::SUCCESS;
00286 myRecordPtr->myFlag = flag;
00287 return true;
00288 }
00289
00290
00291 bool SamRecord::setReferenceName(SamFileHeader& header,
00292 const char* referenceName)
00293 {
00294 myStatus = SamStatus::SUCCESS;
00295
00296 myReferenceName = referenceName;
00297 myRecordPtr->myReferenceID = header.getReferenceID(referenceName);
00298
00299 return true;
00300 }
00301
00302
00303 bool SamRecord::set1BasedPosition(int32_t position)
00304 {
00305 return(set0BasedPosition(position - 1));
00306 }
00307
00308
00309 bool SamRecord::set0BasedPosition(int32_t position)
00310 {
00311 myStatus = SamStatus::SUCCESS;
00312 myRecordPtr->myPosition = position;
00313 myIsBinValid = false;
00314 return true;
00315 }
00316
00317
00318 bool SamRecord::setMapQuality(uint8_t mapQuality)
00319 {
00320 myStatus = SamStatus::SUCCESS;
00321 myRecordPtr->myMapQuality = mapQuality;
00322 return true;
00323 }
00324
00325
00326 bool SamRecord::setCigar(const char* cigar)
00327 {
00328 myStatus = SamStatus::SUCCESS;
00329 myCigar = cigar;
00330
00331 myIsBufferSynced = false;
00332 myIsCigarBufferValid = false;
00333 myCigarTempBufferLength = -1;
00334 myIsBinValid = false;
00335
00336
00337 myAlignmentLength = -1;
00338 myUnclippedStartOffset = -1;
00339 myUnclippedEndOffset = -1;
00340
00341 return true;
00342 }
00343
00344
00345 bool SamRecord::setCigar(const Cigar& cigar)
00346 {
00347 myStatus = SamStatus::SUCCESS;
00348 cigar.getCigarString(myCigar);
00349
00350 myIsBufferSynced = false;
00351 myIsCigarBufferValid = false;
00352 myCigarTempBufferLength = -1;
00353 myIsBinValid = false;
00354
00355
00356 myAlignmentLength = -1;
00357 myUnclippedStartOffset = -1;
00358 myUnclippedEndOffset = -1;
00359
00360 return true;
00361 }
00362
00363
00364 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00365 const char* mateReferenceName)
00366 {
00367 myStatus = SamStatus::SUCCESS;
00368
00369
00370
00371 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00372 {
00373 myMateReferenceName = myReferenceName;
00374 }
00375 else
00376 {
00377 myMateReferenceName = mateReferenceName;
00378 }
00379
00380
00381 myRecordPtr->myMateReferenceID =
00382 header.getReferenceID(myMateReferenceName);
00383
00384 return true;
00385 }
00386
00387
00388 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00389 {
00390 return(set0BasedMatePosition(matePosition - 1));
00391 }
00392
00393
00394 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00395 {
00396 myStatus = SamStatus::SUCCESS;
00397 myRecordPtr->myMatePosition = matePosition;
00398 return true;
00399 }
00400
00401
00402 bool SamRecord::setInsertSize(int32_t insertSize)
00403 {
00404 myStatus = SamStatus::SUCCESS;
00405 myRecordPtr->myInsertSize = insertSize;
00406 return true;
00407 }
00408
00409
00410 bool SamRecord::setSequence(const char* seq)
00411 {
00412 myStatus = SamStatus::SUCCESS;
00413 mySequence = seq;
00414 mySeqWithEq.clear();
00415 mySeqWithoutEq.clear();
00416
00417 myIsBufferSynced = false;
00418 myIsSequenceBufferValid = false;
00419 return true;
00420 }
00421
00422
00423 bool SamRecord::setQuality(const char* quality)
00424 {
00425 myStatus = SamStatus::SUCCESS;
00426 myQuality = quality;
00427 myIsBufferSynced = false;
00428 myIsQualityBufferValid = false;
00429 return true;
00430 }
00431
00432
00433
00434
00435 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00436 uint32_t fromBufferSize,
00437 SamFileHeader& header)
00438 {
00439 myStatus = SamStatus::SUCCESS;
00440 if((fromBuffer == NULL) || (fromBufferSize == 0))
00441 {
00442
00443 myStatus.setStatus(SamStatus::FAIL_PARSE,
00444 "Cannot parse an empty file.");
00445 return(SamStatus::FAIL_PARSE);
00446 }
00447
00448
00449 resetRecord();
00450
00451
00452 if(!allocateRecordStructure(fromBufferSize))
00453 {
00454
00455 return(SamStatus::FAIL_MEM);
00456 }
00457
00458 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00459
00460 setVariablesForNewBuffer(header);
00461
00462
00463 return(SamStatus::SUCCESS);
00464 }
00465
00466
00467
00468
00469 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00470 {
00471 myStatus = SamStatus::SUCCESS;
00472 bool status = true;
00473 int key = 0;
00474 int intVal = 0;
00475 int index = 0;
00476 char bamvtype;
00477
00478 int tagBufferSize = 0;
00479
00480
00481 if(myNeedToSetTagsFromBuffer)
00482 {
00483 if(!setTagsFromBuffer())
00484 {
00485
00486 return(false);
00487 }
00488 }
00489
00490 switch (vtype)
00491 {
00492 case 'A' :
00493 index = integers.Length();
00494 bamvtype = vtype;
00495 integers.Push((const int)*(valuePtr));
00496 tagBufferSize += 4;
00497 break;
00498 case 'i' :
00499 index = integers.Length();
00500 intVal = atoi((const char *)valuePtr);
00501
00502
00503
00504
00505 if(intVal < 0)
00506 {
00507
00508
00509 if(intVal > std::numeric_limits<char>::min())
00510 {
00511
00512 bamvtype = 'c';
00513 tagBufferSize += 4;
00514 }
00515 else if(intVal > std::numeric_limits<short>::min())
00516 {
00517
00518 bamvtype = 's';
00519 tagBufferSize += 5;
00520 }
00521 else
00522 {
00523
00524 bamvtype = 'i';
00525 tagBufferSize += 7;
00526 }
00527 }
00528 else
00529 {
00530
00531 if(intVal < std::numeric_limits<unsigned char>::max())
00532 {
00533
00534 bamvtype = 'C';
00535 tagBufferSize += 4;
00536 }
00537 else if(intVal < std::numeric_limits<unsigned short>::max())
00538 {
00539
00540 bamvtype = 'S';
00541 tagBufferSize += 5;
00542 }
00543 else
00544 {
00545
00546 bamvtype = 'I';
00547 tagBufferSize += 7;
00548 }
00549 }
00550 integers.Push(intVal);
00551 break;
00552 case 'Z' :
00553 index = strings.Length();
00554 bamvtype = vtype;
00555 strings.Push((const char *)valuePtr);
00556 tagBufferSize += 4 + strings.Last().Length();
00557 break;
00558 case 'f' :
00559 index = doubles.Length();
00560 bamvtype = vtype;
00561 doubles.Push(atof((const char *)valuePtr));
00562 tagBufferSize += 7;
00563 break;
00564 default :
00565 fprintf(stderr,
00566 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00567 vtype);
00568 myStatus.setStatus(SamStatus::FAIL_PARSE,
00569 "Unknown custom field in a tag");
00570 status = false;
00571 }
00572
00573
00574 if(status)
00575 {
00576
00577 myNeedToSetTagsInBuffer = true;
00578 myIsTagsBufferValid = false;
00579 myIsBufferSynced = false;
00580
00581 key = MAKEKEY(tag[0], tag[1], bamvtype);
00582 extras.Add(key, index);
00583 myTagBufferSize += tagBufferSize;
00584 }
00585 return(status);
00586 }
00587
00588
00589
00590 const void* SamRecord::getRecordBuffer()
00591 {
00592 return(getRecordBuffer(mySequenceTranslation));
00593 }
00594
00595
00596
00597 const void* SamRecord::getRecordBuffer(SequenceTranslation translation)
00598 {
00599 myStatus = SamStatus::SUCCESS;
00600 bool status = true;
00601
00602
00603 if((myIsBufferSynced == false) ||
00604 (myBufferSequenceTranslation != translation))
00605 {
00606 status &= fixBuffer(translation);
00607 }
00608
00609 if(myNeedToSetTagsInBuffer)
00610 {
00611 status &= setTagsInBuffer();
00612 }
00613 if(!status)
00614 {
00615 return(NULL);
00616 }
00617 return (const void *)myRecordPtr;
00618 }
00619
00620
00621
00622
00623 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
00624 {
00625 return(writeRecordBuffer(filePtr, mySequenceTranslation));
00626 }
00627
00628
00629
00630 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr,
00631 SequenceTranslation translation)
00632 {
00633 myStatus = SamStatus::SUCCESS;
00634 if((filePtr == NULL) || (filePtr->isOpen() == false))
00635 {
00636
00637 myStatus.setStatus(SamStatus::FAIL_ORDER,
00638 "Can't write to an unopened file.");
00639 return(SamStatus::FAIL_ORDER);
00640 }
00641
00642 if((myIsBufferSynced == false) ||
00643 (myBufferSequenceTranslation != translation))
00644 {
00645 if(!fixBuffer(translation))
00646 {
00647 return(myStatus.getStatus());
00648 }
00649 }
00650
00651
00652 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
00653 unsigned int numBytesWritten =
00654 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
00655
00656
00657 if(numBytesToWrite == numBytesWritten)
00658 {
00659 return(SamStatus::SUCCESS);
00660 }
00661
00662 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
00663 return(SamStatus::FAIL_IO);
00664 }
00665
00666
00667 int32_t SamRecord::getBlockSize()
00668 {
00669 myStatus = SamStatus::SUCCESS;
00670
00671
00672 if(myIsBufferSynced == false)
00673 {
00674
00675
00676
00677 fixBuffer(myBufferSequenceTranslation);
00678 }
00679 return myRecordPtr->myBlockSize;
00680 }
00681
00682
00683
00684 const char* SamRecord::getReferenceName()
00685 {
00686 myStatus = SamStatus::SUCCESS;
00687 return myReferenceName.c_str();
00688 }
00689
00690
00691 int32_t SamRecord::getReferenceID()
00692 {
00693 myStatus = SamStatus::SUCCESS;
00694 return myRecordPtr->myReferenceID;
00695 }
00696
00697
00698 int32_t SamRecord::get1BasedPosition()
00699 {
00700 myStatus = SamStatus::SUCCESS;
00701 return (myRecordPtr->myPosition + 1);
00702 }
00703
00704
00705 int32_t SamRecord::get0BasedPosition()
00706 {
00707 myStatus = SamStatus::SUCCESS;
00708 return myRecordPtr->myPosition;
00709 }
00710
00711
00712 uint8_t SamRecord::getReadNameLength()
00713 {
00714 myStatus = SamStatus::SUCCESS;
00715
00716
00717 if(myIsReadNameBufferValid)
00718 {
00719 return(myRecordPtr->myReadNameLength);
00720 }
00721
00722 return(myReadName.Length() + 1);
00723 }
00724
00725
00726 uint8_t SamRecord::getMapQuality()
00727 {
00728 myStatus = SamStatus::SUCCESS;
00729 return myRecordPtr->myMapQuality;
00730 }
00731
00732
00733 uint16_t SamRecord::getBin()
00734 {
00735 myStatus = SamStatus::SUCCESS;
00736 if(!myIsBinValid)
00737 {
00738
00739
00740 myRecordPtr->myBin =
00741 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
00742 myIsBinValid = true;
00743 }
00744 return(myRecordPtr->myBin);
00745 }
00746
00747
00748 uint16_t SamRecord::getCigarLength()
00749 {
00750 myStatus = SamStatus::SUCCESS;
00751
00752
00753 if(myIsCigarBufferValid)
00754 {
00755 return myRecordPtr->myCigarLength;
00756 }
00757
00758 if(myCigarTempBufferLength == -1)
00759 {
00760
00761
00762 parseCigarString();
00763 }
00764
00765
00766 return(myCigarTempBufferLength);
00767 }
00768
00769
00770 uint16_t SamRecord::getFlag()
00771 {
00772 myStatus = SamStatus::SUCCESS;
00773 return myRecordPtr->myFlag;
00774 }
00775
00776
00777 int32_t SamRecord::getReadLength()
00778 {
00779 myStatus = SamStatus::SUCCESS;
00780 if(myIsSequenceBufferValid == false)
00781 {
00782
00783 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
00784 {
00785 return(0);
00786 }
00787
00788 return(mySequence.Length());
00789 }
00790 return(myRecordPtr->myReadLength);
00791 }
00792
00793
00794
00795
00796 const char* SamRecord::getMateReferenceName()
00797 {
00798 myStatus = SamStatus::SUCCESS;
00799 return myMateReferenceName.c_str();
00800 }
00801
00802
00803
00804
00805
00806 const char* SamRecord::getMateReferenceNameOrEqual()
00807 {
00808 myStatus = SamStatus::SUCCESS;
00809 if(myMateReferenceName == "*")
00810 {
00811 return(myMateReferenceName);
00812 }
00813 if(myMateReferenceName == getReferenceName())
00814 {
00815 return(FIELD_ABSENT_STRING);
00816 }
00817 else
00818 {
00819 return(myMateReferenceName);
00820 }
00821 }
00822
00823
00824 int32_t SamRecord::getMateReferenceID()
00825 {
00826 myStatus = SamStatus::SUCCESS;
00827 return myRecordPtr->myMateReferenceID;
00828 }
00829
00830
00831 int32_t SamRecord::get1BasedMatePosition()
00832 {
00833 myStatus = SamStatus::SUCCESS;
00834 return (myRecordPtr->myMatePosition + 1);
00835 }
00836
00837
00838 int32_t SamRecord::get0BasedMatePosition()
00839 {
00840 myStatus = SamStatus::SUCCESS;
00841 return myRecordPtr->myMatePosition;
00842 }
00843
00844
00845 int32_t SamRecord::getInsertSize()
00846 {
00847 myStatus = SamStatus::SUCCESS;
00848 return myRecordPtr->myInsertSize;
00849 }
00850
00851
00852
00853 int32_t SamRecord::get0BasedAlignmentEnd()
00854 {
00855 myStatus = SamStatus::SUCCESS;
00856 if(myAlignmentLength == -1)
00857 {
00858
00859 parseCigar();
00860 }
00861
00862 if(myAlignmentLength == 0)
00863 {
00864
00865 return(myRecordPtr->myPosition);
00866 }
00867 return(myRecordPtr->myPosition + myAlignmentLength - 1);
00868 }
00869
00870
00871
00872 int32_t SamRecord::get1BasedAlignmentEnd()
00873 {
00874 return(get0BasedAlignmentEnd() + 1);
00875 }
00876
00877
00878
00879 int32_t SamRecord::getAlignmentLength()
00880 {
00881 myStatus = SamStatus::SUCCESS;
00882 if(myAlignmentLength == -1)
00883 {
00884
00885 parseCigar();
00886 }
00887
00888 return(myAlignmentLength);
00889 }
00890
00891
00892 int32_t SamRecord::get0BasedUnclippedStart()
00893 {
00894 myStatus = SamStatus::SUCCESS;
00895 if(myUnclippedStartOffset == -1)
00896 {
00897
00898 parseCigar();
00899 }
00900 return(myRecordPtr->myPosition - myUnclippedStartOffset);
00901 }
00902
00903
00904
00905 int32_t SamRecord::get1BasedUnclippedStart()
00906 {
00907 return(get0BasedUnclippedStart() + 1);
00908 }
00909
00910
00911
00912 int32_t SamRecord::get0BasedUnclippedEnd()
00913 {
00914
00915
00916 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
00917 }
00918
00919
00920
00921 int32_t SamRecord::get1BasedUnclippedEnd()
00922 {
00923 return(get0BasedUnclippedEnd() + 1);
00924 }
00925
00926
00927
00928 const char* SamRecord::getReadName()
00929 {
00930 myStatus = SamStatus::SUCCESS;
00931 if(myReadName.Length() == 0)
00932 {
00933
00934
00935 myReadName = (char*)&(myRecordPtr->myData);
00936 }
00937 return myReadName.c_str();
00938 }
00939
00940
00941 const char* SamRecord::getCigar()
00942 {
00943 myStatus = SamStatus::SUCCESS;
00944 if(myCigar.Length() == 0)
00945 {
00946
00947
00948 parseCigarBinary();
00949 }
00950 return myCigar.c_str();
00951 }
00952
00953
00954 const char* SamRecord::getSequence()
00955 {
00956 return(getSequence(mySequenceTranslation));
00957 }
00958
00959
00960 const char* SamRecord::getSequence(SequenceTranslation translation)
00961 {
00962 myStatus = SamStatus::SUCCESS;
00963 if(mySequence.Length() == 0)
00964 {
00965
00966
00967 setSequenceAndQualityFromBuffer();
00968 }
00969
00970
00971 if((translation == NONE) || (myRefPtr == NULL))
00972 {
00973 return mySequence.c_str();
00974 }
00975 else if(translation == EQUAL)
00976 {
00977 if(mySeqWithEq.length() == 0)
00978 {
00979
00980 if(mySequence == "*")
00981 {
00982
00983 mySeqWithEq = '*';
00984 }
00985 else
00986 {
00987
00988 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
00989 myRecordPtr->myPosition,
00990 myCigarRoller,
00991 getReferenceName(),
00992 *myRefPtr,
00993 mySeqWithEq);
00994 }
00995 }
00996 return(mySeqWithEq.c_str());
00997 }
00998 else
00999 {
01000
01001 if(mySeqWithoutEq.length() == 0)
01002 {
01003 if(mySequence == "*")
01004 {
01005
01006 mySeqWithoutEq = '*';
01007 }
01008 else
01009 {
01010
01011 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01012 myRecordPtr->myPosition,
01013 myCigarRoller,
01014 getReferenceName(),
01015 *myRefPtr,
01016 mySeqWithoutEq);
01017 }
01018 }
01019 return(mySeqWithoutEq.c_str());
01020 }
01021 }
01022
01023
01024 const char* SamRecord::getQuality()
01025 {
01026 myStatus = SamStatus::SUCCESS;
01027 if(myQuality.Length() == 0)
01028 {
01029
01030
01031 setSequenceAndQualityFromBuffer();
01032 }
01033 return myQuality.c_str();
01034 }
01035
01036
01037 char SamRecord::getSequence(int index)
01038 {
01039 return(getSequence(index, mySequenceTranslation));
01040 }
01041
01042
01043 char SamRecord::getSequence(int index, SequenceTranslation translation)
01044 {
01045 static const char * asciiBases = "=AC.G...T......N";
01046
01047
01048 int32_t readLen = getReadLength();
01049
01050
01051 if(readLen == 0)
01052 {
01053 String exceptionString = "SamRecord::getSequence(";
01054 exceptionString += index;
01055 exceptionString += ") is not allowed since sequence = '*'";
01056 throw std::runtime_error(exceptionString.c_str());
01057 }
01058 else if((index < 0) || (index >= readLen))
01059 {
01060
01061 String exceptionString = "SamRecord::getSequence(";
01062 exceptionString += index;
01063 exceptionString += ") is out of range. Index must be between 0 and ";
01064 exceptionString += (readLen - 1);
01065 throw std::runtime_error(exceptionString.c_str());
01066 }
01067
01068
01069 if((translation == NONE) || (myRefPtr == NULL))
01070 {
01071
01072 if(mySequence.Length() == 0)
01073 {
01074
01075
01076
01077 unsigned char * packedSequence =
01078 (unsigned char *)myRecordPtr->myData +
01079 myRecordPtr->myReadNameLength +
01080 myRecordPtr->myCigarLength * sizeof(int);
01081
01082 return(index & 1 ?
01083 asciiBases[packedSequence[index / 2] & 0xF] :
01084 asciiBases[packedSequence[index / 2] >> 4]);
01085 }
01086
01087 return(mySequence[index]);
01088 }
01089 else
01090 {
01091
01092
01093
01094 if(mySequence.Length() == 0)
01095 {
01096
01097
01098 setSequenceAndQualityFromBuffer();
01099 }
01100
01101
01102 if(translation == EQUAL)
01103 {
01104
01105
01106 if(mySeqWithEq.length() == 0)
01107 {
01108
01109
01110
01111 if(mySequence == "*")
01112 {
01113
01114 mySeqWithEq = '*';
01115 }
01116 else
01117 {
01118
01119 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01120 myRecordPtr->myPosition,
01121 myCigarRoller,
01122 getReferenceName(),
01123 *myRefPtr,
01124 mySeqWithEq);
01125 }
01126 }
01127
01128 return(mySeqWithEq[index]);
01129 }
01130 else
01131 {
01132
01133
01134
01135 if(mySeqWithoutEq.length() == 0)
01136 {
01137
01138
01139
01140 if(mySequence == "*")
01141 {
01142
01143 mySeqWithoutEq = '*';
01144 }
01145 else
01146 {
01147
01148
01149
01150 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01151 myRecordPtr->myPosition,
01152 myCigarRoller,
01153 getReferenceName(),
01154 *myRefPtr,
01155 mySeqWithoutEq);
01156 }
01157 }
01158
01159 return(mySeqWithoutEq[index]);
01160 }
01161 }
01162 }
01163
01164
01165 char SamRecord::getQuality(int index)
01166 {
01167
01168 int32_t readLen = getReadLength();
01169
01170
01171
01172 if(readLen == 0)
01173 {
01174 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01175 }
01176 else if((index < 0) || (index >= readLen))
01177 {
01178
01179 String exceptionString = "SamRecord::getQuality(";
01180 exceptionString += index;
01181 exceptionString += ") is out of range. Index must be between 0 and ";
01182 exceptionString += (readLen - 1);
01183 throw std::runtime_error(exceptionString.c_str());
01184 }
01185
01186 if(myQuality.Length() == 0)
01187 {
01188
01189 unsigned char * packedQuality =
01190 (unsigned char *)myRecordPtr->myData +
01191 myRecordPtr->myReadNameLength +
01192 myRecordPtr->myCigarLength * sizeof(int) +
01193 (myRecordPtr->myReadLength + 1) / 2;
01194 return(packedQuality[index] + 33);
01195 }
01196 else
01197 {
01198
01199 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01200 {
01201
01202 return(0xFF);
01203 }
01204 else
01205 {
01206 return(myQuality[index]);
01207 }
01208 }
01209 }
01210
01211
01212 Cigar* SamRecord::getCigarInfo()
01213 {
01214
01215
01216
01217
01218 if(myAlignmentLength == -1)
01219 {
01220
01221 parseCigar();
01222 }
01223 return(&myCigarRoller);
01224 }
01225
01226
01227 uint32_t SamRecord::getTagLength()
01228 {
01229 myStatus = SamStatus::SUCCESS;
01230 if(myNeedToSetTagsFromBuffer)
01231 {
01232
01233
01234 unsigned char * tagStart =
01235 (unsigned char *)myRecordPtr->myData
01236 + myRecordPtr->myReadNameLength
01237 + myRecordPtr->myCigarLength * sizeof(int)
01238 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01239
01240
01241
01242
01243 uint32_t nonTagSize =
01244 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01245
01246 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01247 return(tagSize);
01248 }
01249
01250
01251 return(myTagBufferSize);
01252 }
01253
01254
01255
01256
01257
01258
01259
01260 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01261 {
01262 myStatus = SamStatus::SUCCESS;
01263 if(myNeedToSetTagsFromBuffer)
01264 {
01265 if(!setTagsFromBuffer())
01266 {
01267
01268
01269 return(false);
01270 }
01271 }
01272
01273
01274
01275 myLastTagIndex++;
01276 int maxTagIndex = extras.Capacity();
01277 if(myLastTagIndex >= maxTagIndex)
01278 {
01279
01280
01281
01282 return(false);
01283 }
01284
01285 bool tagFound = false;
01286
01287 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01288 {
01289 if(extras.SlotInUse(myLastTagIndex))
01290 {
01291
01292 int key = extras.GetKey(myLastTagIndex);
01293 getTag(key, tag);
01294 getVtype(key, vtype);
01295 tagFound = true;
01296
01297 switch (vtype)
01298 {
01299 case 'A' :
01300 *value = getIntegerPtr(myLastTagIndex);
01301 break;
01302 case 'f' :
01303 *value = getDoublePtr(myLastTagIndex);
01304 break;
01305 case 'c' :
01306 case 'C' :
01307 case 's' :
01308 case 'S' :
01309 case 'i' :
01310 case 'I' :
01311 vtype = 'i';
01312 *value = getIntegerPtr(myLastTagIndex);
01313 break;
01314 case 'Z' :
01315 *value = getStringPtr(myLastTagIndex);
01316 break;
01317 default:
01318 myStatus.setStatus(SamStatus::FAIL_PARSE,
01319 "Unknown tag type");
01320 tagFound = false;
01321 break;
01322 }
01323 }
01324 if(!tagFound)
01325 {
01326
01327 myLastTagIndex++;
01328 }
01329 }
01330 return(tagFound);
01331 }
01332
01333
01334
01335 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01336 String& cigar, String& sequence, String& quality)
01337 {
01338 return(getFields(recStruct, readName, cigar, sequence, quality,
01339 mySequenceTranslation));
01340 }
01341
01342
01343
01344 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01345 String& cigar, String& sequence, String& quality,
01346 SequenceTranslation translation)
01347 {
01348 myStatus = SamStatus::SUCCESS;
01349 if(myIsBufferSynced == false)
01350 {
01351 if(!fixBuffer(translation))
01352 {
01353
01354 return(false);
01355 }
01356 }
01357 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01358
01359 readName = getReadName();
01360
01361 if(myStatus != SamStatus::SUCCESS)
01362 {
01363
01364 return(false);
01365 }
01366 cigar = getCigar();
01367
01368 if(myStatus != SamStatus::SUCCESS)
01369 {
01370
01371 return(false);
01372 }
01373 sequence = getSequence(translation);
01374
01375 if(myStatus != SamStatus::SUCCESS)
01376 {
01377
01378 return(false);
01379 }
01380 quality = getQuality();
01381
01382 if(myStatus != SamStatus::SUCCESS)
01383 {
01384
01385 return(false);
01386 }
01387 return(true);
01388 }
01389
01390
01391 bool SamRecord::isIntegerType(char vtype) const
01392 {
01393 if((vtype == 'c') || (vtype == 'C') ||
01394 (vtype == 's') || (vtype == 'S') ||
01395 (vtype == 'i') || (vtype == 'I'))
01396 {
01397 return(true);
01398 }
01399 return(false);
01400 }
01401
01402
01403 bool SamRecord::isDoubleType(char vtype) const
01404 {
01405 if(vtype == 'f')
01406 {
01407 return(true);
01408 }
01409 return(false);
01410 }
01411
01412
01413 bool SamRecord::isCharType(char vtype) const
01414 {
01415 if(vtype == 'A')
01416 {
01417 return(true);
01418 }
01419 return(false);
01420 }
01421
01422
01423 bool SamRecord::isStringType(char vtype) const
01424 {
01425 if(vtype == 'Z')
01426 {
01427 return(true);
01428 }
01429 return(false);
01430 }
01431
01432
01433 void SamRecord::clearTags()
01434 {
01435 if(extras.Entries() != 0)
01436 {
01437 extras.Clear();
01438 }
01439 strings.Clear();
01440 integers.Clear();
01441 doubles.Clear();
01442 myTagBufferSize = 0;
01443 resetTagIter();
01444 }
01445
01446
01447
01448 const SamStatus& SamRecord::getStatus()
01449 {
01450 return(myStatus);
01451 }
01452
01453
01454 String & SamRecord::getString(const char * tag)
01455 {
01456
01457 myStatus = SamStatus::SUCCESS;
01458
01459 if(myNeedToSetTagsFromBuffer)
01460 {
01461 if(!setTagsFromBuffer())
01462 {
01463
01464
01465
01466 }
01467 }
01468
01469 int key = MAKEKEY(tag[0], tag[1], 'Z');
01470 int offset = extras.Find(key);
01471
01472 int value;
01473 if (offset < 0)
01474 {
01475
01476 return(NOT_FOUND_TAG_STRING);
01477 }
01478 else
01479 value = extras[offset];
01480
01481 return strings[value];
01482 }
01483
01484 int & SamRecord::getInteger(const char * tag)
01485 {
01486
01487 myStatus = SamStatus::SUCCESS;
01488
01489 if(myNeedToSetTagsFromBuffer)
01490 {
01491 if(!setTagsFromBuffer())
01492 {
01493
01494
01495
01496 }
01497 }
01498
01499 int key = MAKEKEY(tag[0], tag[1], 'i');
01500 int offset = extras.Find(key);
01501
01502 int value;
01503 if (offset < 0)
01504 {
01505
01506 return NOT_FOUND_TAG_INT;
01507 }
01508 else
01509 value = extras[offset];
01510
01511 return integers[value];
01512 }
01513
01514 double & SamRecord::getDouble(const char * tag)
01515 {
01516
01517 myStatus = SamStatus::SUCCESS;
01518
01519 if(myNeedToSetTagsFromBuffer)
01520 {
01521 if(!setTagsFromBuffer())
01522 {
01523
01524
01525
01526 }
01527 }
01528
01529 int key = MAKEKEY(tag[0], tag[1], 'f');
01530 int offset = extras.Find(key);
01531
01532 int value;
01533 if (offset < 0)
01534 {
01535
01536 return NOT_FOUND_TAG_DOUBLE;
01537 }
01538 else
01539 value = extras[offset];
01540
01541 return doubles[value];
01542 }
01543
01544
01545 bool SamRecord::checkTag(const char * tag, char type)
01546 {
01547
01548 myStatus = SamStatus::SUCCESS;
01549
01550 if(myNeedToSetTagsFromBuffer)
01551 {
01552 if(!setTagsFromBuffer())
01553 {
01554
01555
01556 return("");
01557 }
01558 }
01559
01560 int key = MAKEKEY(tag[0], tag[1], type);
01561
01562 return (extras.Find(key) != LH_NOTFOUND);
01563 }
01564
01565
01566
01567
01568 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01569 {
01570
01571
01572 if(myAlignmentLength == -1)
01573 {
01574 parseCigar();
01575 }
01576 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01577 }
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609
01610
01611
01612
01613
01614
01615
01616
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628
01629
01630
01631
01632
01633
01634 bool SamRecord::allocateRecordStructure(int size)
01635 {
01636 if (allocatedSize < size)
01637 {
01638 bamRecordStruct* tmpRecordPtr =
01639 (bamRecordStruct *)realloc(myRecordPtr, size);
01640 if(tmpRecordPtr == NULL)
01641 {
01642
01643 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01644 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
01645 return(false);
01646 }
01647
01648 myRecordPtr = tmpRecordPtr;
01649 allocatedSize = size;
01650 }
01651 return(true);
01652 }
01653
01654
01655
01656 void* SamRecord::getStringPtr(int index)
01657 {
01658 int value = extras[index];
01659
01660 return &(strings[value]);
01661 }
01662
01663 void* SamRecord::getIntegerPtr(int offset)
01664 {
01665 int value = extras[offset];
01666
01667 return &(integers[value]);
01668 }
01669
01670 void* SamRecord::getDoublePtr(int offset)
01671 {
01672 int value = extras[offset];
01673
01674 return &(doubles[value]);
01675 }
01676
01677
01678
01679 bool SamRecord::fixBuffer(SequenceTranslation translation)
01680 {
01681
01682 if(myIsBufferSynced &&
01683 (myBufferSequenceTranslation == translation))
01684 {
01685
01686 return(true);
01687 }
01688
01689
01690 if(!myIsBinValid)
01691 {
01692
01693
01694 myRecordPtr->myBin =
01695 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
01696 myIsBinValid = true;
01697 }
01698
01699
01700 bool status = true;
01701
01702
01703 uint8_t newReadNameLen = getReadNameLength();
01704 uint16_t newCigarLen = getCigarLength();
01705 int32_t newReadLen = getReadLength();
01706 uint32_t newTagLen = getTagLength();
01707 uint32_t bamSequenceLen = (newReadLen+1)/2;
01708
01709
01710
01711
01712 int newBufferSize =
01713 ((unsigned char*)(&(myRecordPtr->myData)) -
01714 (unsigned char*)myRecordPtr) +
01715 newReadNameLen + ((newCigarLen)*4) +
01716 newReadLen + bamSequenceLen + newTagLen;
01717
01718 if(!allocateRecordStructure(newBufferSize))
01719 {
01720
01721 return(false);
01722 }
01723
01724
01725
01726
01727
01728
01729 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
01730 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
01731 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
01732
01733
01734
01735 if(myIsTagsBufferValid &&
01736 (readNameLenChange | cigarLenChange | readLenChange))
01737 {
01738 status &= setTagsFromBuffer();
01739
01740
01741 myIsTagsBufferValid = false;
01742 }
01743
01744
01745
01746
01747 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
01748 (readNameLenChange | cigarLenChange | readLenChange))
01749 {
01750 setSequenceAndQualityFromBuffer();
01751
01752
01753 myIsQualityBufferValid = false;
01754 myIsSequenceBufferValid = false;
01755 }
01756
01757
01758
01759 if((myIsCigarBufferValid) &&
01760 (readNameLenChange))
01761 {
01762 status &= parseCigarBinary();
01763 myIsCigarBufferValid = false;
01764 }
01765
01766
01767 if(!myIsReadNameBufferValid)
01768 {
01769 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
01770 newReadNameLen);
01771
01772
01773 myRecordPtr->myReadNameLength = newReadNameLen;
01774 myIsReadNameBufferValid = true;
01775 }
01776
01777 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
01778 myRecordPtr->myReadNameLength;
01779
01780
01781 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
01782
01783 if(!myIsCigarBufferValid)
01784 {
01785
01786
01787 myRecordPtr->myCigarLength = newCigarLen;
01788 memcpy(packedCigar, myCigarTempBuffer,
01789 myRecordPtr->myCigarLength * sizeof(uint32_t));
01790
01791 myIsCigarBufferValid = true;
01792 }
01793
01794 unsigned char * packedSequence = readNameEnds +
01795 myRecordPtr->myCigarLength * sizeof(int);
01796 unsigned char * packedQuality = packedSequence + bamSequenceLen;
01797
01798 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
01799 (myBufferSequenceTranslation != translation))
01800 {
01801 myRecordPtr->myReadLength = newReadLen;
01802
01803
01804 bool noQuality = false;
01805 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01806 {
01807 noQuality = true;
01808 }
01809
01810 const char* translatedSeq = NULL;
01811
01812
01813
01814 if((!myIsSequenceBufferValid) ||
01815 (translation != myBufferSequenceTranslation))
01816 {
01817 translatedSeq = getSequence(translation);
01818 }
01819
01820 for (int i = 0; i < myRecordPtr->myReadLength; i++)
01821 {
01822 if((!myIsSequenceBufferValid) ||
01823 (translation != myBufferSequenceTranslation))
01824 {
01825
01826 int seqVal = 0;
01827 switch(translatedSeq[i])
01828 {
01829 case '=':
01830 seqVal = 0;
01831 break;
01832 case 'A':
01833 case 'a':
01834 seqVal = 1;
01835 break;
01836 case 'C':
01837 case 'c':
01838 seqVal = 2;
01839 break;
01840 case 'G':
01841 case 'g':
01842 seqVal = 4;
01843 break;
01844 case 'T':
01845 case 't':
01846 seqVal = 8;
01847 break;
01848 case 'N':
01849 case 'n':
01850 case '.':
01851 seqVal = 15;
01852 break;
01853 default:
01854 myStatus.addError(SamStatus::FAIL_PARSE,
01855 "Unknown Sequence character found.");
01856 status = false;
01857 break;
01858 };
01859
01860 if(i & 1)
01861 {
01862
01863
01864 packedSequence[i/2] |= seqVal;
01865 }
01866 else
01867 {
01868
01869 packedSequence[i/2] = seqVal << 4;
01870 }
01871 }
01872
01873 if(!myIsQualityBufferValid)
01874 {
01875
01876 if((noQuality) || (myQuality.Length() <= i))
01877 {
01878
01879
01880 packedQuality[i] = 0xFF;
01881 }
01882 else
01883 {
01884
01885 packedQuality[i] = myQuality[i] - 33;
01886 }
01887 }
01888 }
01889 myIsQualityBufferValid = true;
01890 myIsSequenceBufferValid = true;
01891 myBufferSequenceTranslation = translation;
01892 }
01893
01894 if(!myIsTagsBufferValid)
01895 {
01896 status &= setTagsInBuffer();
01897 }
01898
01899
01900 myRecordPtr->myReadNameLength = newReadNameLen;
01901 myRecordPtr->myCigarLength = newCigarLen;
01902 myRecordPtr->myReadLength = newReadLen;
01903
01904
01905
01906 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
01907
01908 if(status)
01909 {
01910 myIsBufferSynced = true;
01911 }
01912
01913 return(status);
01914 }
01915
01916
01917
01918
01919
01920 void SamRecord::setSequenceAndQualityFromBuffer()
01921 {
01922
01923
01924
01925
01926
01927
01928 bool extractSequence = false;
01929 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
01930 {
01931 extractSequence = true;
01932 }
01933
01934
01935 bool extractQuality = false;
01936 if(myIsQualityBufferValid && (myQuality.Length() == 0))
01937 {
01938 extractQuality = true;
01939 }
01940
01941
01942
01943 if(!extractSequence && !extractQuality)
01944 {
01945 return;
01946 }
01947
01948
01949 if(extractSequence)
01950 {
01951 mySequence.SetLength(myRecordPtr->myReadLength);
01952 }
01953 if(extractQuality)
01954 {
01955 myQuality.SetLength(myRecordPtr->myReadLength);
01956 }
01957
01958 unsigned char * readNameEnds =
01959 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
01960 unsigned char * packedSequence =
01961 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
01962 unsigned char * packedQuality =
01963 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
01964
01965 const char * asciiBases = "=AC.G...T......N";
01966
01967
01968
01969 bool qualitySpecified = false;
01970
01971 for (int i = 0; i < myRecordPtr->myReadLength; i++)
01972 {
01973 if(extractSequence)
01974 {
01975 mySequence[i] = i & 1 ?
01976 asciiBases[packedSequence[i / 2] & 0xF] :
01977 asciiBases[packedSequence[i / 2] >> 4];
01978 }
01979
01980 if(extractQuality)
01981 {
01982 if(packedQuality[i] != 0xFF)
01983 {
01984
01985 qualitySpecified = true;
01986 }
01987
01988 myQuality[i] = packedQuality[i] + 33;
01989 }
01990 }
01991
01992
01993 if(myRecordPtr->myReadLength == 0)
01994 {
01995 if(extractSequence)
01996 {
01997 mySequence = "*";
01998 }
01999 if(extractQuality)
02000 {
02001 myQuality = "*";
02002 }
02003 }
02004 else if(extractQuality && !qualitySpecified)
02005 {
02006
02007 myQuality = "*";
02008 }
02009 }
02010
02011
02012
02013 bool SamRecord::parseCigar()
02014 {
02015
02016 if(myCigar.Length() == 0)
02017 {
02018
02019 return(parseCigarBinary());
02020 }
02021 return(parseCigarString());
02022 }
02023
02024
02025 bool SamRecord::parseCigarBinary()
02026 {
02027
02028
02029
02030 if(myCigar.Length() != 0)
02031 {
02032
02033 return(true);
02034 }
02035
02036 unsigned char * readNameEnds =
02037 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02038
02039 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02040
02041 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02042
02043 myCigarRoller.getCigarString(myCigar);
02044
02045 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02046
02047 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02048 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02049
02050
02051 if(myRecordPtr->myCigarLength == 0)
02052 {
02053 myCigar = "*";
02054 return(true);
02055 }
02056
02057
02058 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02059 if(newBufferSize > myCigarTempBufferAllocatedSize)
02060 {
02061 uint32_t* tempBufferPtr =
02062 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02063 if(tempBufferPtr == NULL)
02064 {
02065
02066
02067 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02068 myStatus.addError(SamStatus::FAIL_MEM,
02069 "Failed to Allocate Memory.");
02070 return(false);
02071 }
02072 myCigarTempBuffer = tempBufferPtr;
02073 myCigarTempBufferAllocatedSize = newBufferSize;
02074 }
02075
02076 memcpy(myCigarTempBuffer, packedCigar,
02077 myRecordPtr->myCigarLength * sizeof(uint32_t));
02078
02079
02080 myCigarTempBufferLength = myRecordPtr->myCigarLength;
02081
02082 return(true);
02083 }
02084
02085
02086 bool SamRecord::parseCigarString()
02087 {
02088 myCigarTempBufferLength = 0;
02089 if(myCigar == "*")
02090 {
02091
02092 myAlignmentLength = 0;
02093 myUnclippedStartOffset = 0;
02094 myUnclippedEndOffset = 0;
02095 myCigarRoller.clear();
02096 return(true);
02097 }
02098
02099 myCigarRoller.Set(myCigar);
02100
02101 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02102
02103 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02104 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02105
02106
02107
02108
02109 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02110 if(newBufferSize > myCigarTempBufferAllocatedSize)
02111 {
02112 uint32_t* tempBufferPtr =
02113 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02114 if(tempBufferPtr == NULL)
02115 {
02116
02117
02118 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02119 myStatus.addError(SamStatus::FAIL_MEM,
02120 "Failed to Allocate Memory.");
02121 return(false);
02122 }
02123 myCigarTempBuffer = tempBufferPtr;
02124 myCigarTempBufferAllocatedSize = newBufferSize;
02125 }
02126
02127
02128 bool status = true;
02129
02130
02131 char *cigarOp;
02132 const char* cigarEntryStart = myCigar.c_str();
02133 int opLen = 0;
02134 int op = 0;
02135
02136 unsigned int * packedCigar = myCigarTempBuffer;
02137
02138
02139 const char* endCigarString = cigarEntryStart + myCigar.Length();
02140 while(cigarEntryStart < endCigarString)
02141 {
02142 bool validCigarEntry = true;
02143
02144
02145 opLen = strtol(cigarEntryStart, &cigarOp, 10);
02146
02147 switch(*cigarOp)
02148 {
02149 case('M'):
02150 op = 0;
02151 break;
02152 case('I'):
02153
02154
02155 op = 1;
02156 break;
02157 case('D'):
02158 op = 2;
02159 break;
02160 case('N'):
02161 op = 3;
02162 break;
02163 case('S'):
02164 op = 4;
02165 break;
02166 case('H'):
02167 op = 5;
02168 break;
02169 case('P'):
02170 op = 6;
02171 break;
02172 default:
02173 fprintf(stderr, "ERROR parsing cigar\n");
02174 validCigarEntry = false;
02175 status = false;
02176 myStatus.addError(SamStatus::FAIL_PARSE,
02177 "Unknown operation found when parsing the Cigar.");
02178 break;
02179 };
02180 if(validCigarEntry)
02181 {
02182
02183 ++myCigarTempBufferLength;
02184 *packedCigar = (opLen << 4) | op;
02185 packedCigar++;
02186 }
02187
02188 cigarEntryStart = ++cigarOp;
02189 }
02190
02191
02192 return(status);
02193 }
02194
02195
02196 bool SamRecord::setTagsFromBuffer()
02197 {
02198
02199 if(myNeedToSetTagsFromBuffer == false)
02200 {
02201
02202 return(true);
02203 }
02204
02205
02206 myNeedToSetTagsFromBuffer = false;
02207
02208 unsigned char * readNameEnds =
02209 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02210 unsigned char * packedSequence =
02211 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02212 unsigned char * packedQuality =
02213 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02214
02215 unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02216
02217
02218 bool status = true;
02219
02220
02221 clearTags();
02222 while (myRecordPtr->myBlockSize + 4 -
02223 (extraPtr - (unsigned char *)myRecordPtr) > 0)
02224 {
02225 int key;
02226 int value;
02227 void * content = extraPtr + 3;
02228 int tagBufferSize = 0;
02229
02230 switch (extraPtr[2])
02231 {
02232 case 'A' :
02233 key = MAKEKEY(extraPtr[0], extraPtr[1], 'A');
02234 value = integers.Length();
02235 integers.Push(* (char *) content);
02236 extraPtr += 4;
02237 tagBufferSize += 4;
02238 break;
02239 case 'c' :
02240 key = MAKEKEY(extraPtr[0], extraPtr[1], 'c');
02241 value = integers.Length();
02242 integers.Push(* (char *) content);
02243 extraPtr += 4;
02244 tagBufferSize += 4;
02245 break;
02246 case 'C' :
02247 key = MAKEKEY(extraPtr[0], extraPtr[1], 'C');
02248 value = integers.Length();
02249 integers.Push(* (unsigned char *) content);
02250 extraPtr += 4;
02251 tagBufferSize += 4;
02252 break;
02253 case 's' :
02254 key = MAKEKEY(extraPtr[0], extraPtr[1], 's');
02255 value = integers.Length();
02256 integers.Push(* (short *) content);
02257 extraPtr += 5;
02258 tagBufferSize += 5;
02259 break;
02260 case 'S' :
02261 key = MAKEKEY(extraPtr[0], extraPtr[1], 'S');
02262 value = integers.Length();
02263 integers.Push(* (unsigned short *) content);
02264 extraPtr += 5;
02265 tagBufferSize += 5;
02266 break;
02267 case 'i' :
02268 key = MAKEKEY(extraPtr[0], extraPtr[1], 'i');
02269 value = integers.Length();
02270 integers.Push(* (int *) content);
02271 extraPtr += 7;
02272 tagBufferSize += 7;
02273 break;
02274 case 'I' :
02275 key = MAKEKEY(extraPtr[0], extraPtr[1], 'I');
02276 value = integers.Length();
02277 integers.Push((int) * (unsigned int *) content);
02278 extraPtr += 7;
02279 tagBufferSize += 7;
02280 break;
02281 case 'Z' :
02282 key = MAKEKEY(extraPtr[0], extraPtr[1], 'Z');
02283 value = strings.Length();
02284 strings.Push((const char *) content);
02285 extraPtr += 4 + strings.Last().Length();
02286 tagBufferSize += 4 + strings.Last().Length();
02287 break;
02288 case 'f' :
02289 key = MAKEKEY(extraPtr[0], extraPtr[1], 'f');
02290 value = doubles.Length();
02291 doubles.Push(* (float *) content);
02292 fprintf(stderr, "\n\nFLOAT!!!\n\n");
02293 extraPtr += 7;
02294 tagBufferSize += 7;
02295 break;
02296 default :
02297 fprintf(stderr,
02298 "parsing BAM - Unknown custom field of type %c%c:%c\n",
02299 extraPtr[0], extraPtr[1], extraPtr[2]);
02300
02301
02302 extraPtr += 3;
02303 myStatus.addError(SamStatus::FAIL_PARSE,
02304 "Unknown tag type.");
02305 status = false;
02306 }
02307
02308
02309 if(status)
02310 {
02311 extras.Add(key, value);
02312 myTagBufferSize += tagBufferSize;
02313 }
02314 }
02315 return(status);
02316 }
02317
02318
02319 bool SamRecord::setTagsInBuffer()
02320 {
02321
02322
02323
02324 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
02325 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
02326 (unsigned char*)myRecordPtr) +
02327 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
02328 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
02329
02330
02331 if(!allocateRecordStructure(newBufferSize))
02332 {
02333
02334 return(false);
02335 }
02336
02337 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02338 myRecordPtr->myReadNameLength;
02339 unsigned char * packedSequence = readNameEnds +
02340 myRecordPtr->myCigarLength * sizeof(int);
02341 unsigned char * packedQuality =
02342 packedSequence + bamSequenceLength;
02343
02344 char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
02345
02346 bool status = true;
02347
02348
02349 if (extras.Entries())
02350 {
02351 for (int i = 0; i < extras.Capacity(); i++)
02352 {
02353 if (extras.SlotInUse(i))
02354 {
02355 int key = extras.GetKey(i);
02356 getTag(key, extraPtr);
02357 extraPtr += 2;
02358 getVtype(key, extraPtr[0]);
02359 char vtype = extraPtr[0];
02360
02361
02362 extraPtr += 1;
02363
02364 switch (vtype)
02365 {
02366 case 'A' :
02367 *(char*)extraPtr = (char)getInteger(i);
02368
02369 extraPtr += 1;
02370 break;
02371 case 'c' :
02372 *(int8_t*)extraPtr = (int8_t)getInteger(i);
02373
02374 extraPtr += 1;
02375 break;
02376 case 'C' :
02377 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
02378
02379 extraPtr += 1;
02380 break;
02381 case 's' :
02382 *(int16_t*)extraPtr = (int16_t)getInteger(i);
02383
02384 extraPtr += 2;
02385 break;
02386 case 'S' :
02387 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
02388
02389 extraPtr += 2;
02390 break;
02391 case 'i' :
02392 *(int32_t*)extraPtr = (int32_t)getInteger(i);
02393
02394 extraPtr += 4;
02395 break;
02396 case 'I' :
02397 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
02398
02399 extraPtr += 4;
02400 break;
02401 case 'Z' :
02402 sprintf(extraPtr, "%s", getString(i).c_str());
02403 extraPtr += getString(i).Length() + 1;
02404 break;
02405 case 'f' :
02406
02407 sprintf(extraPtr, "%f", getDouble(i));
02408 extraPtr += 4;
02409 break;
02410 default :
02411 myStatus.addError(SamStatus::FAIL_PARSE,
02412 "Unknown tag type.");
02413 status = false;
02414 break;
02415 }
02416 }
02417 }
02418 }
02419
02420
02421
02422 if(extraPtr != (char*)myRecordPtr + newBufferSize)
02423 {
02424 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
02425 myStatus.addError(SamStatus::FAIL_PARSE,
02426 "ERROR updating the buffer. Incorrect size.");
02427 status = false;
02428 }
02429
02430
02431
02432 myNeedToSetTagsInBuffer = false;
02433 myIsTagsBufferValid = true;
02434 return(status);
02435 }
02436
02437
02438
02439
02440
02441 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
02442 {
02443
02444
02445 myReferenceName =
02446 header.getReferenceLabel(myRecordPtr->myReferenceID);
02447 myMateReferenceName =
02448 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
02449
02450
02451 myReadName.SetLength(0);
02452 myCigar.SetLength(0);
02453 mySequence.SetLength(0);
02454 mySeqWithEq.clear();
02455 mySeqWithoutEq.clear();
02456 myQuality.SetLength(0);
02457 myNeedToSetTagsFromBuffer = true;
02458 myNeedToSetTagsInBuffer = false;
02459
02460
02461 myIsBufferSynced = true;
02462
02463 myIsReadNameBufferValid = true;
02464 myIsCigarBufferValid = true;
02465 myIsSequenceBufferValid = true;
02466 myBufferSequenceTranslation = NONE;
02467 myIsQualityBufferValid = true;
02468 myIsTagsBufferValid = true;
02469 myIsBinValid = true;
02470 }
02471
02472
02473
02474 void SamRecord::getVtype(int key, char& vtype) const
02475 {
02476
02477 vtype = (key >> 16) & 0xFF;
02478 }
02479
02480
02481 void SamRecord::getTag(int key, char* tag) const
02482 {
02483
02484 tag[0] = key & 0xFF;
02485 tag[1] = (key >> 8) & 0xFF;
02486 tag[2] = 0;
02487 }
02488
02489
02490
02491 String & SamRecord::getString(int index)
02492 {
02493 int value = extras[index];
02494
02495 return strings[value];
02496 }
02497
02498 int & SamRecord::getInteger(int offset)
02499 {
02500 int value = extras[offset];
02501
02502 return integers[value];
02503 }
02504
02505 double & SamRecord::getDouble(int offset)
02506 {
02507 int value = extras[offset];
02508
02509 return doubles[value];
02510 }
02511
02512