00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021
00022 #include "bam.h"
00023
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032
00033
00034 SamRecord::SamRecord()
00035 : myStatus(),
00036 myRefPtr(NULL),
00037 mySequenceTranslation(NONE)
00038 {
00039 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040
00041 myRecordPtr =
00042 (bamRecordStruct *) malloc(defaultAllocSize);
00043
00044 myCigarTempBuffer = NULL;
00045 myCigarTempBufferAllocatedSize = 0;
00046
00047 allocatedSize = defaultAllocSize;
00048
00049 resetRecord();
00050 }
00051
00052
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054 : myStatus(errorHandlingType),
00055 myRefPtr(NULL),
00056 mySequenceTranslation(NONE)
00057 {
00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059
00060 myRecordPtr =
00061 (bamRecordStruct *) malloc(defaultAllocSize);
00062
00063 myCigarTempBuffer = NULL;
00064 myCigarTempBufferAllocatedSize = 0;
00065
00066 allocatedSize = defaultAllocSize;
00067
00068 resetRecord();
00069 }
00070
00071
00072 SamRecord::~SamRecord()
00073 {
00074 resetRecord();
00075
00076 if(myRecordPtr != NULL)
00077 {
00078 free(myRecordPtr);
00079 myRecordPtr = NULL;
00080 }
00081 if(myCigarTempBuffer != NULL)
00082 {
00083 free(myCigarTempBuffer);
00084 myCigarTempBuffer = NULL;
00085 myCigarTempBufferAllocatedSize = 0;
00086 }
00087 }
00088
00089
00090
00091 void SamRecord::resetRecord()
00092 {
00093 myIsBufferSynced = true;
00094
00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096 myRecordPtr->myReferenceID = -1;
00097 myRecordPtr->myPosition = -1;
00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099 myRecordPtr->myMapQuality = 0;
00100 myRecordPtr->myBin = DEFAULT_BIN;
00101 myRecordPtr->myCigarLength = 0;
00102 myRecordPtr->myFlag = 0;
00103 myRecordPtr->myReadLength = 0;
00104 myRecordPtr->myMateReferenceID = -1;
00105 myRecordPtr->myMatePosition = -1;
00106 myRecordPtr->myInsertSize = 0;
00107
00108
00109
00110
00111 myReadName = DEFAULT_READ_NAME;
00112 myReferenceName = "*";
00113 myMateReferenceName = "*";
00114 myCigar = "*";
00115 mySequence = "*";
00116 mySeqWithEq.clear();
00117 mySeqWithoutEq.clear();
00118 myQuality = "*";
00119 myNeedToSetTagsFromBuffer = false;
00120 myNeedToSetTagsInBuffer = false;
00121
00122
00123 myAlignmentLength = -1;
00124 myUnclippedStartOffset = -1;
00125 myUnclippedEndOffset = -1;
00126
00127 clearTags();
00128
00129
00130
00131
00132
00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
00134 myRecordPtr->myReadNameLength);
00135
00136
00137 myIsReadNameBufferValid = true;
00138 myIsCigarBufferValid = true;
00139 myIsSequenceBufferValid = true;
00140 myBufferSequenceTranslation = NONE;
00141 myIsQualityBufferValid = true;
00142 myIsTagsBufferValid = true;
00143 myIsBinValid = true;
00144
00145 myCigarTempBufferLength = -1;
00146
00147 myStatus = SamStatus::SUCCESS;
00148
00149 NOT_FOUND_TAG_STRING = "";
00150 NOT_FOUND_TAG_INT = -1;
00151 NOT_FOUND_TAG_DOUBLE = -1;
00152 }
00153
00154
00155
00156 void SamRecord::resetTagIter()
00157 {
00158 myLastTagIndex = -1;
00159 }
00160
00161
00162
00163
00164 bool SamRecord::isValid(SamFileHeader& header)
00165 {
00166 myStatus = SamStatus::SUCCESS;
00167 SamValidationErrors invalidSamErrors;
00168 if(!SamValidator::isValid(header, *this, invalidSamErrors))
00169 {
00170
00171 std::string errorMessage = "";
00172 invalidSamErrors.getErrorString(errorMessage);
00173 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00174 return(false);
00175 }
00176
00177 return(true);
00178 }
00179
00180
00181
00182 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr,
00183 SamFileHeader& header)
00184 {
00185 myStatus = SamStatus::SUCCESS;
00186 if((filePtr == NULL) || (filePtr->isOpen() == false))
00187 {
00188
00189 myStatus.setStatus(SamStatus::FAIL_ORDER,
00190 "Can't read from an unopened file.");
00191 return(SamStatus::FAIL_ORDER);
00192 }
00193
00194
00195 resetRecord();
00196
00197
00198 int numBytes =
00199 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00200
00201 if(ifeof(filePtr))
00202 {
00203 if(numBytes == 0)
00204 {
00205
00206 myStatus.setStatus(SamStatus::NO_MORE_RECS,
00207 "No more records left to read.");
00208 return(SamStatus::NO_MORE_RECS);
00209 }
00210 else
00211 {
00212
00213
00214 myStatus.setStatus(SamStatus::FAIL_PARSE,
00215 "EOF reached in the middle of a record.");
00216 return(SamStatus::FAIL_PARSE);
00217 }
00218 }
00219
00220
00221 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00222 {
00223
00224
00225 return(SamStatus::FAIL_MEM);
00226 }
00227
00228
00229 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00230 != (unsigned int)myRecordPtr->myBlockSize)
00231 {
00232
00233 resetRecord();
00234 myStatus.setStatus(SamStatus::FAIL_IO,
00235 "Failed to read the record");
00236 return(SamStatus::FAIL_IO);
00237 }
00238
00239 setVariablesForNewBuffer(header);
00240
00241
00242 return(SamStatus::SUCCESS);
00243 }
00244
00245
00246 void SamRecord::setReference(GenomeSequence* reference)
00247 {
00248 myRefPtr = reference;
00249 }
00250
00251
00252
00253
00254
00255 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00256 {
00257 mySequenceTranslation = translation;
00258 }
00259
00260
00261 bool SamRecord::setReadName(const char* readName)
00262 {
00263 myReadName = readName;
00264 myIsBufferSynced = false;
00265 myIsReadNameBufferValid = false;
00266 myStatus = SamStatus::SUCCESS;
00267
00268
00269
00270 if(myReadName.Length() == 0)
00271 {
00272
00273 myReadName = DEFAULT_READ_NAME;
00274 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00275 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00276 return(false);
00277 }
00278
00279 return true;
00280 }
00281
00282
00283 bool SamRecord::setFlag(uint16_t flag)
00284 {
00285 myStatus = SamStatus::SUCCESS;
00286 myRecordPtr->myFlag = flag;
00287 return true;
00288 }
00289
00290
00291 bool SamRecord::setReferenceName(SamFileHeader& header,
00292 const char* referenceName)
00293 {
00294 myStatus = SamStatus::SUCCESS;
00295
00296 myReferenceName = referenceName;
00297 myRecordPtr->myReferenceID = header.getReferenceID(referenceName);
00298
00299 return true;
00300 }
00301
00302
00303 bool SamRecord::set1BasedPosition(int32_t position)
00304 {
00305 return(set0BasedPosition(position - 1));
00306 }
00307
00308
00309 bool SamRecord::set0BasedPosition(int32_t position)
00310 {
00311 myStatus = SamStatus::SUCCESS;
00312 myRecordPtr->myPosition = position;
00313 myIsBinValid = false;
00314 return true;
00315 }
00316
00317
00318 bool SamRecord::setMapQuality(uint8_t mapQuality)
00319 {
00320 myStatus = SamStatus::SUCCESS;
00321 myRecordPtr->myMapQuality = mapQuality;
00322 return true;
00323 }
00324
00325
00326 bool SamRecord::setCigar(const char* cigar)
00327 {
00328 myStatus = SamStatus::SUCCESS;
00329 myCigar = cigar;
00330
00331 myIsBufferSynced = false;
00332 myIsCigarBufferValid = false;
00333 myCigarTempBufferLength = -1;
00334 myIsBinValid = false;
00335
00336
00337 myAlignmentLength = -1;
00338 myUnclippedStartOffset = -1;
00339 myUnclippedEndOffset = -1;
00340
00341 return true;
00342 }
00343
00344
00345 bool SamRecord::setCigar(const Cigar& cigar)
00346 {
00347 myStatus = SamStatus::SUCCESS;
00348 cigar.getCigarString(myCigar);
00349
00350 myIsBufferSynced = false;
00351 myIsCigarBufferValid = false;
00352 myCigarTempBufferLength = -1;
00353 myIsBinValid = false;
00354
00355
00356 myAlignmentLength = -1;
00357 myUnclippedStartOffset = -1;
00358 myUnclippedEndOffset = -1;
00359
00360 return true;
00361 }
00362
00363
00364 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00365 const char* mateReferenceName)
00366 {
00367 myStatus = SamStatus::SUCCESS;
00368
00369
00370
00371 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00372 {
00373 myMateReferenceName = myReferenceName;
00374 }
00375 else
00376 {
00377 myMateReferenceName = mateReferenceName;
00378 }
00379
00380
00381 myRecordPtr->myMateReferenceID =
00382 header.getReferenceID(myMateReferenceName);
00383
00384 return true;
00385 }
00386
00387
00388 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00389 {
00390 return(set0BasedMatePosition(matePosition - 1));
00391 }
00392
00393
00394 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00395 {
00396 myStatus = SamStatus::SUCCESS;
00397 myRecordPtr->myMatePosition = matePosition;
00398 return true;
00399 }
00400
00401
00402 bool SamRecord::setInsertSize(int32_t insertSize)
00403 {
00404 myStatus = SamStatus::SUCCESS;
00405 myRecordPtr->myInsertSize = insertSize;
00406 return true;
00407 }
00408
00409
00410 bool SamRecord::setSequence(const char* seq)
00411 {
00412 myStatus = SamStatus::SUCCESS;
00413 mySequence = seq;
00414 mySeqWithEq.clear();
00415 mySeqWithoutEq.clear();
00416
00417 myIsBufferSynced = false;
00418 myIsSequenceBufferValid = false;
00419 return true;
00420 }
00421
00422
00423 bool SamRecord::setQuality(const char* quality)
00424 {
00425 myStatus = SamStatus::SUCCESS;
00426 myQuality = quality;
00427 myIsBufferSynced = false;
00428 myIsQualityBufferValid = false;
00429 return true;
00430 }
00431
00432
00433
00434
00435 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00436 uint32_t fromBufferSize,
00437 SamFileHeader& header)
00438 {
00439 myStatus = SamStatus::SUCCESS;
00440 if((fromBuffer == NULL) || (fromBufferSize == 0))
00441 {
00442
00443 myStatus.setStatus(SamStatus::FAIL_PARSE,
00444 "Cannot parse an empty file.");
00445 return(SamStatus::FAIL_PARSE);
00446 }
00447
00448
00449 resetRecord();
00450
00451
00452 if(!allocateRecordStructure(fromBufferSize))
00453 {
00454
00455 return(SamStatus::FAIL_MEM);
00456 }
00457
00458 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00459
00460 setVariablesForNewBuffer(header);
00461
00462
00463 return(SamStatus::SUCCESS);
00464 }
00465
00466
00467
00468
00469 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00470 {
00471 myStatus = SamStatus::SUCCESS;
00472 bool status = true;
00473 int key = 0;
00474 int intVal = 0;
00475 int index = 0;
00476 char bamvtype;
00477
00478 int tagBufferSize = 0;
00479
00480
00481 if(myNeedToSetTagsFromBuffer)
00482 {
00483 if(!setTagsFromBuffer())
00484 {
00485
00486 return(false);
00487 }
00488 }
00489
00490 switch (vtype)
00491 {
00492 case 'A' :
00493 index = integers.Length();
00494 bamvtype = vtype;
00495 integers.Push((const int)*(valuePtr));
00496 tagBufferSize += 4;
00497 break;
00498 case 'i' :
00499 index = integers.Length();
00500 intVal = atoi((const char *)valuePtr);
00501
00502
00503
00504
00505 if(intVal < 0)
00506 {
00507
00508
00509 if(intVal > std::numeric_limits<char>::min())
00510 {
00511
00512 bamvtype = 'c';
00513 tagBufferSize += 4;
00514 }
00515 else if(intVal > std::numeric_limits<short>::min())
00516 {
00517
00518 bamvtype = 's';
00519 tagBufferSize += 5;
00520 }
00521 else
00522 {
00523
00524 bamvtype = 'i';
00525 tagBufferSize += 7;
00526 }
00527 }
00528 else
00529 {
00530
00531 if(intVal < std::numeric_limits<unsigned char>::max())
00532 {
00533
00534 bamvtype = 'C';
00535 tagBufferSize += 4;
00536 }
00537 else if(intVal < std::numeric_limits<unsigned short>::max())
00538 {
00539
00540 bamvtype = 'S';
00541 tagBufferSize += 5;
00542 }
00543 else
00544 {
00545
00546 bamvtype = 'I';
00547 tagBufferSize += 7;
00548 }
00549 }
00550 integers.Push(intVal);
00551 break;
00552 case 'Z' :
00553 index = strings.Length();
00554 bamvtype = vtype;
00555 strings.Push((const char *)valuePtr);
00556 tagBufferSize += 4 + strings.Last().Length();
00557 break;
00558 case 'f' :
00559 index = doubles.Length();
00560 bamvtype = vtype;
00561 doubles.Push(atof((const char *)valuePtr));
00562 tagBufferSize += 7;
00563 break;
00564 default :
00565 fprintf(stderr,
00566 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00567 vtype);
00568 myStatus.setStatus(SamStatus::FAIL_PARSE,
00569 "Unknown custom field in a tag");
00570 status = false;
00571 }
00572
00573
00574 if(status)
00575 {
00576
00577 myNeedToSetTagsInBuffer = true;
00578 myIsTagsBufferValid = false;
00579 myIsBufferSynced = false;
00580
00581 key = MAKEKEY(tag[0], tag[1], bamvtype);
00582 extras.Add(key, index);
00583 myTagBufferSize += tagBufferSize;
00584 }
00585 return(status);
00586 }
00587
00588
00589
00590 const void* SamRecord::getRecordBuffer()
00591 {
00592 return(getRecordBuffer(mySequenceTranslation));
00593 }
00594
00595
00596
00597 const void* SamRecord::getRecordBuffer(SequenceTranslation translation)
00598 {
00599 myStatus = SamStatus::SUCCESS;
00600 bool status = true;
00601
00602
00603 if((myIsBufferSynced == false) ||
00604 (myBufferSequenceTranslation != translation))
00605 {
00606 status &= fixBuffer(translation);
00607 }
00608
00609 if(myNeedToSetTagsInBuffer)
00610 {
00611 status &= setTagsInBuffer();
00612 }
00613 if(!status)
00614 {
00615 return(NULL);
00616 }
00617 return (const void *)myRecordPtr;
00618 }
00619
00620
00621
00622
00623 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
00624 {
00625 return(writeRecordBuffer(filePtr, mySequenceTranslation));
00626 }
00627
00628
00629
00630 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr,
00631 SequenceTranslation translation)
00632 {
00633 myStatus = SamStatus::SUCCESS;
00634 if((filePtr == NULL) || (filePtr->isOpen() == false))
00635 {
00636
00637 myStatus.setStatus(SamStatus::FAIL_ORDER,
00638 "Can't write to an unopened file.");
00639 return(SamStatus::FAIL_ORDER);
00640 }
00641
00642 if((myIsBufferSynced == false) ||
00643 (myBufferSequenceTranslation != translation))
00644 {
00645 if(!fixBuffer(translation))
00646 {
00647 return(myStatus.getStatus());
00648 }
00649 }
00650
00651
00652 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
00653 unsigned int numBytesWritten =
00654 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
00655
00656
00657 if(numBytesToWrite == numBytesWritten)
00658 {
00659 return(SamStatus::SUCCESS);
00660 }
00661
00662 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
00663 return(SamStatus::FAIL_IO);
00664 }
00665
00666
00667 int32_t SamRecord::getBlockSize()
00668 {
00669 myStatus = SamStatus::SUCCESS;
00670
00671
00672 if(myIsBufferSynced == false)
00673 {
00674
00675
00676
00677 fixBuffer(myBufferSequenceTranslation);
00678 }
00679 return myRecordPtr->myBlockSize;
00680 }
00681
00682
00683
00684 const char* SamRecord::getReferenceName()
00685 {
00686 myStatus = SamStatus::SUCCESS;
00687 return myReferenceName.c_str();
00688 }
00689
00690
00691 int32_t SamRecord::getReferenceID()
00692 {
00693 myStatus = SamStatus::SUCCESS;
00694 return myRecordPtr->myReferenceID;
00695 }
00696
00697
00698 int32_t SamRecord::get1BasedPosition()
00699 {
00700 myStatus = SamStatus::SUCCESS;
00701 return (myRecordPtr->myPosition + 1);
00702 }
00703
00704
00705 int32_t SamRecord::get0BasedPosition()
00706 {
00707 myStatus = SamStatus::SUCCESS;
00708 return myRecordPtr->myPosition;
00709 }
00710
00711
00712 uint8_t SamRecord::getReadNameLength()
00713 {
00714 myStatus = SamStatus::SUCCESS;
00715
00716
00717 if(myIsReadNameBufferValid)
00718 {
00719 return(myRecordPtr->myReadNameLength);
00720 }
00721
00722 return(myReadName.Length() + 1);
00723 }
00724
00725
00726 uint8_t SamRecord::getMapQuality()
00727 {
00728 myStatus = SamStatus::SUCCESS;
00729 return myRecordPtr->myMapQuality;
00730 }
00731
00732
00733 uint16_t SamRecord::getBin()
00734 {
00735 myStatus = SamStatus::SUCCESS;
00736 if(!myIsBinValid)
00737 {
00738
00739
00740 myRecordPtr->myBin =
00741 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
00742 myIsBinValid = true;
00743 }
00744 return(myRecordPtr->myBin);
00745 }
00746
00747
00748 uint16_t SamRecord::getCigarLength()
00749 {
00750 myStatus = SamStatus::SUCCESS;
00751
00752
00753 if(myIsCigarBufferValid)
00754 {
00755 return myRecordPtr->myCigarLength;
00756 }
00757
00758 if(myCigarTempBufferLength == -1)
00759 {
00760
00761
00762 parseCigarString();
00763 }
00764
00765
00766 return(myCigarTempBufferLength);
00767 }
00768
00769
00770 uint16_t SamRecord::getFlag()
00771 {
00772 myStatus = SamStatus::SUCCESS;
00773 return myRecordPtr->myFlag;
00774 }
00775
00776
00777 int32_t SamRecord::getReadLength()
00778 {
00779 myStatus = SamStatus::SUCCESS;
00780 if(myIsSequenceBufferValid == false)
00781 {
00782
00783 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
00784 {
00785 return(0);
00786 }
00787
00788 return(mySequence.Length());
00789 }
00790 return(myRecordPtr->myReadLength);
00791 }
00792
00793
00794
00795
00796 const char* SamRecord::getMateReferenceName()
00797 {
00798 myStatus = SamStatus::SUCCESS;
00799 return myMateReferenceName.c_str();
00800 }
00801
00802
00803
00804
00805
00806 const char* SamRecord::getMateReferenceNameOrEqual()
00807 {
00808 myStatus = SamStatus::SUCCESS;
00809 if(myMateReferenceName == "*")
00810 {
00811 return(myMateReferenceName);
00812 }
00813 if(myMateReferenceName == getReferenceName())
00814 {
00815 return(FIELD_ABSENT_STRING);
00816 }
00817 else
00818 {
00819 return(myMateReferenceName);
00820 }
00821 }
00822
00823
00824 int32_t SamRecord::getMateReferenceID()
00825 {
00826 myStatus = SamStatus::SUCCESS;
00827 return myRecordPtr->myMateReferenceID;
00828 }
00829
00830
00831 int32_t SamRecord::get1BasedMatePosition()
00832 {
00833 myStatus = SamStatus::SUCCESS;
00834 return (myRecordPtr->myMatePosition + 1);
00835 }
00836
00837
00838 int32_t SamRecord::get0BasedMatePosition()
00839 {
00840 myStatus = SamStatus::SUCCESS;
00841 return myRecordPtr->myMatePosition;
00842 }
00843
00844
00845 int32_t SamRecord::getInsertSize()
00846 {
00847 myStatus = SamStatus::SUCCESS;
00848 return myRecordPtr->myInsertSize;
00849 }
00850
00851
00852
00853 int32_t SamRecord::get0BasedAlignmentEnd()
00854 {
00855 myStatus = SamStatus::SUCCESS;
00856 if(myAlignmentLength == -1)
00857 {
00858
00859 parseCigar();
00860 }
00861
00862 if(myAlignmentLength == 0)
00863 {
00864
00865 return(myRecordPtr->myPosition);
00866 }
00867 return(myRecordPtr->myPosition + myAlignmentLength - 1);
00868 }
00869
00870
00871
00872 int32_t SamRecord::get1BasedAlignmentEnd()
00873 {
00874 return(get0BasedAlignmentEnd() + 1);
00875 }
00876
00877
00878
00879 int32_t SamRecord::getAlignmentLength()
00880 {
00881 myStatus = SamStatus::SUCCESS;
00882 if(myAlignmentLength == -1)
00883 {
00884
00885 parseCigar();
00886 }
00887
00888 return(myAlignmentLength);
00889 }
00890
00891
00892 int32_t SamRecord::get0BasedUnclippedStart()
00893 {
00894 myStatus = SamStatus::SUCCESS;
00895 if(myUnclippedStartOffset == -1)
00896 {
00897
00898 parseCigar();
00899 }
00900 return(myRecordPtr->myPosition - myUnclippedStartOffset);
00901 }
00902
00903
00904
00905 int32_t SamRecord::get1BasedUnclippedStart()
00906 {
00907 return(get0BasedUnclippedStart() + 1);
00908 }
00909
00910
00911
00912 int32_t SamRecord::get0BasedUnclippedEnd()
00913 {
00914
00915
00916 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
00917 }
00918
00919
00920
00921 int32_t SamRecord::get1BasedUnclippedEnd()
00922 {
00923 return(get0BasedUnclippedEnd() + 1);
00924 }
00925
00926
00927
00928 const char* SamRecord::getReadName()
00929 {
00930 myStatus = SamStatus::SUCCESS;
00931 if(myReadName.Length() == 0)
00932 {
00933
00934
00935 myReadName = (char*)&(myRecordPtr->myData);
00936 }
00937 return myReadName.c_str();
00938 }
00939
00940
00941 const char* SamRecord::getCigar()
00942 {
00943 myStatus = SamStatus::SUCCESS;
00944 if(myCigar.Length() == 0)
00945 {
00946
00947
00948 parseCigarBinary();
00949 }
00950 return myCigar.c_str();
00951 }
00952
00953
00954 const char* SamRecord::getSequence()
00955 {
00956 return(getSequence(mySequenceTranslation));
00957 }
00958
00959
00960 const char* SamRecord::getSequence(SequenceTranslation translation)
00961 {
00962 myStatus = SamStatus::SUCCESS;
00963 if(mySequence.Length() == 0)
00964 {
00965
00966
00967 setSequenceAndQualityFromBuffer();
00968 }
00969
00970
00971 if((translation == NONE) || (myRefPtr == NULL))
00972 {
00973 return mySequence.c_str();
00974 }
00975 else if(translation == EQUAL)
00976 {
00977 if(mySeqWithEq.length() == 0)
00978 {
00979
00980 if(mySequence == "*")
00981 {
00982
00983 mySeqWithEq = '*';
00984 }
00985 else
00986 {
00987
00988 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
00989 myRecordPtr->myPosition,
00990 *(getCigarInfo()),
00991 getReferenceName(),
00992 *myRefPtr,
00993 mySeqWithEq);
00994 }
00995 }
00996 return(mySeqWithEq.c_str());
00997 }
00998 else
00999 {
01000
01001 if(mySeqWithoutEq.length() == 0)
01002 {
01003 if(mySequence == "*")
01004 {
01005
01006 mySeqWithoutEq = '*';
01007 }
01008 else
01009 {
01010
01011 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01012 myRecordPtr->myPosition,
01013 *(getCigarInfo()),
01014 getReferenceName(),
01015 *myRefPtr,
01016 mySeqWithoutEq);
01017 }
01018 }
01019 return(mySeqWithoutEq.c_str());
01020 }
01021 }
01022
01023
01024 const char* SamRecord::getQuality()
01025 {
01026 myStatus = SamStatus::SUCCESS;
01027 if(myQuality.Length() == 0)
01028 {
01029
01030
01031 setSequenceAndQualityFromBuffer();
01032 }
01033 return myQuality.c_str();
01034 }
01035
01036
01037 char SamRecord::getSequence(int index)
01038 {
01039 return(getSequence(index, mySequenceTranslation));
01040 }
01041
01042
01043 char SamRecord::getSequence(int index, SequenceTranslation translation)
01044 {
01045 static const char * asciiBases = "=AC.G...T......N";
01046
01047
01048 int32_t readLen = getReadLength();
01049
01050
01051 if(readLen == 0)
01052 {
01053 String exceptionString = "SamRecord::getSequence(";
01054 exceptionString += index;
01055 exceptionString += ") is not allowed since sequence = '*'";
01056 throw std::runtime_error(exceptionString.c_str());
01057 }
01058 else if((index < 0) || (index >= readLen))
01059 {
01060
01061 String exceptionString = "SamRecord::getSequence(";
01062 exceptionString += index;
01063 exceptionString += ") is out of range. Index must be between 0 and ";
01064 exceptionString += (readLen - 1);
01065 throw std::runtime_error(exceptionString.c_str());
01066 }
01067
01068
01069 if((translation == NONE) || (myRefPtr == NULL))
01070 {
01071
01072 if(mySequence.Length() == 0)
01073 {
01074
01075
01076
01077 unsigned char * packedSequence =
01078 (unsigned char *)myRecordPtr->myData +
01079 myRecordPtr->myReadNameLength +
01080 myRecordPtr->myCigarLength * sizeof(int);
01081
01082 return(index & 1 ?
01083 asciiBases[packedSequence[index / 2] & 0xF] :
01084 asciiBases[packedSequence[index / 2] >> 4]);
01085 }
01086
01087 return(mySequence[index]);
01088 }
01089 else
01090 {
01091
01092
01093
01094 if(mySequence.Length() == 0)
01095 {
01096
01097
01098 setSequenceAndQualityFromBuffer();
01099 }
01100
01101
01102 if(translation == EQUAL)
01103 {
01104
01105
01106 if(mySeqWithEq.length() == 0)
01107 {
01108
01109
01110
01111 if(mySequence == "*")
01112 {
01113
01114 mySeqWithEq = '*';
01115 }
01116 else
01117 {
01118
01119 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01120 myRecordPtr->myPosition,
01121 *(getCigarInfo()),
01122 getReferenceName(),
01123 *myRefPtr,
01124 mySeqWithEq);
01125 }
01126 }
01127
01128 return(mySeqWithEq[index]);
01129 }
01130 else
01131 {
01132
01133
01134
01135 if(mySeqWithoutEq.length() == 0)
01136 {
01137
01138
01139
01140 if(mySequence == "*")
01141 {
01142
01143 mySeqWithoutEq = '*';
01144 }
01145 else
01146 {
01147
01148
01149
01150 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01151 myRecordPtr->myPosition,
01152 *(getCigarInfo()),
01153 getReferenceName(),
01154 *myRefPtr,
01155 mySeqWithoutEq);
01156 }
01157 }
01158
01159 return(mySeqWithoutEq[index]);
01160 }
01161 }
01162 }
01163
01164
01165 char SamRecord::getQuality(int index)
01166 {
01167
01168 int32_t readLen = getReadLength();
01169
01170
01171
01172 if(readLen == 0)
01173 {
01174 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01175 }
01176 else if((index < 0) || (index >= readLen))
01177 {
01178
01179 String exceptionString = "SamRecord::getQuality(";
01180 exceptionString += index;
01181 exceptionString += ") is out of range. Index must be between 0 and ";
01182 exceptionString += (readLen - 1);
01183 throw std::runtime_error(exceptionString.c_str());
01184 }
01185
01186 if(myQuality.Length() == 0)
01187 {
01188
01189 unsigned char * packedQuality =
01190 (unsigned char *)myRecordPtr->myData +
01191 myRecordPtr->myReadNameLength +
01192 myRecordPtr->myCigarLength * sizeof(int) +
01193 (myRecordPtr->myReadLength + 1) / 2;
01194 return(packedQuality[index] + 33);
01195 }
01196 else
01197 {
01198
01199 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01200 {
01201
01202 return(0xFF);
01203 }
01204 else
01205 {
01206 return(myQuality[index]);
01207 }
01208 }
01209 }
01210
01211
01212 Cigar* SamRecord::getCigarInfo()
01213 {
01214
01215
01216
01217
01218 if(myAlignmentLength == -1)
01219 {
01220
01221 parseCigar();
01222 }
01223 return(&myCigarRoller);
01224 }
01225
01226
01227 uint32_t SamRecord::getTagLength()
01228 {
01229 myStatus = SamStatus::SUCCESS;
01230 if(myNeedToSetTagsFromBuffer)
01231 {
01232
01233
01234 unsigned char * tagStart =
01235 (unsigned char *)myRecordPtr->myData
01236 + myRecordPtr->myReadNameLength
01237 + myRecordPtr->myCigarLength * sizeof(int)
01238 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01239
01240
01241
01242
01243 uint32_t nonTagSize =
01244 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01245
01246 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01247 return(tagSize);
01248 }
01249
01250
01251 return(myTagBufferSize);
01252 }
01253
01254
01255
01256
01257
01258
01259
01260 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01261 {
01262 myStatus = SamStatus::SUCCESS;
01263 if(myNeedToSetTagsFromBuffer)
01264 {
01265 if(!setTagsFromBuffer())
01266 {
01267
01268
01269 return(false);
01270 }
01271 }
01272
01273
01274
01275 myLastTagIndex++;
01276 int maxTagIndex = extras.Capacity();
01277 if(myLastTagIndex >= maxTagIndex)
01278 {
01279
01280
01281
01282 return(false);
01283 }
01284
01285 bool tagFound = false;
01286
01287 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01288 {
01289 if(extras.SlotInUse(myLastTagIndex))
01290 {
01291
01292 int key = extras.GetKey(myLastTagIndex);
01293 getTag(key, tag);
01294 getVtype(key, vtype);
01295 tagFound = true;
01296
01297 switch (vtype)
01298 {
01299 case 'A' :
01300 *value = getIntegerPtr(myLastTagIndex);
01301 break;
01302 case 'f' :
01303 *value = getDoublePtr(myLastTagIndex);
01304 break;
01305 case 'c' :
01306 case 'C' :
01307 case 's' :
01308 case 'S' :
01309 case 'i' :
01310 case 'I' :
01311 vtype = 'i';
01312 *value = getIntegerPtr(myLastTagIndex);
01313 break;
01314 case 'Z' :
01315 *value = getStringPtr(myLastTagIndex);
01316 break;
01317 default:
01318 myStatus.setStatus(SamStatus::FAIL_PARSE,
01319 "Unknown tag type");
01320 tagFound = false;
01321 break;
01322 }
01323 }
01324 if(!tagFound)
01325 {
01326
01327 myLastTagIndex++;
01328 }
01329 }
01330 return(tagFound);
01331 }
01332
01333
01334
01335 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01336 String& cigar, String& sequence, String& quality)
01337 {
01338 return(getFields(recStruct, readName, cigar, sequence, quality,
01339 mySequenceTranslation));
01340 }
01341
01342
01343
01344 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01345 String& cigar, String& sequence, String& quality,
01346 SequenceTranslation translation)
01347 {
01348 myStatus = SamStatus::SUCCESS;
01349 if(myIsBufferSynced == false)
01350 {
01351 if(!fixBuffer(translation))
01352 {
01353
01354 return(false);
01355 }
01356 }
01357 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01358
01359 readName = getReadName();
01360
01361 if(myStatus != SamStatus::SUCCESS)
01362 {
01363
01364 return(false);
01365 }
01366 cigar = getCigar();
01367
01368 if(myStatus != SamStatus::SUCCESS)
01369 {
01370
01371 return(false);
01372 }
01373 sequence = getSequence(translation);
01374
01375 if(myStatus != SamStatus::SUCCESS)
01376 {
01377
01378 return(false);
01379 }
01380 quality = getQuality();
01381
01382 if(myStatus != SamStatus::SUCCESS)
01383 {
01384
01385 return(false);
01386 }
01387 return(true);
01388 }
01389
01390
01391 bool SamRecord::isIntegerType(char vtype) const
01392 {
01393 if((vtype == 'c') || (vtype == 'C') ||
01394 (vtype == 's') || (vtype == 'S') ||
01395 (vtype == 'i') || (vtype == 'I'))
01396 {
01397 return(true);
01398 }
01399 return(false);
01400 }
01401
01402
01403 bool SamRecord::isDoubleType(char vtype) const
01404 {
01405 if(vtype == 'f')
01406 {
01407 return(true);
01408 }
01409 return(false);
01410 }
01411
01412
01413 bool SamRecord::isCharType(char vtype) const
01414 {
01415 if(vtype == 'A')
01416 {
01417 return(true);
01418 }
01419 return(false);
01420 }
01421
01422
01423 bool SamRecord::isStringType(char vtype) const
01424 {
01425 if(vtype == 'Z')
01426 {
01427 return(true);
01428 }
01429 return(false);
01430 }
01431
01432
01433 void SamRecord::clearTags()
01434 {
01435 if(extras.Entries() != 0)
01436 {
01437 extras.Clear();
01438 }
01439 strings.Clear();
01440 integers.Clear();
01441 doubles.Clear();
01442 myTagBufferSize = 0;
01443 resetTagIter();
01444 }
01445
01446
01447 bool SamRecord::rmTag(const char* tag, char type)
01448 {
01449
01450 if(strlen(tag) != 2)
01451 {
01452
01453 myStatus.setStatus(SamStatus::INVALID,
01454 "rmTag called with tag that is not 2 characters\n");
01455 return(false);
01456 }
01457
01458 myStatus = SamStatus::SUCCESS;
01459 if(myNeedToSetTagsFromBuffer)
01460 {
01461 if(!setTagsFromBuffer())
01462 {
01463
01464
01465 return(false);
01466 }
01467 }
01468
01469
01470 int key = MAKEKEY(tag[0], tag[1], type);
01471
01472 int offset = extras.Find(key);
01473
01474 if(offset < 0)
01475 {
01476
01477
01478 return(true);
01479 }
01480
01481 int rmBuffSize = 0;
01482
01483
01484
01485
01486
01487 switch(type)
01488 {
01489 case 'A':
01490 case 'c':
01491 case 'C':
01492 rmBuffSize = 4;
01493 break;
01494 case 's':
01495 case 'S':
01496 rmBuffSize = 5;
01497 break;
01498 case 'i':
01499 case 'I':
01500 rmBuffSize = 7;
01501 break;
01502 case 'f':
01503 rmBuffSize = 7;
01504 break;
01505 case 'Z':
01506 rmBuffSize = 4 + getString(offset).Length();
01507 break;
01508 default:
01509 myStatus.setStatus(SamStatus::INVALID,
01510 "rmTag called with unknown type.\n");
01511 return(false);
01512 break;
01513 };
01514
01515
01516 myNeedToSetTagsInBuffer = true;
01517 myIsTagsBufferValid = false;
01518 myIsBufferSynced = false;
01519 myTagBufferSize -= rmBuffSize;
01520
01521
01522 extras.Delete(offset);
01523 return(true);
01524 }
01525
01526
01527
01528 const SamStatus& SamRecord::getStatus()
01529 {
01530 return(myStatus);
01531 }
01532
01533
01534 String & SamRecord::getString(const char * tag)
01535 {
01536
01537 myStatus = SamStatus::SUCCESS;
01538
01539 if(myNeedToSetTagsFromBuffer)
01540 {
01541 if(!setTagsFromBuffer())
01542 {
01543
01544
01545
01546 }
01547 }
01548
01549 int key = MAKEKEY(tag[0], tag[1], 'Z');
01550 int offset = extras.Find(key);
01551
01552 int value;
01553 if (offset < 0)
01554 {
01555
01556 return(NOT_FOUND_TAG_STRING);
01557 }
01558 else
01559 value = extras[offset];
01560
01561 return strings[value];
01562 }
01563
01564 int & SamRecord::getInteger(const char * tag)
01565 {
01566
01567 myStatus = SamStatus::SUCCESS;
01568
01569 if(myNeedToSetTagsFromBuffer)
01570 {
01571 if(!setTagsFromBuffer())
01572 {
01573
01574
01575
01576 }
01577 }
01578
01579 int key = MAKEKEY(tag[0], tag[1], 'i');
01580 int offset = extras.Find(key);
01581
01582 int value;
01583 if (offset < 0)
01584 {
01585
01586 return NOT_FOUND_TAG_INT;
01587 }
01588 else
01589 value = extras[offset];
01590
01591 return integers[value];
01592 }
01593
01594 double & SamRecord::getDouble(const char * tag)
01595 {
01596
01597 myStatus = SamStatus::SUCCESS;
01598
01599 if(myNeedToSetTagsFromBuffer)
01600 {
01601 if(!setTagsFromBuffer())
01602 {
01603
01604
01605
01606 }
01607 }
01608
01609 int key = MAKEKEY(tag[0], tag[1], 'f');
01610 int offset = extras.Find(key);
01611
01612 int value;
01613 if (offset < 0)
01614 {
01615
01616 return NOT_FOUND_TAG_DOUBLE;
01617 }
01618 else
01619 value = extras[offset];
01620
01621 return doubles[value];
01622 }
01623
01624
01625 bool SamRecord::checkTag(const char * tag, char type)
01626 {
01627
01628 myStatus = SamStatus::SUCCESS;
01629
01630 if(myNeedToSetTagsFromBuffer)
01631 {
01632 if(!setTagsFromBuffer())
01633 {
01634
01635
01636 return("");
01637 }
01638 }
01639
01640 int key = MAKEKEY(tag[0], tag[1], type);
01641
01642 return (extras.Find(key) != LH_NOTFOUND);
01643 }
01644
01645
01646
01647
01648 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01649 {
01650
01651
01652 if(myAlignmentLength == -1)
01653 {
01654 parseCigar();
01655 }
01656 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01657 }
01658
01659
01660
01661
01662
01663
01664
01665
01666
01667
01668
01669
01670
01671
01672
01673
01674
01675
01676
01677
01678
01679
01680
01681
01682
01683
01684
01685
01686
01687
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714 bool SamRecord::allocateRecordStructure(int size)
01715 {
01716 if (allocatedSize < size)
01717 {
01718 bamRecordStruct* tmpRecordPtr =
01719 (bamRecordStruct *)realloc(myRecordPtr, size);
01720 if(tmpRecordPtr == NULL)
01721 {
01722
01723 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
01724 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
01725 return(false);
01726 }
01727
01728 myRecordPtr = tmpRecordPtr;
01729 allocatedSize = size;
01730 }
01731 return(true);
01732 }
01733
01734
01735
01736 void* SamRecord::getStringPtr(int index)
01737 {
01738 int value = extras[index];
01739
01740 return &(strings[value]);
01741 }
01742
01743 void* SamRecord::getIntegerPtr(int offset)
01744 {
01745 int value = extras[offset];
01746
01747 return &(integers[value]);
01748 }
01749
01750 void* SamRecord::getDoublePtr(int offset)
01751 {
01752 int value = extras[offset];
01753
01754 return &(doubles[value]);
01755 }
01756
01757
01758
01759 bool SamRecord::fixBuffer(SequenceTranslation translation)
01760 {
01761
01762 if(myIsBufferSynced &&
01763 (myBufferSequenceTranslation == translation))
01764 {
01765
01766 return(true);
01767 }
01768
01769
01770 if(!myIsBinValid)
01771 {
01772
01773
01774 myRecordPtr->myBin =
01775 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
01776 myIsBinValid = true;
01777 }
01778
01779
01780 bool status = true;
01781
01782
01783 uint8_t newReadNameLen = getReadNameLength();
01784 uint16_t newCigarLen = getCigarLength();
01785 int32_t newReadLen = getReadLength();
01786 uint32_t newTagLen = getTagLength();
01787 uint32_t bamSequenceLen = (newReadLen+1)/2;
01788
01789
01790
01791
01792 int newBufferSize =
01793 ((unsigned char*)(&(myRecordPtr->myData)) -
01794 (unsigned char*)myRecordPtr) +
01795 newReadNameLen + ((newCigarLen)*4) +
01796 newReadLen + bamSequenceLen + newTagLen;
01797
01798 if(!allocateRecordStructure(newBufferSize))
01799 {
01800
01801 return(false);
01802 }
01803
01804
01805
01806
01807
01808
01809 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
01810 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
01811 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
01812
01813
01814
01815 if(myIsTagsBufferValid &&
01816 (readNameLenChange | cigarLenChange | readLenChange))
01817 {
01818 status &= setTagsFromBuffer();
01819
01820
01821 myIsTagsBufferValid = false;
01822 }
01823
01824
01825
01826
01827 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
01828 (readNameLenChange | cigarLenChange | readLenChange))
01829 {
01830 setSequenceAndQualityFromBuffer();
01831
01832
01833 myIsQualityBufferValid = false;
01834 myIsSequenceBufferValid = false;
01835 }
01836
01837
01838
01839 if((myIsCigarBufferValid) &&
01840 (readNameLenChange))
01841 {
01842 status &= parseCigarBinary();
01843 myIsCigarBufferValid = false;
01844 }
01845
01846
01847 if(!myIsReadNameBufferValid)
01848 {
01849 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
01850 newReadNameLen);
01851
01852
01853 myRecordPtr->myReadNameLength = newReadNameLen;
01854 myIsReadNameBufferValid = true;
01855 }
01856
01857 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
01858 myRecordPtr->myReadNameLength;
01859
01860
01861 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
01862
01863 if(!myIsCigarBufferValid)
01864 {
01865
01866
01867 myRecordPtr->myCigarLength = newCigarLen;
01868 memcpy(packedCigar, myCigarTempBuffer,
01869 myRecordPtr->myCigarLength * sizeof(uint32_t));
01870
01871 myIsCigarBufferValid = true;
01872 }
01873
01874 unsigned char * packedSequence = readNameEnds +
01875 myRecordPtr->myCigarLength * sizeof(int);
01876 unsigned char * packedQuality = packedSequence + bamSequenceLen;
01877
01878 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
01879 (myBufferSequenceTranslation != translation))
01880 {
01881 myRecordPtr->myReadLength = newReadLen;
01882
01883
01884 bool noQuality = false;
01885 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01886 {
01887 noQuality = true;
01888 }
01889
01890 const char* translatedSeq = NULL;
01891
01892
01893
01894 if((!myIsSequenceBufferValid) ||
01895 (translation != myBufferSequenceTranslation))
01896 {
01897 translatedSeq = getSequence(translation);
01898 }
01899
01900 for (int i = 0; i < myRecordPtr->myReadLength; i++)
01901 {
01902 if((!myIsSequenceBufferValid) ||
01903 (translation != myBufferSequenceTranslation))
01904 {
01905
01906 int seqVal = 0;
01907 switch(translatedSeq[i])
01908 {
01909 case '=':
01910 seqVal = 0;
01911 break;
01912 case 'A':
01913 case 'a':
01914 seqVal = 1;
01915 break;
01916 case 'C':
01917 case 'c':
01918 seqVal = 2;
01919 break;
01920 case 'G':
01921 case 'g':
01922 seqVal = 4;
01923 break;
01924 case 'T':
01925 case 't':
01926 seqVal = 8;
01927 break;
01928 case 'N':
01929 case 'n':
01930 case '.':
01931 seqVal = 15;
01932 break;
01933 default:
01934 myStatus.addError(SamStatus::FAIL_PARSE,
01935 "Unknown Sequence character found.");
01936 status = false;
01937 break;
01938 };
01939
01940 if(i & 1)
01941 {
01942
01943
01944 packedSequence[i/2] |= seqVal;
01945 }
01946 else
01947 {
01948
01949 packedSequence[i/2] = seqVal << 4;
01950 }
01951 }
01952
01953 if(!myIsQualityBufferValid)
01954 {
01955
01956 if((noQuality) || (myQuality.Length() <= i))
01957 {
01958
01959
01960 packedQuality[i] = 0xFF;
01961 }
01962 else
01963 {
01964
01965 packedQuality[i] = myQuality[i] - 33;
01966 }
01967 }
01968 }
01969 myIsQualityBufferValid = true;
01970 myIsSequenceBufferValid = true;
01971 myBufferSequenceTranslation = translation;
01972 }
01973
01974 if(!myIsTagsBufferValid)
01975 {
01976 status &= setTagsInBuffer();
01977 }
01978
01979
01980 myRecordPtr->myReadNameLength = newReadNameLen;
01981 myRecordPtr->myCigarLength = newCigarLen;
01982 myRecordPtr->myReadLength = newReadLen;
01983
01984
01985
01986 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
01987
01988 if(status)
01989 {
01990 myIsBufferSynced = true;
01991 }
01992
01993 return(status);
01994 }
01995
01996
01997
01998
01999
02000 void SamRecord::setSequenceAndQualityFromBuffer()
02001 {
02002
02003
02004
02005
02006
02007
02008 bool extractSequence = false;
02009 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
02010 {
02011 extractSequence = true;
02012 }
02013
02014
02015 bool extractQuality = false;
02016 if(myIsQualityBufferValid && (myQuality.Length() == 0))
02017 {
02018 extractQuality = true;
02019 }
02020
02021
02022
02023 if(!extractSequence && !extractQuality)
02024 {
02025 return;
02026 }
02027
02028
02029 if(extractSequence)
02030 {
02031 mySequence.SetLength(myRecordPtr->myReadLength);
02032 }
02033 if(extractQuality)
02034 {
02035 myQuality.SetLength(myRecordPtr->myReadLength);
02036 }
02037
02038 unsigned char * readNameEnds =
02039 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02040 unsigned char * packedSequence =
02041 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02042 unsigned char * packedQuality =
02043 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02044
02045 const char * asciiBases = "=AC.G...T......N";
02046
02047
02048
02049 bool qualitySpecified = false;
02050
02051 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02052 {
02053 if(extractSequence)
02054 {
02055 mySequence[i] = i & 1 ?
02056 asciiBases[packedSequence[i / 2] & 0xF] :
02057 asciiBases[packedSequence[i / 2] >> 4];
02058 }
02059
02060 if(extractQuality)
02061 {
02062 if(packedQuality[i] != 0xFF)
02063 {
02064
02065 qualitySpecified = true;
02066 }
02067
02068 myQuality[i] = packedQuality[i] + 33;
02069 }
02070 }
02071
02072
02073 if(myRecordPtr->myReadLength == 0)
02074 {
02075 if(extractSequence)
02076 {
02077 mySequence = "*";
02078 }
02079 if(extractQuality)
02080 {
02081 myQuality = "*";
02082 }
02083 }
02084 else if(extractQuality && !qualitySpecified)
02085 {
02086
02087 myQuality = "*";
02088 }
02089 }
02090
02091
02092
02093 bool SamRecord::parseCigar()
02094 {
02095
02096 if(myCigar.Length() == 0)
02097 {
02098
02099 return(parseCigarBinary());
02100 }
02101 return(parseCigarString());
02102 }
02103
02104
02105 bool SamRecord::parseCigarBinary()
02106 {
02107
02108
02109
02110 if(myCigar.Length() != 0)
02111 {
02112
02113 return(true);
02114 }
02115
02116 unsigned char * readNameEnds =
02117 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02118
02119 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02120
02121 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02122
02123 myCigarRoller.getCigarString(myCigar);
02124
02125 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02126
02127 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02128 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02129
02130
02131 if(myRecordPtr->myCigarLength == 0)
02132 {
02133 myCigar = "*";
02134 return(true);
02135 }
02136
02137
02138 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02139 if(newBufferSize > myCigarTempBufferAllocatedSize)
02140 {
02141 uint32_t* tempBufferPtr =
02142 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02143 if(tempBufferPtr == NULL)
02144 {
02145
02146
02147 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02148 myStatus.addError(SamStatus::FAIL_MEM,
02149 "Failed to Allocate Memory.");
02150 return(false);
02151 }
02152 myCigarTempBuffer = tempBufferPtr;
02153 myCigarTempBufferAllocatedSize = newBufferSize;
02154 }
02155
02156 memcpy(myCigarTempBuffer, packedCigar,
02157 myRecordPtr->myCigarLength * sizeof(uint32_t));
02158
02159
02160 myCigarTempBufferLength = myRecordPtr->myCigarLength;
02161
02162 return(true);
02163 }
02164
02165
02166 bool SamRecord::parseCigarString()
02167 {
02168 myCigarTempBufferLength = 0;
02169 if(myCigar == "*")
02170 {
02171
02172 myAlignmentLength = 0;
02173 myUnclippedStartOffset = 0;
02174 myUnclippedEndOffset = 0;
02175 myCigarRoller.clear();
02176 return(true);
02177 }
02178
02179 myCigarRoller.Set(myCigar);
02180
02181 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02182
02183 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02184 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02185
02186
02187
02188
02189 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02190 if(newBufferSize > myCigarTempBufferAllocatedSize)
02191 {
02192 uint32_t* tempBufferPtr =
02193 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02194 if(tempBufferPtr == NULL)
02195 {
02196
02197
02198 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02199 myStatus.addError(SamStatus::FAIL_MEM,
02200 "Failed to Allocate Memory.");
02201 return(false);
02202 }
02203 myCigarTempBuffer = tempBufferPtr;
02204 myCigarTempBufferAllocatedSize = newBufferSize;
02205 }
02206
02207
02208 bool status = true;
02209
02210
02211 char *cigarOp;
02212 const char* cigarEntryStart = myCigar.c_str();
02213 int opLen = 0;
02214 int op = 0;
02215
02216 unsigned int * packedCigar = myCigarTempBuffer;
02217
02218
02219 const char* endCigarString = cigarEntryStart + myCigar.Length();
02220 while(cigarEntryStart < endCigarString)
02221 {
02222 bool validCigarEntry = true;
02223
02224
02225 opLen = strtol(cigarEntryStart, &cigarOp, 10);
02226
02227 switch(*cigarOp)
02228 {
02229 case('M'):
02230 op = 0;
02231 break;
02232 case('I'):
02233
02234
02235 op = 1;
02236 break;
02237 case('D'):
02238 op = 2;
02239 break;
02240 case('N'):
02241 op = 3;
02242 break;
02243 case('S'):
02244 op = 4;
02245 break;
02246 case('H'):
02247 op = 5;
02248 break;
02249 case('P'):
02250 op = 6;
02251 break;
02252 default:
02253 fprintf(stderr, "ERROR parsing cigar\n");
02254 validCigarEntry = false;
02255 status = false;
02256 myStatus.addError(SamStatus::FAIL_PARSE,
02257 "Unknown operation found when parsing the Cigar.");
02258 break;
02259 };
02260 if(validCigarEntry)
02261 {
02262
02263 ++myCigarTempBufferLength;
02264 *packedCigar = (opLen << 4) | op;
02265 packedCigar++;
02266 }
02267
02268 cigarEntryStart = ++cigarOp;
02269 }
02270
02271
02272 return(status);
02273 }
02274
02275
02276 bool SamRecord::setTagsFromBuffer()
02277 {
02278
02279 if(myNeedToSetTagsFromBuffer == false)
02280 {
02281
02282 return(true);
02283 }
02284
02285
02286 myNeedToSetTagsFromBuffer = false;
02287
02288 unsigned char * readNameEnds =
02289 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02290 unsigned char * packedSequence =
02291 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02292 unsigned char * packedQuality =
02293 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02294
02295 unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02296
02297
02298 bool status = true;
02299
02300
02301 clearTags();
02302 while (myRecordPtr->myBlockSize + 4 -
02303 (extraPtr - (unsigned char *)myRecordPtr) > 0)
02304 {
02305 int key;
02306 int value;
02307 void * content = extraPtr + 3;
02308 int tagBufferSize = 0;
02309
02310 switch (extraPtr[2])
02311 {
02312 case 'A' :
02313 key = MAKEKEY(extraPtr[0], extraPtr[1], 'A');
02314 value = integers.Length();
02315 integers.Push(* (char *) content);
02316 extraPtr += 4;
02317 tagBufferSize += 4;
02318 break;
02319 case 'c' :
02320 key = MAKEKEY(extraPtr[0], extraPtr[1], 'c');
02321 value = integers.Length();
02322 integers.Push(* (char *) content);
02323 extraPtr += 4;
02324 tagBufferSize += 4;
02325 break;
02326 case 'C' :
02327 key = MAKEKEY(extraPtr[0], extraPtr[1], 'C');
02328 value = integers.Length();
02329 integers.Push(* (unsigned char *) content);
02330 extraPtr += 4;
02331 tagBufferSize += 4;
02332 break;
02333 case 's' :
02334 key = MAKEKEY(extraPtr[0], extraPtr[1], 's');
02335 value = integers.Length();
02336 integers.Push(* (short *) content);
02337 extraPtr += 5;
02338 tagBufferSize += 5;
02339 break;
02340 case 'S' :
02341 key = MAKEKEY(extraPtr[0], extraPtr[1], 'S');
02342 value = integers.Length();
02343 integers.Push(* (unsigned short *) content);
02344 extraPtr += 5;
02345 tagBufferSize += 5;
02346 break;
02347 case 'i' :
02348 key = MAKEKEY(extraPtr[0], extraPtr[1], 'i');
02349 value = integers.Length();
02350 integers.Push(* (int *) content);
02351 extraPtr += 7;
02352 tagBufferSize += 7;
02353 break;
02354 case 'I' :
02355 key = MAKEKEY(extraPtr[0], extraPtr[1], 'I');
02356 value = integers.Length();
02357 integers.Push((int) * (unsigned int *) content);
02358 extraPtr += 7;
02359 tagBufferSize += 7;
02360 break;
02361 case 'Z' :
02362 key = MAKEKEY(extraPtr[0], extraPtr[1], 'Z');
02363 value = strings.Length();
02364 strings.Push((const char *) content);
02365 extraPtr += 4 + strings.Last().Length();
02366 tagBufferSize += 4 + strings.Last().Length();
02367 break;
02368 case 'f' :
02369 key = MAKEKEY(extraPtr[0], extraPtr[1], 'f');
02370 value = doubles.Length();
02371 doubles.Push(* (float *) content);
02372 fprintf(stderr, "\n\nFLOAT!!!\n\n");
02373 extraPtr += 7;
02374 tagBufferSize += 7;
02375 break;
02376 default :
02377 fprintf(stderr,
02378 "parsing BAM - Unknown custom field of type %c%c:%c\n",
02379 extraPtr[0], extraPtr[1], extraPtr[2]);
02380
02381
02382 extraPtr += 3;
02383 myStatus.addError(SamStatus::FAIL_PARSE,
02384 "Unknown tag type.");
02385 status = false;
02386 }
02387
02388
02389 if(status)
02390 {
02391 extras.Add(key, value);
02392 myTagBufferSize += tagBufferSize;
02393 }
02394 }
02395 return(status);
02396 }
02397
02398
02399 bool SamRecord::setTagsInBuffer()
02400 {
02401
02402
02403
02404 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
02405 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
02406 (unsigned char*)myRecordPtr) +
02407 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
02408 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
02409
02410
02411 if(!allocateRecordStructure(newBufferSize))
02412 {
02413
02414 return(false);
02415 }
02416
02417 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02418 myRecordPtr->myReadNameLength;
02419 unsigned char * packedSequence = readNameEnds +
02420 myRecordPtr->myCigarLength * sizeof(int);
02421 unsigned char * packedQuality =
02422 packedSequence + bamSequenceLength;
02423
02424 char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
02425
02426 bool status = true;
02427
02428
02429 if (extras.Entries())
02430 {
02431 for (int i = 0; i < extras.Capacity(); i++)
02432 {
02433 if (extras.SlotInUse(i))
02434 {
02435 int key = extras.GetKey(i);
02436 getTag(key, extraPtr);
02437 extraPtr += 2;
02438 getVtype(key, extraPtr[0]);
02439 char vtype = extraPtr[0];
02440
02441
02442 extraPtr += 1;
02443
02444 switch (vtype)
02445 {
02446 case 'A' :
02447 *(char*)extraPtr = (char)getInteger(i);
02448
02449 extraPtr += 1;
02450 break;
02451 case 'c' :
02452 *(int8_t*)extraPtr = (int8_t)getInteger(i);
02453
02454 extraPtr += 1;
02455 break;
02456 case 'C' :
02457 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
02458
02459 extraPtr += 1;
02460 break;
02461 case 's' :
02462 *(int16_t*)extraPtr = (int16_t)getInteger(i);
02463
02464 extraPtr += 2;
02465 break;
02466 case 'S' :
02467 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
02468
02469 extraPtr += 2;
02470 break;
02471 case 'i' :
02472 *(int32_t*)extraPtr = (int32_t)getInteger(i);
02473
02474 extraPtr += 4;
02475 break;
02476 case 'I' :
02477 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
02478
02479 extraPtr += 4;
02480 break;
02481 case 'Z' :
02482 sprintf(extraPtr, "%s", getString(i).c_str());
02483 extraPtr += getString(i).Length() + 1;
02484 break;
02485 case 'f' :
02486
02487 sprintf(extraPtr, "%f", getDouble(i));
02488 extraPtr += 4;
02489 break;
02490 default :
02491 myStatus.addError(SamStatus::FAIL_PARSE,
02492 "Unknown tag type.");
02493 status = false;
02494 break;
02495 }
02496 }
02497 }
02498 }
02499
02500
02501
02502 if(extraPtr != (char*)myRecordPtr + newBufferSize)
02503 {
02504 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
02505 myStatus.addError(SamStatus::FAIL_PARSE,
02506 "ERROR updating the buffer. Incorrect size.");
02507 status = false;
02508 }
02509
02510
02511
02512 myNeedToSetTagsInBuffer = false;
02513 myIsTagsBufferValid = true;
02514 return(status);
02515 }
02516
02517
02518
02519
02520
02521 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
02522 {
02523
02524
02525 myReferenceName =
02526 header.getReferenceLabel(myRecordPtr->myReferenceID);
02527 myMateReferenceName =
02528 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
02529
02530
02531 myReadName.SetLength(0);
02532 myCigar.SetLength(0);
02533 mySequence.SetLength(0);
02534 mySeqWithEq.clear();
02535 mySeqWithoutEq.clear();
02536 myQuality.SetLength(0);
02537 myNeedToSetTagsFromBuffer = true;
02538 myNeedToSetTagsInBuffer = false;
02539
02540
02541 myIsBufferSynced = true;
02542
02543 myIsReadNameBufferValid = true;
02544 myIsCigarBufferValid = true;
02545 myIsSequenceBufferValid = true;
02546 myBufferSequenceTranslation = NONE;
02547 myIsQualityBufferValid = true;
02548 myIsTagsBufferValid = true;
02549 myIsBinValid = true;
02550 }
02551
02552
02553
02554 void SamRecord::getVtype(int key, char& vtype) const
02555 {
02556
02557 vtype = (key >> 16) & 0xFF;
02558 }
02559
02560
02561 void SamRecord::getTag(int key, char* tag) const
02562 {
02563
02564 tag[0] = key & 0xFF;
02565 tag[1] = (key >> 8) & 0xFF;
02566 tag[2] = 0;
02567 }
02568
02569
02570
02571 String & SamRecord::getString(int index)
02572 {
02573 int value = extras[index];
02574
02575 return strings[value];
02576 }
02577
02578 int & SamRecord::getInteger(int offset)
02579 {
02580 int value = extras[offset];
02581
02582 return integers[value];
02583 }
02584
02585 double & SamRecord::getDouble(int offset)
02586 {
02587 int value = extras[offset];
02588
02589 return doubles[value];
02590 }
02591
02592