00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021
00022 #include "bam.h"
00023
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032
00033
00034 SamRecord::SamRecord()
00035 : myStatus(),
00036 myRefPtr(NULL),
00037 mySequenceTranslation(NONE)
00038 {
00039 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040
00041 myRecordPtr =
00042 (bamRecordStruct *) malloc(defaultAllocSize);
00043
00044 myCigarTempBuffer = NULL;
00045 myCigarTempBufferAllocatedSize = 0;
00046
00047 allocatedSize = defaultAllocSize;
00048
00049 resetRecord();
00050 }
00051
00052
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054 : myStatus(errorHandlingType),
00055 myRefPtr(NULL),
00056 mySequenceTranslation(NONE)
00057 {
00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059
00060 myRecordPtr =
00061 (bamRecordStruct *) malloc(defaultAllocSize);
00062
00063 myCigarTempBuffer = NULL;
00064 myCigarTempBufferAllocatedSize = 0;
00065
00066 allocatedSize = defaultAllocSize;
00067
00068 resetRecord();
00069 }
00070
00071
00072 SamRecord::~SamRecord()
00073 {
00074 resetRecord();
00075
00076 if(myRecordPtr != NULL)
00077 {
00078 free(myRecordPtr);
00079 myRecordPtr = NULL;
00080 }
00081 if(myCigarTempBuffer != NULL)
00082 {
00083 free(myCigarTempBuffer);
00084 myCigarTempBuffer = NULL;
00085 myCigarTempBufferAllocatedSize = 0;
00086 }
00087 }
00088
00089
00090
00091 void SamRecord::resetRecord()
00092 {
00093 myIsBufferSynced = true;
00094
00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096 myRecordPtr->myReferenceID = -1;
00097 myRecordPtr->myPosition = -1;
00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099 myRecordPtr->myMapQuality = 0;
00100 myRecordPtr->myBin = DEFAULT_BIN;
00101 myRecordPtr->myCigarLength = 0;
00102 myRecordPtr->myFlag = 0;
00103 myRecordPtr->myReadLength = 0;
00104 myRecordPtr->myMateReferenceID = -1;
00105 myRecordPtr->myMatePosition = -1;
00106 myRecordPtr->myInsertSize = 0;
00107
00108
00109
00110
00111 myReadName = DEFAULT_READ_NAME;
00112 myReferenceName = "*";
00113 myMateReferenceName = "*";
00114 myCigar = "*";
00115 mySequence = "*";
00116 mySeqWithEq.clear();
00117 mySeqWithoutEq.clear();
00118 myQuality = "*";
00119 myNeedToSetTagsFromBuffer = false;
00120 myNeedToSetTagsInBuffer = false;
00121
00122
00123 myAlignmentLength = -1;
00124 myUnclippedStartOffset = -1;
00125 myUnclippedEndOffset = -1;
00126
00127 clearTags();
00128
00129
00130
00131
00132
00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
00134 myRecordPtr->myReadNameLength);
00135
00136
00137 myIsReadNameBufferValid = true;
00138 myIsCigarBufferValid = true;
00139 myIsSequenceBufferValid = true;
00140 myBufferSequenceTranslation = NONE;
00141 myIsQualityBufferValid = true;
00142 myIsTagsBufferValid = true;
00143 myIsBinValid = true;
00144
00145 myCigarTempBufferLength = -1;
00146
00147 myStatus = SamStatus::SUCCESS;
00148
00149 NOT_FOUND_TAG_STRING = "";
00150 NOT_FOUND_TAG_INT = -1;
00151 NOT_FOUND_TAG_DOUBLE = -1;
00152 }
00153
00154
00155
00156 void SamRecord::resetTagIter()
00157 {
00158 myLastTagIndex = -1;
00159 }
00160
00161
00162
00163
00164 bool SamRecord::isValid(SamFileHeader& header)
00165 {
00166 myStatus = SamStatus::SUCCESS;
00167 SamValidationErrors invalidSamErrors;
00168 if(!SamValidator::isValid(header, *this, invalidSamErrors))
00169 {
00170
00171 std::string errorMessage = "";
00172 invalidSamErrors.getErrorString(errorMessage);
00173 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00174 return(false);
00175 }
00176
00177 return(true);
00178 }
00179
00180
00181
00182 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr,
00183 SamFileHeader& header)
00184 {
00185 myStatus = SamStatus::SUCCESS;
00186 if((filePtr == NULL) || (filePtr->isOpen() == false))
00187 {
00188
00189 myStatus.setStatus(SamStatus::FAIL_ORDER,
00190 "Can't read from an unopened file.");
00191 return(SamStatus::FAIL_ORDER);
00192 }
00193
00194
00195 resetRecord();
00196
00197
00198 int numBytes =
00199 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00200
00201
00202 if(ifeof(filePtr) && (numBytes == 0))
00203 {
00204
00205 myStatus.setStatus(SamStatus::NO_MORE_RECS,
00206 "No more records left to read.");
00207 return(SamStatus::NO_MORE_RECS);
00208 }
00209
00210 if(numBytes != sizeof(int32_t))
00211 {
00212
00213
00214 if(ifeof(filePtr))
00215 {
00216
00217
00218 myStatus.setStatus(SamStatus::FAIL_PARSE,
00219 "EOF reached in the middle of a record.");
00220 return(SamStatus::FAIL_PARSE);
00221 }
00222 else
00223 {
00224
00225 myStatus.setStatus(SamStatus::FAIL_IO,
00226 "Failed to read the record size.");
00227 return(SamStatus::FAIL_IO);
00228 }
00229 }
00230
00231
00232 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00233 {
00234
00235
00236 return(SamStatus::FAIL_MEM);
00237 }
00238
00239
00240 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00241 != (unsigned int)myRecordPtr->myBlockSize)
00242 {
00243
00244 resetRecord();
00245 myStatus.setStatus(SamStatus::FAIL_IO,
00246 "Failed to read the record");
00247 return(SamStatus::FAIL_IO);
00248 }
00249
00250 setVariablesForNewBuffer(header);
00251
00252
00253 return(SamStatus::SUCCESS);
00254 }
00255
00256
00257 void SamRecord::setReference(GenomeSequence* reference)
00258 {
00259 myRefPtr = reference;
00260 }
00261
00262
00263
00264
00265
00266 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00267 {
00268 mySequenceTranslation = translation;
00269 }
00270
00271
00272 bool SamRecord::setReadName(const char* readName)
00273 {
00274 myReadName = readName;
00275 myIsBufferSynced = false;
00276 myIsReadNameBufferValid = false;
00277 myStatus = SamStatus::SUCCESS;
00278
00279
00280
00281 if(myReadName.Length() == 0)
00282 {
00283
00284 myReadName = DEFAULT_READ_NAME;
00285 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00286 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00287 return(false);
00288 }
00289
00290 return true;
00291 }
00292
00293
00294 bool SamRecord::setFlag(uint16_t flag)
00295 {
00296 myStatus = SamStatus::SUCCESS;
00297 myRecordPtr->myFlag = flag;
00298 return true;
00299 }
00300
00301
00302 bool SamRecord::setReferenceName(SamFileHeader& header,
00303 const char* referenceName)
00304 {
00305 myStatus = SamStatus::SUCCESS;
00306
00307 myReferenceName = referenceName;
00308
00309 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
00310
00311 return true;
00312 }
00313
00314
00315 bool SamRecord::set1BasedPosition(int32_t position)
00316 {
00317 return(set0BasedPosition(position - 1));
00318 }
00319
00320
00321 bool SamRecord::set0BasedPosition(int32_t position)
00322 {
00323 myStatus = SamStatus::SUCCESS;
00324 myRecordPtr->myPosition = position;
00325 myIsBinValid = false;
00326 return true;
00327 }
00328
00329
00330 bool SamRecord::setMapQuality(uint8_t mapQuality)
00331 {
00332 myStatus = SamStatus::SUCCESS;
00333 myRecordPtr->myMapQuality = mapQuality;
00334 return true;
00335 }
00336
00337
00338 bool SamRecord::setCigar(const char* cigar)
00339 {
00340 myStatus = SamStatus::SUCCESS;
00341 myCigar = cigar;
00342
00343 myIsBufferSynced = false;
00344 myIsCigarBufferValid = false;
00345 myCigarTempBufferLength = -1;
00346 myIsBinValid = false;
00347
00348
00349 myAlignmentLength = -1;
00350 myUnclippedStartOffset = -1;
00351 myUnclippedEndOffset = -1;
00352
00353 return true;
00354 }
00355
00356
00357 bool SamRecord::setCigar(const Cigar& cigar)
00358 {
00359 myStatus = SamStatus::SUCCESS;
00360 cigar.getCigarString(myCigar);
00361
00362 myIsBufferSynced = false;
00363 myIsCigarBufferValid = false;
00364 myCigarTempBufferLength = -1;
00365 myIsBinValid = false;
00366
00367
00368 myAlignmentLength = -1;
00369 myUnclippedStartOffset = -1;
00370 myUnclippedEndOffset = -1;
00371
00372 return true;
00373 }
00374
00375
00376 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00377 const char* mateReferenceName)
00378 {
00379 myStatus = SamStatus::SUCCESS;
00380
00381
00382
00383 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00384 {
00385 myMateReferenceName = myReferenceName;
00386 }
00387 else
00388 {
00389 myMateReferenceName = mateReferenceName;
00390 }
00391
00392
00393
00394 myRecordPtr->myMateReferenceID =
00395 header.getReferenceID(myMateReferenceName, true);
00396
00397 return true;
00398 }
00399
00400
00401 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00402 {
00403 return(set0BasedMatePosition(matePosition - 1));
00404 }
00405
00406
00407 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00408 {
00409 myStatus = SamStatus::SUCCESS;
00410 myRecordPtr->myMatePosition = matePosition;
00411 return true;
00412 }
00413
00414
00415 bool SamRecord::setInsertSize(int32_t insertSize)
00416 {
00417 myStatus = SamStatus::SUCCESS;
00418 myRecordPtr->myInsertSize = insertSize;
00419 return true;
00420 }
00421
00422
00423 bool SamRecord::setSequence(const char* seq)
00424 {
00425 myStatus = SamStatus::SUCCESS;
00426 mySequence = seq;
00427 mySeqWithEq.clear();
00428 mySeqWithoutEq.clear();
00429
00430 myIsBufferSynced = false;
00431 myIsSequenceBufferValid = false;
00432 return true;
00433 }
00434
00435
00436 bool SamRecord::setQuality(const char* quality)
00437 {
00438 myStatus = SamStatus::SUCCESS;
00439 myQuality = quality;
00440 myIsBufferSynced = false;
00441 myIsQualityBufferValid = false;
00442 return true;
00443 }
00444
00445
00446
00447
00448 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00449 uint32_t fromBufferSize,
00450 SamFileHeader& header)
00451 {
00452 myStatus = SamStatus::SUCCESS;
00453 if((fromBuffer == NULL) || (fromBufferSize == 0))
00454 {
00455
00456 myStatus.setStatus(SamStatus::FAIL_PARSE,
00457 "Cannot parse an empty file.");
00458 return(SamStatus::FAIL_PARSE);
00459 }
00460
00461
00462 resetRecord();
00463
00464
00465 if(!allocateRecordStructure(fromBufferSize))
00466 {
00467
00468 return(SamStatus::FAIL_MEM);
00469 }
00470
00471 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00472
00473 setVariablesForNewBuffer(header);
00474
00475
00476 return(SamStatus::SUCCESS);
00477 }
00478
00479
00480
00481
00482 bool SamRecord::addIntTag(const char* tag, int32_t value)
00483 {
00484 myStatus = SamStatus::SUCCESS;
00485 int key = 0;
00486 int index = 0;
00487 char bamvtype;
00488
00489 int tagBufferSize = 0;
00490
00491
00492 if(myNeedToSetTagsFromBuffer)
00493 {
00494 if(!setTagsFromBuffer())
00495 {
00496
00497 return(false);
00498 }
00499 }
00500
00501
00502
00503
00504
00505 if(value < 0)
00506 {
00507
00508
00509 if(value > std::numeric_limits<char>::min())
00510 {
00511
00512 bamvtype = 'c';
00513 tagBufferSize += 4;
00514 }
00515 else if(value > std::numeric_limits<short>::min())
00516 {
00517
00518 bamvtype = 's';
00519 tagBufferSize += 5;
00520 }
00521 else
00522 {
00523
00524 bamvtype = 'i';
00525 tagBufferSize += 7;
00526 }
00527 }
00528 else
00529 {
00530
00531 if(value < std::numeric_limits<unsigned char>::max())
00532 {
00533
00534 bamvtype = 'C';
00535 tagBufferSize += 4;
00536 }
00537 else if(value < std::numeric_limits<unsigned short>::max())
00538 {
00539
00540 bamvtype = 'S';
00541 tagBufferSize += 5;
00542 }
00543 else
00544 {
00545
00546 bamvtype = 'I';
00547 tagBufferSize += 7;
00548 }
00549 }
00550
00551
00552 key = MAKEKEY(tag[0], tag[1], bamvtype);
00553 unsigned int hashIndex = extras.Find(key);
00554 if(hashIndex != LH_NOTFOUND)
00555 {
00556
00557 index = extras[hashIndex];
00558
00559
00560 if((integers[index] == value) && (intType[index] == bamvtype))
00561 {
00562
00563 return(true);
00564 }
00565 else
00566 {
00567
00568
00569
00570 switch(intType[index])
00571 {
00572 case 'c':
00573 case 'C':
00574 tagBufferSize -= 4;
00575 break;
00576 case 's':
00577 case 'S':
00578 tagBufferSize -= 5;
00579 break;
00580 case 'i':
00581 case 'I':
00582 tagBufferSize -= 7;
00583 break;
00584 default:
00585 myStatus.setStatus(SamStatus::INVALID,
00586 "unknown tag inttype type found.\n");
00587 return(false);
00588 }
00589
00590
00591 integers[index] = value;
00592 intType[index] = bamvtype;
00593 }
00594 }
00595 else
00596 {
00597
00598 index = integers.Length();
00599
00600 integers.Push(value);
00601 intType.push_back(bamvtype);
00602
00603 extras.Add(key, index);
00604 }
00605
00606
00607 myNeedToSetTagsInBuffer = true;
00608 myIsTagsBufferValid = false;
00609 myIsBufferSynced = false;
00610 myTagBufferSize += tagBufferSize;
00611
00612 return(true);
00613 }
00614
00615
00616
00617
00618
00619 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00620 {
00621 if(vtype == 'i')
00622 {
00623
00624 int intVal = atoi(valuePtr);
00625 return(addIntTag(tag, intVal));
00626 }
00627
00628
00629 myStatus = SamStatus::SUCCESS;
00630 bool status = true;
00631 int key = 0;
00632 int index = 0;
00633
00634 int tagBufferSize = 0;
00635
00636
00637 if(myNeedToSetTagsFromBuffer)
00638 {
00639 if(!setTagsFromBuffer())
00640 {
00641
00642 return(false);
00643 }
00644 }
00645
00646
00647 key = MAKEKEY(tag[0], tag[1], vtype);
00648 unsigned int hashIndex = extras.Find(key);
00649 if(hashIndex != LH_NOTFOUND)
00650 {
00651
00652 index = extras[hashIndex];
00653
00654
00655 switch (vtype)
00656 {
00657 case 'A' :
00658
00659 if(integers[index] == (const int)*(valuePtr))
00660 {
00661
00662 return(true);
00663 }
00664 else
00665 {
00666
00667 integers[index] = (const int)*(valuePtr);
00668 intType[index] = vtype;
00669 }
00670 break;
00671 case 'Z' :
00672
00673 if(strings[index] == valuePtr)
00674 {
00675
00676 return(true);
00677 }
00678 else
00679 {
00680
00681 tagBufferSize -= strings[index].Length();
00682 strings[index] = valuePtr;
00683
00684 tagBufferSize += strings[index].Length();
00685 }
00686 break;
00687 case 'f' :
00688
00689 if(doubles[index] == atof(valuePtr))
00690 {
00691
00692 return(true);
00693 }
00694 else
00695 {
00696
00697 doubles[index] = atof(valuePtr);
00698 }
00699 break;
00700 default :
00701 fprintf(stderr,
00702 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00703 vtype);
00704 myStatus.setStatus(SamStatus::FAIL_PARSE,
00705 "Unknown custom field in a tag");
00706 status = false;
00707 break;
00708 }
00709 }
00710 else
00711 {
00712
00713 switch (vtype)
00714 {
00715 case 'A' :
00716 index = integers.Length();
00717 integers.Push((const int)*(valuePtr));
00718 intType.push_back(vtype);
00719 tagBufferSize += 4;
00720 break;
00721 case 'Z' :
00722 index = strings.Length();
00723 strings.Push(valuePtr);
00724 tagBufferSize += 4 + strings.Last().Length();
00725 break;
00726 case 'f' :
00727 index = doubles.Length();
00728 doubles.Push(atof(valuePtr));
00729 tagBufferSize += 7;
00730 break;
00731 default :
00732 fprintf(stderr,
00733 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00734 vtype);
00735 myStatus.setStatus(SamStatus::FAIL_PARSE,
00736 "Unknown custom field in a tag");
00737 status = false;
00738 break;
00739 }
00740 if(status)
00741 {
00742
00743 extras.Add(key, index);
00744 }
00745 }
00746
00747
00748 if(status)
00749 {
00750
00751 myNeedToSetTagsInBuffer = true;
00752 myIsTagsBufferValid = false;
00753 myIsBufferSynced = false;
00754 myTagBufferSize += tagBufferSize;
00755 }
00756 return(status);
00757 }
00758
00759
00760
00761 bool SamRecord::shiftIndelsLeft()
00762 {
00763
00764
00765
00766
00767 if(myAlignmentLength == -1)
00768 {
00769
00770 parseCigar();
00771 }
00772
00773
00774 bool shifted = false;
00775
00776
00777
00778 uint32_t currentPos = 0;
00779
00780
00781
00782 if(Cigar::foundInQuery(myCigarRoller[0]))
00783 {
00784
00785 currentPos += myCigarRoller[0].count;
00786 }
00787
00788 int numOps = myCigarRoller.size();
00789
00790
00791
00792 for(int currentOp = 1; currentOp < numOps; currentOp++)
00793 {
00794 if(myCigarRoller[currentOp].operation == Cigar::insert)
00795 {
00796
00797 int prevOpIndex = currentOp-1;
00798
00799
00800 int nextOpIndex = currentOp+1;
00801 if(nextOpIndex == numOps)
00802 {
00803
00804 nextOpIndex = currentOp;
00805 }
00806
00807
00808 uint32_t prevOpStart =
00809 currentPos - myCigarRoller[prevOpIndex].count;
00810
00811
00812 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
00813 {
00814
00815
00816 currentPos += myCigarRoller[currentOp].count;
00817
00818 continue;
00819 }
00820
00821
00822
00823
00824
00825
00826 uint32_t insertEndPos =
00827 currentPos + myCigarRoller[currentOp].count - 1;
00828
00829
00830 uint32_t insertStartPos = currentPos;
00831
00832
00833
00834
00835
00836
00837
00838
00839 while((insertStartPos > prevOpStart) &&
00840 (getSequence(insertEndPos,BASES) ==
00841 getSequence(insertStartPos - 1, BASES)))
00842 {
00843
00844 --insertEndPos;
00845 --insertStartPos;
00846 }
00847
00848
00849 int shiftLen = currentPos - insertStartPos;
00850 if(shiftLen > 0)
00851 {
00852
00853
00854
00855
00856
00857
00858
00859
00860 if(myCigarRoller[nextOpIndex].operation ==
00861 myCigarRoller[prevOpIndex].operation)
00862 {
00863
00864
00865 myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
00866 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
00867
00868
00869
00870 if(myCigarRoller[prevOpIndex].count == 0)
00871 {
00872 myCigarRoller.Remove(prevOpIndex);
00873 }
00874 shifted = true;
00875 }
00876 else
00877 {
00878
00879
00880
00881 if(insertStartPos == prevOpStart)
00882 {
00883
00884
00885 myCigarRoller.Update(currentOp,
00886 myCigarRoller[prevOpIndex].operation,
00887 myCigarRoller[prevOpIndex].count);
00888
00889
00890 myCigarRoller.Update(prevOpIndex,
00891 Cigar::insert,
00892 shiftLen);
00893 shifted = true;
00894 }
00895 }
00896 }
00897
00898 currentPos += myCigarRoller[currentOp].count;
00899 }
00900 else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
00901 {
00902
00903 currentPos += myCigarRoller[currentOp].count;
00904 }
00905 }
00906 if(shifted)
00907 {
00908
00909
00910 setCigar(myCigarRoller);
00911 }
00912 return(shifted);
00913 }
00914
00915
00916
00917 const void* SamRecord::getRecordBuffer()
00918 {
00919 return(getRecordBuffer(mySequenceTranslation));
00920 }
00921
00922
00923
00924 const void* SamRecord::getRecordBuffer(SequenceTranslation translation)
00925 {
00926 myStatus = SamStatus::SUCCESS;
00927 bool status = true;
00928
00929
00930 if((myIsBufferSynced == false) ||
00931 (myBufferSequenceTranslation != translation))
00932 {
00933 status &= fixBuffer(translation);
00934 }
00935
00936 if(myNeedToSetTagsInBuffer)
00937 {
00938 status &= setTagsInBuffer();
00939 }
00940 if(!status)
00941 {
00942 return(NULL);
00943 }
00944 return (const void *)myRecordPtr;
00945 }
00946
00947
00948
00949
00950 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
00951 {
00952 return(writeRecordBuffer(filePtr, mySequenceTranslation));
00953 }
00954
00955
00956
00957 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr,
00958 SequenceTranslation translation)
00959 {
00960 myStatus = SamStatus::SUCCESS;
00961 if((filePtr == NULL) || (filePtr->isOpen() == false))
00962 {
00963
00964 myStatus.setStatus(SamStatus::FAIL_ORDER,
00965 "Can't write to an unopened file.");
00966 return(SamStatus::FAIL_ORDER);
00967 }
00968
00969 if((myIsBufferSynced == false) ||
00970 (myBufferSequenceTranslation != translation))
00971 {
00972 if(!fixBuffer(translation))
00973 {
00974 return(myStatus.getStatus());
00975 }
00976 }
00977
00978
00979 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
00980 unsigned int numBytesWritten =
00981 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
00982
00983
00984 if(numBytesToWrite == numBytesWritten)
00985 {
00986 return(SamStatus::SUCCESS);
00987 }
00988
00989 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
00990 return(SamStatus::FAIL_IO);
00991 }
00992
00993
00994 int32_t SamRecord::getBlockSize()
00995 {
00996 myStatus = SamStatus::SUCCESS;
00997
00998
00999 if(myIsBufferSynced == false)
01000 {
01001
01002
01003
01004 fixBuffer(myBufferSequenceTranslation);
01005 }
01006 return myRecordPtr->myBlockSize;
01007 }
01008
01009
01010
01011 const char* SamRecord::getReferenceName()
01012 {
01013 myStatus = SamStatus::SUCCESS;
01014 return myReferenceName.c_str();
01015 }
01016
01017
01018 int32_t SamRecord::getReferenceID()
01019 {
01020 myStatus = SamStatus::SUCCESS;
01021 return myRecordPtr->myReferenceID;
01022 }
01023
01024
01025 int32_t SamRecord::get1BasedPosition()
01026 {
01027 myStatus = SamStatus::SUCCESS;
01028 return (myRecordPtr->myPosition + 1);
01029 }
01030
01031
01032 int32_t SamRecord::get0BasedPosition()
01033 {
01034 myStatus = SamStatus::SUCCESS;
01035 return myRecordPtr->myPosition;
01036 }
01037
01038
01039 uint8_t SamRecord::getReadNameLength()
01040 {
01041 myStatus = SamStatus::SUCCESS;
01042
01043
01044 if(myIsReadNameBufferValid)
01045 {
01046 return(myRecordPtr->myReadNameLength);
01047 }
01048
01049 return(myReadName.Length() + 1);
01050 }
01051
01052
01053 uint8_t SamRecord::getMapQuality()
01054 {
01055 myStatus = SamStatus::SUCCESS;
01056 return myRecordPtr->myMapQuality;
01057 }
01058
01059
01060 uint16_t SamRecord::getBin()
01061 {
01062 myStatus = SamStatus::SUCCESS;
01063 if(!myIsBinValid)
01064 {
01065
01066
01067 myRecordPtr->myBin =
01068 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
01069 myIsBinValid = true;
01070 }
01071 return(myRecordPtr->myBin);
01072 }
01073
01074
01075 uint16_t SamRecord::getCigarLength()
01076 {
01077 myStatus = SamStatus::SUCCESS;
01078
01079
01080 if(myIsCigarBufferValid)
01081 {
01082 return myRecordPtr->myCigarLength;
01083 }
01084
01085 if(myCigarTempBufferLength == -1)
01086 {
01087
01088
01089 parseCigarString();
01090 }
01091
01092
01093 return(myCigarTempBufferLength);
01094 }
01095
01096
01097 uint16_t SamRecord::getFlag()
01098 {
01099 myStatus = SamStatus::SUCCESS;
01100 return myRecordPtr->myFlag;
01101 }
01102
01103
01104 int32_t SamRecord::getReadLength()
01105 {
01106 myStatus = SamStatus::SUCCESS;
01107 if(myIsSequenceBufferValid == false)
01108 {
01109
01110 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
01111 {
01112 return(0);
01113 }
01114
01115 return(mySequence.Length());
01116 }
01117 return(myRecordPtr->myReadLength);
01118 }
01119
01120
01121
01122
01123 const char* SamRecord::getMateReferenceName()
01124 {
01125 myStatus = SamStatus::SUCCESS;
01126 return myMateReferenceName.c_str();
01127 }
01128
01129
01130
01131
01132
01133 const char* SamRecord::getMateReferenceNameOrEqual()
01134 {
01135 myStatus = SamStatus::SUCCESS;
01136 if(myMateReferenceName == "*")
01137 {
01138 return(myMateReferenceName);
01139 }
01140 if(myMateReferenceName == getReferenceName())
01141 {
01142 return(FIELD_ABSENT_STRING);
01143 }
01144 else
01145 {
01146 return(myMateReferenceName);
01147 }
01148 }
01149
01150
01151 int32_t SamRecord::getMateReferenceID()
01152 {
01153 myStatus = SamStatus::SUCCESS;
01154 return myRecordPtr->myMateReferenceID;
01155 }
01156
01157
01158 int32_t SamRecord::get1BasedMatePosition()
01159 {
01160 myStatus = SamStatus::SUCCESS;
01161 return (myRecordPtr->myMatePosition + 1);
01162 }
01163
01164
01165 int32_t SamRecord::get0BasedMatePosition()
01166 {
01167 myStatus = SamStatus::SUCCESS;
01168 return myRecordPtr->myMatePosition;
01169 }
01170
01171
01172 int32_t SamRecord::getInsertSize()
01173 {
01174 myStatus = SamStatus::SUCCESS;
01175 return myRecordPtr->myInsertSize;
01176 }
01177
01178
01179
01180 int32_t SamRecord::get0BasedAlignmentEnd()
01181 {
01182 myStatus = SamStatus::SUCCESS;
01183 if(myAlignmentLength == -1)
01184 {
01185
01186 parseCigar();
01187 }
01188
01189 if(myAlignmentLength == 0)
01190 {
01191
01192 return(myRecordPtr->myPosition);
01193 }
01194 return(myRecordPtr->myPosition + myAlignmentLength - 1);
01195 }
01196
01197
01198
01199 int32_t SamRecord::get1BasedAlignmentEnd()
01200 {
01201 return(get0BasedAlignmentEnd() + 1);
01202 }
01203
01204
01205
01206 int32_t SamRecord::getAlignmentLength()
01207 {
01208 myStatus = SamStatus::SUCCESS;
01209 if(myAlignmentLength == -1)
01210 {
01211
01212 parseCigar();
01213 }
01214
01215 return(myAlignmentLength);
01216 }
01217
01218
01219 int32_t SamRecord::get0BasedUnclippedStart()
01220 {
01221 myStatus = SamStatus::SUCCESS;
01222 if(myUnclippedStartOffset == -1)
01223 {
01224
01225 parseCigar();
01226 }
01227 return(myRecordPtr->myPosition - myUnclippedStartOffset);
01228 }
01229
01230
01231
01232 int32_t SamRecord::get1BasedUnclippedStart()
01233 {
01234 return(get0BasedUnclippedStart() + 1);
01235 }
01236
01237
01238
01239 int32_t SamRecord::get0BasedUnclippedEnd()
01240 {
01241
01242
01243 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
01244 }
01245
01246
01247
01248 int32_t SamRecord::get1BasedUnclippedEnd()
01249 {
01250 return(get0BasedUnclippedEnd() + 1);
01251 }
01252
01253
01254
01255 const char* SamRecord::getReadName()
01256 {
01257 myStatus = SamStatus::SUCCESS;
01258 if(myReadName.Length() == 0)
01259 {
01260
01261
01262 myReadName = (char*)&(myRecordPtr->myData);
01263 }
01264 return myReadName.c_str();
01265 }
01266
01267
01268 const char* SamRecord::getCigar()
01269 {
01270 myStatus = SamStatus::SUCCESS;
01271 if(myCigar.Length() == 0)
01272 {
01273
01274
01275 parseCigarBinary();
01276 }
01277 return myCigar.c_str();
01278 }
01279
01280
01281 const char* SamRecord::getSequence()
01282 {
01283 return(getSequence(mySequenceTranslation));
01284 }
01285
01286
01287 const char* SamRecord::getSequence(SequenceTranslation translation)
01288 {
01289 myStatus = SamStatus::SUCCESS;
01290 if(mySequence.Length() == 0)
01291 {
01292
01293
01294 setSequenceAndQualityFromBuffer();
01295 }
01296
01297
01298 if((translation == NONE) || (myRefPtr == NULL))
01299 {
01300 return mySequence.c_str();
01301 }
01302 else if(translation == EQUAL)
01303 {
01304 if(mySeqWithEq.length() == 0)
01305 {
01306
01307 if(mySequence == "*")
01308 {
01309
01310 mySeqWithEq = '*';
01311 }
01312 else
01313 {
01314
01315 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01316 myRecordPtr->myPosition,
01317 *(getCigarInfo()),
01318 getReferenceName(),
01319 *myRefPtr,
01320 mySeqWithEq);
01321 }
01322 }
01323 return(mySeqWithEq.c_str());
01324 }
01325 else
01326 {
01327
01328 if(mySeqWithoutEq.length() == 0)
01329 {
01330 if(mySequence == "*")
01331 {
01332
01333 mySeqWithoutEq = '*';
01334 }
01335 else
01336 {
01337
01338 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01339 myRecordPtr->myPosition,
01340 *(getCigarInfo()),
01341 getReferenceName(),
01342 *myRefPtr,
01343 mySeqWithoutEq);
01344 }
01345 }
01346 return(mySeqWithoutEq.c_str());
01347 }
01348 }
01349
01350
01351 const char* SamRecord::getQuality()
01352 {
01353 myStatus = SamStatus::SUCCESS;
01354 if(myQuality.Length() == 0)
01355 {
01356
01357
01358 setSequenceAndQualityFromBuffer();
01359 }
01360 return myQuality.c_str();
01361 }
01362
01363
01364 char SamRecord::getSequence(int index)
01365 {
01366 return(getSequence(index, mySequenceTranslation));
01367 }
01368
01369
01370 char SamRecord::getSequence(int index, SequenceTranslation translation)
01371 {
01372 static const char * asciiBases = "=AC.G...T......N";
01373
01374
01375 int32_t readLen = getReadLength();
01376
01377
01378 if(readLen == 0)
01379 {
01380 String exceptionString = "SamRecord::getSequence(";
01381 exceptionString += index;
01382 exceptionString += ") is not allowed since sequence = '*'";
01383 throw std::runtime_error(exceptionString.c_str());
01384 }
01385 else if((index < 0) || (index >= readLen))
01386 {
01387
01388 String exceptionString = "SamRecord::getSequence(";
01389 exceptionString += index;
01390 exceptionString += ") is out of range. Index must be between 0 and ";
01391 exceptionString += (readLen - 1);
01392 throw std::runtime_error(exceptionString.c_str());
01393 }
01394
01395
01396 if((translation == NONE) || (myRefPtr == NULL))
01397 {
01398
01399 if(mySequence.Length() == 0)
01400 {
01401
01402
01403
01404 unsigned char * packedSequence =
01405 (unsigned char *)myRecordPtr->myData +
01406 myRecordPtr->myReadNameLength +
01407 myRecordPtr->myCigarLength * sizeof(int);
01408
01409 return(index & 1 ?
01410 asciiBases[packedSequence[index / 2] & 0xF] :
01411 asciiBases[packedSequence[index / 2] >> 4]);
01412 }
01413
01414 return(mySequence[index]);
01415 }
01416 else
01417 {
01418
01419
01420
01421 if(mySequence.Length() == 0)
01422 {
01423
01424
01425 setSequenceAndQualityFromBuffer();
01426 }
01427
01428
01429 if(translation == EQUAL)
01430 {
01431
01432
01433 if(mySeqWithEq.length() == 0)
01434 {
01435
01436
01437
01438 if(mySequence == "*")
01439 {
01440
01441 mySeqWithEq = '*';
01442 }
01443 else
01444 {
01445
01446 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01447 myRecordPtr->myPosition,
01448 *(getCigarInfo()),
01449 getReferenceName(),
01450 *myRefPtr,
01451 mySeqWithEq);
01452 }
01453 }
01454
01455 return(mySeqWithEq[index]);
01456 }
01457 else
01458 {
01459
01460
01461
01462 if(mySeqWithoutEq.length() == 0)
01463 {
01464
01465
01466
01467 if(mySequence == "*")
01468 {
01469
01470 mySeqWithoutEq = '*';
01471 }
01472 else
01473 {
01474
01475
01476
01477 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01478 myRecordPtr->myPosition,
01479 *(getCigarInfo()),
01480 getReferenceName(),
01481 *myRefPtr,
01482 mySeqWithoutEq);
01483 }
01484 }
01485
01486 return(mySeqWithoutEq[index]);
01487 }
01488 }
01489 }
01490
01491
01492 char SamRecord::getQuality(int index)
01493 {
01494
01495 int32_t readLen = getReadLength();
01496
01497
01498
01499 if(readLen == 0)
01500 {
01501 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01502 }
01503 else if((index < 0) || (index >= readLen))
01504 {
01505
01506 String exceptionString = "SamRecord::getQuality(";
01507 exceptionString += index;
01508 exceptionString += ") is out of range. Index must be between 0 and ";
01509 exceptionString += (readLen - 1);
01510 throw std::runtime_error(exceptionString.c_str());
01511 }
01512
01513 if(myQuality.Length() == 0)
01514 {
01515
01516 unsigned char * packedQuality =
01517 (unsigned char *)myRecordPtr->myData +
01518 myRecordPtr->myReadNameLength +
01519 myRecordPtr->myCigarLength * sizeof(int) +
01520 (myRecordPtr->myReadLength + 1) / 2;
01521 return(packedQuality[index] + 33);
01522 }
01523 else
01524 {
01525
01526 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01527 {
01528
01529 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01530 }
01531 else
01532 {
01533 return(myQuality[index]);
01534 }
01535 }
01536 }
01537
01538
01539 Cigar* SamRecord::getCigarInfo()
01540 {
01541
01542
01543
01544
01545 if(myAlignmentLength == -1)
01546 {
01547
01548 parseCigar();
01549 }
01550 return(&myCigarRoller);
01551 }
01552
01553
01554 uint32_t SamRecord::getTagLength()
01555 {
01556 myStatus = SamStatus::SUCCESS;
01557 if(myNeedToSetTagsFromBuffer)
01558 {
01559
01560
01561 unsigned char * tagStart =
01562 (unsigned char *)myRecordPtr->myData
01563 + myRecordPtr->myReadNameLength
01564 + myRecordPtr->myCigarLength * sizeof(int)
01565 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01566
01567
01568
01569
01570 uint32_t nonTagSize =
01571 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01572
01573 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01574 return(tagSize);
01575 }
01576
01577
01578 return(myTagBufferSize);
01579 }
01580
01581
01582
01583
01584
01585
01586
01587 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01588 {
01589 myStatus = SamStatus::SUCCESS;
01590 if(myNeedToSetTagsFromBuffer)
01591 {
01592 if(!setTagsFromBuffer())
01593 {
01594
01595
01596 return(false);
01597 }
01598 }
01599
01600
01601
01602 myLastTagIndex++;
01603 int maxTagIndex = extras.Capacity();
01604 if(myLastTagIndex >= maxTagIndex)
01605 {
01606
01607
01608
01609 return(false);
01610 }
01611
01612 bool tagFound = false;
01613
01614 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01615 {
01616 if(extras.SlotInUse(myLastTagIndex))
01617 {
01618
01619 int key = extras.GetKey(myLastTagIndex);
01620 getTag(key, tag);
01621 getTypeFromKey(key, vtype);
01622 tagFound = true;
01623
01624 switch (vtype)
01625 {
01626 case 'f' :
01627 *value = getDoublePtr(myLastTagIndex);
01628 break;
01629 case 'i' :
01630 *value = getIntegerPtr(myLastTagIndex, vtype);
01631 if(vtype != 'A')
01632 {
01633
01634 vtype = 'i';
01635 }
01636 break;
01637 case 'Z' :
01638 *value = getStringPtr(myLastTagIndex);
01639 break;
01640 default:
01641 myStatus.setStatus(SamStatus::FAIL_PARSE,
01642 "Unknown tag type");
01643 tagFound = false;
01644 break;
01645 }
01646 }
01647 if(!tagFound)
01648 {
01649
01650 myLastTagIndex++;
01651 }
01652 }
01653 return(tagFound);
01654 }
01655
01656
01657
01658 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01659 String& cigar, String& sequence, String& quality)
01660 {
01661 return(getFields(recStruct, readName, cigar, sequence, quality,
01662 mySequenceTranslation));
01663 }
01664
01665
01666
01667 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01668 String& cigar, String& sequence, String& quality,
01669 SequenceTranslation translation)
01670 {
01671 myStatus = SamStatus::SUCCESS;
01672 if(myIsBufferSynced == false)
01673 {
01674 if(!fixBuffer(translation))
01675 {
01676
01677 return(false);
01678 }
01679 }
01680 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01681
01682 readName = getReadName();
01683
01684 if(myStatus != SamStatus::SUCCESS)
01685 {
01686
01687 return(false);
01688 }
01689 cigar = getCigar();
01690
01691 if(myStatus != SamStatus::SUCCESS)
01692 {
01693
01694 return(false);
01695 }
01696 sequence = getSequence(translation);
01697
01698 if(myStatus != SamStatus::SUCCESS)
01699 {
01700
01701 return(false);
01702 }
01703 quality = getQuality();
01704
01705 if(myStatus != SamStatus::SUCCESS)
01706 {
01707
01708 return(false);
01709 }
01710 return(true);
01711 }
01712
01713
01714
01715 GenomeSequence* SamRecord::getReference()
01716 {
01717 return(myRefPtr);
01718 }
01719
01720
01721 bool SamRecord::isIntegerType(char vtype) const
01722 {
01723 if((vtype == 'c') || (vtype == 'C') ||
01724 (vtype == 's') || (vtype == 'S') ||
01725 (vtype == 'i') || (vtype == 'I'))
01726 {
01727 return(true);
01728 }
01729 return(false);
01730 }
01731
01732
01733 bool SamRecord::isDoubleType(char vtype) const
01734 {
01735 if(vtype == 'f')
01736 {
01737 return(true);
01738 }
01739 return(false);
01740 }
01741
01742
01743 bool SamRecord::isCharType(char vtype) const
01744 {
01745 if(vtype == 'A')
01746 {
01747 return(true);
01748 }
01749 return(false);
01750 }
01751
01752
01753 bool SamRecord::isStringType(char vtype) const
01754 {
01755 if(vtype == 'Z')
01756 {
01757 return(true);
01758 }
01759 return(false);
01760 }
01761
01762
01763 void SamRecord::clearTags()
01764 {
01765 if(extras.Entries() != 0)
01766 {
01767 extras.Clear();
01768 }
01769 strings.Clear();
01770 integers.Clear();
01771 intType.clear();
01772 doubles.Clear();
01773 myTagBufferSize = 0;
01774 resetTagIter();
01775 }
01776
01777
01778 bool SamRecord::rmTag(const char* tag, char type)
01779 {
01780
01781 if(strlen(tag) != 2)
01782 {
01783
01784 myStatus.setStatus(SamStatus::INVALID,
01785 "rmTag called with tag that is not 2 characters\n");
01786 return(false);
01787 }
01788
01789 myStatus = SamStatus::SUCCESS;
01790 if(myNeedToSetTagsFromBuffer)
01791 {
01792 if(!setTagsFromBuffer())
01793 {
01794
01795
01796 return(false);
01797 }
01798 }
01799
01800
01801 int key = MAKEKEY(tag[0], tag[1], type);
01802
01803 int offset = extras.Find(key);
01804
01805 if(offset < 0)
01806 {
01807
01808
01809 return(true);
01810 }
01811
01812
01813
01814 char vtype;
01815 getTypeFromKey(key, vtype);
01816 if(vtype == 'i')
01817 {
01818 vtype = getIntegerType(offset);
01819 }
01820
01821
01822
01823
01824
01825 int rmBuffSize = 0;
01826 switch(vtype)
01827 {
01828 case 'A':
01829 case 'c':
01830 case 'C':
01831 rmBuffSize = 4;
01832 break;
01833 case 's':
01834 case 'S':
01835 rmBuffSize = 5;
01836 break;
01837 case 'i':
01838 case 'I':
01839 rmBuffSize = 7;
01840 break;
01841 case 'f':
01842 rmBuffSize = 7;
01843 break;
01844 case 'Z':
01845 rmBuffSize = 4 + getString(offset).Length();
01846 break;
01847 default:
01848 myStatus.setStatus(SamStatus::INVALID,
01849 "rmTag called with unknown type.\n");
01850 return(false);
01851 break;
01852 };
01853
01854
01855 myNeedToSetTagsInBuffer = true;
01856 myIsTagsBufferValid = false;
01857 myIsBufferSynced = false;
01858 myTagBufferSize -= rmBuffSize;
01859
01860
01861 extras.Delete(offset);
01862 return(true);
01863 }
01864
01865
01866 bool SamRecord::rmTags(const char* tags)
01867 {
01868 const char* currentTagPtr = tags;
01869
01870 myStatus = SamStatus::SUCCESS;
01871 if(myNeedToSetTagsFromBuffer)
01872 {
01873 if(!setTagsFromBuffer())
01874 {
01875
01876
01877 return(false);
01878 }
01879 }
01880
01881 bool returnStatus = true;
01882
01883 int rmBuffSize = 0;
01884 while(*currentTagPtr != '\0')
01885 {
01886
01887
01888
01889
01890 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
01891 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
01892 {
01893 myStatus.setStatus(SamStatus::INVALID,
01894 "rmTags called with improperly formatted tags.\n");
01895 returnStatus = false;
01896 break;
01897 }
01898
01899
01900 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
01901 currentTagPtr[3]);
01902
01903 int offset = extras.Find(key);
01904
01905 if(offset >= 0)
01906 {
01907
01908
01909 char vtype;
01910 getTypeFromKey(key, vtype);
01911 if(vtype == 'i')
01912 {
01913 vtype = getIntegerType(offset);
01914 }
01915
01916
01917
01918
01919
01920 switch(vtype)
01921 {
01922 case 'A':
01923 case 'c':
01924 case 'C':
01925 rmBuffSize += 4;
01926 break;
01927 case 's':
01928 case 'S':
01929 rmBuffSize += 5;
01930 break;
01931 case 'i':
01932 case 'I':
01933 rmBuffSize += 7;
01934 break;
01935 case 'f':
01936 rmBuffSize += 7;
01937 break;
01938 case 'Z':
01939 rmBuffSize += 4 + getString(offset).Length();
01940 break;
01941 default:
01942 myStatus.setStatus(SamStatus::INVALID,
01943 "rmTag called with unknown type.\n");
01944 returnStatus = false;
01945 break;
01946 };
01947
01948
01949 extras.Delete(offset);
01950 }
01951
01952 if(currentTagPtr[4] == ';')
01953 {
01954
01955 currentTagPtr += 5;
01956 }
01957 else if(currentTagPtr[4] != '\0')
01958 {
01959
01960 myStatus.setStatus(SamStatus::INVALID,
01961 "rmTags called with improperly formatted tags.\n");
01962 returnStatus = false;
01963 break;
01964 }
01965 else
01966 {
01967
01968 currentTagPtr += 4;
01969 }
01970 }
01971
01972
01973 myNeedToSetTagsInBuffer = true;
01974 myIsTagsBufferValid = false;
01975 myIsBufferSynced = false;
01976 myTagBufferSize -= rmBuffSize;
01977
01978
01979 return(returnStatus);
01980 }
01981
01982
01983
01984 const SamStatus& SamRecord::getStatus()
01985 {
01986 return(myStatus);
01987 }
01988
01989
01990 bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
01991 {
01992 const char* currentTagPtr = tags;
01993
01994 returnString.Clear();
01995 myStatus = SamStatus::SUCCESS;
01996 if(myNeedToSetTagsFromBuffer)
01997 {
01998 if(!setTagsFromBuffer())
01999 {
02000
02001
02002 return(false);
02003 }
02004 }
02005
02006 bool returnStatus = true;
02007
02008 while(*currentTagPtr != '\0')
02009 {
02010
02011
02012
02013 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
02014 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
02015 {
02016 myStatus.setStatus(SamStatus::INVALID,
02017 "getTagsString called with improperly formatted tags.\n");
02018 returnStatus = false;
02019 break;
02020 }
02021
02022
02023 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
02024 currentTagPtr[3]);
02025
02026 int offset = extras.Find(key);
02027
02028 if(offset >= 0)
02029 {
02030
02031 if(!returnString.IsEmpty())
02032 {
02033 returnString += delim;
02034 }
02035 returnString += currentTagPtr[0];
02036 returnString += currentTagPtr[1];
02037 returnString += ':';
02038 returnString += currentTagPtr[3];
02039 returnString += ':';
02040
02041
02042 char vtype;
02043 getTypeFromKey(key, vtype);
02044
02045
02046
02047
02048
02049
02050 switch(vtype)
02051 {
02052 case 'i':
02053 returnString += *(int*)getIntegerPtr(offset, vtype);
02054 break;
02055 case 'f':
02056 returnString += *(double*)getDoublePtr(offset);
02057 break;
02058 case 'Z':
02059 returnString += *(String*)getStringPtr(offset);
02060 break;
02061 default:
02062 myStatus.setStatus(SamStatus::INVALID,
02063 "rmTag called with unknown type.\n");
02064 returnStatus = false;
02065 break;
02066 };
02067 }
02068
02069 if(currentTagPtr[4] == ';')
02070 {
02071
02072 currentTagPtr += 5;
02073 }
02074 else if(currentTagPtr[4] != '\0')
02075 {
02076
02077 myStatus.setStatus(SamStatus::INVALID,
02078 "rmTags called with improperly formatted tags.\n");
02079 returnStatus = false;
02080 break;
02081 }
02082 else
02083 {
02084
02085 currentTagPtr += 4;
02086 }
02087 }
02088 return(returnStatus);
02089 }
02090
02091
02092 String* SamRecord::getStringTag(const char * tag)
02093 {
02094
02095 if(myNeedToSetTagsFromBuffer)
02096 {
02097 if(!setTagsFromBuffer())
02098 {
02099
02100
02101
02102 return(NULL);
02103 }
02104 }
02105
02106 int key = MAKEKEY(tag[0], tag[1], 'Z');
02107 int offset = extras.Find(key);
02108
02109 int value;
02110 if (offset < 0)
02111 {
02112
02113 return(NULL);
02114 }
02115
02116
02117 value = extras[offset];
02118 return(&(strings[value]));
02119 }
02120
02121
02122 int* SamRecord::getIntegerTag(const char * tag)
02123 {
02124
02125 myStatus = SamStatus::SUCCESS;
02126
02127 if(myNeedToSetTagsFromBuffer)
02128 {
02129 if(!setTagsFromBuffer())
02130 {
02131
02132
02133
02134 return(NULL);
02135 }
02136 }
02137
02138 int key = MAKEKEY(tag[0], tag[1], 'i');
02139 int offset = extras.Find(key);
02140
02141 int value;
02142 if (offset < 0)
02143 {
02144
02145 return(NULL);
02146 }
02147 else
02148 value = extras[offset];
02149
02150 return(&(integers[value]));
02151 }
02152
02153
02154 double* SamRecord::getDoubleTag(const char * tag)
02155 {
02156
02157 myStatus = SamStatus::SUCCESS;
02158
02159 if(myNeedToSetTagsFromBuffer)
02160 {
02161 if(!setTagsFromBuffer())
02162 {
02163
02164
02165
02166 return(NULL);
02167 }
02168 }
02169
02170 int key = MAKEKEY(tag[0], tag[1], 'f');
02171 int offset = extras.Find(key);
02172
02173 int value;
02174 if (offset < 0)
02175 {
02176
02177 return(NULL);
02178 }
02179 else
02180 value = extras[offset];
02181
02182 return(&(doubles[value]));
02183 }
02184
02185
02186 String & SamRecord::getString(const char * tag)
02187 {
02188
02189 myStatus = SamStatus::SUCCESS;
02190
02191 if(myNeedToSetTagsFromBuffer)
02192 {
02193 if(!setTagsFromBuffer())
02194 {
02195
02196
02197
02198 }
02199 }
02200
02201 int key = MAKEKEY(tag[0], tag[1], 'Z');
02202 int offset = extras.Find(key);
02203
02204 int value;
02205 if (offset < 0)
02206 {
02207
02208 return(NOT_FOUND_TAG_STRING);
02209 }
02210 else
02211 value = extras[offset];
02212
02213 return strings[value];
02214 }
02215
02216 int & SamRecord::getInteger(const char * tag)
02217 {
02218
02219 myStatus = SamStatus::SUCCESS;
02220
02221 if(myNeedToSetTagsFromBuffer)
02222 {
02223 if(!setTagsFromBuffer())
02224 {
02225
02226
02227
02228 }
02229 }
02230
02231 int key = MAKEKEY(tag[0], tag[1], 'i');
02232 int offset = extras.Find(key);
02233
02234 int value;
02235 if (offset < 0)
02236 {
02237
02238 return NOT_FOUND_TAG_INT;
02239 }
02240 else
02241 value = extras[offset];
02242
02243 return integers[value];
02244 }
02245
02246
02247 double & SamRecord::getDouble(const char * tag)
02248 {
02249
02250 myStatus = SamStatus::SUCCESS;
02251
02252 if(myNeedToSetTagsFromBuffer)
02253 {
02254 if(!setTagsFromBuffer())
02255 {
02256
02257
02258
02259 }
02260 }
02261
02262 int key = MAKEKEY(tag[0], tag[1], 'f');
02263 int offset = extras.Find(key);
02264
02265 int value;
02266 if (offset < 0)
02267 {
02268
02269 return NOT_FOUND_TAG_DOUBLE;
02270 }
02271 else
02272 value = extras[offset];
02273
02274 return doubles[value];
02275 }
02276
02277
02278 bool SamRecord::checkTag(const char * tag, char type)
02279 {
02280
02281 myStatus = SamStatus::SUCCESS;
02282
02283 if(myNeedToSetTagsFromBuffer)
02284 {
02285 if(!setTagsFromBuffer())
02286 {
02287
02288
02289 return("");
02290 }
02291 }
02292
02293 int key = MAKEKEY(tag[0], tag[1], type);
02294
02295 return (extras.Find(key) != LH_NOTFOUND);
02296 }
02297
02298
02299
02300
02301 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
02302 {
02303
02304
02305 if(myAlignmentLength == -1)
02306 {
02307 parseCigar();
02308 }
02309 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
02310 }
02311
02312
02313
02314
02315
02316
02317
02318
02319
02320
02321
02322
02323
02324
02325
02326
02327
02328
02329
02330
02331
02332
02333
02334
02335
02336
02337
02338
02339
02340
02341
02342
02343
02344
02345
02346
02347
02348
02349
02350
02351
02352
02353
02354
02355
02356
02357
02358
02359
02360
02361
02362
02363
02364
02365
02366
02367 bool SamRecord::allocateRecordStructure(int size)
02368 {
02369 if (allocatedSize < size)
02370 {
02371 bamRecordStruct* tmpRecordPtr =
02372 (bamRecordStruct *)realloc(myRecordPtr, size);
02373 if(tmpRecordPtr == NULL)
02374 {
02375
02376 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02377 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
02378 return(false);
02379 }
02380
02381 myRecordPtr = tmpRecordPtr;
02382 allocatedSize = size;
02383 }
02384 return(true);
02385 }
02386
02387
02388
02389 void* SamRecord::getStringPtr(int index)
02390 {
02391 int value = extras[index];
02392
02393 return &(strings[value]);
02394 }
02395
02396 void* SamRecord::getIntegerPtr(int offset, char& type)
02397 {
02398 int value = extras[offset];
02399
02400 type = intType[value];
02401
02402 return &(integers[value]);
02403 }
02404
02405 void* SamRecord::getDoublePtr(int offset)
02406 {
02407 int value = extras[offset];
02408
02409 return &(doubles[value]);
02410 }
02411
02412
02413
02414 bool SamRecord::fixBuffer(SequenceTranslation translation)
02415 {
02416
02417 if(myIsBufferSynced &&
02418 (myBufferSequenceTranslation == translation))
02419 {
02420
02421 return(true);
02422 }
02423
02424
02425 if(!myIsBinValid)
02426 {
02427
02428
02429 myRecordPtr->myBin =
02430 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
02431 myIsBinValid = true;
02432 }
02433
02434
02435 bool status = true;
02436
02437
02438 uint8_t newReadNameLen = getReadNameLength();
02439 uint16_t newCigarLen = getCigarLength();
02440 int32_t newReadLen = getReadLength();
02441 uint32_t newTagLen = getTagLength();
02442 uint32_t bamSequenceLen = (newReadLen+1)/2;
02443
02444
02445
02446
02447 int newBufferSize =
02448 ((unsigned char*)(&(myRecordPtr->myData)) -
02449 (unsigned char*)myRecordPtr) +
02450 newReadNameLen + ((newCigarLen)*4) +
02451 newReadLen + bamSequenceLen + newTagLen;
02452
02453 if(!allocateRecordStructure(newBufferSize))
02454 {
02455
02456 return(false);
02457 }
02458
02459
02460
02461
02462
02463
02464 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
02465 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
02466 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
02467
02468
02469
02470 if(myIsTagsBufferValid &&
02471 (readNameLenChange | cigarLenChange | readLenChange))
02472 {
02473 status &= setTagsFromBuffer();
02474
02475
02476 myIsTagsBufferValid = false;
02477 }
02478
02479
02480
02481
02482 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
02483 (readNameLenChange | cigarLenChange | readLenChange))
02484 {
02485 setSequenceAndQualityFromBuffer();
02486
02487
02488 myIsQualityBufferValid = false;
02489 myIsSequenceBufferValid = false;
02490 }
02491
02492
02493
02494 if((myIsCigarBufferValid) &&
02495 (readNameLenChange))
02496 {
02497 status &= parseCigarBinary();
02498 myIsCigarBufferValid = false;
02499 }
02500
02501
02502 if(!myIsReadNameBufferValid)
02503 {
02504 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
02505 newReadNameLen);
02506
02507
02508 myRecordPtr->myReadNameLength = newReadNameLen;
02509 myIsReadNameBufferValid = true;
02510 }
02511
02512 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02513 myRecordPtr->myReadNameLength;
02514
02515
02516 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02517
02518 if(!myIsCigarBufferValid)
02519 {
02520
02521
02522 myRecordPtr->myCigarLength = newCigarLen;
02523 memcpy(packedCigar, myCigarTempBuffer,
02524 myRecordPtr->myCigarLength * sizeof(uint32_t));
02525
02526 myIsCigarBufferValid = true;
02527 }
02528
02529 unsigned char * packedSequence = readNameEnds +
02530 myRecordPtr->myCigarLength * sizeof(int);
02531 unsigned char * packedQuality = packedSequence + bamSequenceLen;
02532
02533 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
02534 (myBufferSequenceTranslation != translation))
02535 {
02536 myRecordPtr->myReadLength = newReadLen;
02537
02538
02539 bool noQuality = false;
02540 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
02541 {
02542 noQuality = true;
02543 }
02544
02545 const char* translatedSeq = NULL;
02546
02547
02548
02549 if((!myIsSequenceBufferValid) ||
02550 (translation != myBufferSequenceTranslation))
02551 {
02552 translatedSeq = getSequence(translation);
02553 }
02554
02555 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02556 {
02557 if((!myIsSequenceBufferValid) ||
02558 (translation != myBufferSequenceTranslation))
02559 {
02560
02561 int seqVal = 0;
02562 switch(translatedSeq[i])
02563 {
02564 case '=':
02565 seqVal = 0;
02566 break;
02567 case 'A':
02568 case 'a':
02569 seqVal = 1;
02570 break;
02571 case 'C':
02572 case 'c':
02573 seqVal = 2;
02574 break;
02575 case 'G':
02576 case 'g':
02577 seqVal = 4;
02578 break;
02579 case 'T':
02580 case 't':
02581 seqVal = 8;
02582 break;
02583 case 'N':
02584 case 'n':
02585 case '.':
02586 seqVal = 15;
02587 break;
02588 default:
02589 myStatus.addError(SamStatus::FAIL_PARSE,
02590 "Unknown Sequence character found.");
02591 status = false;
02592 break;
02593 };
02594
02595 if(i & 1)
02596 {
02597
02598
02599 packedSequence[i/2] |= seqVal;
02600 }
02601 else
02602 {
02603
02604 packedSequence[i/2] = seqVal << 4;
02605 }
02606 }
02607
02608 if(!myIsQualityBufferValid)
02609 {
02610
02611 if((noQuality) || (myQuality.Length() <= i))
02612 {
02613
02614
02615 packedQuality[i] = 0xFF;
02616 }
02617 else
02618 {
02619
02620 packedQuality[i] = myQuality[i] - 33;
02621 }
02622 }
02623 }
02624 myIsQualityBufferValid = true;
02625 myIsSequenceBufferValid = true;
02626 myBufferSequenceTranslation = translation;
02627 }
02628
02629 if(!myIsTagsBufferValid)
02630 {
02631 status &= setTagsInBuffer();
02632 }
02633
02634
02635 myRecordPtr->myReadNameLength = newReadNameLen;
02636 myRecordPtr->myCigarLength = newCigarLen;
02637 myRecordPtr->myReadLength = newReadLen;
02638
02639
02640
02641 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
02642
02643 if(status)
02644 {
02645 myIsBufferSynced = true;
02646 }
02647
02648 return(status);
02649 }
02650
02651
02652
02653
02654
02655 void SamRecord::setSequenceAndQualityFromBuffer()
02656 {
02657
02658
02659
02660
02661
02662
02663 bool extractSequence = false;
02664 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
02665 {
02666 extractSequence = true;
02667 }
02668
02669
02670 bool extractQuality = false;
02671 if(myIsQualityBufferValid && (myQuality.Length() == 0))
02672 {
02673 extractQuality = true;
02674 }
02675
02676
02677
02678 if(!extractSequence && !extractQuality)
02679 {
02680 return;
02681 }
02682
02683
02684 if(extractSequence)
02685 {
02686 mySequence.SetLength(myRecordPtr->myReadLength);
02687 }
02688 if(extractQuality)
02689 {
02690 myQuality.SetLength(myRecordPtr->myReadLength);
02691 }
02692
02693 unsigned char * readNameEnds =
02694 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02695 unsigned char * packedSequence =
02696 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02697 unsigned char * packedQuality =
02698 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02699
02700 const char * asciiBases = "=AC.G...T......N";
02701
02702
02703
02704 bool qualitySpecified = false;
02705
02706 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02707 {
02708 if(extractSequence)
02709 {
02710 mySequence[i] = i & 1 ?
02711 asciiBases[packedSequence[i / 2] & 0xF] :
02712 asciiBases[packedSequence[i / 2] >> 4];
02713 }
02714
02715 if(extractQuality)
02716 {
02717 if(packedQuality[i] != 0xFF)
02718 {
02719
02720 qualitySpecified = true;
02721 }
02722
02723 myQuality[i] = packedQuality[i] + 33;
02724 }
02725 }
02726
02727
02728 if(myRecordPtr->myReadLength == 0)
02729 {
02730 if(extractSequence)
02731 {
02732 mySequence = "*";
02733 }
02734 if(extractQuality)
02735 {
02736 myQuality = "*";
02737 }
02738 }
02739 else if(extractQuality && !qualitySpecified)
02740 {
02741
02742 myQuality = "*";
02743 }
02744 }
02745
02746
02747
02748 bool SamRecord::parseCigar()
02749 {
02750
02751 if(myCigar.Length() == 0)
02752 {
02753
02754 return(parseCigarBinary());
02755 }
02756 return(parseCigarString());
02757 }
02758
02759
02760 bool SamRecord::parseCigarBinary()
02761 {
02762
02763
02764
02765 if(myCigar.Length() != 0)
02766 {
02767
02768 return(true);
02769 }
02770
02771 unsigned char * readNameEnds =
02772 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02773
02774 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02775
02776 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02777
02778 myCigarRoller.getCigarString(myCigar);
02779
02780 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02781
02782 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02783 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02784
02785
02786 if(myRecordPtr->myCigarLength == 0)
02787 {
02788 myCigar = "*";
02789 return(true);
02790 }
02791
02792
02793 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02794 if(newBufferSize > myCigarTempBufferAllocatedSize)
02795 {
02796 uint32_t* tempBufferPtr =
02797 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02798 if(tempBufferPtr == NULL)
02799 {
02800
02801
02802 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02803 myStatus.addError(SamStatus::FAIL_MEM,
02804 "Failed to Allocate Memory.");
02805 return(false);
02806 }
02807 myCigarTempBuffer = tempBufferPtr;
02808 myCigarTempBufferAllocatedSize = newBufferSize;
02809 }
02810
02811 memcpy(myCigarTempBuffer, packedCigar,
02812 myRecordPtr->myCigarLength * sizeof(uint32_t));
02813
02814
02815 myCigarTempBufferLength = myRecordPtr->myCigarLength;
02816
02817 return(true);
02818 }
02819
02820
02821 bool SamRecord::parseCigarString()
02822 {
02823 myCigarTempBufferLength = 0;
02824 if(myCigar == "*")
02825 {
02826
02827 myAlignmentLength = 0;
02828 myUnclippedStartOffset = 0;
02829 myUnclippedEndOffset = 0;
02830 myCigarRoller.clear();
02831 return(true);
02832 }
02833
02834 myCigarRoller.Set(myCigar);
02835
02836 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02837
02838 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02839 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02840
02841
02842
02843
02844 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02845 if(newBufferSize > myCigarTempBufferAllocatedSize)
02846 {
02847 uint32_t* tempBufferPtr =
02848 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02849 if(tempBufferPtr == NULL)
02850 {
02851
02852
02853 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02854 myStatus.addError(SamStatus::FAIL_MEM,
02855 "Failed to Allocate Memory.");
02856 return(false);
02857 }
02858 myCigarTempBuffer = tempBufferPtr;
02859 myCigarTempBufferAllocatedSize = newBufferSize;
02860 }
02861
02862
02863 bool status = true;
02864
02865
02866 char *cigarOp;
02867 const char* cigarEntryStart = myCigar.c_str();
02868 int opLen = 0;
02869 int op = 0;
02870
02871 unsigned int * packedCigar = myCigarTempBuffer;
02872
02873
02874 const char* endCigarString = cigarEntryStart + myCigar.Length();
02875 while(cigarEntryStart < endCigarString)
02876 {
02877 bool validCigarEntry = true;
02878
02879
02880 opLen = strtol(cigarEntryStart, &cigarOp, 10);
02881
02882 switch(*cigarOp)
02883 {
02884 case('M'):
02885 op = 0;
02886 break;
02887 case('I'):
02888
02889
02890 op = 1;
02891 break;
02892 case('D'):
02893 op = 2;
02894 break;
02895 case('N'):
02896 op = 3;
02897 break;
02898 case('S'):
02899 op = 4;
02900 break;
02901 case('H'):
02902 op = 5;
02903 break;
02904 case('P'):
02905 op = 6;
02906 break;
02907 default:
02908 fprintf(stderr, "ERROR parsing cigar\n");
02909 validCigarEntry = false;
02910 status = false;
02911 myStatus.addError(SamStatus::FAIL_PARSE,
02912 "Unknown operation found when parsing the Cigar.");
02913 break;
02914 };
02915 if(validCigarEntry)
02916 {
02917
02918 ++myCigarTempBufferLength;
02919 *packedCigar = (opLen << 4) | op;
02920 packedCigar++;
02921 }
02922
02923 cigarEntryStart = ++cigarOp;
02924 }
02925
02926
02927 return(status);
02928 }
02929
02930
02931 bool SamRecord::setTagsFromBuffer()
02932 {
02933
02934 if(myNeedToSetTagsFromBuffer == false)
02935 {
02936
02937 return(true);
02938 }
02939
02940
02941 myNeedToSetTagsFromBuffer = false;
02942
02943 unsigned char * readNameEnds =
02944 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02945 unsigned char * packedSequence =
02946 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02947 unsigned char * packedQuality =
02948 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02949
02950 unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02951
02952
02953 bool status = true;
02954
02955
02956 clearTags();
02957 while (myRecordPtr->myBlockSize + 4 -
02958 (extraPtr - (unsigned char *)myRecordPtr) > 0)
02959 {
02960 int key;
02961 int value;
02962 void * content = extraPtr + 3;
02963 int tagBufferSize = 0;
02964
02965 key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
02966
02967 switch (extraPtr[2])
02968 {
02969 case 'A' :
02970 value = integers.Length();
02971 integers.Push(* (char *) content);
02972 intType.push_back(extraPtr[2]);
02973 extraPtr += 4;
02974 tagBufferSize += 4;
02975 break;
02976 case 'c' :
02977 value = integers.Length();
02978 integers.Push(* (char *) content);
02979 intType.push_back(extraPtr[2]);
02980 extraPtr += 4;
02981 tagBufferSize += 4;
02982 break;
02983 case 'C' :
02984 value = integers.Length();
02985 integers.Push(* (unsigned char *) content);
02986 intType.push_back(extraPtr[2]);
02987 extraPtr += 4;
02988 tagBufferSize += 4;
02989 break;
02990 case 's' :
02991 value = integers.Length();
02992 integers.Push(* (short *) content);
02993 intType.push_back(extraPtr[2]);
02994 extraPtr += 5;
02995 tagBufferSize += 5;
02996 break;
02997 case 'S' :
02998 value = integers.Length();
02999 integers.Push(* (unsigned short *) content);
03000 intType.push_back(extraPtr[2]);
03001 extraPtr += 5;
03002 tagBufferSize += 5;
03003 break;
03004 case 'i' :
03005 value = integers.Length();
03006 integers.Push(* (int *) content);
03007 intType.push_back(extraPtr[2]);
03008 extraPtr += 7;
03009 tagBufferSize += 7;
03010 break;
03011 case 'I' :
03012 value = integers.Length();
03013 integers.Push((int) * (unsigned int *) content);
03014 intType.push_back(extraPtr[2]);
03015 extraPtr += 7;
03016 tagBufferSize += 7;
03017 break;
03018 case 'Z' :
03019 value = strings.Length();
03020 strings.Push((const char *) content);
03021 extraPtr += 4 + strings.Last().Length();
03022 tagBufferSize += 4 + strings.Last().Length();
03023 break;
03024 case 'f' :
03025 value = doubles.Length();
03026 doubles.Push(* (float *) content);
03027 fprintf(stderr, "\n\nFLOAT!!!\n\n");
03028 extraPtr += 7;
03029 tagBufferSize += 7;
03030 break;
03031 default :
03032 fprintf(stderr,
03033 "parsing BAM - Unknown custom field of type %c%c:%c\n",
03034 extraPtr[0], extraPtr[1], extraPtr[2]);
03035
03036
03037 extraPtr += 3;
03038 myStatus.addError(SamStatus::FAIL_PARSE,
03039 "Unknown tag type.");
03040 status = false;
03041 }
03042
03043
03044 if(status)
03045 {
03046 extras.Add(key, value);
03047 myTagBufferSize += tagBufferSize;
03048 }
03049 }
03050 return(status);
03051 }
03052
03053
03054 bool SamRecord::setTagsInBuffer()
03055 {
03056
03057
03058
03059 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
03060 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
03061 (unsigned char*)myRecordPtr) +
03062 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
03063 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
03064
03065
03066 if(!allocateRecordStructure(newBufferSize))
03067 {
03068
03069 return(false);
03070 }
03071
03072 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
03073 myRecordPtr->myReadNameLength;
03074 unsigned char * packedSequence = readNameEnds +
03075 myRecordPtr->myCigarLength * sizeof(int);
03076 unsigned char * packedQuality =
03077 packedSequence + bamSequenceLength;
03078
03079 char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
03080
03081 bool status = true;
03082
03083
03084 if (extras.Entries())
03085 {
03086 for (int i = 0; i < extras.Capacity(); i++)
03087 {
03088 if (extras.SlotInUse(i))
03089 {
03090 int key = extras.GetKey(i);
03091 getTag(key, extraPtr);
03092 extraPtr += 2;
03093 char vtype;
03094 getTypeFromKey(key, vtype);
03095
03096 if(vtype == 'i')
03097 {
03098 vtype = getIntegerType(i);
03099 }
03100
03101 extraPtr[0] = vtype;
03102
03103
03104 extraPtr += 1;
03105
03106 switch (vtype)
03107 {
03108 case 'A' :
03109 *(char*)extraPtr = (char)getInteger(i);
03110
03111 extraPtr += 1;
03112 break;
03113 case 'c' :
03114 *(int8_t*)extraPtr = (int8_t)getInteger(i);
03115
03116 extraPtr += 1;
03117 break;
03118 case 'C' :
03119 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
03120
03121 extraPtr += 1;
03122 break;
03123 case 's' :
03124 *(int16_t*)extraPtr = (int16_t)getInteger(i);
03125
03126 extraPtr += 2;
03127 break;
03128 case 'S' :
03129 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
03130
03131 extraPtr += 2;
03132 break;
03133 case 'i' :
03134 *(int32_t*)extraPtr = (int32_t)getInteger(i);
03135
03136 extraPtr += 4;
03137 break;
03138 case 'I' :
03139 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
03140
03141 extraPtr += 4;
03142 break;
03143 case 'Z' :
03144 sprintf(extraPtr, "%s", getString(i).c_str());
03145 extraPtr += getString(i).Length() + 1;
03146 break;
03147 case 'f' :
03148
03149 sprintf(extraPtr, "%f", getDouble(i));
03150 extraPtr += 4;
03151 break;
03152 default :
03153 myStatus.addError(SamStatus::FAIL_PARSE,
03154 "Unknown tag type.");
03155 status = false;
03156 break;
03157 }
03158 }
03159 }
03160 }
03161
03162
03163
03164 if(extraPtr != (char*)myRecordPtr + newBufferSize)
03165 {
03166 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
03167 myStatus.addError(SamStatus::FAIL_PARSE,
03168 "ERROR updating the buffer. Incorrect size.");
03169 status = false;
03170 }
03171
03172
03173
03174 myNeedToSetTagsInBuffer = false;
03175 myIsTagsBufferValid = true;
03176 return(status);
03177 }
03178
03179
03180
03181
03182
03183 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
03184 {
03185
03186
03187 myReferenceName =
03188 header.getReferenceLabel(myRecordPtr->myReferenceID);
03189 myMateReferenceName =
03190 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
03191
03192
03193 myReadName.SetLength(0);
03194 myCigar.SetLength(0);
03195 mySequence.SetLength(0);
03196 mySeqWithEq.clear();
03197 mySeqWithoutEq.clear();
03198 myQuality.SetLength(0);
03199 myNeedToSetTagsFromBuffer = true;
03200 myNeedToSetTagsInBuffer = false;
03201
03202
03203 myIsBufferSynced = true;
03204
03205 myIsReadNameBufferValid = true;
03206 myIsCigarBufferValid = true;
03207 myIsSequenceBufferValid = true;
03208 myBufferSequenceTranslation = NONE;
03209 myIsQualityBufferValid = true;
03210 myIsTagsBufferValid = true;
03211 myIsBinValid = true;
03212 }
03213
03214
03215
03216 void SamRecord::getTypeFromKey(int key, char& type) const
03217 {
03218
03219 type = (key >> 16) & 0xFF;
03220 }
03221
03222
03223
03224 void SamRecord::getTag(int key, char* tag) const
03225 {
03226
03227 tag[0] = key & 0xFF;
03228 tag[1] = (key >> 8) & 0xFF;
03229 tag[2] = 0;
03230 }
03231
03232
03233
03234 String & SamRecord::getString(int index)
03235 {
03236 int value = extras[index];
03237
03238 return strings[value];
03239 }
03240
03241 int & SamRecord::getInteger(int offset)
03242 {
03243 int value = extras[offset];
03244
03245 return integers[value];
03246 }
03247
03248 char & SamRecord::getIntegerType(int offset)
03249 {
03250 int value = extras[offset];
03251
03252 return intType[value];
03253 }
03254
03255 double & SamRecord::getDouble(int offset)
03256 {
03257 int value = extras[offset];
03258
03259 return doubles[value];
03260 }
03261
03262