00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021
00022 #include "bam.h"
00023
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032
00033
00034 SamRecord::SamRecord()
00035 : myStatus(),
00036 myRefPtr(NULL),
00037 mySequenceTranslation(NONE)
00038 {
00039 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040
00041 myRecordPtr =
00042 (bamRecordStruct *) malloc(defaultAllocSize);
00043
00044 myCigarTempBuffer = NULL;
00045 myCigarTempBufferAllocatedSize = 0;
00046
00047 allocatedSize = defaultAllocSize;
00048
00049 resetRecord();
00050 }
00051
00052
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054 : myStatus(errorHandlingType),
00055 myRefPtr(NULL),
00056 mySequenceTranslation(NONE)
00057 {
00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059
00060 myRecordPtr =
00061 (bamRecordStruct *) malloc(defaultAllocSize);
00062
00063 myCigarTempBuffer = NULL;
00064 myCigarTempBufferAllocatedSize = 0;
00065
00066 allocatedSize = defaultAllocSize;
00067
00068 resetRecord();
00069 }
00070
00071
00072 SamRecord::~SamRecord()
00073 {
00074 resetRecord();
00075
00076 if(myRecordPtr != NULL)
00077 {
00078 free(myRecordPtr);
00079 myRecordPtr = NULL;
00080 }
00081 if(myCigarTempBuffer != NULL)
00082 {
00083 free(myCigarTempBuffer);
00084 myCigarTempBuffer = NULL;
00085 myCigarTempBufferAllocatedSize = 0;
00086 }
00087 }
00088
00089
00090
00091 void SamRecord::resetRecord()
00092 {
00093 myIsBufferSynced = true;
00094
00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096 myRecordPtr->myReferenceID = -1;
00097 myRecordPtr->myPosition = -1;
00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099 myRecordPtr->myMapQuality = 0;
00100 myRecordPtr->myBin = DEFAULT_BIN;
00101 myRecordPtr->myCigarLength = 0;
00102 myRecordPtr->myFlag = 0;
00103 myRecordPtr->myReadLength = 0;
00104 myRecordPtr->myMateReferenceID = -1;
00105 myRecordPtr->myMatePosition = -1;
00106 myRecordPtr->myInsertSize = 0;
00107
00108
00109
00110
00111 myReadName = DEFAULT_READ_NAME;
00112 myReferenceName = "*";
00113 myMateReferenceName = "*";
00114 myCigar = "*";
00115 mySequence = "*";
00116 mySeqWithEq.clear();
00117 mySeqWithoutEq.clear();
00118 myQuality = "*";
00119 myNeedToSetTagsFromBuffer = false;
00120 myNeedToSetTagsInBuffer = false;
00121
00122
00123 myAlignmentLength = -1;
00124 myUnclippedStartOffset = -1;
00125 myUnclippedEndOffset = -1;
00126
00127 clearTags();
00128
00129
00130
00131
00132
00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
00134 myRecordPtr->myReadNameLength);
00135
00136
00137 myIsReadNameBufferValid = true;
00138 myIsCigarBufferValid = true;
00139 myIsSequenceBufferValid = true;
00140 myBufferSequenceTranslation = NONE;
00141 myIsQualityBufferValid = true;
00142 myIsTagsBufferValid = true;
00143 myIsBinValid = true;
00144
00145 myCigarTempBufferLength = -1;
00146
00147 myStatus = SamStatus::SUCCESS;
00148
00149 NOT_FOUND_TAG_STRING = "";
00150 NOT_FOUND_TAG_INT = -1;
00151 NOT_FOUND_TAG_DOUBLE = -1;
00152 }
00153
00154
00155
00156
00157 bool SamRecord::isValid(SamFileHeader& header)
00158 {
00159 myStatus = SamStatus::SUCCESS;
00160 SamValidationErrors invalidSamErrors;
00161 if(!SamValidator::isValid(header, *this, invalidSamErrors))
00162 {
00163
00164 std::string errorMessage = "";
00165 invalidSamErrors.getErrorString(errorMessage);
00166 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00167 return(false);
00168 }
00169
00170 return(true);
00171 }
00172
00173
00174 void SamRecord::setReference(GenomeSequence* reference)
00175 {
00176 myRefPtr = reference;
00177 }
00178
00179
00180
00181
00182
00183 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00184 {
00185 mySequenceTranslation = translation;
00186 }
00187
00188
00189 bool SamRecord::setReadName(const char* readName)
00190 {
00191 myReadName = readName;
00192 myIsBufferSynced = false;
00193 myIsReadNameBufferValid = false;
00194 myStatus = SamStatus::SUCCESS;
00195
00196
00197
00198 if(myReadName.Length() == 0)
00199 {
00200
00201 myReadName = DEFAULT_READ_NAME;
00202 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00203 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00204 return(false);
00205 }
00206
00207 return true;
00208 }
00209
00210
00211 bool SamRecord::setFlag(uint16_t flag)
00212 {
00213 myStatus = SamStatus::SUCCESS;
00214 myRecordPtr->myFlag = flag;
00215 return true;
00216 }
00217
00218
00219 bool SamRecord::setReferenceName(SamFileHeader& header,
00220 const char* referenceName)
00221 {
00222 myStatus = SamStatus::SUCCESS;
00223
00224 myReferenceName = referenceName;
00225
00226 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
00227
00228 return true;
00229 }
00230
00231
00232 bool SamRecord::set1BasedPosition(int32_t position)
00233 {
00234 return(set0BasedPosition(position - 1));
00235 }
00236
00237
00238 bool SamRecord::set0BasedPosition(int32_t position)
00239 {
00240 myStatus = SamStatus::SUCCESS;
00241 myRecordPtr->myPosition = position;
00242 myIsBinValid = false;
00243 return true;
00244 }
00245
00246
00247 bool SamRecord::setMapQuality(uint8_t mapQuality)
00248 {
00249 myStatus = SamStatus::SUCCESS;
00250 myRecordPtr->myMapQuality = mapQuality;
00251 return true;
00252 }
00253
00254
00255 bool SamRecord::setCigar(const char* cigar)
00256 {
00257 myStatus = SamStatus::SUCCESS;
00258 myCigar = cigar;
00259
00260 myIsBufferSynced = false;
00261 myIsCigarBufferValid = false;
00262 myCigarTempBufferLength = -1;
00263 myIsBinValid = false;
00264
00265
00266 myAlignmentLength = -1;
00267 myUnclippedStartOffset = -1;
00268 myUnclippedEndOffset = -1;
00269
00270 return true;
00271 }
00272
00273
00274 bool SamRecord::setCigar(const Cigar& cigar)
00275 {
00276 myStatus = SamStatus::SUCCESS;
00277 cigar.getCigarString(myCigar);
00278
00279 myIsBufferSynced = false;
00280 myIsCigarBufferValid = false;
00281 myCigarTempBufferLength = -1;
00282 myIsBinValid = false;
00283
00284
00285 myAlignmentLength = -1;
00286 myUnclippedStartOffset = -1;
00287 myUnclippedEndOffset = -1;
00288
00289 return true;
00290 }
00291
00292
00293 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00294 const char* mateReferenceName)
00295 {
00296 myStatus = SamStatus::SUCCESS;
00297
00298
00299
00300 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00301 {
00302 myMateReferenceName = myReferenceName;
00303 }
00304 else
00305 {
00306 myMateReferenceName = mateReferenceName;
00307 }
00308
00309
00310
00311 myRecordPtr->myMateReferenceID =
00312 header.getReferenceID(myMateReferenceName, true);
00313
00314 return true;
00315 }
00316
00317
00318 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00319 {
00320 return(set0BasedMatePosition(matePosition - 1));
00321 }
00322
00323
00324 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00325 {
00326 myStatus = SamStatus::SUCCESS;
00327 myRecordPtr->myMatePosition = matePosition;
00328 return true;
00329 }
00330
00331
00332 bool SamRecord::setInsertSize(int32_t insertSize)
00333 {
00334 myStatus = SamStatus::SUCCESS;
00335 myRecordPtr->myInsertSize = insertSize;
00336 return true;
00337 }
00338
00339
00340 bool SamRecord::setSequence(const char* seq)
00341 {
00342 myStatus = SamStatus::SUCCESS;
00343 mySequence = seq;
00344 mySeqWithEq.clear();
00345 mySeqWithoutEq.clear();
00346
00347 myIsBufferSynced = false;
00348 myIsSequenceBufferValid = false;
00349 return true;
00350 }
00351
00352
00353 bool SamRecord::setQuality(const char* quality)
00354 {
00355 myStatus = SamStatus::SUCCESS;
00356 myQuality = quality;
00357 myIsBufferSynced = false;
00358 myIsQualityBufferValid = false;
00359 return true;
00360 }
00361
00362
00363
00364 bool SamRecord::shiftIndelsLeft()
00365 {
00366
00367
00368
00369
00370 if(myAlignmentLength == -1)
00371 {
00372
00373 parseCigar();
00374 }
00375
00376
00377 bool shifted = false;
00378
00379
00380
00381 uint32_t currentPos = 0;
00382
00383
00384
00385 if(Cigar::foundInQuery(myCigarRoller[0]))
00386 {
00387
00388 currentPos += myCigarRoller[0].count;
00389 }
00390
00391 int numOps = myCigarRoller.size();
00392
00393
00394
00395 for(int currentOp = 1; currentOp < numOps; currentOp++)
00396 {
00397 if(myCigarRoller[currentOp].operation == Cigar::insert)
00398 {
00399
00400 int prevOpIndex = currentOp-1;
00401
00402
00403 int nextOpIndex = currentOp+1;
00404 if(nextOpIndex == numOps)
00405 {
00406
00407 nextOpIndex = currentOp;
00408 }
00409
00410
00411 uint32_t prevOpStart =
00412 currentPos - myCigarRoller[prevOpIndex].count;
00413
00414
00415 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
00416 {
00417
00418
00419 currentPos += myCigarRoller[currentOp].count;
00420
00421 continue;
00422 }
00423
00424
00425
00426
00427
00428
00429 uint32_t insertEndPos =
00430 currentPos + myCigarRoller[currentOp].count - 1;
00431
00432
00433 uint32_t insertStartPos = currentPos;
00434
00435
00436
00437
00438
00439
00440
00441
00442 while((insertStartPos > prevOpStart) &&
00443 (getSequence(insertEndPos,BASES) ==
00444 getSequence(insertStartPos - 1, BASES)))
00445 {
00446
00447 --insertEndPos;
00448 --insertStartPos;
00449 }
00450
00451
00452 int shiftLen = currentPos - insertStartPos;
00453 if(shiftLen > 0)
00454 {
00455
00456
00457
00458
00459
00460
00461
00462
00463 if(myCigarRoller[nextOpIndex].operation ==
00464 myCigarRoller[prevOpIndex].operation)
00465 {
00466
00467
00468 myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
00469 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
00470
00471
00472
00473 if(myCigarRoller[prevOpIndex].count == 0)
00474 {
00475 myCigarRoller.Remove(prevOpIndex);
00476 }
00477 shifted = true;
00478 }
00479 else
00480 {
00481
00482
00483
00484 if(insertStartPos == prevOpStart)
00485 {
00486
00487
00488 myCigarRoller.Update(currentOp,
00489 myCigarRoller[prevOpIndex].operation,
00490 myCigarRoller[prevOpIndex].count);
00491
00492
00493 myCigarRoller.Update(prevOpIndex,
00494 Cigar::insert,
00495 shiftLen);
00496 shifted = true;
00497 }
00498 }
00499 }
00500
00501 currentPos += myCigarRoller[currentOp].count;
00502 }
00503 else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
00504 {
00505
00506 currentPos += myCigarRoller[currentOp].count;
00507 }
00508 }
00509 if(shifted)
00510 {
00511
00512
00513 setCigar(myCigarRoller);
00514 }
00515 return(shifted);
00516 }
00517
00518
00519
00520
00521 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00522 uint32_t fromBufferSize,
00523 SamFileHeader& header)
00524 {
00525 myStatus = SamStatus::SUCCESS;
00526 if((fromBuffer == NULL) || (fromBufferSize == 0))
00527 {
00528
00529 myStatus.setStatus(SamStatus::FAIL_PARSE,
00530 "Cannot parse an empty file.");
00531 return(SamStatus::FAIL_PARSE);
00532 }
00533
00534
00535 resetRecord();
00536
00537
00538 if(!allocateRecordStructure(fromBufferSize))
00539 {
00540
00541 return(SamStatus::FAIL_MEM);
00542 }
00543
00544 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00545
00546 setVariablesForNewBuffer(header);
00547
00548
00549 return(SamStatus::SUCCESS);
00550 }
00551
00552
00553
00554 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr,
00555 SamFileHeader& header)
00556 {
00557 myStatus = SamStatus::SUCCESS;
00558 if((filePtr == NULL) || (filePtr->isOpen() == false))
00559 {
00560
00561 myStatus.setStatus(SamStatus::FAIL_ORDER,
00562 "Can't read from an unopened file.");
00563 return(SamStatus::FAIL_ORDER);
00564 }
00565
00566
00567 resetRecord();
00568
00569
00570 int numBytes =
00571 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00572
00573
00574 if(ifeof(filePtr) && (numBytes == 0))
00575 {
00576
00577 myStatus.setStatus(SamStatus::NO_MORE_RECS,
00578 "No more records left to read.");
00579 return(SamStatus::NO_MORE_RECS);
00580 }
00581
00582 if(numBytes != sizeof(int32_t))
00583 {
00584
00585
00586 if(ifeof(filePtr))
00587 {
00588
00589
00590 myStatus.setStatus(SamStatus::FAIL_PARSE,
00591 "EOF reached in the middle of a record.");
00592 return(SamStatus::FAIL_PARSE);
00593 }
00594 else
00595 {
00596
00597 myStatus.setStatus(SamStatus::FAIL_IO,
00598 "Failed to read the record size.");
00599 return(SamStatus::FAIL_IO);
00600 }
00601 }
00602
00603
00604 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00605 {
00606
00607
00608 return(SamStatus::FAIL_MEM);
00609 }
00610
00611
00612 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00613 != (unsigned int)myRecordPtr->myBlockSize)
00614 {
00615
00616 resetRecord();
00617 myStatus.setStatus(SamStatus::FAIL_IO,
00618 "Failed to read the record");
00619 return(SamStatus::FAIL_IO);
00620 }
00621
00622 setVariablesForNewBuffer(header);
00623
00624
00625 return(SamStatus::SUCCESS);
00626 }
00627
00628
00629
00630
00631 bool SamRecord::addIntTag(const char* tag, int32_t value)
00632 {
00633 myStatus = SamStatus::SUCCESS;
00634 int key = 0;
00635 int index = 0;
00636 char bamvtype;
00637
00638 int tagBufferSize = 0;
00639
00640
00641 if(myNeedToSetTagsFromBuffer)
00642 {
00643 if(!setTagsFromBuffer())
00644 {
00645
00646 return(false);
00647 }
00648 }
00649
00650
00651
00652
00653
00654 if(value < 0)
00655 {
00656
00657
00658 if(value > std::numeric_limits<char>::min())
00659 {
00660
00661 bamvtype = 'c';
00662 tagBufferSize += 4;
00663 }
00664 else if(value > std::numeric_limits<short>::min())
00665 {
00666
00667 bamvtype = 's';
00668 tagBufferSize += 5;
00669 }
00670 else
00671 {
00672
00673 bamvtype = 'i';
00674 tagBufferSize += 7;
00675 }
00676 }
00677 else
00678 {
00679
00680 if(value < std::numeric_limits<unsigned char>::max())
00681 {
00682
00683 bamvtype = 'C';
00684 tagBufferSize += 4;
00685 }
00686 else if(value < std::numeric_limits<unsigned short>::max())
00687 {
00688
00689 bamvtype = 'S';
00690 tagBufferSize += 5;
00691 }
00692 else
00693 {
00694
00695 bamvtype = 'I';
00696 tagBufferSize += 7;
00697 }
00698 }
00699
00700
00701 key = MAKEKEY(tag[0], tag[1], bamvtype);
00702 unsigned int hashIndex = extras.Find(key);
00703 if(hashIndex != LH_NOTFOUND)
00704 {
00705
00706 index = extras[hashIndex];
00707
00708
00709 if((integers[index] == value) && (intType[index] == bamvtype))
00710 {
00711
00712 return(true);
00713 }
00714 else
00715 {
00716
00717
00718
00719 switch(intType[index])
00720 {
00721 case 'c':
00722 case 'C':
00723 tagBufferSize -= 4;
00724 break;
00725 case 's':
00726 case 'S':
00727 tagBufferSize -= 5;
00728 break;
00729 case 'i':
00730 case 'I':
00731 tagBufferSize -= 7;
00732 break;
00733 default:
00734 myStatus.setStatus(SamStatus::INVALID,
00735 "unknown tag inttype type found.\n");
00736 return(false);
00737 }
00738
00739
00740 integers[index] = value;
00741 intType[index] = bamvtype;
00742 }
00743 }
00744 else
00745 {
00746
00747 index = integers.Length();
00748
00749 integers.Push(value);
00750 intType.push_back(bamvtype);
00751
00752 extras.Add(key, index);
00753 }
00754
00755
00756 myNeedToSetTagsInBuffer = true;
00757 myIsTagsBufferValid = false;
00758 myIsBufferSynced = false;
00759 myTagBufferSize += tagBufferSize;
00760
00761 return(true);
00762 }
00763
00764
00765
00766
00767
00768 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00769 {
00770 if(vtype == 'i')
00771 {
00772
00773 int intVal = atoi(valuePtr);
00774 return(addIntTag(tag, intVal));
00775 }
00776
00777
00778 myStatus = SamStatus::SUCCESS;
00779 bool status = true;
00780 int key = 0;
00781 int index = 0;
00782
00783 int tagBufferSize = 0;
00784
00785
00786 if(myNeedToSetTagsFromBuffer)
00787 {
00788 if(!setTagsFromBuffer())
00789 {
00790
00791 return(false);
00792 }
00793 }
00794
00795
00796 key = MAKEKEY(tag[0], tag[1], vtype);
00797 unsigned int hashIndex = extras.Find(key);
00798 if(hashIndex != LH_NOTFOUND)
00799 {
00800
00801 index = extras[hashIndex];
00802
00803
00804 switch (vtype)
00805 {
00806 case 'A' :
00807
00808 if(integers[index] == (const int)*(valuePtr))
00809 {
00810
00811 return(true);
00812 }
00813 else
00814 {
00815
00816 integers[index] = (const int)*(valuePtr);
00817 intType[index] = vtype;
00818 }
00819 break;
00820 case 'Z' :
00821
00822 if(strings[index] == valuePtr)
00823 {
00824
00825 return(true);
00826 }
00827 else
00828 {
00829
00830 tagBufferSize -= strings[index].Length();
00831 strings[index] = valuePtr;
00832
00833 tagBufferSize += strings[index].Length();
00834 }
00835 break;
00836 case 'f' :
00837
00838 if(doubles[index] == atof(valuePtr))
00839 {
00840
00841 return(true);
00842 }
00843 else
00844 {
00845
00846 doubles[index] = atof(valuePtr);
00847 }
00848 break;
00849 default :
00850 fprintf(stderr,
00851 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00852 vtype);
00853 myStatus.setStatus(SamStatus::FAIL_PARSE,
00854 "Unknown custom field in a tag");
00855 status = false;
00856 break;
00857 }
00858 }
00859 else
00860 {
00861
00862 switch (vtype)
00863 {
00864 case 'A' :
00865 index = integers.Length();
00866 integers.Push((const int)*(valuePtr));
00867 intType.push_back(vtype);
00868 tagBufferSize += 4;
00869 break;
00870 case 'Z' :
00871 index = strings.Length();
00872 strings.Push(valuePtr);
00873 tagBufferSize += 4 + strings.Last().Length();
00874 break;
00875 case 'f' :
00876 index = doubles.Length();
00877 doubles.Push(atof(valuePtr));
00878 tagBufferSize += 7;
00879 break;
00880 default :
00881 fprintf(stderr,
00882 "samFile::ReadSAM() - Unknown custom field of type %c\n",
00883 vtype);
00884 myStatus.setStatus(SamStatus::FAIL_PARSE,
00885 "Unknown custom field in a tag");
00886 status = false;
00887 break;
00888 }
00889 if(status)
00890 {
00891
00892 extras.Add(key, index);
00893 }
00894 }
00895
00896
00897 if(status)
00898 {
00899
00900 myNeedToSetTagsInBuffer = true;
00901 myIsTagsBufferValid = false;
00902 myIsBufferSynced = false;
00903 myTagBufferSize += tagBufferSize;
00904 }
00905 return(status);
00906 }
00907
00908
00909 void SamRecord::clearTags()
00910 {
00911 if(extras.Entries() != 0)
00912 {
00913 extras.Clear();
00914 }
00915 strings.Clear();
00916 integers.Clear();
00917 intType.clear();
00918 doubles.Clear();
00919 myTagBufferSize = 0;
00920 resetTagIter();
00921 }
00922
00923
00924 bool SamRecord::rmTag(const char* tag, char type)
00925 {
00926
00927 if(strlen(tag) != 2)
00928 {
00929
00930 myStatus.setStatus(SamStatus::INVALID,
00931 "rmTag called with tag that is not 2 characters\n");
00932 return(false);
00933 }
00934
00935 myStatus = SamStatus::SUCCESS;
00936 if(myNeedToSetTagsFromBuffer)
00937 {
00938 if(!setTagsFromBuffer())
00939 {
00940
00941
00942 return(false);
00943 }
00944 }
00945
00946
00947 int key = MAKEKEY(tag[0], tag[1], type);
00948
00949 int offset = extras.Find(key);
00950
00951 if(offset < 0)
00952 {
00953
00954
00955 return(true);
00956 }
00957
00958
00959
00960 char vtype;
00961 getTypeFromKey(key, vtype);
00962 if(vtype == 'i')
00963 {
00964 vtype = getIntegerType(offset);
00965 }
00966
00967
00968
00969
00970
00971 int rmBuffSize = 0;
00972 switch(vtype)
00973 {
00974 case 'A':
00975 case 'c':
00976 case 'C':
00977 rmBuffSize = 4;
00978 break;
00979 case 's':
00980 case 'S':
00981 rmBuffSize = 5;
00982 break;
00983 case 'i':
00984 case 'I':
00985 rmBuffSize = 7;
00986 break;
00987 case 'f':
00988 rmBuffSize = 7;
00989 break;
00990 case 'Z':
00991 rmBuffSize = 4 + getString(offset).Length();
00992 break;
00993 default:
00994 myStatus.setStatus(SamStatus::INVALID,
00995 "rmTag called with unknown type.\n");
00996 return(false);
00997 break;
00998 };
00999
01000
01001 myNeedToSetTagsInBuffer = true;
01002 myIsTagsBufferValid = false;
01003 myIsBufferSynced = false;
01004 myTagBufferSize -= rmBuffSize;
01005
01006
01007 extras.Delete(offset);
01008 return(true);
01009 }
01010
01011
01012 bool SamRecord::rmTags(const char* tags)
01013 {
01014 const char* currentTagPtr = tags;
01015
01016 myStatus = SamStatus::SUCCESS;
01017 if(myNeedToSetTagsFromBuffer)
01018 {
01019 if(!setTagsFromBuffer())
01020 {
01021
01022
01023 return(false);
01024 }
01025 }
01026
01027 bool returnStatus = true;
01028
01029 int rmBuffSize = 0;
01030 while(*currentTagPtr != '\0')
01031 {
01032
01033
01034
01035
01036 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
01037 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
01038 {
01039 myStatus.setStatus(SamStatus::INVALID,
01040 "rmTags called with improperly formatted tags.\n");
01041 returnStatus = false;
01042 break;
01043 }
01044
01045
01046 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
01047 currentTagPtr[3]);
01048
01049 int offset = extras.Find(key);
01050
01051 if(offset >= 0)
01052 {
01053
01054
01055 char vtype;
01056 getTypeFromKey(key, vtype);
01057 if(vtype == 'i')
01058 {
01059 vtype = getIntegerType(offset);
01060 }
01061
01062
01063
01064
01065
01066 switch(vtype)
01067 {
01068 case 'A':
01069 case 'c':
01070 case 'C':
01071 rmBuffSize += 4;
01072 break;
01073 case 's':
01074 case 'S':
01075 rmBuffSize += 5;
01076 break;
01077 case 'i':
01078 case 'I':
01079 rmBuffSize += 7;
01080 break;
01081 case 'f':
01082 rmBuffSize += 7;
01083 break;
01084 case 'Z':
01085 rmBuffSize += 4 + getString(offset).Length();
01086 break;
01087 default:
01088 myStatus.setStatus(SamStatus::INVALID,
01089 "rmTag called with unknown type.\n");
01090 returnStatus = false;
01091 break;
01092 };
01093
01094
01095 extras.Delete(offset);
01096 }
01097
01098 if(currentTagPtr[4] == ';')
01099 {
01100
01101 currentTagPtr += 5;
01102 }
01103 else if(currentTagPtr[4] != '\0')
01104 {
01105
01106 myStatus.setStatus(SamStatus::INVALID,
01107 "rmTags called with improperly formatted tags.\n");
01108 returnStatus = false;
01109 break;
01110 }
01111 else
01112 {
01113
01114 currentTagPtr += 4;
01115 }
01116 }
01117
01118
01119 myNeedToSetTagsInBuffer = true;
01120 myIsTagsBufferValid = false;
01121 myIsBufferSynced = false;
01122 myTagBufferSize -= rmBuffSize;
01123
01124
01125 return(returnStatus);
01126 }
01127
01128
01129
01130 const void* SamRecord::getRecordBuffer()
01131 {
01132 return(getRecordBuffer(mySequenceTranslation));
01133 }
01134
01135
01136
01137 const void* SamRecord::getRecordBuffer(SequenceTranslation translation)
01138 {
01139 myStatus = SamStatus::SUCCESS;
01140 bool status = true;
01141
01142
01143 if((myIsBufferSynced == false) ||
01144 (myBufferSequenceTranslation != translation))
01145 {
01146 status &= fixBuffer(translation);
01147 }
01148
01149 if(myNeedToSetTagsInBuffer)
01150 {
01151 status &= setTagsInBuffer();
01152 }
01153 if(!status)
01154 {
01155 return(NULL);
01156 }
01157 return (const void *)myRecordPtr;
01158 }
01159
01160
01161
01162
01163 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
01164 {
01165 return(writeRecordBuffer(filePtr, mySequenceTranslation));
01166 }
01167
01168
01169
01170 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr,
01171 SequenceTranslation translation)
01172 {
01173 myStatus = SamStatus::SUCCESS;
01174 if((filePtr == NULL) || (filePtr->isOpen() == false))
01175 {
01176
01177 myStatus.setStatus(SamStatus::FAIL_ORDER,
01178 "Can't write to an unopened file.");
01179 return(SamStatus::FAIL_ORDER);
01180 }
01181
01182 if((myIsBufferSynced == false) ||
01183 (myBufferSequenceTranslation != translation))
01184 {
01185 if(!fixBuffer(translation))
01186 {
01187 return(myStatus.getStatus());
01188 }
01189 }
01190
01191
01192 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
01193 unsigned int numBytesWritten =
01194 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
01195
01196
01197 if(numBytesToWrite == numBytesWritten)
01198 {
01199 return(SamStatus::SUCCESS);
01200 }
01201
01202 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
01203 return(SamStatus::FAIL_IO);
01204 }
01205
01206
01207 int32_t SamRecord::getBlockSize()
01208 {
01209 myStatus = SamStatus::SUCCESS;
01210
01211
01212 if(myIsBufferSynced == false)
01213 {
01214
01215
01216
01217 fixBuffer(myBufferSequenceTranslation);
01218 }
01219 return myRecordPtr->myBlockSize;
01220 }
01221
01222
01223
01224 const char* SamRecord::getReferenceName()
01225 {
01226 myStatus = SamStatus::SUCCESS;
01227 return myReferenceName.c_str();
01228 }
01229
01230
01231 int32_t SamRecord::getReferenceID()
01232 {
01233 myStatus = SamStatus::SUCCESS;
01234 return myRecordPtr->myReferenceID;
01235 }
01236
01237
01238 int32_t SamRecord::get1BasedPosition()
01239 {
01240 myStatus = SamStatus::SUCCESS;
01241 return (myRecordPtr->myPosition + 1);
01242 }
01243
01244
01245 int32_t SamRecord::get0BasedPosition()
01246 {
01247 myStatus = SamStatus::SUCCESS;
01248 return myRecordPtr->myPosition;
01249 }
01250
01251
01252 uint8_t SamRecord::getReadNameLength()
01253 {
01254 myStatus = SamStatus::SUCCESS;
01255
01256
01257 if(myIsReadNameBufferValid)
01258 {
01259 return(myRecordPtr->myReadNameLength);
01260 }
01261
01262 return(myReadName.Length() + 1);
01263 }
01264
01265
01266 uint8_t SamRecord::getMapQuality()
01267 {
01268 myStatus = SamStatus::SUCCESS;
01269 return myRecordPtr->myMapQuality;
01270 }
01271
01272
01273 uint16_t SamRecord::getBin()
01274 {
01275 myStatus = SamStatus::SUCCESS;
01276 if(!myIsBinValid)
01277 {
01278
01279
01280 myRecordPtr->myBin =
01281 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
01282 myIsBinValid = true;
01283 }
01284 return(myRecordPtr->myBin);
01285 }
01286
01287
01288 uint16_t SamRecord::getCigarLength()
01289 {
01290 myStatus = SamStatus::SUCCESS;
01291
01292
01293 if(myIsCigarBufferValid)
01294 {
01295 return myRecordPtr->myCigarLength;
01296 }
01297
01298 if(myCigarTempBufferLength == -1)
01299 {
01300
01301
01302 parseCigarString();
01303 }
01304
01305
01306 return(myCigarTempBufferLength);
01307 }
01308
01309
01310 uint16_t SamRecord::getFlag()
01311 {
01312 myStatus = SamStatus::SUCCESS;
01313 return myRecordPtr->myFlag;
01314 }
01315
01316
01317 int32_t SamRecord::getReadLength()
01318 {
01319 myStatus = SamStatus::SUCCESS;
01320 if(myIsSequenceBufferValid == false)
01321 {
01322
01323 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
01324 {
01325 return(0);
01326 }
01327
01328 return(mySequence.Length());
01329 }
01330 return(myRecordPtr->myReadLength);
01331 }
01332
01333
01334
01335
01336 const char* SamRecord::getMateReferenceName()
01337 {
01338 myStatus = SamStatus::SUCCESS;
01339 return myMateReferenceName.c_str();
01340 }
01341
01342
01343
01344
01345
01346 const char* SamRecord::getMateReferenceNameOrEqual()
01347 {
01348 myStatus = SamStatus::SUCCESS;
01349 if(myMateReferenceName == "*")
01350 {
01351 return(myMateReferenceName);
01352 }
01353 if(myMateReferenceName == getReferenceName())
01354 {
01355 return(FIELD_ABSENT_STRING);
01356 }
01357 else
01358 {
01359 return(myMateReferenceName);
01360 }
01361 }
01362
01363
01364 int32_t SamRecord::getMateReferenceID()
01365 {
01366 myStatus = SamStatus::SUCCESS;
01367 return myRecordPtr->myMateReferenceID;
01368 }
01369
01370
01371 int32_t SamRecord::get1BasedMatePosition()
01372 {
01373 myStatus = SamStatus::SUCCESS;
01374 return (myRecordPtr->myMatePosition + 1);
01375 }
01376
01377
01378 int32_t SamRecord::get0BasedMatePosition()
01379 {
01380 myStatus = SamStatus::SUCCESS;
01381 return myRecordPtr->myMatePosition;
01382 }
01383
01384
01385 int32_t SamRecord::getInsertSize()
01386 {
01387 myStatus = SamStatus::SUCCESS;
01388 return myRecordPtr->myInsertSize;
01389 }
01390
01391
01392
01393 int32_t SamRecord::get0BasedAlignmentEnd()
01394 {
01395 myStatus = SamStatus::SUCCESS;
01396 if(myAlignmentLength == -1)
01397 {
01398
01399 parseCigar();
01400 }
01401
01402 if(myAlignmentLength == 0)
01403 {
01404
01405 return(myRecordPtr->myPosition);
01406 }
01407 return(myRecordPtr->myPosition + myAlignmentLength - 1);
01408 }
01409
01410
01411
01412 int32_t SamRecord::get1BasedAlignmentEnd()
01413 {
01414 return(get0BasedAlignmentEnd() + 1);
01415 }
01416
01417
01418
01419 int32_t SamRecord::getAlignmentLength()
01420 {
01421 myStatus = SamStatus::SUCCESS;
01422 if(myAlignmentLength == -1)
01423 {
01424
01425 parseCigar();
01426 }
01427
01428 return(myAlignmentLength);
01429 }
01430
01431
01432 int32_t SamRecord::get0BasedUnclippedStart()
01433 {
01434 myStatus = SamStatus::SUCCESS;
01435 if(myUnclippedStartOffset == -1)
01436 {
01437
01438 parseCigar();
01439 }
01440 return(myRecordPtr->myPosition - myUnclippedStartOffset);
01441 }
01442
01443
01444
01445 int32_t SamRecord::get1BasedUnclippedStart()
01446 {
01447 return(get0BasedUnclippedStart() + 1);
01448 }
01449
01450
01451
01452 int32_t SamRecord::get0BasedUnclippedEnd()
01453 {
01454
01455
01456 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
01457 }
01458
01459
01460
01461 int32_t SamRecord::get1BasedUnclippedEnd()
01462 {
01463 return(get0BasedUnclippedEnd() + 1);
01464 }
01465
01466
01467
01468 const char* SamRecord::getReadName()
01469 {
01470 myStatus = SamStatus::SUCCESS;
01471 if(myReadName.Length() == 0)
01472 {
01473
01474
01475 myReadName = (char*)&(myRecordPtr->myData);
01476 }
01477 return myReadName.c_str();
01478 }
01479
01480
01481 const char* SamRecord::getCigar()
01482 {
01483 myStatus = SamStatus::SUCCESS;
01484 if(myCigar.Length() == 0)
01485 {
01486
01487
01488 parseCigarBinary();
01489 }
01490 return myCigar.c_str();
01491 }
01492
01493
01494 const char* SamRecord::getSequence()
01495 {
01496 return(getSequence(mySequenceTranslation));
01497 }
01498
01499
01500 const char* SamRecord::getSequence(SequenceTranslation translation)
01501 {
01502 myStatus = SamStatus::SUCCESS;
01503 if(mySequence.Length() == 0)
01504 {
01505
01506
01507 setSequenceAndQualityFromBuffer();
01508 }
01509
01510
01511 if((translation == NONE) || (myRefPtr == NULL))
01512 {
01513 return mySequence.c_str();
01514 }
01515 else if(translation == EQUAL)
01516 {
01517 if(mySeqWithEq.length() == 0)
01518 {
01519
01520 if(mySequence == "*")
01521 {
01522
01523 mySeqWithEq = '*';
01524 }
01525 else
01526 {
01527
01528 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01529 myRecordPtr->myPosition,
01530 *(getCigarInfo()),
01531 getReferenceName(),
01532 *myRefPtr,
01533 mySeqWithEq);
01534 }
01535 }
01536 return(mySeqWithEq.c_str());
01537 }
01538 else
01539 {
01540
01541 if(mySeqWithoutEq.length() == 0)
01542 {
01543 if(mySequence == "*")
01544 {
01545
01546 mySeqWithoutEq = '*';
01547 }
01548 else
01549 {
01550
01551 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01552 myRecordPtr->myPosition,
01553 *(getCigarInfo()),
01554 getReferenceName(),
01555 *myRefPtr,
01556 mySeqWithoutEq);
01557 }
01558 }
01559 return(mySeqWithoutEq.c_str());
01560 }
01561 }
01562
01563
01564 const char* SamRecord::getQuality()
01565 {
01566 myStatus = SamStatus::SUCCESS;
01567 if(myQuality.Length() == 0)
01568 {
01569
01570
01571 setSequenceAndQualityFromBuffer();
01572 }
01573 return myQuality.c_str();
01574 }
01575
01576
01577 char SamRecord::getSequence(int index)
01578 {
01579 return(getSequence(index, mySequenceTranslation));
01580 }
01581
01582
01583 char SamRecord::getSequence(int index, SequenceTranslation translation)
01584 {
01585 static const char * asciiBases = "=AC.G...T......N";
01586
01587
01588 int32_t readLen = getReadLength();
01589
01590
01591 if(readLen == 0)
01592 {
01593 String exceptionString = "SamRecord::getSequence(";
01594 exceptionString += index;
01595 exceptionString += ") is not allowed since sequence = '*'";
01596 throw std::runtime_error(exceptionString.c_str());
01597 }
01598 else if((index < 0) || (index >= readLen))
01599 {
01600
01601 String exceptionString = "SamRecord::getSequence(";
01602 exceptionString += index;
01603 exceptionString += ") is out of range. Index must be between 0 and ";
01604 exceptionString += (readLen - 1);
01605 throw std::runtime_error(exceptionString.c_str());
01606 }
01607
01608
01609 if((translation == NONE) || (myRefPtr == NULL))
01610 {
01611
01612 if(mySequence.Length() == 0)
01613 {
01614
01615
01616
01617 unsigned char * packedSequence =
01618 (unsigned char *)myRecordPtr->myData +
01619 myRecordPtr->myReadNameLength +
01620 myRecordPtr->myCigarLength * sizeof(int);
01621
01622 return(index & 1 ?
01623 asciiBases[packedSequence[index / 2] & 0xF] :
01624 asciiBases[packedSequence[index / 2] >> 4]);
01625 }
01626
01627 return(mySequence[index]);
01628 }
01629 else
01630 {
01631
01632
01633
01634 if(mySequence.Length() == 0)
01635 {
01636
01637
01638 setSequenceAndQualityFromBuffer();
01639 }
01640
01641
01642 if(translation == EQUAL)
01643 {
01644
01645
01646 if(mySeqWithEq.length() == 0)
01647 {
01648
01649
01650
01651 if(mySequence == "*")
01652 {
01653
01654 mySeqWithEq = '*';
01655 }
01656 else
01657 {
01658
01659 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01660 myRecordPtr->myPosition,
01661 *(getCigarInfo()),
01662 getReferenceName(),
01663 *myRefPtr,
01664 mySeqWithEq);
01665 }
01666 }
01667
01668 return(mySeqWithEq[index]);
01669 }
01670 else
01671 {
01672
01673
01674
01675 if(mySeqWithoutEq.length() == 0)
01676 {
01677
01678
01679
01680 if(mySequence == "*")
01681 {
01682
01683 mySeqWithoutEq = '*';
01684 }
01685 else
01686 {
01687
01688
01689
01690 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01691 myRecordPtr->myPosition,
01692 *(getCigarInfo()),
01693 getReferenceName(),
01694 *myRefPtr,
01695 mySeqWithoutEq);
01696 }
01697 }
01698
01699 return(mySeqWithoutEq[index]);
01700 }
01701 }
01702 }
01703
01704
01705 char SamRecord::getQuality(int index)
01706 {
01707
01708 int32_t readLen = getReadLength();
01709
01710
01711
01712 if(readLen == 0)
01713 {
01714 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01715 }
01716 else if((index < 0) || (index >= readLen))
01717 {
01718
01719 String exceptionString = "SamRecord::getQuality(";
01720 exceptionString += index;
01721 exceptionString += ") is out of range. Index must be between 0 and ";
01722 exceptionString += (readLen - 1);
01723 throw std::runtime_error(exceptionString.c_str());
01724 }
01725
01726 if(myQuality.Length() == 0)
01727 {
01728
01729 unsigned char * packedQuality =
01730 (unsigned char *)myRecordPtr->myData +
01731 myRecordPtr->myReadNameLength +
01732 myRecordPtr->myCigarLength * sizeof(int) +
01733 (myRecordPtr->myReadLength + 1) / 2;
01734 return(packedQuality[index] + 33);
01735 }
01736 else
01737 {
01738
01739 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01740 {
01741
01742 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01743 }
01744 else
01745 {
01746 return(myQuality[index]);
01747 }
01748 }
01749 }
01750
01751
01752 Cigar* SamRecord::getCigarInfo()
01753 {
01754
01755
01756
01757
01758 if(myAlignmentLength == -1)
01759 {
01760
01761 parseCigar();
01762 }
01763 return(&myCigarRoller);
01764 }
01765
01766
01767
01768
01769 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01770 {
01771
01772
01773 if(myAlignmentLength == -1)
01774 {
01775 parseCigar();
01776 }
01777 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01778 }
01779
01780
01781
01782 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01783 String& cigar, String& sequence, String& quality)
01784 {
01785 return(getFields(recStruct, readName, cigar, sequence, quality,
01786 mySequenceTranslation));
01787 }
01788
01789
01790
01791 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01792 String& cigar, String& sequence, String& quality,
01793 SequenceTranslation translation)
01794 {
01795 myStatus = SamStatus::SUCCESS;
01796 if(myIsBufferSynced == false)
01797 {
01798 if(!fixBuffer(translation))
01799 {
01800
01801 return(false);
01802 }
01803 }
01804 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01805
01806 readName = getReadName();
01807
01808 if(myStatus != SamStatus::SUCCESS)
01809 {
01810
01811 return(false);
01812 }
01813 cigar = getCigar();
01814
01815 if(myStatus != SamStatus::SUCCESS)
01816 {
01817
01818 return(false);
01819 }
01820 sequence = getSequence(translation);
01821
01822 if(myStatus != SamStatus::SUCCESS)
01823 {
01824
01825 return(false);
01826 }
01827 quality = getQuality();
01828
01829 if(myStatus != SamStatus::SUCCESS)
01830 {
01831
01832 return(false);
01833 }
01834 return(true);
01835 }
01836
01837
01838
01839 GenomeSequence* SamRecord::getReference()
01840 {
01841 return(myRefPtr);
01842 }
01843
01844
01845 uint32_t SamRecord::getTagLength()
01846 {
01847 myStatus = SamStatus::SUCCESS;
01848 if(myNeedToSetTagsFromBuffer)
01849 {
01850
01851
01852 unsigned char * tagStart =
01853 (unsigned char *)myRecordPtr->myData
01854 + myRecordPtr->myReadNameLength
01855 + myRecordPtr->myCigarLength * sizeof(int)
01856 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01857
01858
01859
01860
01861 uint32_t nonTagSize =
01862 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01863
01864 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01865 return(tagSize);
01866 }
01867
01868
01869 return(myTagBufferSize);
01870 }
01871
01872
01873
01874
01875
01876
01877
01878 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01879 {
01880 myStatus = SamStatus::SUCCESS;
01881 if(myNeedToSetTagsFromBuffer)
01882 {
01883 if(!setTagsFromBuffer())
01884 {
01885
01886
01887 return(false);
01888 }
01889 }
01890
01891
01892
01893 myLastTagIndex++;
01894 int maxTagIndex = extras.Capacity();
01895 if(myLastTagIndex >= maxTagIndex)
01896 {
01897
01898
01899
01900 return(false);
01901 }
01902
01903 bool tagFound = false;
01904
01905 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01906 {
01907 if(extras.SlotInUse(myLastTagIndex))
01908 {
01909
01910 int key = extras.GetKey(myLastTagIndex);
01911 getTag(key, tag);
01912 getTypeFromKey(key, vtype);
01913 tagFound = true;
01914
01915 switch (vtype)
01916 {
01917 case 'f' :
01918 *value = getDoublePtr(myLastTagIndex);
01919 break;
01920 case 'i' :
01921 *value = getIntegerPtr(myLastTagIndex, vtype);
01922 if(vtype != 'A')
01923 {
01924
01925 vtype = 'i';
01926 }
01927 break;
01928 case 'Z' :
01929 *value = getStringPtr(myLastTagIndex);
01930 break;
01931 default:
01932 myStatus.setStatus(SamStatus::FAIL_PARSE,
01933 "Unknown tag type");
01934 tagFound = false;
01935 break;
01936 }
01937 }
01938 if(!tagFound)
01939 {
01940
01941 myLastTagIndex++;
01942 }
01943 }
01944 return(tagFound);
01945 }
01946
01947
01948
01949 void SamRecord::resetTagIter()
01950 {
01951 myLastTagIndex = -1;
01952 }
01953
01954
01955 bool SamRecord::isIntegerType(char vtype) const
01956 {
01957 if((vtype == 'c') || (vtype == 'C') ||
01958 (vtype == 's') || (vtype == 'S') ||
01959 (vtype == 'i') || (vtype == 'I'))
01960 {
01961 return(true);
01962 }
01963 return(false);
01964 }
01965
01966
01967 bool SamRecord::isDoubleType(char vtype) const
01968 {
01969 if(vtype == 'f')
01970 {
01971 return(true);
01972 }
01973 return(false);
01974 }
01975
01976
01977 bool SamRecord::isCharType(char vtype) const
01978 {
01979 if(vtype == 'A')
01980 {
01981 return(true);
01982 }
01983 return(false);
01984 }
01985
01986
01987 bool SamRecord::isStringType(char vtype) const
01988 {
01989 if(vtype == 'Z')
01990 {
01991 return(true);
01992 }
01993 return(false);
01994 }
01995
01996
01997 bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
01998 {
01999 const char* currentTagPtr = tags;
02000
02001 returnString.Clear();
02002 myStatus = SamStatus::SUCCESS;
02003 if(myNeedToSetTagsFromBuffer)
02004 {
02005 if(!setTagsFromBuffer())
02006 {
02007
02008
02009 return(false);
02010 }
02011 }
02012
02013 bool returnStatus = true;
02014
02015 while(*currentTagPtr != '\0')
02016 {
02017
02018
02019
02020 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
02021 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
02022 {
02023 myStatus.setStatus(SamStatus::INVALID,
02024 "getTagsString called with improperly formatted tags.\n");
02025 returnStatus = false;
02026 break;
02027 }
02028
02029
02030 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
02031 currentTagPtr[3]);
02032
02033 int offset = extras.Find(key);
02034
02035 if(offset >= 0)
02036 {
02037
02038 if(!returnString.IsEmpty())
02039 {
02040 returnString += delim;
02041 }
02042 returnString += currentTagPtr[0];
02043 returnString += currentTagPtr[1];
02044 returnString += ':';
02045 returnString += currentTagPtr[3];
02046 returnString += ':';
02047
02048
02049 char vtype;
02050 getTypeFromKey(key, vtype);
02051
02052
02053
02054
02055
02056
02057 switch(vtype)
02058 {
02059 case 'i':
02060 returnString += *(int*)getIntegerPtr(offset, vtype);
02061 break;
02062 case 'f':
02063 returnString += *(double*)getDoublePtr(offset);
02064 break;
02065 case 'Z':
02066 returnString += *(String*)getStringPtr(offset);
02067 break;
02068 default:
02069 myStatus.setStatus(SamStatus::INVALID,
02070 "rmTag called with unknown type.\n");
02071 returnStatus = false;
02072 break;
02073 };
02074 }
02075
02076 if(currentTagPtr[4] == ';')
02077 {
02078
02079 currentTagPtr += 5;
02080 }
02081 else if(currentTagPtr[4] != '\0')
02082 {
02083
02084 myStatus.setStatus(SamStatus::INVALID,
02085 "rmTags called with improperly formatted tags.\n");
02086 returnStatus = false;
02087 break;
02088 }
02089 else
02090 {
02091
02092 currentTagPtr += 4;
02093 }
02094 }
02095 return(returnStatus);
02096 }
02097
02098
02099 String* SamRecord::getStringTag(const char * tag)
02100 {
02101
02102 if(myNeedToSetTagsFromBuffer)
02103 {
02104 if(!setTagsFromBuffer())
02105 {
02106
02107
02108
02109 return(NULL);
02110 }
02111 }
02112
02113 int key = MAKEKEY(tag[0], tag[1], 'Z');
02114 int offset = extras.Find(key);
02115
02116 int value;
02117 if (offset < 0)
02118 {
02119
02120 return(NULL);
02121 }
02122
02123
02124 value = extras[offset];
02125 return(&(strings[value]));
02126 }
02127
02128
02129 int* SamRecord::getIntegerTag(const char * tag)
02130 {
02131
02132 myStatus = SamStatus::SUCCESS;
02133
02134 if(myNeedToSetTagsFromBuffer)
02135 {
02136 if(!setTagsFromBuffer())
02137 {
02138
02139
02140
02141 return(NULL);
02142 }
02143 }
02144
02145 int key = MAKEKEY(tag[0], tag[1], 'i');
02146 int offset = extras.Find(key);
02147
02148 int value;
02149 if (offset < 0)
02150 {
02151
02152 return(NULL);
02153 }
02154 else
02155 value = extras[offset];
02156
02157 return(&(integers[value]));
02158 }
02159
02160
02161 double* SamRecord::getDoubleTag(const char * tag)
02162 {
02163
02164 myStatus = SamStatus::SUCCESS;
02165
02166 if(myNeedToSetTagsFromBuffer)
02167 {
02168 if(!setTagsFromBuffer())
02169 {
02170
02171
02172
02173 return(NULL);
02174 }
02175 }
02176
02177 int key = MAKEKEY(tag[0], tag[1], 'f');
02178 int offset = extras.Find(key);
02179
02180 int value;
02181 if (offset < 0)
02182 {
02183
02184 return(NULL);
02185 }
02186 else
02187 value = extras[offset];
02188
02189 return(&(doubles[value]));
02190 }
02191
02192
02193 String & SamRecord::getString(const char * tag)
02194 {
02195
02196 myStatus = SamStatus::SUCCESS;
02197
02198 if(myNeedToSetTagsFromBuffer)
02199 {
02200 if(!setTagsFromBuffer())
02201 {
02202
02203
02204
02205 }
02206 }
02207
02208 int key = MAKEKEY(tag[0], tag[1], 'Z');
02209 int offset = extras.Find(key);
02210
02211 int value;
02212 if (offset < 0)
02213 {
02214
02215 return(NOT_FOUND_TAG_STRING);
02216 }
02217 else
02218 value = extras[offset];
02219
02220 return strings[value];
02221 }
02222
02223 int & SamRecord::getInteger(const char * tag)
02224 {
02225
02226 myStatus = SamStatus::SUCCESS;
02227
02228 if(myNeedToSetTagsFromBuffer)
02229 {
02230 if(!setTagsFromBuffer())
02231 {
02232
02233
02234
02235 }
02236 }
02237
02238 int key = MAKEKEY(tag[0], tag[1], 'i');
02239 int offset = extras.Find(key);
02240
02241 int value;
02242 if (offset < 0)
02243 {
02244
02245 return NOT_FOUND_TAG_INT;
02246 }
02247 else
02248 value = extras[offset];
02249
02250 return integers[value];
02251 }
02252
02253
02254 double & SamRecord::getDouble(const char * tag)
02255 {
02256
02257 myStatus = SamStatus::SUCCESS;
02258
02259 if(myNeedToSetTagsFromBuffer)
02260 {
02261 if(!setTagsFromBuffer())
02262 {
02263
02264
02265
02266 }
02267 }
02268
02269 int key = MAKEKEY(tag[0], tag[1], 'f');
02270 int offset = extras.Find(key);
02271
02272 int value;
02273 if (offset < 0)
02274 {
02275
02276 return NOT_FOUND_TAG_DOUBLE;
02277 }
02278 else
02279 value = extras[offset];
02280
02281 return doubles[value];
02282 }
02283
02284
02285 bool SamRecord::checkTag(const char * tag, char type)
02286 {
02287
02288 myStatus = SamStatus::SUCCESS;
02289
02290 if(myNeedToSetTagsFromBuffer)
02291 {
02292 if(!setTagsFromBuffer())
02293 {
02294
02295
02296 return("");
02297 }
02298 }
02299
02300 int key = MAKEKEY(tag[0], tag[1], type);
02301
02302 return (extras.Find(key) != LH_NOTFOUND);
02303 }
02304
02305
02306
02307 const SamStatus& SamRecord::getStatus()
02308 {
02309 return(myStatus);
02310 }
02311
02312
02313
02314
02315
02316 bool SamRecord::allocateRecordStructure(int size)
02317 {
02318 if (allocatedSize < size)
02319 {
02320 bamRecordStruct* tmpRecordPtr =
02321 (bamRecordStruct *)realloc(myRecordPtr, size);
02322 if(tmpRecordPtr == NULL)
02323 {
02324
02325 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02326 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
02327 return(false);
02328 }
02329
02330 myRecordPtr = tmpRecordPtr;
02331 allocatedSize = size;
02332 }
02333 return(true);
02334 }
02335
02336
02337
02338 void* SamRecord::getStringPtr(int index)
02339 {
02340 int value = extras[index];
02341
02342 return &(strings[value]);
02343 }
02344
02345 void* SamRecord::getIntegerPtr(int offset, char& type)
02346 {
02347 int value = extras[offset];
02348
02349 type = intType[value];
02350
02351 return &(integers[value]);
02352 }
02353
02354 void* SamRecord::getDoublePtr(int offset)
02355 {
02356 int value = extras[offset];
02357
02358 return &(doubles[value]);
02359 }
02360
02361
02362
02363 bool SamRecord::fixBuffer(SequenceTranslation translation)
02364 {
02365
02366 if(myIsBufferSynced &&
02367 (myBufferSequenceTranslation == translation))
02368 {
02369
02370 return(true);
02371 }
02372
02373
02374 if(!myIsBinValid)
02375 {
02376
02377
02378 myRecordPtr->myBin =
02379 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
02380 myIsBinValid = true;
02381 }
02382
02383
02384 bool status = true;
02385
02386
02387 uint8_t newReadNameLen = getReadNameLength();
02388 uint16_t newCigarLen = getCigarLength();
02389 int32_t newReadLen = getReadLength();
02390 uint32_t newTagLen = getTagLength();
02391 uint32_t bamSequenceLen = (newReadLen+1)/2;
02392
02393
02394
02395
02396 int newBufferSize =
02397 ((unsigned char*)(&(myRecordPtr->myData)) -
02398 (unsigned char*)myRecordPtr) +
02399 newReadNameLen + ((newCigarLen)*4) +
02400 newReadLen + bamSequenceLen + newTagLen;
02401
02402 if(!allocateRecordStructure(newBufferSize))
02403 {
02404
02405 return(false);
02406 }
02407
02408
02409
02410
02411
02412
02413 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
02414 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
02415 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
02416
02417
02418
02419 if(myIsTagsBufferValid &&
02420 (readNameLenChange | cigarLenChange | readLenChange))
02421 {
02422 status &= setTagsFromBuffer();
02423
02424
02425 myIsTagsBufferValid = false;
02426 }
02427
02428
02429
02430
02431 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
02432 (readNameLenChange | cigarLenChange | readLenChange))
02433 {
02434 setSequenceAndQualityFromBuffer();
02435
02436
02437 myIsQualityBufferValid = false;
02438 myIsSequenceBufferValid = false;
02439 }
02440
02441
02442
02443 if((myIsCigarBufferValid) &&
02444 (readNameLenChange))
02445 {
02446 status &= parseCigarBinary();
02447 myIsCigarBufferValid = false;
02448 }
02449
02450
02451 if(!myIsReadNameBufferValid)
02452 {
02453 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
02454 newReadNameLen);
02455
02456
02457 myRecordPtr->myReadNameLength = newReadNameLen;
02458 myIsReadNameBufferValid = true;
02459 }
02460
02461 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02462 myRecordPtr->myReadNameLength;
02463
02464
02465 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02466
02467 if(!myIsCigarBufferValid)
02468 {
02469
02470
02471 myRecordPtr->myCigarLength = newCigarLen;
02472 memcpy(packedCigar, myCigarTempBuffer,
02473 myRecordPtr->myCigarLength * sizeof(uint32_t));
02474
02475 myIsCigarBufferValid = true;
02476 }
02477
02478 unsigned char * packedSequence = readNameEnds +
02479 myRecordPtr->myCigarLength * sizeof(int);
02480 unsigned char * packedQuality = packedSequence + bamSequenceLen;
02481
02482 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
02483 (myBufferSequenceTranslation != translation))
02484 {
02485 myRecordPtr->myReadLength = newReadLen;
02486
02487
02488 bool noQuality = false;
02489 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
02490 {
02491 noQuality = true;
02492 }
02493
02494 const char* translatedSeq = NULL;
02495
02496
02497
02498 if((!myIsSequenceBufferValid) ||
02499 (translation != myBufferSequenceTranslation))
02500 {
02501 translatedSeq = getSequence(translation);
02502 }
02503
02504 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02505 {
02506 if((!myIsSequenceBufferValid) ||
02507 (translation != myBufferSequenceTranslation))
02508 {
02509
02510 int seqVal = 0;
02511 switch(translatedSeq[i])
02512 {
02513 case '=':
02514 seqVal = 0;
02515 break;
02516 case 'A':
02517 case 'a':
02518 seqVal = 1;
02519 break;
02520 case 'C':
02521 case 'c':
02522 seqVal = 2;
02523 break;
02524 case 'G':
02525 case 'g':
02526 seqVal = 4;
02527 break;
02528 case 'T':
02529 case 't':
02530 seqVal = 8;
02531 break;
02532 case 'N':
02533 case 'n':
02534 case '.':
02535 seqVal = 15;
02536 break;
02537 default:
02538 myStatus.addError(SamStatus::FAIL_PARSE,
02539 "Unknown Sequence character found.");
02540 status = false;
02541 break;
02542 };
02543
02544 if(i & 1)
02545 {
02546
02547
02548 packedSequence[i/2] |= seqVal;
02549 }
02550 else
02551 {
02552
02553 packedSequence[i/2] = seqVal << 4;
02554 }
02555 }
02556
02557 if(!myIsQualityBufferValid)
02558 {
02559
02560 if((noQuality) || (myQuality.Length() <= i))
02561 {
02562
02563
02564 packedQuality[i] = 0xFF;
02565 }
02566 else
02567 {
02568
02569 packedQuality[i] = myQuality[i] - 33;
02570 }
02571 }
02572 }
02573 myIsQualityBufferValid = true;
02574 myIsSequenceBufferValid = true;
02575 myBufferSequenceTranslation = translation;
02576 }
02577
02578 if(!myIsTagsBufferValid)
02579 {
02580 status &= setTagsInBuffer();
02581 }
02582
02583
02584 myRecordPtr->myReadNameLength = newReadNameLen;
02585 myRecordPtr->myCigarLength = newCigarLen;
02586 myRecordPtr->myReadLength = newReadLen;
02587
02588
02589
02590 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
02591
02592 if(status)
02593 {
02594 myIsBufferSynced = true;
02595 }
02596
02597 return(status);
02598 }
02599
02600
02601
02602
02603
02604 void SamRecord::setSequenceAndQualityFromBuffer()
02605 {
02606
02607
02608
02609
02610
02611
02612 bool extractSequence = false;
02613 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
02614 {
02615 extractSequence = true;
02616 }
02617
02618
02619 bool extractQuality = false;
02620 if(myIsQualityBufferValid && (myQuality.Length() == 0))
02621 {
02622 extractQuality = true;
02623 }
02624
02625
02626
02627 if(!extractSequence && !extractQuality)
02628 {
02629 return;
02630 }
02631
02632
02633 if(extractSequence)
02634 {
02635 mySequence.SetLength(myRecordPtr->myReadLength);
02636 }
02637 if(extractQuality)
02638 {
02639 myQuality.SetLength(myRecordPtr->myReadLength);
02640 }
02641
02642 unsigned char * readNameEnds =
02643 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02644 unsigned char * packedSequence =
02645 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02646 unsigned char * packedQuality =
02647 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02648
02649 const char * asciiBases = "=AC.G...T......N";
02650
02651
02652
02653 bool qualitySpecified = false;
02654
02655 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02656 {
02657 if(extractSequence)
02658 {
02659 mySequence[i] = i & 1 ?
02660 asciiBases[packedSequence[i / 2] & 0xF] :
02661 asciiBases[packedSequence[i / 2] >> 4];
02662 }
02663
02664 if(extractQuality)
02665 {
02666 if(packedQuality[i] != 0xFF)
02667 {
02668
02669 qualitySpecified = true;
02670 }
02671
02672 myQuality[i] = packedQuality[i] + 33;
02673 }
02674 }
02675
02676
02677 if(myRecordPtr->myReadLength == 0)
02678 {
02679 if(extractSequence)
02680 {
02681 mySequence = "*";
02682 }
02683 if(extractQuality)
02684 {
02685 myQuality = "*";
02686 }
02687 }
02688 else if(extractQuality && !qualitySpecified)
02689 {
02690
02691 myQuality = "*";
02692 }
02693 }
02694
02695
02696
02697 bool SamRecord::parseCigar()
02698 {
02699
02700 if(myCigar.Length() == 0)
02701 {
02702
02703 return(parseCigarBinary());
02704 }
02705 return(parseCigarString());
02706 }
02707
02708
02709 bool SamRecord::parseCigarBinary()
02710 {
02711
02712
02713
02714 if(myCigar.Length() != 0)
02715 {
02716
02717 return(true);
02718 }
02719
02720 unsigned char * readNameEnds =
02721 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02722
02723 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02724
02725 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02726
02727 myCigarRoller.getCigarString(myCigar);
02728
02729 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02730
02731 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02732 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02733
02734
02735 if(myRecordPtr->myCigarLength == 0)
02736 {
02737 myCigar = "*";
02738 return(true);
02739 }
02740
02741
02742 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02743 if(newBufferSize > myCigarTempBufferAllocatedSize)
02744 {
02745 uint32_t* tempBufferPtr =
02746 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02747 if(tempBufferPtr == NULL)
02748 {
02749
02750
02751 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02752 myStatus.addError(SamStatus::FAIL_MEM,
02753 "Failed to Allocate Memory.");
02754 return(false);
02755 }
02756 myCigarTempBuffer = tempBufferPtr;
02757 myCigarTempBufferAllocatedSize = newBufferSize;
02758 }
02759
02760 memcpy(myCigarTempBuffer, packedCigar,
02761 myRecordPtr->myCigarLength * sizeof(uint32_t));
02762
02763
02764 myCigarTempBufferLength = myRecordPtr->myCigarLength;
02765
02766 return(true);
02767 }
02768
02769
02770 bool SamRecord::parseCigarString()
02771 {
02772 myCigarTempBufferLength = 0;
02773 if(myCigar == "*")
02774 {
02775
02776 myAlignmentLength = 0;
02777 myUnclippedStartOffset = 0;
02778 myUnclippedEndOffset = 0;
02779 myCigarRoller.clear();
02780 return(true);
02781 }
02782
02783 myCigarRoller.Set(myCigar);
02784
02785 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02786
02787 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02788 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02789
02790
02791
02792
02793 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02794 if(newBufferSize > myCigarTempBufferAllocatedSize)
02795 {
02796 uint32_t* tempBufferPtr =
02797 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02798 if(tempBufferPtr == NULL)
02799 {
02800
02801
02802 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02803 myStatus.addError(SamStatus::FAIL_MEM,
02804 "Failed to Allocate Memory.");
02805 return(false);
02806 }
02807 myCigarTempBuffer = tempBufferPtr;
02808 myCigarTempBufferAllocatedSize = newBufferSize;
02809 }
02810
02811
02812 bool status = true;
02813
02814
02815 char *cigarOp;
02816 const char* cigarEntryStart = myCigar.c_str();
02817 int opLen = 0;
02818 int op = 0;
02819
02820 unsigned int * packedCigar = myCigarTempBuffer;
02821
02822
02823 const char* endCigarString = cigarEntryStart + myCigar.Length();
02824 while(cigarEntryStart < endCigarString)
02825 {
02826 bool validCigarEntry = true;
02827
02828
02829 opLen = strtol(cigarEntryStart, &cigarOp, 10);
02830
02831 switch(*cigarOp)
02832 {
02833 case('M'):
02834 op = 0;
02835 break;
02836 case('I'):
02837
02838
02839 op = 1;
02840 break;
02841 case('D'):
02842 op = 2;
02843 break;
02844 case('N'):
02845 op = 3;
02846 break;
02847 case('S'):
02848 op = 4;
02849 break;
02850 case('H'):
02851 op = 5;
02852 break;
02853 case('P'):
02854 op = 6;
02855 break;
02856 default:
02857 fprintf(stderr, "ERROR parsing cigar\n");
02858 validCigarEntry = false;
02859 status = false;
02860 myStatus.addError(SamStatus::FAIL_PARSE,
02861 "Unknown operation found when parsing the Cigar.");
02862 break;
02863 };
02864 if(validCigarEntry)
02865 {
02866
02867 ++myCigarTempBufferLength;
02868 *packedCigar = (opLen << 4) | op;
02869 packedCigar++;
02870 }
02871
02872 cigarEntryStart = ++cigarOp;
02873 }
02874
02875
02876 return(status);
02877 }
02878
02879
02880 bool SamRecord::setTagsFromBuffer()
02881 {
02882
02883 if(myNeedToSetTagsFromBuffer == false)
02884 {
02885
02886 return(true);
02887 }
02888
02889
02890 myNeedToSetTagsFromBuffer = false;
02891
02892 unsigned char * readNameEnds =
02893 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02894 unsigned char * packedSequence =
02895 readNameEnds + myRecordPtr->myCigarLength * sizeof(int);
02896 unsigned char * packedQuality =
02897 packedSequence + (myRecordPtr->myReadLength + 1) / 2;
02898
02899 unsigned char * extraPtr = packedQuality + myRecordPtr->myReadLength;
02900
02901
02902 bool status = true;
02903
02904
02905 clearTags();
02906 while (myRecordPtr->myBlockSize + 4 -
02907 (extraPtr - (unsigned char *)myRecordPtr) > 0)
02908 {
02909 int key;
02910 int value;
02911 void * content = extraPtr + 3;
02912 int tagBufferSize = 0;
02913
02914 key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
02915
02916 switch (extraPtr[2])
02917 {
02918 case 'A' :
02919 value = integers.Length();
02920 integers.Push(* (char *) content);
02921 intType.push_back(extraPtr[2]);
02922 extraPtr += 4;
02923 tagBufferSize += 4;
02924 break;
02925 case 'c' :
02926 value = integers.Length();
02927 integers.Push(* (char *) content);
02928 intType.push_back(extraPtr[2]);
02929 extraPtr += 4;
02930 tagBufferSize += 4;
02931 break;
02932 case 'C' :
02933 value = integers.Length();
02934 integers.Push(* (unsigned char *) content);
02935 intType.push_back(extraPtr[2]);
02936 extraPtr += 4;
02937 tagBufferSize += 4;
02938 break;
02939 case 's' :
02940 value = integers.Length();
02941 integers.Push(* (short *) content);
02942 intType.push_back(extraPtr[2]);
02943 extraPtr += 5;
02944 tagBufferSize += 5;
02945 break;
02946 case 'S' :
02947 value = integers.Length();
02948 integers.Push(* (unsigned short *) content);
02949 intType.push_back(extraPtr[2]);
02950 extraPtr += 5;
02951 tagBufferSize += 5;
02952 break;
02953 case 'i' :
02954 value = integers.Length();
02955 integers.Push(* (int *) content);
02956 intType.push_back(extraPtr[2]);
02957 extraPtr += 7;
02958 tagBufferSize += 7;
02959 break;
02960 case 'I' :
02961 value = integers.Length();
02962 integers.Push((int) * (unsigned int *) content);
02963 intType.push_back(extraPtr[2]);
02964 extraPtr += 7;
02965 tagBufferSize += 7;
02966 break;
02967 case 'Z' :
02968 value = strings.Length();
02969 strings.Push((const char *) content);
02970 extraPtr += 4 + strings.Last().Length();
02971 tagBufferSize += 4 + strings.Last().Length();
02972 break;
02973 case 'f' :
02974 value = doubles.Length();
02975 doubles.Push(* (float *) content);
02976 fprintf(stderr, "\n\nFLOAT!!!\n\n");
02977 extraPtr += 7;
02978 tagBufferSize += 7;
02979 break;
02980 default :
02981 fprintf(stderr,
02982 "parsing BAM - Unknown custom field of type %c%c:%c\n",
02983 extraPtr[0], extraPtr[1], extraPtr[2]);
02984
02985
02986 extraPtr += 3;
02987 myStatus.addError(SamStatus::FAIL_PARSE,
02988 "Unknown tag type.");
02989 status = false;
02990 }
02991
02992
02993 if(status)
02994 {
02995 extras.Add(key, value);
02996 myTagBufferSize += tagBufferSize;
02997 }
02998 }
02999 return(status);
03000 }
03001
03002
03003 bool SamRecord::setTagsInBuffer()
03004 {
03005
03006
03007
03008 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
03009 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
03010 (unsigned char*)myRecordPtr) +
03011 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
03012 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
03013
03014
03015 if(!allocateRecordStructure(newBufferSize))
03016 {
03017
03018 return(false);
03019 }
03020
03021 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
03022 myRecordPtr->myReadNameLength;
03023 unsigned char * packedSequence = readNameEnds +
03024 myRecordPtr->myCigarLength * sizeof(int);
03025 unsigned char * packedQuality =
03026 packedSequence + bamSequenceLength;
03027
03028 char * extraPtr = (char*)packedQuality + myRecordPtr->myReadLength;
03029
03030 bool status = true;
03031
03032
03033 if (extras.Entries())
03034 {
03035 for (int i = 0; i < extras.Capacity(); i++)
03036 {
03037 if (extras.SlotInUse(i))
03038 {
03039 int key = extras.GetKey(i);
03040 getTag(key, extraPtr);
03041 extraPtr += 2;
03042 char vtype;
03043 getTypeFromKey(key, vtype);
03044
03045 if(vtype == 'i')
03046 {
03047 vtype = getIntegerType(i);
03048 }
03049
03050 extraPtr[0] = vtype;
03051
03052
03053 extraPtr += 1;
03054
03055 switch (vtype)
03056 {
03057 case 'A' :
03058 *(char*)extraPtr = (char)getInteger(i);
03059
03060 extraPtr += 1;
03061 break;
03062 case 'c' :
03063 *(int8_t*)extraPtr = (int8_t)getInteger(i);
03064
03065 extraPtr += 1;
03066 break;
03067 case 'C' :
03068 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
03069
03070 extraPtr += 1;
03071 break;
03072 case 's' :
03073 *(int16_t*)extraPtr = (int16_t)getInteger(i);
03074
03075 extraPtr += 2;
03076 break;
03077 case 'S' :
03078 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
03079
03080 extraPtr += 2;
03081 break;
03082 case 'i' :
03083 *(int32_t*)extraPtr = (int32_t)getInteger(i);
03084
03085 extraPtr += 4;
03086 break;
03087 case 'I' :
03088 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
03089
03090 extraPtr += 4;
03091 break;
03092 case 'Z' :
03093 sprintf(extraPtr, "%s", getString(i).c_str());
03094 extraPtr += getString(i).Length() + 1;
03095 break;
03096 case 'f' :
03097
03098 sprintf(extraPtr, "%f", getDouble(i));
03099 extraPtr += 4;
03100 break;
03101 default :
03102 myStatus.addError(SamStatus::FAIL_PARSE,
03103 "Unknown tag type.");
03104 status = false;
03105 break;
03106 }
03107 }
03108 }
03109 }
03110
03111
03112
03113 if(extraPtr != (char*)myRecordPtr + newBufferSize)
03114 {
03115 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
03116 myStatus.addError(SamStatus::FAIL_PARSE,
03117 "ERROR updating the buffer. Incorrect size.");
03118 status = false;
03119 }
03120
03121
03122
03123 myNeedToSetTagsInBuffer = false;
03124 myIsTagsBufferValid = true;
03125 return(status);
03126 }
03127
03128
03129
03130
03131
03132 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
03133 {
03134
03135
03136 myReferenceName =
03137 header.getReferenceLabel(myRecordPtr->myReferenceID);
03138 myMateReferenceName =
03139 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
03140
03141
03142 myReadName.SetLength(0);
03143 myCigar.SetLength(0);
03144 mySequence.SetLength(0);
03145 mySeqWithEq.clear();
03146 mySeqWithoutEq.clear();
03147 myQuality.SetLength(0);
03148 myNeedToSetTagsFromBuffer = true;
03149 myNeedToSetTagsInBuffer = false;
03150
03151
03152 myIsBufferSynced = true;
03153
03154 myIsReadNameBufferValid = true;
03155 myIsCigarBufferValid = true;
03156 myIsSequenceBufferValid = true;
03157 myBufferSequenceTranslation = NONE;
03158 myIsQualityBufferValid = true;
03159 myIsTagsBufferValid = true;
03160 myIsBinValid = true;
03161 }
03162
03163
03164
03165 void SamRecord::getTypeFromKey(int key, char& type) const
03166 {
03167
03168 type = (key >> 16) & 0xFF;
03169 }
03170
03171
03172
03173 void SamRecord::getTag(int key, char* tag) const
03174 {
03175
03176 tag[0] = key & 0xFF;
03177 tag[1] = (key >> 8) & 0xFF;
03178 tag[2] = 0;
03179 }
03180
03181
03182
03183 String & SamRecord::getString(int index)
03184 {
03185 int value = extras[index];
03186
03187 return strings[value];
03188 }
03189
03190 int & SamRecord::getInteger(int offset)
03191 {
03192 int value = extras[offset];
03193
03194 return integers[value];
03195 }
03196
03197 char & SamRecord::getIntegerType(int offset)
03198 {
03199 int value = extras[offset];
03200
03201 return intType[value];
03202 }
03203
03204 double & SamRecord::getDouble(int offset)
03205 {
03206 int value = extras[offset];
03207
03208 return doubles[value];
03209 }
03210
03211