00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdlib.h>
00019 #include <limits>
00020 #include <stdexcept>
00021
00022 #include "bam.h"
00023
00024 #include "SamRecord.h"
00025 #include "SamValidation.h"
00026
00027 #include "BaseUtilities.h"
00028 #include "SamQuerySeqWithRefHelper.h"
00029
00030 const char* SamRecord::DEFAULT_READ_NAME = "UNKNOWN";
00031 const char* SamRecord::FIELD_ABSENT_STRING = "=";
00032
00033
00034 SamRecord::SamRecord()
00035 : myStatus(),
00036 myRefPtr(NULL),
00037 mySequenceTranslation(NONE)
00038 {
00039 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00040
00041 myRecordPtr =
00042 (bamRecordStruct *) malloc(defaultAllocSize);
00043
00044 myCigarTempBuffer = NULL;
00045 myCigarTempBufferAllocatedSize = 0;
00046
00047 allocatedSize = defaultAllocSize;
00048
00049 resetRecord();
00050 }
00051
00052
00053 SamRecord::SamRecord(ErrorHandler::HandlingType errorHandlingType)
00054 : myStatus(errorHandlingType),
00055 myRefPtr(NULL),
00056 mySequenceTranslation(NONE)
00057 {
00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
00059
00060 myRecordPtr =
00061 (bamRecordStruct *) malloc(defaultAllocSize);
00062
00063 myCigarTempBuffer = NULL;
00064 myCigarTempBufferAllocatedSize = 0;
00065
00066 allocatedSize = defaultAllocSize;
00067
00068 resetRecord();
00069 }
00070
00071
00072 SamRecord::~SamRecord()
00073 {
00074 resetRecord();
00075
00076 if(myRecordPtr != NULL)
00077 {
00078 free(myRecordPtr);
00079 myRecordPtr = NULL;
00080 }
00081 if(myCigarTempBuffer != NULL)
00082 {
00083 free(myCigarTempBuffer);
00084 myCigarTempBuffer = NULL;
00085 myCigarTempBufferAllocatedSize = 0;
00086 }
00087 }
00088
00089
00090
00091 void SamRecord::resetRecord()
00092 {
00093 myIsBufferSynced = true;
00094
00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
00096 myRecordPtr->myReferenceID = -1;
00097 myRecordPtr->myPosition = -1;
00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00099 myRecordPtr->myMapQuality = 0;
00100 myRecordPtr->myBin = DEFAULT_BIN;
00101 myRecordPtr->myCigarLength = 0;
00102 myRecordPtr->myFlag = 0;
00103 myRecordPtr->myReadLength = 0;
00104 myRecordPtr->myMateReferenceID = -1;
00105 myRecordPtr->myMatePosition = -1;
00106 myRecordPtr->myInsertSize = 0;
00107
00108
00109
00110
00111 myReadName = DEFAULT_READ_NAME;
00112 myReferenceName = "*";
00113 myMateReferenceName = "*";
00114 myCigar = "*";
00115 mySequence = "*";
00116 mySeqWithEq.clear();
00117 mySeqWithoutEq.clear();
00118 myQuality = "*";
00119 myNeedToSetTagsFromBuffer = false;
00120 myNeedToSetTagsInBuffer = false;
00121
00122
00123 myAlignmentLength = -1;
00124 myUnclippedStartOffset = -1;
00125 myUnclippedEndOffset = -1;
00126
00127 clearTags();
00128
00129
00130
00131
00132
00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
00134 myRecordPtr->myReadNameLength);
00135
00136
00137 myIsReadNameBufferValid = true;
00138 myIsCigarBufferValid = true;
00139 myPackedSequence =
00140 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
00141 myRecordPtr->myCigarLength * sizeof(int);
00142 myIsSequenceBufferValid = true;
00143 myBufferSequenceTranslation = NONE;
00144
00145 myPackedQuality = myPackedSequence;
00146 myIsQualityBufferValid = true;
00147 myIsTagsBufferValid = true;
00148 myIsBinValid = true;
00149
00150 myCigarTempBufferLength = -1;
00151
00152 myStatus = SamStatus::SUCCESS;
00153
00154 NOT_FOUND_TAG_STRING = "";
00155 NOT_FOUND_TAG_INT = -1;
00156 NOT_FOUND_TAG_DOUBLE = -1;
00157 }
00158
00159
00160
00161
00162 bool SamRecord::isValid(SamFileHeader& header)
00163 {
00164 myStatus = SamStatus::SUCCESS;
00165 SamValidationErrors invalidSamErrors;
00166 if(!SamValidator::isValid(header, *this, invalidSamErrors))
00167 {
00168
00169 std::string errorMessage = "";
00170 invalidSamErrors.getErrorString(errorMessage);
00171 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
00172 return(false);
00173 }
00174
00175 return(true);
00176 }
00177
00178
00179 void SamRecord::setReference(GenomeSequence* reference)
00180 {
00181 myRefPtr = reference;
00182 }
00183
00184
00185
00186
00187
00188 void SamRecord::setSequenceTranslation(SequenceTranslation translation)
00189 {
00190 mySequenceTranslation = translation;
00191 }
00192
00193
00194 bool SamRecord::setReadName(const char* readName)
00195 {
00196 myReadName = readName;
00197 myIsBufferSynced = false;
00198 myIsReadNameBufferValid = false;
00199 myStatus = SamStatus::SUCCESS;
00200
00201
00202
00203 if(myReadName.Length() == 0)
00204 {
00205
00206 myReadName = DEFAULT_READ_NAME;
00207 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
00208 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
00209 return(false);
00210 }
00211
00212 return true;
00213 }
00214
00215
00216 bool SamRecord::setFlag(uint16_t flag)
00217 {
00218 myStatus = SamStatus::SUCCESS;
00219 myRecordPtr->myFlag = flag;
00220 return true;
00221 }
00222
00223
00224 bool SamRecord::setReferenceName(SamFileHeader& header,
00225 const char* referenceName)
00226 {
00227 myStatus = SamStatus::SUCCESS;
00228
00229 myReferenceName = referenceName;
00230
00231 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
00232
00233 return true;
00234 }
00235
00236
00237 bool SamRecord::set1BasedPosition(int32_t position)
00238 {
00239 return(set0BasedPosition(position - 1));
00240 }
00241
00242
00243 bool SamRecord::set0BasedPosition(int32_t position)
00244 {
00245 myStatus = SamStatus::SUCCESS;
00246 myRecordPtr->myPosition = position;
00247 myIsBinValid = false;
00248 return true;
00249 }
00250
00251
00252 bool SamRecord::setMapQuality(uint8_t mapQuality)
00253 {
00254 myStatus = SamStatus::SUCCESS;
00255 myRecordPtr->myMapQuality = mapQuality;
00256 return true;
00257 }
00258
00259
00260 bool SamRecord::setCigar(const char* cigar)
00261 {
00262 myStatus = SamStatus::SUCCESS;
00263 myCigar = cigar;
00264
00265 myIsBufferSynced = false;
00266 myIsCigarBufferValid = false;
00267 myCigarTempBufferLength = -1;
00268 myIsBinValid = false;
00269
00270
00271 myAlignmentLength = -1;
00272 myUnclippedStartOffset = -1;
00273 myUnclippedEndOffset = -1;
00274
00275 return true;
00276 }
00277
00278
00279 bool SamRecord::setCigar(const Cigar& cigar)
00280 {
00281 myStatus = SamStatus::SUCCESS;
00282 cigar.getCigarString(myCigar);
00283
00284 myIsBufferSynced = false;
00285 myIsCigarBufferValid = false;
00286 myCigarTempBufferLength = -1;
00287 myIsBinValid = false;
00288
00289
00290 myAlignmentLength = -1;
00291 myUnclippedStartOffset = -1;
00292 myUnclippedEndOffset = -1;
00293
00294 return true;
00295 }
00296
00297
00298 bool SamRecord::setMateReferenceName(SamFileHeader& header,
00299 const char* mateReferenceName)
00300 {
00301 myStatus = SamStatus::SUCCESS;
00302
00303
00304
00305 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
00306 {
00307 myMateReferenceName = myReferenceName;
00308 }
00309 else
00310 {
00311 myMateReferenceName = mateReferenceName;
00312 }
00313
00314
00315
00316 myRecordPtr->myMateReferenceID =
00317 header.getReferenceID(myMateReferenceName, true);
00318
00319 return true;
00320 }
00321
00322
00323 bool SamRecord::set1BasedMatePosition(int32_t matePosition)
00324 {
00325 return(set0BasedMatePosition(matePosition - 1));
00326 }
00327
00328
00329 bool SamRecord::set0BasedMatePosition(int32_t matePosition)
00330 {
00331 myStatus = SamStatus::SUCCESS;
00332 myRecordPtr->myMatePosition = matePosition;
00333 return true;
00334 }
00335
00336
00337 bool SamRecord::setInsertSize(int32_t insertSize)
00338 {
00339 myStatus = SamStatus::SUCCESS;
00340 myRecordPtr->myInsertSize = insertSize;
00341 return true;
00342 }
00343
00344
00345 bool SamRecord::setSequence(const char* seq)
00346 {
00347 myStatus = SamStatus::SUCCESS;
00348 mySequence = seq;
00349 mySeqWithEq.clear();
00350 mySeqWithoutEq.clear();
00351
00352 myIsBufferSynced = false;
00353 myIsSequenceBufferValid = false;
00354 return true;
00355 }
00356
00357
00358 bool SamRecord::setQuality(const char* quality)
00359 {
00360 myStatus = SamStatus::SUCCESS;
00361 myQuality = quality;
00362 myIsBufferSynced = false;
00363 myIsQualityBufferValid = false;
00364 return true;
00365 }
00366
00367
00368
00369 bool SamRecord::shiftIndelsLeft()
00370 {
00371
00372
00373
00374
00375 if(myAlignmentLength == -1)
00376 {
00377
00378 parseCigar();
00379 }
00380
00381
00382 bool shifted = false;
00383
00384
00385
00386 uint32_t currentPos = 0;
00387
00388
00389
00390 if(Cigar::foundInQuery(myCigarRoller[0]))
00391 {
00392
00393 currentPos += myCigarRoller[0].count;
00394 }
00395
00396 int numOps = myCigarRoller.size();
00397
00398
00399
00400 for(int currentOp = 1; currentOp < numOps; currentOp++)
00401 {
00402 if(myCigarRoller[currentOp].operation == Cigar::insert)
00403 {
00404
00405 int prevOpIndex = currentOp-1;
00406
00407
00408 int nextOpIndex = currentOp+1;
00409 if(nextOpIndex == numOps)
00410 {
00411
00412 nextOpIndex = currentOp;
00413 }
00414
00415
00416 uint32_t prevOpStart =
00417 currentPos - myCigarRoller[prevOpIndex].count;
00418
00419
00420 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
00421 {
00422
00423
00424 currentPos += myCigarRoller[currentOp].count;
00425
00426 continue;
00427 }
00428
00429
00430
00431
00432
00433
00434 uint32_t insertEndPos =
00435 currentPos + myCigarRoller[currentOp].count - 1;
00436
00437
00438 uint32_t insertStartPos = currentPos;
00439
00440
00441
00442
00443
00444
00445
00446
00447 while((insertStartPos > prevOpStart) &&
00448 (getSequence(insertEndPos,BASES) ==
00449 getSequence(insertStartPos - 1, BASES)))
00450 {
00451
00452 --insertEndPos;
00453 --insertStartPos;
00454 }
00455
00456
00457 int shiftLen = currentPos - insertStartPos;
00458 if(shiftLen > 0)
00459 {
00460
00461
00462
00463
00464
00465
00466
00467
00468 if(myCigarRoller[nextOpIndex].operation ==
00469 myCigarRoller[prevOpIndex].operation)
00470 {
00471
00472
00473 myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
00474 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
00475
00476
00477
00478 if(myCigarRoller[prevOpIndex].count == 0)
00479 {
00480 myCigarRoller.Remove(prevOpIndex);
00481 }
00482 shifted = true;
00483 }
00484 else
00485 {
00486
00487
00488
00489 if(insertStartPos == prevOpStart)
00490 {
00491
00492
00493 myCigarRoller.Update(currentOp,
00494 myCigarRoller[prevOpIndex].operation,
00495 myCigarRoller[prevOpIndex].count);
00496
00497
00498 myCigarRoller.Update(prevOpIndex,
00499 Cigar::insert,
00500 shiftLen);
00501 shifted = true;
00502 }
00503 }
00504 }
00505
00506 currentPos += myCigarRoller[currentOp].count;
00507 }
00508 else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
00509 {
00510
00511 currentPos += myCigarRoller[currentOp].count;
00512 }
00513 }
00514 if(shifted)
00515 {
00516
00517
00518 setCigar(myCigarRoller);
00519 }
00520 return(shifted);
00521 }
00522
00523
00524
00525
00526 SamStatus::Status SamRecord::setBuffer(const char* fromBuffer,
00527 uint32_t fromBufferSize,
00528 SamFileHeader& header)
00529 {
00530 myStatus = SamStatus::SUCCESS;
00531 if((fromBuffer == NULL) || (fromBufferSize == 0))
00532 {
00533
00534 myStatus.setStatus(SamStatus::FAIL_PARSE,
00535 "Cannot parse an empty file.");
00536 return(SamStatus::FAIL_PARSE);
00537 }
00538
00539
00540 resetRecord();
00541
00542
00543 if(!allocateRecordStructure(fromBufferSize))
00544 {
00545
00546 return(SamStatus::FAIL_MEM);
00547 }
00548
00549 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
00550
00551 setVariablesForNewBuffer(header);
00552
00553
00554 return(SamStatus::SUCCESS);
00555 }
00556
00557
00558
00559 SamStatus::Status SamRecord::setBufferFromFile(IFILE filePtr,
00560 SamFileHeader& header)
00561 {
00562 myStatus = SamStatus::SUCCESS;
00563 if((filePtr == NULL) || (filePtr->isOpen() == false))
00564 {
00565
00566 myStatus.setStatus(SamStatus::FAIL_ORDER,
00567 "Can't read from an unopened file.");
00568 return(SamStatus::FAIL_ORDER);
00569 }
00570
00571
00572 resetRecord();
00573
00574
00575 int numBytes =
00576 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
00577
00578
00579 if(ifeof(filePtr) && (numBytes == 0))
00580 {
00581
00582 myStatus.setStatus(SamStatus::NO_MORE_RECS,
00583 "No more records left to read.");
00584 return(SamStatus::NO_MORE_RECS);
00585 }
00586
00587 if(numBytes != sizeof(int32_t))
00588 {
00589
00590
00591 if(ifeof(filePtr))
00592 {
00593
00594
00595 myStatus.setStatus(SamStatus::FAIL_PARSE,
00596 "EOF reached in the middle of a record.");
00597 return(SamStatus::FAIL_PARSE);
00598 }
00599 else
00600 {
00601
00602 myStatus.setStatus(SamStatus::FAIL_IO,
00603 "Failed to read the record size.");
00604 return(SamStatus::FAIL_IO);
00605 }
00606 }
00607
00608
00609 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
00610 {
00611
00612
00613 return(SamStatus::FAIL_MEM);
00614 }
00615
00616
00617 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
00618 != (unsigned int)myRecordPtr->myBlockSize)
00619 {
00620
00621 resetRecord();
00622 myStatus.setStatus(SamStatus::FAIL_IO,
00623 "Failed to read the record");
00624 return(SamStatus::FAIL_IO);
00625 }
00626
00627 setVariablesForNewBuffer(header);
00628
00629
00630 return(SamStatus::SUCCESS);
00631 }
00632
00633
00634
00635
00636 bool SamRecord::addIntTag(const char* tag, int32_t value)
00637 {
00638 myStatus = SamStatus::SUCCESS;
00639 int key = 0;
00640 int index = 0;
00641 char bamvtype;
00642
00643 int tagBufferSize = 0;
00644
00645
00646 if(myNeedToSetTagsFromBuffer)
00647 {
00648 if(!setTagsFromBuffer())
00649 {
00650
00651 return(false);
00652 }
00653 }
00654
00655
00656
00657
00658
00659 if(value < 0)
00660 {
00661
00662
00663 if(value > std::numeric_limits<char>::min())
00664 {
00665
00666 bamvtype = 'c';
00667 tagBufferSize += 4;
00668 }
00669 else if(value > std::numeric_limits<short>::min())
00670 {
00671
00672 bamvtype = 's';
00673 tagBufferSize += 5;
00674 }
00675 else
00676 {
00677
00678 bamvtype = 'i';
00679 tagBufferSize += 7;
00680 }
00681 }
00682 else
00683 {
00684
00685 if(value < std::numeric_limits<unsigned char>::max())
00686 {
00687
00688 bamvtype = 'C';
00689 tagBufferSize += 4;
00690 }
00691 else if(value < std::numeric_limits<unsigned short>::max())
00692 {
00693
00694 bamvtype = 'S';
00695 tagBufferSize += 5;
00696 }
00697 else
00698 {
00699
00700 bamvtype = 'I';
00701 tagBufferSize += 7;
00702 }
00703 }
00704
00705
00706 key = MAKEKEY(tag[0], tag[1], bamvtype);
00707 unsigned int hashIndex = extras.Find(key);
00708 if(hashIndex != LH_NOTFOUND)
00709 {
00710
00711 index = extras[hashIndex];
00712
00713
00714 if((integers[index] == value) && (intType[index] == bamvtype))
00715 {
00716
00717 return(true);
00718 }
00719 else
00720 {
00721
00722
00723
00724 switch(intType[index])
00725 {
00726 case 'c':
00727 case 'C':
00728 tagBufferSize -= 4;
00729 break;
00730 case 's':
00731 case 'S':
00732 tagBufferSize -= 5;
00733 break;
00734 case 'i':
00735 case 'I':
00736 tagBufferSize -= 7;
00737 break;
00738 default:
00739 myStatus.setStatus(SamStatus::INVALID,
00740 "unknown tag inttype type found.\n");
00741 return(false);
00742 }
00743
00744
00745 integers[index] = value;
00746 intType[index] = bamvtype;
00747 }
00748 }
00749 else
00750 {
00751
00752 index = integers.Length();
00753
00754 integers.Push(value);
00755 intType.push_back(bamvtype);
00756
00757 extras.Add(key, index);
00758 }
00759
00760
00761 myNeedToSetTagsInBuffer = true;
00762 myIsTagsBufferValid = false;
00763 myIsBufferSynced = false;
00764 myTagBufferSize += tagBufferSize;
00765
00766 return(true);
00767 }
00768
00769
00770
00771
00772
00773 bool SamRecord::addTag(const char* tag, char vtype, const char* valuePtr)
00774 {
00775 if(vtype == 'i')
00776 {
00777
00778 int intVal = atoi(valuePtr);
00779 return(addIntTag(tag, intVal));
00780 }
00781
00782
00783 myStatus = SamStatus::SUCCESS;
00784 bool status = true;
00785 int key = 0;
00786 int index = 0;
00787
00788 int tagBufferSize = 0;
00789
00790
00791 if(myNeedToSetTagsFromBuffer)
00792 {
00793 if(!setTagsFromBuffer())
00794 {
00795
00796 return(false);
00797 }
00798 }
00799
00800
00801 key = MAKEKEY(tag[0], tag[1], vtype);
00802 unsigned int hashIndex = extras.Find(key);
00803 if(hashIndex == LH_NOTFOUND)
00804 {
00805
00806
00807 if(vtype == 'Z')
00808 {
00809 int tmpkey = MAKEKEY(tag[0], tag[1], 'B');
00810 hashIndex = extras.Find(tmpkey);
00811 }
00812 else if(vtype == 'B')
00813 {
00814 int tmpkey = MAKEKEY(tag[0], tag[1], 'Z');
00815 hashIndex = extras.Find(tmpkey);
00816 }
00817 }
00818 if(hashIndex != LH_NOTFOUND)
00819 {
00820
00821 index = extras[hashIndex];
00822
00823
00824 switch (vtype)
00825 {
00826 case 'A' :
00827
00828 if(integers[index] == (const int)*(valuePtr))
00829 {
00830
00831 return(true);
00832 }
00833 else
00834 {
00835
00836 integers[index] = (const int)*(valuePtr);
00837 intType[index] = vtype;
00838 }
00839 break;
00840 case 'Z' :
00841 case 'B' :
00842
00843 if(strings[index] == valuePtr)
00844 {
00845
00846 return(true);
00847 }
00848 else
00849 {
00850
00851 tagBufferSize -= strings[index].Length();
00852 strings[index] = valuePtr;
00853
00854 tagBufferSize += strings[index].Length();
00855 }
00856 break;
00857 case 'f' :
00858
00859 if(doubles[index] == atof(valuePtr))
00860 {
00861
00862 return(true);
00863 }
00864 else
00865 {
00866
00867 doubles[index] = atof(valuePtr);
00868 }
00869 break;
00870 default :
00871 fprintf(stderr,
00872 "samRecord::addTag() - Unknown custom field of type %c\n",
00873 vtype);
00874 myStatus.setStatus(SamStatus::FAIL_PARSE,
00875 "Unknown custom field in a tag");
00876 status = false;
00877 break;
00878 }
00879 }
00880 else
00881 {
00882
00883 switch (vtype)
00884 {
00885 case 'A' :
00886 index = integers.Length();
00887 integers.Push((const int)*(valuePtr));
00888 intType.push_back(vtype);
00889 tagBufferSize += 4;
00890 break;
00891 case 'Z' :
00892 case 'B' :
00893 index = strings.Length();
00894 strings.Push(valuePtr);
00895 tagBufferSize += 4 + strings.Last().Length();
00896 break;
00897 case 'f' :
00898 index = doubles.Length();
00899 doubles.Push(atof(valuePtr));
00900 tagBufferSize += 7;
00901 break;
00902 default :
00903 fprintf(stderr,
00904 "samRecord::addTag() - Unknown custom field of type %c\n",
00905 vtype);
00906 myStatus.setStatus(SamStatus::FAIL_PARSE,
00907 "Unknown custom field in a tag");
00908 status = false;
00909 break;
00910 }
00911 if(status)
00912 {
00913
00914 extras.Add(key, index);
00915 }
00916 }
00917
00918
00919 if(status)
00920 {
00921
00922 myNeedToSetTagsInBuffer = true;
00923 myIsTagsBufferValid = false;
00924 myIsBufferSynced = false;
00925 myTagBufferSize += tagBufferSize;
00926 }
00927 return(status);
00928 }
00929
00930
00931 void SamRecord::clearTags()
00932 {
00933 if(extras.Entries() != 0)
00934 {
00935 extras.Clear();
00936 }
00937 strings.Clear();
00938 integers.Clear();
00939 intType.clear();
00940 doubles.Clear();
00941 myTagBufferSize = 0;
00942 resetTagIter();
00943 }
00944
00945
00946 bool SamRecord::rmTag(const char* tag, char type)
00947 {
00948
00949 if(strlen(tag) != 2)
00950 {
00951
00952 myStatus.setStatus(SamStatus::INVALID,
00953 "rmTag called with tag that is not 2 characters\n");
00954 return(false);
00955 }
00956
00957 myStatus = SamStatus::SUCCESS;
00958 if(myNeedToSetTagsFromBuffer)
00959 {
00960 if(!setTagsFromBuffer())
00961 {
00962
00963
00964 return(false);
00965 }
00966 }
00967
00968
00969 int key = MAKEKEY(tag[0], tag[1], type);
00970
00971 int offset = extras.Find(key);
00972
00973 if(offset < 0)
00974 {
00975
00976
00977 return(true);
00978 }
00979
00980
00981
00982 char vtype;
00983 getTypeFromKey(key, vtype);
00984 if(vtype == 'i')
00985 {
00986 vtype = getIntegerType(offset);
00987 }
00988
00989
00990
00991
00992
00993 int rmBuffSize = 0;
00994 switch(vtype)
00995 {
00996 case 'A':
00997 case 'c':
00998 case 'C':
00999 rmBuffSize = 4;
01000 break;
01001 case 's':
01002 case 'S':
01003 rmBuffSize = 5;
01004 break;
01005 case 'i':
01006 case 'I':
01007 rmBuffSize = 7;
01008 break;
01009 case 'f':
01010 rmBuffSize = 7;
01011 break;
01012 case 'Z':
01013 case 'B':
01014 rmBuffSize = 4 + getString(offset).Length();
01015 break;
01016 default:
01017 myStatus.setStatus(SamStatus::INVALID,
01018 "rmTag called with unknown type.\n");
01019 return(false);
01020 break;
01021 };
01022
01023
01024 myNeedToSetTagsInBuffer = true;
01025 myIsTagsBufferValid = false;
01026 myIsBufferSynced = false;
01027 myTagBufferSize -= rmBuffSize;
01028
01029
01030 extras.Delete(offset);
01031 return(true);
01032 }
01033
01034
01035 bool SamRecord::rmTags(const char* tags)
01036 {
01037 const char* currentTagPtr = tags;
01038
01039 myStatus = SamStatus::SUCCESS;
01040 if(myNeedToSetTagsFromBuffer)
01041 {
01042 if(!setTagsFromBuffer())
01043 {
01044
01045
01046 return(false);
01047 }
01048 }
01049
01050 bool returnStatus = true;
01051
01052 int rmBuffSize = 0;
01053 while(*currentTagPtr != '\0')
01054 {
01055
01056
01057
01058
01059 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
01060 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
01061 {
01062 myStatus.setStatus(SamStatus::INVALID,
01063 "rmTags called with improperly formatted tags.\n");
01064 returnStatus = false;
01065 break;
01066 }
01067
01068
01069 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
01070 currentTagPtr[3]);
01071
01072 int offset = extras.Find(key);
01073
01074 if(offset >= 0)
01075 {
01076
01077
01078 char vtype;
01079 getTypeFromKey(key, vtype);
01080 if(vtype == 'i')
01081 {
01082 vtype = getIntegerType(offset);
01083 }
01084
01085
01086
01087
01088
01089 switch(vtype)
01090 {
01091 case 'A':
01092 case 'c':
01093 case 'C':
01094 rmBuffSize += 4;
01095 break;
01096 case 's':
01097 case 'S':
01098 rmBuffSize += 5;
01099 break;
01100 case 'i':
01101 case 'I':
01102 rmBuffSize += 7;
01103 break;
01104 case 'f':
01105 rmBuffSize += 7;
01106 break;
01107 case 'Z':
01108 case 'B':
01109 rmBuffSize += 4 + getString(offset).Length();
01110 break;
01111 default:
01112 myStatus.setStatus(SamStatus::INVALID,
01113 "rmTag called with unknown type.\n");
01114 returnStatus = false;
01115 break;
01116 };
01117
01118
01119 extras.Delete(offset);
01120 }
01121
01122 if(currentTagPtr[4] == ';')
01123 {
01124
01125 currentTagPtr += 5;
01126 }
01127 else if(currentTagPtr[4] != '\0')
01128 {
01129
01130 myStatus.setStatus(SamStatus::INVALID,
01131 "rmTags called with improperly formatted tags.\n");
01132 returnStatus = false;
01133 break;
01134 }
01135 else
01136 {
01137
01138 currentTagPtr += 4;
01139 }
01140 }
01141
01142
01143 myNeedToSetTagsInBuffer = true;
01144 myIsTagsBufferValid = false;
01145 myIsBufferSynced = false;
01146 myTagBufferSize -= rmBuffSize;
01147
01148
01149 return(returnStatus);
01150 }
01151
01152
01153
01154 const void* SamRecord::getRecordBuffer()
01155 {
01156 return(getRecordBuffer(mySequenceTranslation));
01157 }
01158
01159
01160
01161 const void* SamRecord::getRecordBuffer(SequenceTranslation translation)
01162 {
01163 myStatus = SamStatus::SUCCESS;
01164 bool status = true;
01165
01166
01167 if((myIsBufferSynced == false) ||
01168 (myBufferSequenceTranslation != translation))
01169 {
01170 status &= fixBuffer(translation);
01171 }
01172
01173 if(myNeedToSetTagsInBuffer)
01174 {
01175 status &= setTagsInBuffer();
01176 }
01177 if(!status)
01178 {
01179 return(NULL);
01180 }
01181 return (const void *)myRecordPtr;
01182 }
01183
01184
01185
01186
01187 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr)
01188 {
01189 return(writeRecordBuffer(filePtr, mySequenceTranslation));
01190 }
01191
01192
01193
01194 SamStatus::Status SamRecord::writeRecordBuffer(IFILE filePtr,
01195 SequenceTranslation translation)
01196 {
01197 myStatus = SamStatus::SUCCESS;
01198 if((filePtr == NULL) || (filePtr->isOpen() == false))
01199 {
01200
01201 myStatus.setStatus(SamStatus::FAIL_ORDER,
01202 "Can't write to an unopened file.");
01203 return(SamStatus::FAIL_ORDER);
01204 }
01205
01206 if((myIsBufferSynced == false) ||
01207 (myBufferSequenceTranslation != translation))
01208 {
01209 if(!fixBuffer(translation))
01210 {
01211 return(myStatus.getStatus());
01212 }
01213 }
01214
01215
01216 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
01217 unsigned int numBytesWritten =
01218 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
01219
01220
01221 if(numBytesToWrite == numBytesWritten)
01222 {
01223 return(SamStatus::SUCCESS);
01224 }
01225
01226 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
01227 return(SamStatus::FAIL_IO);
01228 }
01229
01230
01231 int32_t SamRecord::getBlockSize()
01232 {
01233 myStatus = SamStatus::SUCCESS;
01234
01235
01236 if(myIsBufferSynced == false)
01237 {
01238
01239
01240
01241 fixBuffer(myBufferSequenceTranslation);
01242 }
01243 return myRecordPtr->myBlockSize;
01244 }
01245
01246
01247
01248 const char* SamRecord::getReferenceName()
01249 {
01250 myStatus = SamStatus::SUCCESS;
01251 return myReferenceName.c_str();
01252 }
01253
01254
01255 int32_t SamRecord::getReferenceID()
01256 {
01257 myStatus = SamStatus::SUCCESS;
01258 return myRecordPtr->myReferenceID;
01259 }
01260
01261
01262 int32_t SamRecord::get1BasedPosition()
01263 {
01264 myStatus = SamStatus::SUCCESS;
01265 return (myRecordPtr->myPosition + 1);
01266 }
01267
01268
01269 int32_t SamRecord::get0BasedPosition()
01270 {
01271 myStatus = SamStatus::SUCCESS;
01272 return myRecordPtr->myPosition;
01273 }
01274
01275
01276 uint8_t SamRecord::getReadNameLength()
01277 {
01278 myStatus = SamStatus::SUCCESS;
01279
01280
01281 if(myIsReadNameBufferValid)
01282 {
01283 return(myRecordPtr->myReadNameLength);
01284 }
01285
01286 return(myReadName.Length() + 1);
01287 }
01288
01289
01290 uint8_t SamRecord::getMapQuality()
01291 {
01292 myStatus = SamStatus::SUCCESS;
01293 return myRecordPtr->myMapQuality;
01294 }
01295
01296
01297 uint16_t SamRecord::getBin()
01298 {
01299 myStatus = SamStatus::SUCCESS;
01300 if(!myIsBinValid)
01301 {
01302
01303
01304 myRecordPtr->myBin =
01305 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
01306 myIsBinValid = true;
01307 }
01308 return(myRecordPtr->myBin);
01309 }
01310
01311
01312 uint16_t SamRecord::getCigarLength()
01313 {
01314 myStatus = SamStatus::SUCCESS;
01315
01316
01317 if(myIsCigarBufferValid)
01318 {
01319 return myRecordPtr->myCigarLength;
01320 }
01321
01322 if(myCigarTempBufferLength == -1)
01323 {
01324
01325
01326 parseCigarString();
01327 }
01328
01329
01330 return(myCigarTempBufferLength);
01331 }
01332
01333
01334 uint16_t SamRecord::getFlag()
01335 {
01336 myStatus = SamStatus::SUCCESS;
01337 return myRecordPtr->myFlag;
01338 }
01339
01340
01341 int32_t SamRecord::getReadLength()
01342 {
01343 myStatus = SamStatus::SUCCESS;
01344 if(myIsSequenceBufferValid == false)
01345 {
01346
01347 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
01348 {
01349 return(0);
01350 }
01351
01352 return(mySequence.Length());
01353 }
01354 return(myRecordPtr->myReadLength);
01355 }
01356
01357
01358
01359
01360 const char* SamRecord::getMateReferenceName()
01361 {
01362 myStatus = SamStatus::SUCCESS;
01363 return myMateReferenceName.c_str();
01364 }
01365
01366
01367
01368
01369
01370 const char* SamRecord::getMateReferenceNameOrEqual()
01371 {
01372 myStatus = SamStatus::SUCCESS;
01373 if(myMateReferenceName == "*")
01374 {
01375 return(myMateReferenceName);
01376 }
01377 if(myMateReferenceName == getReferenceName())
01378 {
01379 return(FIELD_ABSENT_STRING);
01380 }
01381 else
01382 {
01383 return(myMateReferenceName);
01384 }
01385 }
01386
01387
01388 int32_t SamRecord::getMateReferenceID()
01389 {
01390 myStatus = SamStatus::SUCCESS;
01391 return myRecordPtr->myMateReferenceID;
01392 }
01393
01394
01395 int32_t SamRecord::get1BasedMatePosition()
01396 {
01397 myStatus = SamStatus::SUCCESS;
01398 return (myRecordPtr->myMatePosition + 1);
01399 }
01400
01401
01402 int32_t SamRecord::get0BasedMatePosition()
01403 {
01404 myStatus = SamStatus::SUCCESS;
01405 return myRecordPtr->myMatePosition;
01406 }
01407
01408
01409 int32_t SamRecord::getInsertSize()
01410 {
01411 myStatus = SamStatus::SUCCESS;
01412 return myRecordPtr->myInsertSize;
01413 }
01414
01415
01416
01417 int32_t SamRecord::get0BasedAlignmentEnd()
01418 {
01419 myStatus = SamStatus::SUCCESS;
01420 if(myAlignmentLength == -1)
01421 {
01422
01423 parseCigar();
01424 }
01425
01426 if(myAlignmentLength == 0)
01427 {
01428
01429 return(myRecordPtr->myPosition);
01430 }
01431 return(myRecordPtr->myPosition + myAlignmentLength - 1);
01432 }
01433
01434
01435
01436 int32_t SamRecord::get1BasedAlignmentEnd()
01437 {
01438 return(get0BasedAlignmentEnd() + 1);
01439 }
01440
01441
01442
01443 int32_t SamRecord::getAlignmentLength()
01444 {
01445 myStatus = SamStatus::SUCCESS;
01446 if(myAlignmentLength == -1)
01447 {
01448
01449 parseCigar();
01450 }
01451
01452 return(myAlignmentLength);
01453 }
01454
01455
01456 int32_t SamRecord::get0BasedUnclippedStart()
01457 {
01458 myStatus = SamStatus::SUCCESS;
01459 if(myUnclippedStartOffset == -1)
01460 {
01461
01462 parseCigar();
01463 }
01464 return(myRecordPtr->myPosition - myUnclippedStartOffset);
01465 }
01466
01467
01468
01469 int32_t SamRecord::get1BasedUnclippedStart()
01470 {
01471 return(get0BasedUnclippedStart() + 1);
01472 }
01473
01474
01475
01476 int32_t SamRecord::get0BasedUnclippedEnd()
01477 {
01478
01479
01480 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
01481 }
01482
01483
01484
01485 int32_t SamRecord::get1BasedUnclippedEnd()
01486 {
01487 return(get0BasedUnclippedEnd() + 1);
01488 }
01489
01490
01491
01492 const char* SamRecord::getReadName()
01493 {
01494 myStatus = SamStatus::SUCCESS;
01495 if(myReadName.Length() == 0)
01496 {
01497
01498
01499 myReadName = (char*)&(myRecordPtr->myData);
01500 }
01501 return myReadName.c_str();
01502 }
01503
01504
01505 const char* SamRecord::getCigar()
01506 {
01507 myStatus = SamStatus::SUCCESS;
01508 if(myCigar.Length() == 0)
01509 {
01510
01511
01512 parseCigarBinary();
01513 }
01514 return myCigar.c_str();
01515 }
01516
01517
01518 const char* SamRecord::getSequence()
01519 {
01520 return(getSequence(mySequenceTranslation));
01521 }
01522
01523
01524 const char* SamRecord::getSequence(SequenceTranslation translation)
01525 {
01526 myStatus = SamStatus::SUCCESS;
01527 if(mySequence.Length() == 0)
01528 {
01529
01530
01531 setSequenceAndQualityFromBuffer();
01532 }
01533
01534
01535 if((translation == NONE) || (myRefPtr == NULL))
01536 {
01537 return mySequence.c_str();
01538 }
01539 else if(translation == EQUAL)
01540 {
01541 if(mySeqWithEq.length() == 0)
01542 {
01543
01544 if(mySequence == "*")
01545 {
01546
01547 mySeqWithEq = '*';
01548 }
01549 else
01550 {
01551
01552 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01553 myRecordPtr->myPosition,
01554 *(getCigarInfo()),
01555 getReferenceName(),
01556 *myRefPtr,
01557 mySeqWithEq);
01558 }
01559 }
01560 return(mySeqWithEq.c_str());
01561 }
01562 else
01563 {
01564
01565 if(mySeqWithoutEq.length() == 0)
01566 {
01567 if(mySequence == "*")
01568 {
01569
01570 mySeqWithoutEq = '*';
01571 }
01572 else
01573 {
01574
01575 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01576 myRecordPtr->myPosition,
01577 *(getCigarInfo()),
01578 getReferenceName(),
01579 *myRefPtr,
01580 mySeqWithoutEq);
01581 }
01582 }
01583 return(mySeqWithoutEq.c_str());
01584 }
01585 }
01586
01587
01588 const char* SamRecord::getQuality()
01589 {
01590 myStatus = SamStatus::SUCCESS;
01591 if(myQuality.Length() == 0)
01592 {
01593
01594
01595 setSequenceAndQualityFromBuffer();
01596 }
01597 return myQuality.c_str();
01598 }
01599
01600
01601 char SamRecord::getSequence(int index)
01602 {
01603 return(getSequence(index, mySequenceTranslation));
01604 }
01605
01606
01607 char SamRecord::getSequence(int index, SequenceTranslation translation)
01608 {
01609 static const char * asciiBases = "=AC.G...T......N";
01610
01611
01612 int32_t readLen = getReadLength();
01613
01614
01615 if(readLen == 0)
01616 {
01617 String exceptionString = "SamRecord::getSequence(";
01618 exceptionString += index;
01619 exceptionString += ") is not allowed since sequence = '*'";
01620 throw std::runtime_error(exceptionString.c_str());
01621 }
01622 else if((index < 0) || (index >= readLen))
01623 {
01624
01625 String exceptionString = "SamRecord::getSequence(";
01626 exceptionString += index;
01627 exceptionString += ") is out of range. Index must be between 0 and ";
01628 exceptionString += (readLen - 1);
01629 throw std::runtime_error(exceptionString.c_str());
01630 }
01631
01632
01633 if((translation == NONE) || (myRefPtr == NULL))
01634 {
01635
01636 if(mySequence.Length() == 0)
01637 {
01638
01639 if(myIsSequenceBufferValid)
01640 {
01641 return(index & 1 ?
01642 asciiBases[myPackedSequence[index / 2] & 0xF] :
01643 asciiBases[myPackedSequence[index / 2] >> 4]);
01644 }
01645 else
01646 {
01647 String exceptionString = "SamRecord::getSequence(";
01648 exceptionString += index;
01649 exceptionString += ") called with no sequence set";
01650 throw std::runtime_error(exceptionString.c_str());
01651 }
01652 }
01653
01654 return(mySequence[index]);
01655 }
01656 else
01657 {
01658
01659
01660
01661 if(mySequence.Length() == 0)
01662 {
01663
01664
01665 setSequenceAndQualityFromBuffer();
01666 }
01667
01668
01669 if(translation == EQUAL)
01670 {
01671
01672
01673 if(mySeqWithEq.length() == 0)
01674 {
01675
01676
01677
01678 if(mySequence == "*")
01679 {
01680
01681 mySeqWithEq = '*';
01682 }
01683 else
01684 {
01685
01686 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
01687 myRecordPtr->myPosition,
01688 *(getCigarInfo()),
01689 getReferenceName(),
01690 *myRefPtr,
01691 mySeqWithEq);
01692 }
01693 }
01694
01695 return(mySeqWithEq[index]);
01696 }
01697 else
01698 {
01699
01700
01701
01702 if(mySeqWithoutEq.length() == 0)
01703 {
01704
01705
01706
01707 if(mySequence == "*")
01708 {
01709
01710 mySeqWithoutEq = '*';
01711 }
01712 else
01713 {
01714
01715
01716
01717 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
01718 myRecordPtr->myPosition,
01719 *(getCigarInfo()),
01720 getReferenceName(),
01721 *myRefPtr,
01722 mySeqWithoutEq);
01723 }
01724 }
01725
01726 return(mySeqWithoutEq[index]);
01727 }
01728 }
01729 }
01730
01731
01732 char SamRecord::getQuality(int index)
01733 {
01734
01735 int32_t readLen = getReadLength();
01736
01737
01738
01739 if(readLen == 0)
01740 {
01741 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01742 }
01743 else if((index < 0) || (index >= readLen))
01744 {
01745
01746 String exceptionString = "SamRecord::getQuality(";
01747 exceptionString += index;
01748 exceptionString += ") is out of range. Index must be between 0 and ";
01749 exceptionString += (readLen - 1);
01750 throw std::runtime_error(exceptionString.c_str());
01751 }
01752
01753 if(myQuality.Length() == 0)
01754 {
01755
01756
01757 return(myPackedQuality[index] + 33);
01758 }
01759 else
01760 {
01761
01762 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
01763 {
01764
01765 return(BaseUtilities::UNKNOWN_QUALITY_CHAR);
01766 }
01767 else if(index >= myQuality.Length())
01768 {
01769
01770
01771
01772 String exceptionString = "SamRecord::getQuality(";
01773 exceptionString += index;
01774 exceptionString += ") is out of range. Index must be between 0 and ";
01775 exceptionString += (myQuality.Length() - 1);
01776 throw std::runtime_error(exceptionString.c_str());
01777 }
01778 else
01779 {
01780 return(myQuality[index]);
01781 }
01782 }
01783 }
01784
01785
01786 Cigar* SamRecord::getCigarInfo()
01787 {
01788
01789
01790
01791
01792 if(myAlignmentLength == -1)
01793 {
01794
01795 parseCigar();
01796 }
01797 return(&myCigarRoller);
01798 }
01799
01800
01801
01802
01803 uint32_t SamRecord::getNumOverlaps(int32_t start, int32_t end)
01804 {
01805
01806
01807 if(myAlignmentLength == -1)
01808 {
01809 parseCigar();
01810 }
01811 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
01812 }
01813
01814
01815
01816 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01817 String& cigar, String& sequence, String& quality)
01818 {
01819 return(getFields(recStruct, readName, cigar, sequence, quality,
01820 mySequenceTranslation));
01821 }
01822
01823
01824
01825 bool SamRecord::getFields(bamRecordStruct& recStruct, String& readName,
01826 String& cigar, String& sequence, String& quality,
01827 SequenceTranslation translation)
01828 {
01829 myStatus = SamStatus::SUCCESS;
01830 if(myIsBufferSynced == false)
01831 {
01832 if(!fixBuffer(translation))
01833 {
01834
01835 return(false);
01836 }
01837 }
01838 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
01839
01840 readName = getReadName();
01841
01842 if(myStatus != SamStatus::SUCCESS)
01843 {
01844
01845 return(false);
01846 }
01847 cigar = getCigar();
01848
01849 if(myStatus != SamStatus::SUCCESS)
01850 {
01851
01852 return(false);
01853 }
01854 sequence = getSequence(translation);
01855
01856 if(myStatus != SamStatus::SUCCESS)
01857 {
01858
01859 return(false);
01860 }
01861 quality = getQuality();
01862
01863 if(myStatus != SamStatus::SUCCESS)
01864 {
01865
01866 return(false);
01867 }
01868 return(true);
01869 }
01870
01871
01872
01873 GenomeSequence* SamRecord::getReference()
01874 {
01875 return(myRefPtr);
01876 }
01877
01878
01879 uint32_t SamRecord::getTagLength()
01880 {
01881 myStatus = SamStatus::SUCCESS;
01882 if(myNeedToSetTagsFromBuffer)
01883 {
01884
01885
01886 unsigned char * tagStart =
01887 (unsigned char *)myRecordPtr->myData
01888 + myRecordPtr->myReadNameLength
01889 + myRecordPtr->myCigarLength * sizeof(int)
01890 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
01891
01892
01893
01894
01895 uint32_t nonTagSize =
01896 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
01897
01898 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
01899 return(tagSize);
01900 }
01901
01902
01903 return(myTagBufferSize);
01904 }
01905
01906
01907
01908
01909
01910
01911
01912 bool SamRecord::getNextSamTag(char* tag, char& vtype, void** value)
01913 {
01914 myStatus = SamStatus::SUCCESS;
01915 if(myNeedToSetTagsFromBuffer)
01916 {
01917 if(!setTagsFromBuffer())
01918 {
01919
01920
01921 return(false);
01922 }
01923 }
01924
01925
01926
01927 myLastTagIndex++;
01928 int maxTagIndex = extras.Capacity();
01929 if(myLastTagIndex >= maxTagIndex)
01930 {
01931
01932
01933
01934 return(false);
01935 }
01936
01937 bool tagFound = false;
01938
01939 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
01940 {
01941 if(extras.SlotInUse(myLastTagIndex))
01942 {
01943
01944 int key = extras.GetKey(myLastTagIndex);
01945 getTag(key, tag);
01946 getTypeFromKey(key, vtype);
01947 tagFound = true;
01948
01949 switch (vtype)
01950 {
01951 case 'f' :
01952 *value = getDoublePtr(myLastTagIndex);
01953 break;
01954 case 'i' :
01955 *value = getIntegerPtr(myLastTagIndex, vtype);
01956 if(vtype != 'A')
01957 {
01958
01959 vtype = 'i';
01960 }
01961 break;
01962 case 'Z' :
01963 case 'B' :
01964 *value = getStringPtr(myLastTagIndex);
01965 break;
01966 default:
01967 myStatus.setStatus(SamStatus::FAIL_PARSE,
01968 "Unknown tag type");
01969 tagFound = false;
01970 break;
01971 }
01972 }
01973 if(!tagFound)
01974 {
01975
01976 myLastTagIndex++;
01977 }
01978 }
01979 return(tagFound);
01980 }
01981
01982
01983
01984 void SamRecord::resetTagIter()
01985 {
01986 myLastTagIndex = -1;
01987 }
01988
01989
01990 bool SamRecord::isIntegerType(char vtype)
01991 {
01992 if((vtype == 'c') || (vtype == 'C') ||
01993 (vtype == 's') || (vtype == 'S') ||
01994 (vtype == 'i') || (vtype == 'I'))
01995 {
01996 return(true);
01997 }
01998 return(false);
01999 }
02000
02001
02002 bool SamRecord::isDoubleType(char vtype)
02003 {
02004 if(vtype == 'f')
02005 {
02006 return(true);
02007 }
02008 return(false);
02009 }
02010
02011
02012 bool SamRecord::isCharType(char vtype)
02013 {
02014 if(vtype == 'A')
02015 {
02016 return(true);
02017 }
02018 return(false);
02019 }
02020
02021
02022 bool SamRecord::isStringType(char vtype)
02023 {
02024 if((vtype == 'Z') || (vtype == 'B'))
02025 {
02026 return(true);
02027 }
02028 return(false);
02029 }
02030
02031
02032 bool SamRecord::getTagsString(const char* tags, String& returnString, char delim)
02033 {
02034 const char* currentTagPtr = tags;
02035
02036 returnString.Clear();
02037 myStatus = SamStatus::SUCCESS;
02038 if(myNeedToSetTagsFromBuffer)
02039 {
02040 if(!setTagsFromBuffer())
02041 {
02042
02043
02044 return(false);
02045 }
02046 }
02047
02048 bool returnStatus = true;
02049
02050 while(*currentTagPtr != '\0')
02051 {
02052
02053
02054
02055 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
02056 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
02057 {
02058 myStatus.setStatus(SamStatus::INVALID,
02059 "getTagsString called with improperly formatted tags.\n");
02060 returnStatus = false;
02061 break;
02062 }
02063
02064
02065 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
02066 currentTagPtr[3]);
02067
02068 int offset = extras.Find(key);
02069
02070 if(offset >= 0)
02071 {
02072
02073 if(!returnString.IsEmpty())
02074 {
02075 returnString += delim;
02076 }
02077 returnString += currentTagPtr[0];
02078 returnString += currentTagPtr[1];
02079 returnString += ':';
02080 returnString += currentTagPtr[3];
02081 returnString += ':';
02082
02083
02084 char vtype;
02085 getTypeFromKey(key, vtype);
02086
02087 switch(vtype)
02088 {
02089 case 'i':
02090 returnString += *(int*)getIntegerPtr(offset, vtype);
02091 break;
02092 case 'f':
02093 returnString += *(double*)getDoublePtr(offset);
02094 break;
02095 case 'Z':
02096 case 'B':
02097 returnString += *(String*)getStringPtr(offset);
02098 break;
02099 default:
02100 myStatus.setStatus(SamStatus::INVALID,
02101 "rmTag called with unknown type.\n");
02102 returnStatus = false;
02103 break;
02104 };
02105 }
02106
02107 if(currentTagPtr[4] == ';')
02108 {
02109
02110 currentTagPtr += 5;
02111 }
02112 else if(currentTagPtr[4] != '\0')
02113 {
02114
02115 myStatus.setStatus(SamStatus::INVALID,
02116 "rmTags called with improperly formatted tags.\n");
02117 returnStatus = false;
02118 break;
02119 }
02120 else
02121 {
02122
02123 currentTagPtr += 4;
02124 }
02125 }
02126 return(returnStatus);
02127 }
02128
02129
02130 const String* SamRecord::getStringTag(const char * tag)
02131 {
02132
02133 if(myNeedToSetTagsFromBuffer)
02134 {
02135 if(!setTagsFromBuffer())
02136 {
02137
02138
02139
02140 return(NULL);
02141 }
02142 }
02143
02144 int key = MAKEKEY(tag[0], tag[1], 'Z');
02145 int offset = extras.Find(key);
02146
02147 int value;
02148 if (offset < 0)
02149 {
02150
02151 key = MAKEKEY(tag[0], tag[1], 'B');
02152 offset = extras.Find(key);
02153 if(offset < 0)
02154 {
02155
02156 return(NULL);
02157 }
02158 }
02159
02160
02161 value = extras[offset];
02162 return(&(strings[value]));
02163 }
02164
02165
02166 int* SamRecord::getIntegerTag(const char * tag)
02167 {
02168
02169 myStatus = SamStatus::SUCCESS;
02170
02171 if(myNeedToSetTagsFromBuffer)
02172 {
02173 if(!setTagsFromBuffer())
02174 {
02175
02176
02177
02178 return(NULL);
02179 }
02180 }
02181
02182 int key = MAKEKEY(tag[0], tag[1], 'i');
02183 int offset = extras.Find(key);
02184
02185 int value;
02186 if (offset < 0)
02187 {
02188
02189 return(NULL);
02190 }
02191 else
02192 value = extras[offset];
02193
02194 return(&(integers[value]));
02195 }
02196
02197
02198 double* SamRecord::getDoubleTag(const char * tag)
02199 {
02200
02201 myStatus = SamStatus::SUCCESS;
02202
02203 if(myNeedToSetTagsFromBuffer)
02204 {
02205 if(!setTagsFromBuffer())
02206 {
02207
02208
02209
02210 return(NULL);
02211 }
02212 }
02213
02214 int key = MAKEKEY(tag[0], tag[1], 'f');
02215 int offset = extras.Find(key);
02216
02217 int value;
02218 if (offset < 0)
02219 {
02220
02221 return(NULL);
02222 }
02223 else
02224 value = extras[offset];
02225
02226 return(&(doubles[value]));
02227 }
02228
02229
02230 String & SamRecord::getString(const char * tag)
02231 {
02232
02233 myStatus = SamStatus::SUCCESS;
02234
02235 if(myNeedToSetTagsFromBuffer)
02236 {
02237 if(!setTagsFromBuffer())
02238 {
02239
02240
02241
02242 }
02243 }
02244
02245 int key = MAKEKEY(tag[0], tag[1], 'Z');
02246 int offset = extras.Find(key);
02247
02248 int value;
02249 if (offset < 0)
02250 {
02251
02252 key = MAKEKEY(tag[0], tag[1], 'B');
02253 offset = extras.Find(key);
02254 if (offset < 0)
02255 {
02256
02257 return(NOT_FOUND_TAG_STRING);
02258 }
02259 }
02260 value = extras[offset];
02261
02262 return strings[value];
02263 }
02264
02265 int & SamRecord::getInteger(const char * tag)
02266 {
02267
02268 myStatus = SamStatus::SUCCESS;
02269
02270 if(myNeedToSetTagsFromBuffer)
02271 {
02272 if(!setTagsFromBuffer())
02273 {
02274
02275
02276
02277 }
02278 }
02279
02280 int key = MAKEKEY(tag[0], tag[1], 'i');
02281 int offset = extras.Find(key);
02282
02283 int value;
02284 if (offset < 0)
02285 {
02286
02287 return NOT_FOUND_TAG_INT;
02288 }
02289 else
02290 value = extras[offset];
02291
02292 return integers[value];
02293 }
02294
02295
02296 double & SamRecord::getDouble(const char * tag)
02297 {
02298
02299 myStatus = SamStatus::SUCCESS;
02300
02301 if(myNeedToSetTagsFromBuffer)
02302 {
02303 if(!setTagsFromBuffer())
02304 {
02305
02306
02307
02308 }
02309 }
02310
02311 int key = MAKEKEY(tag[0], tag[1], 'f');
02312 int offset = extras.Find(key);
02313
02314 int value;
02315 if (offset < 0)
02316 {
02317
02318 return NOT_FOUND_TAG_DOUBLE;
02319 }
02320 else
02321 value = extras[offset];
02322
02323 return doubles[value];
02324 }
02325
02326
02327 bool SamRecord::checkTag(const char * tag, char type)
02328 {
02329
02330 myStatus = SamStatus::SUCCESS;
02331
02332 if(myNeedToSetTagsFromBuffer)
02333 {
02334 if(!setTagsFromBuffer())
02335 {
02336
02337
02338 return("");
02339 }
02340 }
02341
02342 int key = MAKEKEY(tag[0], tag[1], type);
02343
02344 return (extras.Find(key) != LH_NOTFOUND);
02345 }
02346
02347
02348
02349 const SamStatus& SamRecord::getStatus()
02350 {
02351 return(myStatus);
02352 }
02353
02354
02355
02356
02357
02358 bool SamRecord::allocateRecordStructure(int size)
02359 {
02360 if (allocatedSize < size)
02361 {
02362 bamRecordStruct* tmpRecordPtr =
02363 (bamRecordStruct *)realloc(myRecordPtr, size);
02364 if(tmpRecordPtr == NULL)
02365 {
02366
02367 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02368 myStatus.addError(SamStatus::FAIL_MEM, "Failed Memory Allocation.");
02369 return(false);
02370 }
02371
02372 myRecordPtr = tmpRecordPtr;
02373
02374
02375 if(myIsSequenceBufferValid)
02376 {
02377 myPackedSequence = (unsigned char *)myRecordPtr->myData +
02378 myRecordPtr->myReadNameLength +
02379 myRecordPtr->myCigarLength * sizeof(int);
02380 }
02381 if(myIsQualityBufferValid)
02382 {
02383 myPackedQuality = (unsigned char *)myRecordPtr->myData +
02384 myRecordPtr->myReadNameLength +
02385 myRecordPtr->myCigarLength * sizeof(int) +
02386 (myRecordPtr->myReadLength + 1) / 2;
02387 }
02388
02389 allocatedSize = size;
02390 }
02391 return(true);
02392 }
02393
02394
02395
02396 void* SamRecord::getStringPtr(int index)
02397 {
02398 int value = extras[index];
02399
02400 return &(strings[value]);
02401 }
02402
02403 void* SamRecord::getIntegerPtr(int offset, char& type)
02404 {
02405 int value = extras[offset];
02406
02407 type = intType[value];
02408
02409 return &(integers[value]);
02410 }
02411
02412 void* SamRecord::getDoublePtr(int offset)
02413 {
02414 int value = extras[offset];
02415
02416 return &(doubles[value]);
02417 }
02418
02419
02420
02421 bool SamRecord::fixBuffer(SequenceTranslation translation)
02422 {
02423
02424 if(myIsBufferSynced &&
02425 (myBufferSequenceTranslation == translation))
02426 {
02427
02428 return(true);
02429 }
02430
02431
02432 if(!myIsBinValid)
02433 {
02434
02435
02436 myRecordPtr->myBin =
02437 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
02438 myIsBinValid = true;
02439 }
02440
02441
02442 bool status = true;
02443
02444
02445 uint8_t newReadNameLen = getReadNameLength();
02446 uint16_t newCigarLen = getCigarLength();
02447 int32_t newReadLen = getReadLength();
02448 uint32_t newTagLen = getTagLength();
02449 uint32_t bamSequenceLen = (newReadLen+1)/2;
02450
02451
02452
02453
02454 int newBufferSize =
02455 ((unsigned char*)(&(myRecordPtr->myData)) -
02456 (unsigned char*)myRecordPtr) +
02457 newReadNameLen + ((newCigarLen)*4) +
02458 newReadLen + bamSequenceLen + newTagLen;
02459
02460 if(!allocateRecordStructure(newBufferSize))
02461 {
02462
02463 return(false);
02464 }
02465
02466
02467
02468
02469
02470
02471 bool readNameLenChange = (newReadNameLen != myRecordPtr->myReadNameLength);
02472 bool cigarLenChange = (newCigarLen != myRecordPtr->myCigarLength);
02473 bool readLenChange = (newReadLen != myRecordPtr->myReadLength);
02474
02475
02476
02477 if(myIsTagsBufferValid &&
02478 (readNameLenChange | cigarLenChange | readLenChange))
02479 {
02480 status &= setTagsFromBuffer();
02481
02482
02483 myIsTagsBufferValid = false;
02484 }
02485
02486
02487
02488
02489 if((myIsQualityBufferValid | myIsSequenceBufferValid) &&
02490 (readNameLenChange | cigarLenChange | readLenChange))
02491 {
02492 setSequenceAndQualityFromBuffer();
02493
02494
02495 myIsQualityBufferValid = false;
02496 myIsSequenceBufferValid = false;
02497 }
02498
02499
02500
02501 if((myIsCigarBufferValid) &&
02502 (readNameLenChange))
02503 {
02504 status &= parseCigarBinary();
02505 myIsCigarBufferValid = false;
02506 }
02507
02508
02509 if(!myIsReadNameBufferValid)
02510 {
02511 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
02512 newReadNameLen);
02513
02514
02515 myRecordPtr->myReadNameLength = newReadNameLen;
02516 myIsReadNameBufferValid = true;
02517 }
02518
02519 unsigned char * readNameEnds = (unsigned char*)(&(myRecordPtr->myData)) +
02520 myRecordPtr->myReadNameLength;
02521
02522
02523 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02524
02525 if(!myIsCigarBufferValid)
02526 {
02527
02528
02529 myRecordPtr->myCigarLength = newCigarLen;
02530 memcpy(packedCigar, myCigarTempBuffer,
02531 myRecordPtr->myCigarLength * sizeof(uint32_t));
02532
02533 myIsCigarBufferValid = true;
02534 }
02535
02536 unsigned char * packedSequence = readNameEnds +
02537 myRecordPtr->myCigarLength * sizeof(int);
02538 unsigned char * packedQuality = packedSequence + bamSequenceLen;
02539
02540 if(!myIsSequenceBufferValid || !myIsQualityBufferValid ||
02541 (myBufferSequenceTranslation != translation))
02542 {
02543 myRecordPtr->myReadLength = newReadLen;
02544
02545
02546 bool noQuality = false;
02547 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
02548 {
02549 noQuality = true;
02550 }
02551
02552 const char* translatedSeq = NULL;
02553
02554
02555
02556 if((!myIsSequenceBufferValid) ||
02557 (translation != myBufferSequenceTranslation))
02558 {
02559 translatedSeq = getSequence(translation);
02560 }
02561
02562 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02563 {
02564 if((!myIsSequenceBufferValid) ||
02565 (translation != myBufferSequenceTranslation))
02566 {
02567
02568 int seqVal = 0;
02569 switch(translatedSeq[i])
02570 {
02571 case '=':
02572 seqVal = 0;
02573 break;
02574 case 'A':
02575 case 'a':
02576 seqVal = 1;
02577 break;
02578 case 'C':
02579 case 'c':
02580 seqVal = 2;
02581 break;
02582 case 'G':
02583 case 'g':
02584 seqVal = 4;
02585 break;
02586 case 'T':
02587 case 't':
02588 seqVal = 8;
02589 break;
02590 case 'N':
02591 case 'n':
02592 case '.':
02593 seqVal = 15;
02594 break;
02595 default:
02596 myStatus.addError(SamStatus::FAIL_PARSE,
02597 "Unknown Sequence character found.");
02598 status = false;
02599 break;
02600 };
02601
02602 if(i & 1)
02603 {
02604
02605
02606 packedSequence[i/2] |= seqVal;
02607 }
02608 else
02609 {
02610
02611 packedSequence[i/2] = seqVal << 4;
02612 }
02613 }
02614
02615 if(!myIsQualityBufferValid)
02616 {
02617
02618 if((noQuality) || (myQuality.Length() <= i))
02619 {
02620
02621
02622 packedQuality[i] = 0xFF;
02623 }
02624 else
02625 {
02626
02627 packedQuality[i] = myQuality[i] - 33;
02628 }
02629 }
02630 }
02631 myPackedSequence = (unsigned char *)myRecordPtr->myData +
02632 myRecordPtr->myReadNameLength +
02633 myRecordPtr->myCigarLength * sizeof(int);
02634 myPackedQuality = myPackedSequence +
02635 (myRecordPtr->myReadLength + 1) / 2;
02636 myIsSequenceBufferValid = true;
02637 myIsQualityBufferValid = true;
02638 myBufferSequenceTranslation = translation;
02639 }
02640
02641 if(!myIsTagsBufferValid)
02642 {
02643 status &= setTagsInBuffer();
02644 }
02645
02646
02647 myRecordPtr->myReadNameLength = newReadNameLen;
02648 myRecordPtr->myCigarLength = newCigarLen;
02649 myRecordPtr->myReadLength = newReadLen;
02650
02651
02652
02653 myRecordPtr->myBlockSize = newBufferSize - sizeof(int32_t);
02654
02655 if(status)
02656 {
02657 myIsBufferSynced = true;
02658 }
02659
02660 return(status);
02661 }
02662
02663
02664
02665
02666
02667 void SamRecord::setSequenceAndQualityFromBuffer()
02668 {
02669
02670
02671
02672
02673
02674
02675 bool extractSequence = false;
02676 if(myIsSequenceBufferValid && (mySequence.Length() == 0))
02677 {
02678 extractSequence = true;
02679 }
02680
02681
02682 bool extractQuality = false;
02683 if(myIsQualityBufferValid && (myQuality.Length() == 0))
02684 {
02685 extractQuality = true;
02686 }
02687
02688
02689
02690 if(!extractSequence && !extractQuality)
02691 {
02692 return;
02693 }
02694
02695
02696 if(extractSequence)
02697 {
02698 mySequence.SetLength(myRecordPtr->myReadLength);
02699 }
02700 if(extractQuality)
02701 {
02702 myQuality.SetLength(myRecordPtr->myReadLength);
02703 }
02704
02705 const char * asciiBases = "=AC.G...T......N";
02706
02707
02708
02709 bool qualitySpecified = false;
02710
02711 for (int i = 0; i < myRecordPtr->myReadLength; i++)
02712 {
02713 if(extractSequence)
02714 {
02715 mySequence[i] = i & 1 ?
02716 asciiBases[myPackedSequence[i / 2] & 0xF] :
02717 asciiBases[myPackedSequence[i / 2] >> 4];
02718 }
02719
02720 if(extractQuality)
02721 {
02722 if(myPackedQuality[i] != 0xFF)
02723 {
02724
02725 qualitySpecified = true;
02726 }
02727
02728 myQuality[i] = myPackedQuality[i] + 33;
02729 }
02730 }
02731
02732
02733 if(myRecordPtr->myReadLength == 0)
02734 {
02735 if(extractSequence)
02736 {
02737 mySequence = "*";
02738 }
02739 if(extractQuality)
02740 {
02741 myQuality = "*";
02742 }
02743 }
02744 else if(extractQuality && !qualitySpecified)
02745 {
02746
02747 myQuality = "*";
02748 }
02749 }
02750
02751
02752
02753 bool SamRecord::parseCigar()
02754 {
02755
02756 if(myCigar.Length() == 0)
02757 {
02758
02759 return(parseCigarBinary());
02760 }
02761 return(parseCigarString());
02762 }
02763
02764
02765 bool SamRecord::parseCigarBinary()
02766 {
02767
02768
02769
02770 if(myCigar.Length() != 0)
02771 {
02772
02773 return(true);
02774 }
02775
02776 unsigned char * readNameEnds =
02777 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength;
02778
02779 unsigned int * packedCigar = (unsigned int *) (void *) readNameEnds;
02780
02781 myCigarRoller.Set(packedCigar, myRecordPtr->myCigarLength);
02782
02783 myCigarRoller.getCigarString(myCigar);
02784
02785 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02786
02787 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02788 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02789
02790
02791 if(myRecordPtr->myCigarLength == 0)
02792 {
02793 myCigar = "*";
02794 return(true);
02795 }
02796
02797
02798 int newBufferSize = myRecordPtr->myCigarLength * sizeof(uint32_t);
02799 if(newBufferSize > myCigarTempBufferAllocatedSize)
02800 {
02801 uint32_t* tempBufferPtr =
02802 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02803 if(tempBufferPtr == NULL)
02804 {
02805
02806
02807 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02808 myStatus.addError(SamStatus::FAIL_MEM,
02809 "Failed to Allocate Memory.");
02810 return(false);
02811 }
02812 myCigarTempBuffer = tempBufferPtr;
02813 myCigarTempBufferAllocatedSize = newBufferSize;
02814 }
02815
02816 memcpy(myCigarTempBuffer, packedCigar,
02817 myRecordPtr->myCigarLength * sizeof(uint32_t));
02818
02819
02820 myCigarTempBufferLength = myRecordPtr->myCigarLength;
02821
02822 return(true);
02823 }
02824
02825
02826 bool SamRecord::parseCigarString()
02827 {
02828 myCigarTempBufferLength = 0;
02829 if(myCigar == "*")
02830 {
02831
02832 myAlignmentLength = 0;
02833 myUnclippedStartOffset = 0;
02834 myUnclippedEndOffset = 0;
02835 myCigarRoller.clear();
02836 return(true);
02837 }
02838
02839 myCigarRoller.Set(myCigar);
02840
02841 myAlignmentLength = myCigarRoller.getExpectedReferenceBaseCount();
02842
02843 myUnclippedStartOffset = myCigarRoller.getNumBeginClips();
02844 myUnclippedEndOffset = myCigarRoller.getNumEndClips();
02845
02846
02847
02848
02849 int newBufferSize = myCigar.Length() * sizeof(uint32_t);
02850 if(newBufferSize > myCigarTempBufferAllocatedSize)
02851 {
02852 uint32_t* tempBufferPtr =
02853 (uint32_t*)realloc(myCigarTempBuffer, newBufferSize);
02854 if(tempBufferPtr == NULL)
02855 {
02856
02857
02858 fprintf(stderr, "FAILED TO ALLOCATE MEMORY!!!");
02859 myStatus.addError(SamStatus::FAIL_MEM,
02860 "Failed to Allocate Memory.");
02861 return(false);
02862 }
02863 myCigarTempBuffer = tempBufferPtr;
02864 myCigarTempBufferAllocatedSize = newBufferSize;
02865 }
02866
02867
02868 bool status = true;
02869
02870
02871 char *cigarOp;
02872 const char* cigarEntryStart = myCigar.c_str();
02873 int opLen = 0;
02874 int op = 0;
02875
02876 unsigned int * packedCigar = myCigarTempBuffer;
02877
02878
02879 const char* endCigarString = cigarEntryStart + myCigar.Length();
02880 while(cigarEntryStart < endCigarString)
02881 {
02882 bool validCigarEntry = true;
02883
02884
02885 opLen = strtol(cigarEntryStart, &cigarOp, 10);
02886
02887 switch(*cigarOp)
02888 {
02889 case('M'):
02890 op = 0;
02891 break;
02892 case('I'):
02893
02894
02895 op = 1;
02896 break;
02897 case('D'):
02898 op = 2;
02899 break;
02900 case('N'):
02901 op = 3;
02902 break;
02903 case('S'):
02904 op = 4;
02905 break;
02906 case('H'):
02907 op = 5;
02908 break;
02909 case('P'):
02910 op = 6;
02911 break;
02912 default:
02913 fprintf(stderr, "ERROR parsing cigar\n");
02914 validCigarEntry = false;
02915 status = false;
02916 myStatus.addError(SamStatus::FAIL_PARSE,
02917 "Unknown operation found when parsing the Cigar.");
02918 break;
02919 };
02920 if(validCigarEntry)
02921 {
02922
02923 ++myCigarTempBufferLength;
02924 *packedCigar = (opLen << 4) | op;
02925 packedCigar++;
02926 }
02927
02928 cigarEntryStart = ++cigarOp;
02929 }
02930
02931
02932 return(status);
02933 }
02934
02935
02936 bool SamRecord::setTagsFromBuffer()
02937 {
02938
02939 if(myNeedToSetTagsFromBuffer == false)
02940 {
02941
02942 return(true);
02943 }
02944
02945
02946 myNeedToSetTagsFromBuffer = false;
02947
02948 unsigned char * extraPtr = myPackedQuality + myRecordPtr->myReadLength;
02949
02950
02951 bool status = true;
02952
02953
02954 clearTags();
02955 while (myRecordPtr->myBlockSize + 4 -
02956 (extraPtr - (unsigned char *)myRecordPtr) > 0)
02957 {
02958 int key = 0;
02959 int value = 0;
02960 void * content = extraPtr + 3;
02961 int tagBufferSize = 0;
02962
02963 key = MAKEKEY(extraPtr[0], extraPtr[1], extraPtr[2]);
02964
02965 switch (extraPtr[2])
02966 {
02967 case 'A' :
02968 value = integers.Length();
02969 integers.Push(* (char *) content);
02970 intType.push_back(extraPtr[2]);
02971 extraPtr += 4;
02972 tagBufferSize += 4;
02973 break;
02974 case 'c' :
02975 value = integers.Length();
02976 integers.Push(* (char *) content);
02977 intType.push_back(extraPtr[2]);
02978 extraPtr += 4;
02979 tagBufferSize += 4;
02980 break;
02981 case 'C' :
02982 value = integers.Length();
02983 integers.Push(* (unsigned char *) content);
02984 intType.push_back(extraPtr[2]);
02985 extraPtr += 4;
02986 tagBufferSize += 4;
02987 break;
02988 case 's' :
02989 value = integers.Length();
02990 integers.Push(* (short *) content);
02991 intType.push_back(extraPtr[2]);
02992 extraPtr += 5;
02993 tagBufferSize += 5;
02994 break;
02995 case 'S' :
02996 value = integers.Length();
02997 integers.Push(* (unsigned short *) content);
02998 intType.push_back(extraPtr[2]);
02999 extraPtr += 5;
03000 tagBufferSize += 5;
03001 break;
03002 case 'i' :
03003 value = integers.Length();
03004 integers.Push(* (int *) content);
03005 intType.push_back(extraPtr[2]);
03006 extraPtr += 7;
03007 tagBufferSize += 7;
03008 break;
03009 case 'I' :
03010 value = integers.Length();
03011 integers.Push((int) * (unsigned int *) content);
03012 intType.push_back(extraPtr[2]);
03013 extraPtr += 7;
03014 tagBufferSize += 7;
03015 break;
03016 case 'Z' :
03017 case 'B' :
03018 value = strings.Length();
03019 strings.Push((const char *) content);
03020 extraPtr += 4 + strings.Last().Length();
03021 tagBufferSize += 4 + strings.Last().Length();
03022 break;
03023 case 'f' :
03024 value = doubles.Length();
03025 doubles.Push(* (float *) content);
03026 fprintf(stderr, "\n\nFLOAT!!!\n\n");
03027 extraPtr += 7;
03028 tagBufferSize += 7;
03029 break;
03030 default :
03031 fprintf(stderr,
03032 "parsing BAM - Unknown custom field of type %c%c:%c\n",
03033 extraPtr[0], extraPtr[1], extraPtr[2]);
03034
03035
03036 extraPtr += 3;
03037 myStatus.addError(SamStatus::FAIL_PARSE,
03038 "Unknown tag type.");
03039 status = false;
03040 }
03041
03042
03043 if(status)
03044 {
03045 extras.Add(key, value);
03046 myTagBufferSize += tagBufferSize;
03047 }
03048 }
03049 return(status);
03050 }
03051
03052
03053 bool SamRecord::setTagsInBuffer()
03054 {
03055
03056
03057
03058 int bamSequenceLength = (myRecordPtr->myReadLength+1)/2;
03059 int newBufferSize = ((unsigned char*)(&(myRecordPtr->myData)) -
03060 (unsigned char*)myRecordPtr) +
03061 myRecordPtr->myReadNameLength + ((myRecordPtr->myCigarLength)*4) +
03062 myRecordPtr->myReadLength + bamSequenceLength + myTagBufferSize;
03063
03064
03065 if(!allocateRecordStructure(newBufferSize))
03066 {
03067
03068 return(false);
03069 }
03070
03071 char * extraPtr = (char*)myPackedQuality + myRecordPtr->myReadLength;
03072
03073 bool status = true;
03074
03075
03076 if (extras.Entries())
03077 {
03078 for (int i = 0; i < extras.Capacity(); i++)
03079 {
03080 if (extras.SlotInUse(i))
03081 {
03082 int key = extras.GetKey(i);
03083 getTag(key, extraPtr);
03084 extraPtr += 2;
03085 char vtype;
03086 getTypeFromKey(key, vtype);
03087
03088 if(vtype == 'i')
03089 {
03090 vtype = getIntegerType(i);
03091 }
03092
03093 extraPtr[0] = vtype;
03094
03095
03096 extraPtr += 1;
03097
03098 switch (vtype)
03099 {
03100 case 'A' :
03101 *(char*)extraPtr = (char)getInteger(i);
03102
03103 extraPtr += 1;
03104 break;
03105 case 'c' :
03106 *(int8_t*)extraPtr = (int8_t)getInteger(i);
03107
03108 extraPtr += 1;
03109 break;
03110 case 'C' :
03111 *(uint8_t*)extraPtr = (uint8_t)getInteger(i);
03112
03113 extraPtr += 1;
03114 break;
03115 case 's' :
03116 *(int16_t*)extraPtr = (int16_t)getInteger(i);
03117
03118 extraPtr += 2;
03119 break;
03120 case 'S' :
03121 *(uint16_t*)extraPtr = (uint16_t)getInteger(i);
03122
03123 extraPtr += 2;
03124 break;
03125 case 'i' :
03126 *(int32_t*)extraPtr = (int32_t)getInteger(i);
03127
03128 extraPtr += 4;
03129 break;
03130 case 'I' :
03131 *(uint32_t*)extraPtr = (uint32_t)getInteger(i);
03132
03133 extraPtr += 4;
03134 break;
03135 case 'Z' :
03136 case 'B' :
03137 sprintf(extraPtr, "%s", getString(i).c_str());
03138 extraPtr += getString(i).Length() + 1;
03139 break;
03140 case 'f' :
03141
03142 sprintf(extraPtr, "%f", getDouble(i));
03143 extraPtr += 4;
03144 break;
03145 default :
03146 myStatus.addError(SamStatus::FAIL_PARSE,
03147 "Unknown tag type.");
03148 status = false;
03149 break;
03150 }
03151 }
03152 }
03153 }
03154
03155
03156
03157 if(extraPtr != (char*)myRecordPtr + newBufferSize)
03158 {
03159 fprintf(stderr, "ERROR updating the buffer. Incorrect size.");
03160 myStatus.addError(SamStatus::FAIL_PARSE,
03161 "ERROR updating the buffer. Incorrect size.");
03162 status = false;
03163 }
03164
03165
03166
03167 myNeedToSetTagsInBuffer = false;
03168 myIsTagsBufferValid = true;
03169 return(status);
03170 }
03171
03172
03173
03174
03175
03176 void SamRecord::setVariablesForNewBuffer(SamFileHeader& header)
03177 {
03178
03179
03180 myReferenceName =
03181 header.getReferenceLabel(myRecordPtr->myReferenceID);
03182 myMateReferenceName =
03183 header.getReferenceLabel(myRecordPtr->myMateReferenceID);
03184
03185
03186 myReadName.SetLength(0);
03187 myCigar.SetLength(0);
03188 mySequence.SetLength(0);
03189 mySeqWithEq.clear();
03190 mySeqWithoutEq.clear();
03191 myQuality.SetLength(0);
03192 myNeedToSetTagsFromBuffer = true;
03193 myNeedToSetTagsInBuffer = false;
03194
03195
03196 myIsBufferSynced = true;
03197
03198 myIsReadNameBufferValid = true;
03199 myIsCigarBufferValid = true;
03200 myPackedSequence = (unsigned char *)myRecordPtr->myData +
03201 myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength * sizeof(int);
03202 myIsSequenceBufferValid = true;
03203 myBufferSequenceTranslation = NONE;
03204 myPackedQuality = myPackedSequence +
03205 (myRecordPtr->myReadLength + 1) / 2;
03206 myIsQualityBufferValid = true;
03207 myIsTagsBufferValid = true;
03208 myIsBinValid = true;
03209 }
03210
03211
03212
03213 void SamRecord::getTypeFromKey(int key, char& type) const
03214 {
03215
03216 type = (key >> 16) & 0xFF;
03217 }
03218
03219
03220
03221 void SamRecord::getTag(int key, char* tag) const
03222 {
03223
03224 tag[0] = key & 0xFF;
03225 tag[1] = (key >> 8) & 0xFF;
03226 tag[2] = 0;
03227 }
03228
03229
03230
03231 String & SamRecord::getString(int index)
03232 {
03233 int value = extras[index];
03234
03235 return strings[value];
03236 }
03237
03238 int & SamRecord::getInteger(int offset)
03239 {
03240 int value = extras[offset];
03241
03242 return integers[value];
03243 }
03244
03245 char & SamRecord::getIntegerType(int offset)
03246 {
03247 int value = extras[offset];
03248
03249 return intType[value];
03250 }
03251
03252 double & SamRecord::getDouble(int offset)
03253 {
03254 int value = extras[offset];
03255
03256 return doubles[value];
03257 }
03258
03259