00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include <stdexcept>
00018 #include "SamFile.h"
00019 #include "SamFileHeader.h"
00020 #include "SamRecord.h"
00021 #include "BamInterface.h"
00022 #include "SamInterface.h"
00023 #include "BgzfFileType.h"
00024
00025
00026 SamFile::SamFile()
00027 : myStatus()
00028 {
00029 init();
00030 resetFile();
00031 }
00032
00033
00034
00035 SamFile::SamFile(ErrorHandler::HandlingType errorHandlingType)
00036 : myStatus(errorHandlingType)
00037 {
00038 init();
00039 resetFile();
00040 }
00041
00042
00043
00044
00045 SamFile::SamFile(const char* filename, OpenType mode)
00046 : myStatus()
00047 {
00048 init(filename, mode, NULL);
00049 }
00050
00051
00052
00053
00054 SamFile::SamFile(const char* filename, OpenType mode,
00055 ErrorHandler::HandlingType errorHandlingType)
00056 : myStatus(errorHandlingType)
00057 {
00058 init(filename, mode, NULL);
00059 }
00060
00061
00062
00063
00064 SamFile::SamFile(const char* filename, OpenType mode, SamFileHeader* header)
00065 : myStatus()
00066 {
00067 init(filename, mode, header);
00068 }
00069
00070
00071
00072
00073 SamFile::SamFile(const char* filename, OpenType mode,
00074 ErrorHandler::HandlingType errorHandlingType,
00075 SamFileHeader* header)
00076 : myStatus(errorHandlingType)
00077 {
00078 init(filename, mode, header);
00079 }
00080
00081
00082 SamFile::~SamFile()
00083 {
00084 resetFile();
00085 if(myStatistics != NULL)
00086 {
00087 delete myStatistics;
00088 }
00089 }
00090
00091
00092
00093 bool SamFile::OpenForRead(const char * filename, SamFileHeader* header)
00094 {
00095
00096 resetFile();
00097
00098 int lastchar = 0;
00099
00100 while (filename[lastchar] != 0) lastchar++;
00101
00102
00103 if((lastchar >= 1) && (filename[0] == '-'))
00104 {
00105
00106
00107 if(strcmp(filename, "-.bam") == 0)
00108 {
00109
00110
00111 filename = "-";
00112
00113 myFilePtr = new InputFile;
00114
00115
00116 myFilePtr->setAttemptRecovery(myAttemptRecovery);
00117 myFilePtr->openFile(filename, "rb", InputFile::BGZF);
00118
00119 myInterfacePtr = new BamInterface;
00120
00121
00122 char magic[4];
00123 ifread(myFilePtr, magic, 4);
00124 }
00125 else if(strcmp(filename, "-.ubam") == 0)
00126 {
00127
00128
00129
00130
00131
00132 filename = "-";
00133
00134
00135 #ifdef __ZLIB_AVAILABLE__
00136 BgzfFileType::setRequireEofBlock(false);
00137 #endif
00138 myFilePtr = ifopen(filename, "rb", InputFile::BGZF);
00139
00140 myInterfacePtr = new BamInterface;
00141
00142
00143 char magic[4];
00144 ifread(myFilePtr, magic, 4);
00145 }
00146 else
00147 {
00148
00149
00150 filename = "-";
00151 myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
00152 myInterfacePtr = new SamInterface;
00153 }
00154 }
00155 else
00156 {
00157
00158
00159 myFilePtr = new InputFile;
00160
00161
00162
00163 myFilePtr->setAttemptRecovery(myAttemptRecovery);
00164 bool rc = myFilePtr->openFile(filename, "rb", InputFile::DEFAULT);
00165
00166 if (rc == false)
00167 {
00168 std::string errorMessage = "Failed to Open ";
00169 errorMessage += filename;
00170 errorMessage += " for reading";
00171 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
00172 delete myFilePtr;
00173 myFilePtr = NULL;
00174 return(false);
00175 }
00176
00177 char magic[4];
00178 ifread(myFilePtr, magic, 4);
00179
00180 if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' &&
00181 magic[3] == 1)
00182 {
00183 myInterfacePtr = new BamInterface;
00184
00185
00186 myIsBamOpenForRead = true;
00187 }
00188 else
00189 {
00190
00191
00192 ifrewind(myFilePtr);
00193 myInterfacePtr = new SamInterface;
00194 }
00195 }
00196
00197
00198 myIsOpenForRead = true;
00199
00200
00201 if(header != NULL)
00202 {
00203 return(ReadHeader(*header));
00204 }
00205
00206
00207 myStatus = SamStatus::SUCCESS;
00208 return(true);
00209 }
00210
00211
00212
00213 bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header)
00214 {
00215
00216 resetFile();
00217
00218 int lastchar = 0;
00219 while (filename[lastchar] != 0) lastchar++;
00220 if (lastchar >= 4 &&
00221 filename[lastchar - 4] == 'u' &&
00222 filename[lastchar - 3] == 'b' &&
00223 filename[lastchar - 2] == 'a' &&
00224 filename[lastchar - 1] == 'm')
00225 {
00226
00227
00228 if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.'))
00229 {
00230 filename = "-";
00231 }
00232
00233 myFilePtr = ifopen(filename, "wb0", InputFile::BGZF);
00234
00235 myInterfacePtr = new BamInterface;
00236 }
00237 else if (lastchar >= 3 &&
00238 filename[lastchar - 3] == 'b' &&
00239 filename[lastchar - 2] == 'a' &&
00240 filename[lastchar - 1] == 'm')
00241 {
00242
00243
00244 if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.'))
00245 {
00246 filename = "-";
00247 }
00248 myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
00249
00250 myInterfacePtr = new BamInterface;
00251 }
00252 else
00253 {
00254
00255
00256
00257 if((lastchar >= 1) && (filename[0] == '-'))
00258 {
00259 filename = "-";
00260 }
00261 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
00262
00263 myInterfacePtr = new SamInterface;
00264 }
00265
00266 if (myFilePtr == NULL)
00267 {
00268 std::string errorMessage = "Failed to Open ";
00269 errorMessage += filename;
00270 errorMessage += " for writing";
00271 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
00272 return(false);
00273 }
00274
00275 myIsOpenForWrite = true;
00276
00277
00278 if(header != NULL)
00279 {
00280 return(WriteHeader(*header));
00281 }
00282
00283
00284 myStatus = SamStatus::SUCCESS;
00285 return(true);
00286 }
00287
00288
00289
00290 bool SamFile::ReadBamIndex(const char* bamIndexFilename)
00291 {
00292
00293 if(myBamIndex != NULL)
00294 {
00295 delete myBamIndex;
00296 myBamIndex = NULL;
00297 }
00298
00299
00300 myBamIndex = new BamIndex();
00301 SamStatus::Status indexStat = myBamIndex->readIndex(bamIndexFilename);
00302
00303 if(indexStat != SamStatus::SUCCESS)
00304 {
00305 std::string errorMessage = "Failed to read the bam Index file: ";
00306 errorMessage += bamIndexFilename;
00307 myStatus.setStatus(indexStat, errorMessage.c_str());
00308 delete myBamIndex;
00309 myBamIndex = NULL;
00310 return(false);
00311 }
00312 myStatus = SamStatus::SUCCESS;
00313 return(true);
00314 }
00315
00316
00317
00318 bool SamFile::ReadBamIndex()
00319 {
00320 if(myFilePtr == NULL)
00321 {
00322
00323
00324 std::string errorMessage = "Failed to read the bam Index file -"
00325 " the BAM file needs to be read first in order to determine"
00326 " the index filename.";
00327 myStatus.setStatus(SamStatus::FAIL_ORDER, errorMessage.c_str());
00328 return(false);
00329 }
00330
00331 const char* bamBaseName = myFilePtr->getFileName();
00332
00333 std::string indexName = bamBaseName;
00334 indexName += ".bai";
00335
00336 bool foundFile = true;
00337 try
00338 {
00339 if(ReadBamIndex(indexName.c_str()) == false)
00340 {
00341 foundFile = false;
00342 }
00343 }
00344 catch (std::exception& e)
00345 {
00346 foundFile = false;
00347 }
00348
00349
00350 if(!foundFile)
00351 {
00352
00353
00354 size_t startExt = indexName.find(".bam");
00355 if(startExt == std::string::npos)
00356 {
00357
00358
00359 return(false);
00360 }
00361
00362 indexName.erase(startExt, 4);
00363 return(ReadBamIndex(indexName.c_str()));
00364 }
00365 return(true);
00366 }
00367
00368
00369
00370 void SamFile::SetReference(GenomeSequence* reference)
00371 {
00372 myRefPtr = reference;
00373 }
00374
00375
00376
00377 void SamFile::SetReadSequenceTranslation(SamRecord::SequenceTranslation translation)
00378 {
00379 myReadTranslation = translation;
00380 }
00381
00382
00383
00384 void SamFile::SetWriteSequenceTranslation(SamRecord::SequenceTranslation translation)
00385 {
00386 myWriteTranslation = translation;
00387 }
00388
00389
00390 void SamFile::Close()
00391 {
00392
00393
00394 resetFile();
00395 }
00396
00397
00398
00399
00400 bool SamFile::IsOpen()
00401 {
00402 if (myFilePtr != NULL)
00403 {
00404
00405 return(myFilePtr->isOpen());
00406 }
00407
00408 return false;
00409 }
00410
00411
00412
00413
00414 bool SamFile::IsEOF()
00415 {
00416 if (myFilePtr != NULL)
00417 {
00418
00419 return(ifeof(myFilePtr));
00420 }
00421
00422 return true;
00423 }
00424
00425
00426
00427 bool SamFile::ReadHeader(SamFileHeader& header)
00428 {
00429 myStatus = SamStatus::SUCCESS;
00430 if(myIsOpenForRead == false)
00431 {
00432
00433 myStatus.setStatus(SamStatus::FAIL_ORDER,
00434 "Cannot read header since the file is not open for reading");
00435 return(false);
00436 }
00437
00438 if(myHasHeader == true)
00439 {
00440
00441 myStatus.setStatus(SamStatus::FAIL_ORDER,
00442 "Cannot read header since it has already been read.");
00443 return(false);
00444 }
00445
00446 if(myInterfacePtr->readHeader(myFilePtr, header, myStatus))
00447 {
00448
00449 myHasHeader = true;
00450 return(true);
00451 }
00452 return(false);
00453 }
00454
00455
00456
00457 bool SamFile::WriteHeader(SamFileHeader& header)
00458 {
00459 myStatus = SamStatus::SUCCESS;
00460 if(myIsOpenForWrite == false)
00461 {
00462
00463
00464
00465 myStatus.setStatus(SamStatus::FAIL_ORDER,
00466 "Cannot write header since the file is not open for writing");
00467 return(false);
00468 }
00469
00470 if(myHasHeader == true)
00471 {
00472
00473 myStatus.setStatus(SamStatus::FAIL_ORDER,
00474 "Cannot write header since it has already been written");
00475 return(false);
00476 }
00477
00478 if(myInterfacePtr->writeHeader(myFilePtr, header, myStatus))
00479 {
00480
00481 myHasHeader = true;
00482 return(true);
00483 }
00484
00485
00486 return(false);
00487 }
00488
00489
00490
00491 bool SamFile::ReadRecord(SamFileHeader& header,
00492 SamRecord& record)
00493 {
00494 myStatus = SamStatus::SUCCESS;
00495
00496 if(myIsOpenForRead == false)
00497 {
00498
00499 myStatus.setStatus(SamStatus::FAIL_ORDER,
00500 "Cannot read record since the file is not open for reading");
00501 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file."));
00502 return(false);
00503 }
00504
00505 if(myHasHeader == false)
00506 {
00507
00508
00509 myStatus.setStatus(SamStatus::FAIL_ORDER,
00510 "Cannot read record since the header has not been read.");
00511 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header."));
00512 return(false);
00513 }
00514
00515
00516
00517 if(myNewSection)
00518 {
00519 if(!processNewSection(header))
00520 {
00521
00522
00523
00524
00525 return(false);
00526 }
00527 }
00528
00529
00530
00531 while(myStatus == SamStatus::SUCCESS)
00532 {
00533 record.setReference(myRefPtr);
00534 record.setSequenceTranslation(myReadTranslation);
00535
00536
00537
00538
00539
00540 if(!ensureIndexedReadPosition())
00541 {
00542
00543
00544
00545 break;
00546 }
00547
00548
00549
00550 myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
00551 if(myStatus != SamStatus::SUCCESS)
00552 {
00553
00554 break;
00555 }
00556
00557
00558
00559
00560
00561
00562 if(!checkRecordInSection(record))
00563 {
00564
00565
00566 continue;
00567 }
00568
00569
00570 uint16_t flag = record.getFlag();
00571 if((flag & myRequiredFlags) != myRequiredFlags)
00572 {
00573
00574
00575 continue;
00576 }
00577 if((flag & myExcludedFlags) != 0)
00578 {
00579
00580 continue;
00581 }
00582
00583
00584 myRecordCount++;
00585
00586 if(myStatistics != NULL)
00587 {
00588
00589 myStatistics->updateStatistics(record);
00590 }
00591
00592
00593 if(!validateSortOrder(record, header))
00594 {
00595
00596 return(false);
00597 }
00598 return(true);
00599
00600 }
00601
00602
00603 return(myStatus == SamStatus::SUCCESS);
00604 }
00605
00606
00607
00608
00609 bool SamFile::WriteRecord(SamFileHeader& header,
00610 SamRecord& record)
00611 {
00612 if(myIsOpenForWrite == false)
00613 {
00614
00615 myStatus.setStatus(SamStatus::FAIL_ORDER,
00616 "Cannot write record since the file is not open for writing");
00617 return(false);
00618 }
00619
00620 if(myHasHeader == false)
00621 {
00622
00623 myStatus.setStatus(SamStatus::FAIL_ORDER,
00624 "Cannot write record since the header has not been written");
00625 return(false);
00626 }
00627
00628
00629 if(!validateSortOrder(record, header))
00630 {
00631
00632 myStatus.setStatus(SamStatus::INVALID_SORT,
00633 "Cannot write the record since the file is not properly sorted.");
00634 return(false);
00635 }
00636
00637 if(myRefPtr != NULL)
00638 {
00639 record.setReference(myRefPtr);
00640 }
00641
00642
00643
00644 myStatus = myInterfacePtr->writeRecord(myFilePtr, header, record,
00645 myWriteTranslation);
00646
00647 if(myStatus == SamStatus::SUCCESS)
00648 {
00649
00650 myRecordCount++;
00651 return(true);
00652 }
00653 return(false);
00654 }
00655
00656
00657
00658
00659 void SamFile::setSortedValidation(SortedType sortType)
00660 {
00661 mySortedType = sortType;
00662 }
00663
00664
00665
00666 uint32_t SamFile::GetCurrentRecordCount()
00667 {
00668 return(myRecordCount);
00669 }
00670
00671
00672
00673 bool SamFile::SetReadSection(int32_t refID)
00674 {
00675
00676 return(SetReadSection(refID, -1, -1));
00677 }
00678
00679
00680
00681
00682 bool SamFile::SetReadSection(const char* refName)
00683 {
00684
00685 return(SetReadSection(refName, -1, -1));
00686 }
00687
00688
00689
00690 bool SamFile::SetReadSection(int32_t refID, int32_t start, int32_t end,
00691 bool overlap)
00692 {
00693
00694
00695
00696 if(!myIsBamOpenForRead)
00697 {
00698
00699 myStatus.setStatus(SamStatus::FAIL_ORDER,
00700 "Cannot set section since there is no bam file open");
00701 return(false);
00702 }
00703
00704 myNewSection = true;
00705 myOverlapSection = overlap;
00706 myStartPos = start;
00707 myEndPos = end;
00708 myRefID = refID;
00709 myRefName.clear();
00710 myChunksToRead.clear();
00711
00712
00713 myCurrentChunkEnd = 0;
00714 myStatus = SamStatus::SUCCESS;
00715
00716
00717 myPrevCoord = -1;
00718 myPrevRefID = 0;
00719 myPrevReadName.clear();
00720
00721 return(true);
00722 }
00723
00724
00725
00726 bool SamFile::SetReadSection(const char* refName, int32_t start, int32_t end,
00727 bool overlap)
00728 {
00729
00730
00731
00732 if(!myIsBamOpenForRead)
00733 {
00734
00735 myStatus.setStatus(SamStatus::FAIL_ORDER,
00736 "Cannot set section since there is no bam file open");
00737 return(false);
00738 }
00739
00740 myNewSection = true;
00741 myOverlapSection = overlap;
00742 myStartPos = start;
00743 myEndPos = end;
00744 if((strcmp(refName, "") == 0) || (strcmp(refName, "*") == 0))
00745 {
00746
00747 myRefID = BamIndex::REF_ID_UNMAPPED;
00748 }
00749 else
00750 {
00751
00752
00753 myRefName = refName;
00754 myRefID = BamIndex::REF_ID_ALL;
00755 }
00756 myChunksToRead.clear();
00757
00758
00759 myCurrentChunkEnd = 0;
00760 myStatus = SamStatus::SUCCESS;
00761
00762
00763 myPrevCoord = -1;
00764 myPrevRefID = 0;
00765 myPrevReadName.clear();
00766
00767 return(true);
00768 }
00769
00770
00771 void SamFile::SetReadFlags(uint16_t requiredFlags, uint16_t excludedFlags)
00772 {
00773 myRequiredFlags = requiredFlags;
00774 myExcludedFlags = excludedFlags;
00775 }
00776
00777
00778
00779
00780 int32_t SamFile::getNumMappedReadsFromIndex(int32_t refID)
00781 {
00782
00783 if(myBamIndex == NULL)
00784 {
00785 myStatus.setStatus(SamStatus::FAIL_ORDER,
00786 "Cannot get num mapped reads from the index until it has been read.");
00787 return(false);
00788 }
00789 return(myBamIndex->getNumMappedReads(refID));
00790 }
00791
00792
00793
00794
00795 int32_t SamFile::getNumUnMappedReadsFromIndex(int32_t refID)
00796 {
00797
00798 if(myBamIndex == NULL)
00799 {
00800 myStatus.setStatus(SamStatus::FAIL_ORDER,
00801 "Cannot get num unmapped reads from the index until it has been read.");
00802 return(false);
00803 }
00804 return(myBamIndex->getNumUnMappedReads(refID));
00805 }
00806
00807
00808
00809
00810 int32_t SamFile::getNumMappedReadsFromIndex(const char* refName,
00811 SamFileHeader& header)
00812 {
00813
00814 if(myBamIndex == NULL)
00815 {
00816 myStatus.setStatus(SamStatus::FAIL_ORDER,
00817 "Cannot get num mapped reads from the index until it has been read.");
00818 return(false);
00819 }
00820 int32_t refID = BamIndex::REF_ID_UNMAPPED;
00821 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
00822 {
00823
00824 refID = header.getReferenceID(refName);
00825 }
00826 return(myBamIndex->getNumMappedReads(refID));
00827 }
00828
00829
00830
00831
00832 int32_t SamFile::getNumUnMappedReadsFromIndex(const char* refName,
00833 SamFileHeader& header)
00834 {
00835
00836 if(myBamIndex == NULL)
00837 {
00838 myStatus.setStatus(SamStatus::FAIL_ORDER,
00839 "Cannot get num unmapped reads from the index until it has been read.");
00840 return(false);
00841 }
00842 int32_t refID = BamIndex::REF_ID_UNMAPPED;
00843 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
00844 {
00845
00846 refID = header.getReferenceID(refName);
00847 }
00848 return(myBamIndex->getNumUnMappedReads(refID));
00849 }
00850
00851
00852
00853
00854 uint32_t SamFile::GetNumOverlaps(SamRecord& samRecord)
00855 {
00856 if(myRefPtr != NULL)
00857 {
00858 samRecord.setReference(myRefPtr);
00859 }
00860 samRecord.setSequenceTranslation(myReadTranslation);
00861
00862
00863
00864 return(samRecord.getNumOverlaps(myStartPos, myEndPos));
00865 }
00866
00867
00868 void SamFile::GenerateStatistics(bool genStats)
00869 {
00870 if(genStats)
00871 {
00872 if(myStatistics == NULL)
00873 {
00874
00875
00876 myStatistics = new SamStatistics();
00877 }
00878 }
00879 else
00880 {
00881
00882
00883 if(myStatistics != NULL)
00884 {
00885 delete myStatistics;
00886 myStatistics = NULL;
00887 }
00888 }
00889
00890 }
00891
00892
00893 const BamIndex* SamFile::GetBamIndex()
00894 {
00895 return(myBamIndex);
00896 }
00897
00898
00899
00900 void SamFile::init()
00901 {
00902 myFilePtr = NULL;
00903 myInterfacePtr = NULL;
00904 myStatistics = NULL;
00905 myBamIndex = NULL;
00906 myRefPtr = NULL;
00907 myReadTranslation = SamRecord::NONE;
00908 myWriteTranslation = SamRecord::NONE;
00909 myAttemptRecovery = false;
00910 myRequiredFlags = 0;
00911 myExcludedFlags = 0;
00912 }
00913
00914
00915 void SamFile::init(const char* filename, OpenType mode, SamFileHeader* header)
00916 {
00917 init();
00918
00919 resetFile();
00920
00921 bool openStatus = true;
00922 if(mode == READ)
00923 {
00924
00925 openStatus = OpenForRead(filename, header);
00926 }
00927 else
00928 {
00929
00930 openStatus = OpenForWrite(filename, header);
00931 }
00932 if(!openStatus)
00933 {
00934
00935 fprintf(stderr, "%s\n", GetStatusMessage());
00936 std::cerr << "FAILURE - EXITING!!!" << std::endl;
00937 exit(-1);
00938 }
00939 }
00940
00941
00942
00943 void SamFile::resetFile()
00944 {
00945
00946 if (myFilePtr != NULL)
00947 {
00948
00949 ifclose(myFilePtr);
00950 myFilePtr = NULL;
00951 }
00952 if(myInterfacePtr != NULL)
00953 {
00954 delete myInterfacePtr;
00955 myInterfacePtr = NULL;
00956 }
00957
00958 myIsOpenForRead = false;
00959 myIsOpenForWrite = false;
00960 myHasHeader = false;
00961 mySortedType = UNSORTED;
00962 myPrevReadName.clear();
00963 myPrevCoord = -1;
00964 myPrevRefID = 0;
00965 myRecordCount = 0;
00966 myStatus = SamStatus::SUCCESS;
00967
00968
00969 myIsBamOpenForRead = false;
00970 myRefID = BamIndex::REF_ID_ALL;
00971 myStartPos = -1;
00972 myEndPos = -1;
00973 myNewSection = false;
00974 myOverlapSection = true;
00975 myCurrentChunkEnd = 0;
00976 myChunksToRead.clear();
00977 if(myBamIndex != NULL)
00978 {
00979 delete myBamIndex;
00980 myBamIndex = NULL;
00981 }
00982
00983
00984 if(myStatistics != NULL)
00985 {
00986 myStatistics->reset();
00987 }
00988
00989 myRefName.clear();
00990 }
00991
00992
00993
00994
00995
00996 bool SamFile::validateSortOrder(SamRecord& record, SamFileHeader& header)
00997 {
00998 if(myRefPtr != NULL)
00999 {
01000 record.setReference(myRefPtr);
01001 }
01002 record.setSequenceTranslation(myReadTranslation);
01003
01004 bool status = false;
01005 if(mySortedType == UNSORTED)
01006 {
01007
01008 status = true;
01009 }
01010 else
01011 {
01012
01013 if(mySortedType == FLAG)
01014 {
01015
01016 mySortedType = getSortOrderFromHeader(header);
01017 }
01018
01019 if(mySortedType == QUERY_NAME)
01020 {
01021
01022
01023 const char* readName = record.getReadName();
01024 if(myPrevReadName.compare(readName) > 0)
01025 {
01026
01027
01028 String errorMessage = "ERROR: File is not sorted at record ";
01029 errorMessage += myRecordCount;
01030 myStatus.setStatus(SamStatus::INVALID_SORT,
01031 errorMessage.c_str());
01032 status = false;
01033 }
01034 else
01035 {
01036 myPrevReadName = readName;
01037 status = true;
01038 }
01039 }
01040 else
01041 {
01042
01043
01044 int32_t refID = record.getReferenceID();
01045 int32_t coord = record.get0BasedPosition();
01046
01047 if(refID == BamIndex::REF_ID_UNMAPPED)
01048 {
01049
01050
01051 status = true;
01052 myPrevRefID = refID;
01053 myPrevCoord = coord;
01054 }
01055 else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED)
01056 {
01057
01058
01059 String errorMessage = "ERROR: File is not sorted at record ";
01060 errorMessage += myRecordCount;
01061 myStatus.setStatus(SamStatus::INVALID_SORT,
01062 errorMessage.c_str());
01063 status = false;
01064 }
01065 else if(refID < myPrevRefID)
01066 {
01067
01068
01069 String errorMessage = "ERROR: File is not sorted at record ";
01070 errorMessage += myRecordCount;
01071 myStatus.setStatus(SamStatus::INVALID_SORT,
01072 errorMessage.c_str());
01073 status = false;
01074 }
01075 else
01076 {
01077
01078 if(refID > myPrevRefID)
01079 {
01080
01081 myPrevCoord = -1;
01082 }
01083
01084
01085 if(coord < myPrevCoord)
01086 {
01087
01088 String errorMessage = "ERROR: File is not sorted at record ";
01089 errorMessage += myRecordCount;
01090 myStatus.setStatus(SamStatus::INVALID_SORT,
01091 errorMessage.c_str());
01092 status = false;
01093 }
01094 else
01095 {
01096 myPrevRefID = refID;
01097 myPrevCoord = coord;
01098 status = true;
01099 }
01100 }
01101 }
01102 }
01103
01104 return(status);
01105 }
01106
01107
01108 SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header)
01109 {
01110 const char* tag = header.getSortOrder();
01111
01112
01113
01114 SortedType headerSortOrder = UNSORTED;
01115 if(strcmp(tag, "queryname") == 0)
01116 {
01117 headerSortOrder = QUERY_NAME;
01118 }
01119 else if(strcmp(tag, "coordinate") == 0)
01120 {
01121 headerSortOrder = COORDINATE;
01122 }
01123 return(headerSortOrder);
01124 }
01125
01126
01127 bool SamFile::ensureIndexedReadPosition()
01128 {
01129
01130 if(myRefID == BamIndex::REF_ID_ALL)
01131 {
01132 return(true);
01133 }
01134
01135
01136
01137
01138
01139 uint64_t currentPos = iftell(myFilePtr);
01140 if(currentPos >= myCurrentChunkEnd)
01141 {
01142
01143 if(myChunksToRead.empty())
01144 {
01145 myStatus = SamStatus::NO_MORE_RECS;
01146 return(false);
01147 }
01148
01149
01150 Chunk newChunk = myChunksToRead.pop();
01151
01152
01153
01154 if(newChunk.chunk_beg != currentPos)
01155 {
01156
01157 if(ifseek(myFilePtr, newChunk.chunk_beg, SEEK_SET) != true)
01158 {
01159
01160 myStatus.setStatus(SamStatus::FAIL_IO,
01161 "Failed to seek to the next record");
01162 return(false);
01163 }
01164 }
01165
01166 myCurrentChunkEnd = newChunk.chunk_end;
01167 }
01168 return(true);
01169 }
01170
01171
01172 bool SamFile::checkRecordInSection(SamRecord& record)
01173 {
01174 bool recordFound = true;
01175 if(myRefID == BamIndex::REF_ID_ALL)
01176 {
01177 return(true);
01178 }
01179
01180 if(record.getReferenceID() != myRefID)
01181 {
01182
01183 myStatus = SamStatus::NO_MORE_RECS;
01184 return(false);
01185 }
01186
01187
01188 recordFound = true;
01189
01190
01191
01192
01193
01194
01195
01196 if((myEndPos != -1) && (record.get0BasedPosition() >= myEndPos))
01197 {
01198 myStatus = SamStatus::NO_MORE_RECS;
01199 return(false);
01200 }
01201
01202
01203
01204
01205 if((myStartPos != -1) && (record.get0BasedAlignmentEnd() < myStartPos))
01206 {
01207
01208
01209 recordFound = false;
01210 }
01211
01212 if(!myOverlapSection)
01213 {
01214
01215
01216
01217
01218
01219
01220 if((record.get0BasedPosition() < myStartPos) ||
01221 ((myEndPos != -1) &&
01222 (record.get0BasedAlignmentEnd() >= myEndPos)))
01223 {
01224
01225
01226 recordFound = false;
01227 }
01228 }
01229
01230 return(recordFound);
01231 }
01232
01233
01234 bool SamFile::processNewSection(SamFileHeader &header)
01235 {
01236 myNewSection = false;
01237
01238
01239 if(myBamIndex == NULL)
01240 {
01241
01242 myStatus.setStatus(SamStatus::FAIL_ORDER,
01243 "Cannot read section since there is no index file open");
01244 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the BAM Index file."));
01245 return(false);
01246 }
01247
01248
01249 if(!myIsBamOpenForRead)
01250 {
01251
01252 myStatus.setStatus(SamStatus::FAIL_ORDER,
01253 "Cannot read section since there is no bam file open");
01254 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section without opening a BAM file."));
01255 return(false);
01256 }
01257
01258 if(myHasHeader == false)
01259 {
01260
01261 myStatus.setStatus(SamStatus::FAIL_ORDER,
01262 "Cannot read record since the header has not been read.");
01263 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the header."));
01264 return(false);
01265 }
01266
01267
01268
01269
01270
01271 myFilePtr->disableBuffering();
01272
01273 myChunksToRead.clear();
01274
01275
01276 myCurrentChunkEnd = 0;
01277
01278
01279
01280 if(!myRefName.empty())
01281 {
01282 myRefID = header.getReferenceID(myRefName.c_str());
01283
01284 myRefName.clear();
01285
01286
01287 if(myRefID == SamReferenceInfo::NO_REF_ID)
01288 {
01289 myStatus = SamStatus::NO_MORE_RECS;
01290 return(false);
01291 }
01292 }
01293
01294
01295 if(myBamIndex->getChunksForRegion(myRefID, myStartPos, myEndPos,
01296 myChunksToRead) == true)
01297 {
01298 myStatus = SamStatus::SUCCESS;
01299 }
01300 else
01301 {
01302 String errorMsg = "Failed to get the specified region, refID = ";
01303 errorMsg += myRefID;
01304 errorMsg += "; startPos = ";
01305 errorMsg += myStartPos;
01306 errorMsg += "; endPos = ";
01307 errorMsg += myEndPos;
01308 myStatus.setStatus(SamStatus::FAIL_PARSE,
01309 errorMsg);
01310 }
01311 return(true);
01312 }
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326 bool SamFile::attemptRecoverySync(bool (*checkSignature)(void *data) , int length)
01327 {
01328 if(myFilePtr==NULL) return false;
01329
01330 return myFilePtr->attemptRecoverySync(checkSignature, length);
01331 }
01332
01333
01334 SamFileReader::SamFileReader()
01335 : SamFile()
01336 {
01337 }
01338
01339
01340
01341 SamFileReader::SamFileReader(const char* filename)
01342 : SamFile(filename, READ)
01343 {
01344 }
01345
01346
01347
01348
01349
01350 SamFileReader::SamFileReader(const char* filename,
01351 ErrorHandler::HandlingType errorHandlingType)
01352 : SamFile(filename, READ, errorHandlingType)
01353 {
01354 }
01355
01356
01357
01358 SamFileReader::SamFileReader(const char* filename,
01359 SamFileHeader* header)
01360 : SamFile(filename, READ, header)
01361 {
01362 }
01363
01364
01365
01366 SamFileReader::SamFileReader(const char* filename,
01367 ErrorHandler::HandlingType errorHandlingType,
01368 SamFileHeader* header)
01369 : SamFile(filename, READ, errorHandlingType, header)
01370 {
01371 }
01372
01373
01374 SamFileReader::~SamFileReader()
01375 {
01376 }
01377
01378
01379
01380 SamFileWriter::SamFileWriter()
01381 : SamFile()
01382 {
01383 }
01384
01385
01386
01387 SamFileWriter::SamFileWriter(const char* filename)
01388 : SamFile(filename, WRITE)
01389 {
01390 }
01391
01392
01393
01394 SamFileWriter::SamFileWriter(const char* filename,
01395 ErrorHandler::HandlingType errorHandlingType)
01396 : SamFile(filename, WRITE, errorHandlingType)
01397 {
01398 }
01399
01400
01401
01402 SamFileWriter::SamFileWriter(const char* filename,
01403 SamFileHeader* header)
01404 : SamFile(filename, WRITE, header)
01405 {
01406 }
01407
01408
01409
01410 SamFileWriter::SamFileWriter(const char* filename,
01411 ErrorHandler::HandlingType errorHandlingType,
01412 SamFileHeader* header)
01413 : SamFile(filename, WRITE, errorHandlingType, header)
01414 {
01415 }
01416
01417
01418 SamFileWriter::~SamFileWriter()
01419 {
01420 }