00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include <stdexcept>
00018 #include "SamFile.h"
00019 #include "SamFileHeader.h"
00020 #include "SamRecord.h"
00021 #include "BamInterface.h"
00022 #include "SamInterface.h"
00023 #include "BgzfFileType.h"
00024
00025
00026 SamFile::SamFile()
00027 : myFilePtr(NULL),
00028 myInterfacePtr(NULL),
00029 myStatistics(NULL),
00030 myStatus(),
00031 myBamIndex(NULL),
00032 myRefPtr(NULL),
00033 myReadTranslation(SamRecord::NONE),
00034 myWriteTranslation(SamRecord::NONE),
00035 myAttemptRecovery(false)
00036 {
00037 resetFile();
00038 }
00039
00040
00041
00042 SamFile::SamFile(ErrorHandler::HandlingType errorHandlingType)
00043 : myFilePtr(NULL),
00044 myInterfacePtr(NULL),
00045 myStatistics(NULL),
00046 myStatus(errorHandlingType),
00047 myBamIndex(NULL),
00048 myRefPtr(NULL),
00049 myReadTranslation(SamRecord::NONE),
00050 myWriteTranslation(SamRecord::NONE)
00051 {
00052 resetFile();
00053 }
00054
00055
00056
00057
00058 SamFile::SamFile(const char* filename, OpenType mode)
00059 : myStatus()
00060 {
00061 init(filename, mode, NULL);
00062 }
00063
00064
00065
00066
00067 SamFile::SamFile(const char* filename, OpenType mode,
00068 ErrorHandler::HandlingType errorHandlingType)
00069 : myStatus(errorHandlingType)
00070 {
00071 init(filename, mode, NULL);
00072 }
00073
00074
00075
00076
00077 SamFile::SamFile(const char* filename, OpenType mode, SamFileHeader* header)
00078 : myStatus()
00079 {
00080 init(filename, mode, header);
00081 }
00082
00083
00084
00085
00086 SamFile::SamFile(const char* filename, OpenType mode,
00087 ErrorHandler::HandlingType errorHandlingType,
00088 SamFileHeader* header)
00089 : myStatus(errorHandlingType)
00090 {
00091 init(filename, mode, header);
00092 }
00093
00094
00095 SamFile::~SamFile()
00096 {
00097 resetFile();
00098 if(myStatistics != NULL)
00099 {
00100 delete myStatistics;
00101 }
00102 }
00103
00104
00105
00106 bool SamFile::OpenForRead(const char * filename, SamFileHeader* header)
00107 {
00108
00109 resetFile();
00110
00111 int lastchar = 0;
00112
00113 while (filename[lastchar] != 0) lastchar++;
00114
00115
00116 if((lastchar >= 1) && (filename[0] == '-'))
00117 {
00118
00119
00120 if(strcmp(filename, "-.bam") == 0)
00121 {
00122
00123
00124 filename = "-";
00125
00126 myFilePtr = new InputFile;
00127
00128
00129 myFilePtr->setAttemptRecovery(myAttemptRecovery);
00130 myFilePtr->openFile(filename, "rb", InputFile::BGZF);
00131
00132 myInterfacePtr = new BamInterface;
00133
00134
00135 char magic[4];
00136 ifread(myFilePtr, magic, 4);
00137 }
00138 else if(strcmp(filename, "-.ubam") == 0)
00139 {
00140
00141
00142
00143
00144
00145 filename = "-";
00146
00147
00148 BgzfFileType::setRequireEofBlock(false);
00149
00150 myFilePtr = ifopen(filename, "rb", InputFile::BGZF);
00151
00152 myInterfacePtr = new BamInterface;
00153
00154
00155 char magic[4];
00156 ifread(myFilePtr, magic, 4);
00157 }
00158 else
00159 {
00160
00161
00162 filename = "-";
00163 myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
00164 myInterfacePtr = new SamInterface;
00165 }
00166 }
00167 else
00168 {
00169
00170
00171 myFilePtr = new InputFile;
00172
00173
00174
00175 myFilePtr->setAttemptRecovery(myAttemptRecovery);
00176 bool rc = myFilePtr->openFile(filename, "rb", InputFile::DEFAULT);
00177
00178 if (rc == false)
00179 {
00180 std::string errorMessage = "Failed to Open ";
00181 errorMessage += filename;
00182 errorMessage += " for reading";
00183 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
00184 delete myFilePtr;
00185 return(false);
00186 }
00187
00188 char magic[4];
00189 ifread(myFilePtr, magic, 4);
00190
00191 if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' &&
00192 magic[3] == 1)
00193 {
00194 myInterfacePtr = new BamInterface;
00195
00196
00197 myIsBamOpenForRead = true;
00198 }
00199 else
00200 {
00201
00202
00203 ifrewind(myFilePtr);
00204 myInterfacePtr = new SamInterface;
00205 }
00206 }
00207
00208
00209 myIsOpenForRead = true;
00210
00211
00212 if(header != NULL)
00213 {
00214 return(ReadHeader(*header));
00215 }
00216
00217
00218 myStatus = SamStatus::SUCCESS;
00219 return(true);
00220 }
00221
00222
00223
00224 bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header)
00225 {
00226
00227 resetFile();
00228
00229 int lastchar = 0;
00230 while (filename[lastchar] != 0) lastchar++;
00231 if (lastchar >= 4 &&
00232 filename[lastchar - 4] == 'u' &&
00233 filename[lastchar - 3] == 'b' &&
00234 filename[lastchar - 2] == 'a' &&
00235 filename[lastchar - 1] == 'm')
00236 {
00237
00238
00239 if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.'))
00240 {
00241 filename = "-";
00242 }
00243
00244 myFilePtr = ifopen(filename, "wb0", InputFile::BGZF);
00245
00246 myInterfacePtr = new BamInterface;
00247 }
00248 else if (lastchar >= 3 &&
00249 filename[lastchar - 3] == 'b' &&
00250 filename[lastchar - 2] == 'a' &&
00251 filename[lastchar - 1] == 'm')
00252 {
00253
00254
00255 if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.'))
00256 {
00257 filename = "-";
00258 }
00259 myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
00260
00261 myInterfacePtr = new BamInterface;
00262 }
00263 else
00264 {
00265
00266
00267
00268 if((lastchar >= 1) && (filename[0] == '-'))
00269 {
00270 filename = "-";
00271 }
00272 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
00273
00274 myInterfacePtr = new SamInterface;
00275 }
00276
00277 if (myFilePtr == NULL)
00278 {
00279 std::string errorMessage = "Failed to Open ";
00280 errorMessage += filename;
00281 errorMessage += " for writing";
00282 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
00283 return(false);
00284 }
00285
00286 myIsOpenForWrite = true;
00287
00288
00289 if(header != NULL)
00290 {
00291 return(WriteHeader(*header));
00292 }
00293
00294
00295 myStatus = SamStatus::SUCCESS;
00296 return(true);
00297 }
00298
00299
00300
00301 bool SamFile::ReadBamIndex(const char* bamIndexFilename)
00302 {
00303
00304 if(myBamIndex != NULL)
00305 {
00306 delete myBamIndex;
00307 myBamIndex = NULL;
00308 }
00309
00310
00311 myBamIndex = new BamIndex();
00312 SamStatus::Status indexStat = myBamIndex->readIndex(bamIndexFilename);
00313
00314 if(indexStat != SamStatus::SUCCESS)
00315 {
00316 std::string errorMessage = "Failed to read the bam Index file: ";
00317 errorMessage += bamIndexFilename;
00318 myStatus.setStatus(indexStat, errorMessage.c_str());
00319 delete myBamIndex;
00320 myBamIndex = NULL;
00321 return(false);
00322 }
00323 myStatus = SamStatus::SUCCESS;
00324 return(true);
00325 }
00326
00327
00328
00329 bool SamFile::ReadBamIndex()
00330 {
00331 if(myFilePtr == NULL)
00332 {
00333
00334
00335 std::string errorMessage = "Failed to read the bam Index file -"
00336 " the BAM file needs to be read first in order to determine"
00337 " the index filename.";
00338 myStatus.setStatus(SamStatus::FAIL_ORDER, errorMessage.c_str());
00339 return(false);
00340 }
00341
00342 const char* bamBaseName = myFilePtr->getFileName();
00343
00344 std::string indexName = bamBaseName;
00345 indexName += ".bai";
00346
00347 bool foundFile = true;
00348 try
00349 {
00350 if(ReadBamIndex(indexName.c_str()) == false)
00351 {
00352 foundFile = false;
00353 }
00354 }
00355 catch (std::exception& e)
00356 {
00357 foundFile = false;
00358 }
00359
00360
00361 if(!foundFile)
00362 {
00363
00364
00365 size_t startExt = indexName.find(".bam");
00366 if(startExt == std::string::npos)
00367 {
00368
00369
00370 return(false);
00371 }
00372
00373 indexName.erase(startExt, 4);
00374 return(ReadBamIndex(indexName.c_str()));
00375 }
00376 return(true);
00377 }
00378
00379
00380
00381 void SamFile::SetReference(GenomeSequence* reference)
00382 {
00383 myRefPtr = reference;
00384 }
00385
00386
00387
00388 void SamFile::SetReadSequenceTranslation(SamRecord::SequenceTranslation translation)
00389 {
00390 myReadTranslation = translation;
00391 }
00392
00393
00394
00395 void SamFile::SetWriteSequenceTranslation(SamRecord::SequenceTranslation translation)
00396 {
00397 myWriteTranslation = translation;
00398 }
00399
00400
00401 void SamFile::Close()
00402 {
00403
00404
00405 resetFile();
00406 }
00407
00408
00409
00410
00411 bool SamFile::IsOpen()
00412 {
00413 if (myFilePtr != NULL)
00414 {
00415
00416 return(myFilePtr->isOpen());
00417 }
00418
00419 return false;
00420 }
00421
00422
00423
00424
00425 bool SamFile::IsEOF()
00426 {
00427 if (myFilePtr != NULL)
00428 {
00429
00430 return(ifeof(myFilePtr));
00431 }
00432
00433 return true;
00434 }
00435
00436
00437
00438 bool SamFile::ReadHeader(SamFileHeader& header)
00439 {
00440 if(myIsOpenForRead == false)
00441 {
00442
00443 myStatus.setStatus(SamStatus::FAIL_ORDER,
00444 "Cannot read header since the file is not open for reading");
00445 return(false);
00446 }
00447
00448 if(myHasHeader == true)
00449 {
00450
00451 myStatus.setStatus(SamStatus::FAIL_ORDER,
00452 "Cannot read header since it has already been read.");
00453 return(false);
00454 }
00455
00456 myStatus = myInterfacePtr->readHeader(myFilePtr, header);
00457 if(myStatus == SamStatus::SUCCESS)
00458 {
00459
00460 myHasHeader = true;
00461 return(true);
00462 }
00463 return(false);
00464 }
00465
00466
00467
00468 bool SamFile::WriteHeader(SamFileHeader& header)
00469 {
00470 if(myIsOpenForWrite == false)
00471 {
00472
00473
00474
00475 myStatus.setStatus(SamStatus::FAIL_ORDER,
00476 "Cannot write header since the file is not open for writing");
00477 return(false);
00478 }
00479
00480 if(myHasHeader == true)
00481 {
00482
00483 myStatus.setStatus(SamStatus::FAIL_ORDER,
00484 "Cannot write header since it has already been written");
00485 return(false);
00486 }
00487
00488 myStatus = myInterfacePtr->writeHeader(myFilePtr, header);
00489 if(myStatus == SamStatus::SUCCESS)
00490 {
00491
00492 myHasHeader = true;
00493 return(true);
00494 }
00495
00496
00497 return(false);
00498 }
00499
00500
00501
00502 bool SamFile::ReadRecord(SamFileHeader& header,
00503 SamRecord& record)
00504 {
00505 myStatus = SamStatus::SUCCESS;
00506
00507 if(myIsOpenForRead == false)
00508 {
00509
00510 myStatus.setStatus(SamStatus::FAIL_ORDER,
00511 "Cannot read record since the file is not open for reading");
00512 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file."));
00513 return(false);
00514 }
00515
00516 if(myHasHeader == false)
00517 {
00518
00519
00520 myStatus.setStatus(SamStatus::FAIL_ORDER,
00521 "Cannot read record since the header has not been read.");
00522 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header."));
00523 return(false);
00524 }
00525
00526
00527
00528 if(myNewSection)
00529 {
00530 if(!processNewSection(header))
00531 {
00532
00533
00534
00535
00536 return(false);
00537 }
00538 }
00539
00540
00541 if(myRefID != BamIndex::REF_ID_ALL)
00542 {
00543
00544 return(readIndexedRecord(header, record));
00545 }
00546
00547 record.setReference(myRefPtr);
00548 record.setSequenceTranslation(myReadTranslation);
00549
00550
00551
00552 myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
00553 if(myStatus == SamStatus::SUCCESS)
00554 {
00555
00556 myRecordCount++;
00557
00558 if(myStatistics != NULL)
00559 {
00560
00561 myStatistics->updateStatistics(record);
00562 }
00563
00564
00565 if(!validateSortOrder(record, header))
00566 {
00567
00568 return(false);
00569 }
00570 return(true);
00571 }
00572
00573 return(false);
00574 }
00575
00576
00577
00578
00579 bool SamFile::WriteRecord(SamFileHeader& header,
00580 SamRecord& record)
00581 {
00582 if(myIsOpenForWrite == false)
00583 {
00584
00585 myStatus.setStatus(SamStatus::FAIL_ORDER,
00586 "Cannot write record since the file is not open for writing");
00587 return(false);
00588 }
00589
00590 if(myHasHeader == false)
00591 {
00592
00593 myStatus.setStatus(SamStatus::FAIL_ORDER,
00594 "Cannot write record since the header has not been written");
00595 return(false);
00596 }
00597
00598
00599 if(!validateSortOrder(record, header))
00600 {
00601
00602 myStatus.setStatus(SamStatus::INVALID_SORT,
00603 "Cannot write the record since the file is not properly sorted.");
00604 return(false);
00605 }
00606
00607 if(myRefPtr != NULL)
00608 {
00609 record.setReference(myRefPtr);
00610 }
00611
00612
00613
00614 myStatus = myInterfacePtr->writeRecord(myFilePtr, header, record,
00615 myWriteTranslation);
00616
00617 if(myStatus == SamStatus::SUCCESS)
00618 {
00619
00620 myRecordCount++;
00621 return(true);
00622 }
00623 return(false);
00624 }
00625
00626
00627
00628
00629 void SamFile::setSortedValidation(SortedType sortType)
00630 {
00631 mySortedType = sortType;
00632 }
00633
00634
00635
00636 uint32_t SamFile::GetCurrentRecordCount()
00637 {
00638 return(myRecordCount);
00639 }
00640
00641
00642
00643 bool SamFile::SetReadSection(int32_t refID)
00644 {
00645
00646 return(SetReadSection(refID, -1, -1));
00647 }
00648
00649
00650
00651
00652 bool SamFile::SetReadSection(const char* refName)
00653 {
00654
00655 return(SetReadSection(refName, -1, -1));
00656 }
00657
00658
00659
00660 bool SamFile::SetReadSection(int32_t refID, int32_t start, int32_t end,
00661 bool overlap)
00662 {
00663
00664
00665
00666 if(!myIsBamOpenForRead)
00667 {
00668
00669 myStatus.setStatus(SamStatus::FAIL_ORDER,
00670 "Cannot set section since there is no bam file open");
00671 return(false);
00672 }
00673
00674 myNewSection = true;
00675 myOverlapSection = overlap;
00676 myStartPos = start;
00677 myEndPos = end;
00678 myRefID = refID;
00679 myRefName.clear();
00680 myChunksToRead.clear();
00681
00682
00683 myCurrentChunkEnd = 0;
00684 myStatus = SamStatus::SUCCESS;
00685
00686
00687 myPrevCoord = -1;
00688 myPrevRefID = 0;
00689 myPrevReadName.clear();
00690
00691 return(true);
00692 }
00693
00694
00695
00696 bool SamFile::SetReadSection(const char* refName, int32_t start, int32_t end,
00697 bool overlap)
00698 {
00699
00700
00701
00702 if(!myIsBamOpenForRead)
00703 {
00704
00705 myStatus.setStatus(SamStatus::FAIL_ORDER,
00706 "Cannot set section since there is no bam file open");
00707 return(false);
00708 }
00709
00710 myNewSection = true;
00711 myOverlapSection = overlap;
00712 myStartPos = start;
00713 myEndPos = end;
00714 if((strcmp(refName, "") == 0) || (strcmp(refName, "*") == 0))
00715 {
00716
00717 myRefID = BamIndex::REF_ID_UNMAPPED;
00718 }
00719 else
00720 {
00721
00722
00723 myRefName = refName;
00724 myRefID = BamIndex::REF_ID_ALL;
00725 }
00726 myChunksToRead.clear();
00727
00728
00729 myCurrentChunkEnd = 0;
00730 myStatus = SamStatus::SUCCESS;
00731
00732
00733 myPrevCoord = -1;
00734 myPrevRefID = 0;
00735 myPrevReadName.clear();
00736
00737 return(true);
00738 }
00739
00740
00741
00742
00743
00744 int32_t SamFile::getNumMappedReadsFromIndex(int32_t refID)
00745 {
00746
00747 if(myBamIndex == NULL)
00748 {
00749 myStatus.setStatus(SamStatus::FAIL_ORDER,
00750 "Cannot get num mapped reads from the index until it has been read.");
00751 return(false);
00752 }
00753 return(myBamIndex->getNumMappedReads(refID));
00754 }
00755
00756
00757
00758
00759 int32_t SamFile::getNumUnMappedReadsFromIndex(int32_t refID)
00760 {
00761
00762 if(myBamIndex == NULL)
00763 {
00764 myStatus.setStatus(SamStatus::FAIL_ORDER,
00765 "Cannot get num unmapped reads from the index until it has been read.");
00766 return(false);
00767 }
00768 return(myBamIndex->getNumUnMappedReads(refID));
00769 }
00770
00771
00772
00773
00774 int32_t SamFile::getNumMappedReadsFromIndex(const char* refName,
00775 SamFileHeader& header)
00776 {
00777
00778 if(myBamIndex == NULL)
00779 {
00780 myStatus.setStatus(SamStatus::FAIL_ORDER,
00781 "Cannot get num mapped reads from the index until it has been read.");
00782 return(false);
00783 }
00784 int32_t refID = BamIndex::REF_ID_UNMAPPED;
00785 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
00786 {
00787
00788 refID = header.getReferenceID(refName);
00789 }
00790 return(myBamIndex->getNumMappedReads(refID));
00791 }
00792
00793
00794
00795
00796 int32_t SamFile::getNumUnMappedReadsFromIndex(const char* refName,
00797 SamFileHeader& header)
00798 {
00799
00800 if(myBamIndex == NULL)
00801 {
00802 myStatus.setStatus(SamStatus::FAIL_ORDER,
00803 "Cannot get num unmapped reads from the index until it has been read.");
00804 return(false);
00805 }
00806 int32_t refID = BamIndex::REF_ID_UNMAPPED;
00807 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
00808 {
00809
00810 refID = header.getReferenceID(refName);
00811 }
00812 return(myBamIndex->getNumUnMappedReads(refID));
00813 }
00814
00815
00816
00817
00818 uint32_t SamFile::GetNumOverlaps(SamRecord& samRecord)
00819 {
00820 if(myRefPtr != NULL)
00821 {
00822 samRecord.setReference(myRefPtr);
00823 }
00824 samRecord.setSequenceTranslation(myReadTranslation);
00825
00826
00827
00828 return(samRecord.getNumOverlaps(myStartPos, myEndPos));
00829 }
00830
00831
00832 void SamFile::GenerateStatistics(bool genStats)
00833 {
00834 if(genStats)
00835 {
00836 if(myStatistics == NULL)
00837 {
00838
00839
00840 myStatistics = new SamStatistics();
00841 }
00842 }
00843 else
00844 {
00845
00846
00847 if(myStatistics != NULL)
00848 {
00849 delete myStatistics;
00850 myStatistics = NULL;
00851 }
00852 }
00853
00854 }
00855
00856
00857 const BamIndex* SamFile::GetBamIndex()
00858 {
00859 return(myBamIndex);
00860 }
00861
00862
00863
00864 void SamFile::init(const char* filename, OpenType mode, SamFileHeader* header)
00865 {
00866 myFilePtr = NULL;
00867 myInterfacePtr = NULL;
00868 myStatistics = NULL;
00869 myBamIndex = NULL;
00870 myRefPtr = NULL;
00871 myReadTranslation = SamRecord::NONE;
00872 myWriteTranslation = SamRecord::NONE;
00873
00874 resetFile();
00875
00876 bool openStatus = true;
00877 if(mode == READ)
00878 {
00879
00880 openStatus = OpenForRead(filename, header);
00881 }
00882 else
00883 {
00884
00885 openStatus = OpenForWrite(filename, header);
00886 }
00887 if(!openStatus)
00888 {
00889
00890 fprintf(stderr, "%s\n", GetStatusMessage());
00891 std::cerr << "FAILURE - EXITING!!!" << std::endl;
00892 exit(-1);
00893 }
00894 }
00895
00896
00897
00898 void SamFile::resetFile()
00899 {
00900
00901 if (myFilePtr != NULL)
00902 {
00903
00904 ifclose(myFilePtr);
00905 myFilePtr = NULL;
00906 }
00907 if(myInterfacePtr != NULL)
00908 {
00909 delete myInterfacePtr;
00910 myInterfacePtr = NULL;
00911 }
00912
00913 myIsOpenForRead = false;
00914 myIsOpenForWrite = false;
00915 myHasHeader = false;
00916 mySortedType = UNSORTED;
00917 myPrevReadName.clear();
00918 myPrevCoord = -1;
00919 myPrevRefID = 0;
00920 myRecordCount = 0;
00921 myStatus = SamStatus::SUCCESS;
00922
00923
00924 myIsBamOpenForRead = false;
00925 myRefID = BamIndex::REF_ID_ALL;
00926 myStartPos = -1;
00927 myEndPos = -1;
00928 myNewSection = false;
00929 myOverlapSection = true;
00930 myCurrentChunkEnd = 0;
00931 myChunksToRead.clear();
00932 if(myBamIndex != NULL)
00933 {
00934 delete myBamIndex;
00935 myBamIndex = NULL;
00936 }
00937
00938
00939 if(myStatistics != NULL)
00940 {
00941 myStatistics->reset();
00942 }
00943
00944 myRefName.clear();
00945 }
00946
00947
00948
00949
00950
00951 bool SamFile::validateSortOrder(SamRecord& record, SamFileHeader& header)
00952 {
00953 if(myRefPtr != NULL)
00954 {
00955 record.setReference(myRefPtr);
00956 }
00957 record.setSequenceTranslation(myReadTranslation);
00958
00959 bool status = false;
00960 if(mySortedType == UNSORTED)
00961 {
00962
00963 status = true;
00964 }
00965 else
00966 {
00967
00968 if(mySortedType == FLAG)
00969 {
00970
00971 mySortedType = getSortOrderFromHeader(header);
00972 }
00973
00974 if(mySortedType == QUERY_NAME)
00975 {
00976
00977
00978 const char* readName = record.getReadName();
00979 if(myPrevReadName.compare(readName) > 0)
00980 {
00981
00982
00983 String errorMessage = "ERROR: File is not sorted at record ";
00984 errorMessage += myRecordCount;
00985 myStatus.setStatus(SamStatus::INVALID_SORT,
00986 errorMessage.c_str());
00987 status = false;
00988 }
00989 else
00990 {
00991 myPrevReadName = readName;
00992 status = true;
00993 }
00994 }
00995 else
00996 {
00997
00998
00999 int32_t refID = record.getReferenceID();
01000 int32_t coord = record.get0BasedPosition();
01001
01002 if(refID == BamIndex::REF_ID_UNMAPPED)
01003 {
01004
01005
01006 status = true;
01007 myPrevRefID = refID;
01008 myPrevCoord = coord;
01009 }
01010 else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED)
01011 {
01012
01013
01014 String errorMessage = "ERROR: File is not sorted at record ";
01015 errorMessage += myRecordCount;
01016 myStatus.setStatus(SamStatus::INVALID_SORT,
01017 errorMessage.c_str());
01018 status = false;
01019 }
01020 else if(refID < myPrevRefID)
01021 {
01022
01023
01024 String errorMessage = "ERROR: File is not sorted at record ";
01025 errorMessage += myRecordCount;
01026 myStatus.setStatus(SamStatus::INVALID_SORT,
01027 errorMessage.c_str());
01028 status = false;
01029 }
01030 else
01031 {
01032
01033 if(refID > myPrevRefID)
01034 {
01035
01036 myPrevCoord = -1;
01037 }
01038
01039
01040 if(coord < myPrevCoord)
01041 {
01042
01043 String errorMessage = "ERROR: File is not sorted at record ";
01044 errorMessage += myRecordCount;
01045 myStatus.setStatus(SamStatus::INVALID_SORT,
01046 errorMessage.c_str());
01047 status = false;
01048 }
01049 else
01050 {
01051 myPrevRefID = refID;
01052 myPrevCoord = coord;
01053 status = true;
01054 }
01055 }
01056 }
01057 }
01058
01059 return(status);
01060 }
01061
01062
01063 SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header)
01064 {
01065 const char* tag = header.getSortOrder();
01066
01067
01068
01069 SortedType headerSortOrder = UNSORTED;
01070 if(strcmp(tag, "queryname") == 0)
01071 {
01072 headerSortOrder = QUERY_NAME;
01073 }
01074 else if(strcmp(tag, "coordinate") == 0)
01075 {
01076 headerSortOrder = COORDINATE;
01077 }
01078 return(headerSortOrder);
01079 }
01080
01081
01082
01083 bool SamFile::readIndexedRecord(SamFileHeader& header,
01084 SamRecord& record)
01085 {
01086 record.setReference(myRefPtr);
01087 record.setSequenceTranslation(myReadTranslation);
01088
01089
01090
01091 bool recordFound = false;
01092 while(!recordFound)
01093 {
01094
01095
01096
01097
01098 uint64_t currentPos = iftell(myFilePtr);
01099 if(currentPos >= myCurrentChunkEnd)
01100 {
01101
01102 if(myChunksToRead.empty())
01103 {
01104 myStatus = SamStatus::NO_MORE_RECS;
01105 return(false);
01106 }
01107
01108
01109 Chunk newChunk = myChunksToRead.pop();
01110
01111
01112
01113 if(newChunk.chunk_beg != currentPos)
01114 {
01115
01116 if(ifseek(myFilePtr, newChunk.chunk_beg, SEEK_SET) != true)
01117 {
01118
01119 myStatus.setStatus(SamStatus::FAIL_IO,
01120 "Failed to seek to the next record");
01121 return(false);
01122 }
01123 }
01124
01125 myCurrentChunkEnd = newChunk.chunk_end;
01126 }
01127
01128
01129 myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
01130 if(myStatus != SamStatus::SUCCESS)
01131 {
01132
01133 return(false);
01134 }
01135
01136
01137
01138 if(record.getReferenceID() != myRefID)
01139 {
01140
01141 myStatus = SamStatus::NO_MORE_RECS;
01142 return(false);
01143 }
01144
01145
01146 recordFound = true;
01147
01148
01149
01150
01151
01152
01153
01154 if((myEndPos != -1) && (record.get0BasedPosition() >= myEndPos))
01155 {
01156 myStatus = SamStatus::NO_MORE_RECS;
01157 return(false);
01158 }
01159
01160
01161
01162
01163 if((myStartPos != -1) && (record.get0BasedAlignmentEnd() < myStartPos))
01164 {
01165
01166
01167 recordFound = false;
01168 }
01169
01170 if(!myOverlapSection)
01171 {
01172
01173
01174
01175
01176
01177
01178 if((record.get0BasedPosition() < myStartPos) ||
01179 ((myEndPos != -1) &&
01180 (record.get0BasedAlignmentEnd() >= myEndPos)))
01181 {
01182
01183
01184 recordFound = false;
01185 }
01186 }
01187 }
01188
01189
01190 if(!validateSortOrder(record, header))
01191 {
01192 myStatus.setStatus(SamStatus::INVALID_SORT,
01193 "The file is not properly sorted.");
01194 return(false);
01195 }
01196
01197
01198
01199 myRecordCount++;
01200
01201 if(myStatistics != NULL)
01202 {
01203
01204 myStatistics->updateStatistics(record);
01205 }
01206
01207 return(true);
01208 }
01209
01210
01211 bool SamFile::processNewSection(SamFileHeader &header)
01212 {
01213 myNewSection = false;
01214
01215
01216 if(myBamIndex == NULL)
01217 {
01218
01219 myStatus.setStatus(SamStatus::FAIL_ORDER,
01220 "Cannot read section since there is no index file open");
01221 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the BAM Index file."));
01222 return(false);
01223 }
01224
01225
01226 if(!myIsBamOpenForRead)
01227 {
01228
01229 myStatus.setStatus(SamStatus::FAIL_ORDER,
01230 "Cannot read section since there is no bam file open");
01231 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section without opening a BAM file."));
01232 return(false);
01233 }
01234
01235 if(myHasHeader == false)
01236 {
01237
01238 myStatus.setStatus(SamStatus::FAIL_ORDER,
01239 "Cannot read record since the header has not been read.");
01240 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the header."));
01241 return(false);
01242 }
01243
01244
01245
01246
01247
01248 myFilePtr->disableBuffering();
01249
01250 myChunksToRead.clear();
01251
01252
01253 myCurrentChunkEnd = 0;
01254
01255
01256
01257 if(!myRefName.empty())
01258 {
01259 myRefID = header.getReferenceID(myRefName.c_str());
01260
01261 myRefName.clear();
01262
01263
01264 if(myRefID == SamReferenceInfo::NO_REF_ID)
01265 {
01266 myStatus = SamStatus::NO_MORE_RECS;
01267 return(false);
01268 }
01269 }
01270
01271
01272 if(myBamIndex->getChunksForRegion(myRefID, myStartPos, myEndPos,
01273 myChunksToRead) == true)
01274 {
01275 myStatus = SamStatus::SUCCESS;
01276 }
01277 else
01278 {
01279 String errorMsg = "Failed to get the specified region, refID = ";
01280 errorMsg += myRefID;
01281 errorMsg += "; startPos = ";
01282 errorMsg += myStartPos;
01283 errorMsg += "; endPos = ";
01284 errorMsg += myEndPos;
01285 myStatus.setStatus(SamStatus::FAIL_PARSE,
01286 errorMsg);
01287 }
01288 return(true);
01289 }
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303 bool SamFile::attemptRecoverySync(bool (*checkSignature)(void *data) , int length)
01304 {
01305 if(myFilePtr==NULL) return false;
01306
01307 return myFilePtr->attemptRecoverySync(checkSignature, length);
01308 }
01309
01310
01311 SamFileReader::SamFileReader()
01312 : SamFile()
01313 {
01314 }
01315
01316
01317
01318 SamFileReader::SamFileReader(const char* filename)
01319 : SamFile(filename, READ)
01320 {
01321 }
01322
01323
01324
01325
01326
01327 SamFileReader::SamFileReader(const char* filename,
01328 ErrorHandler::HandlingType errorHandlingType)
01329 : SamFile(filename, READ, errorHandlingType)
01330 {
01331 }
01332
01333
01334
01335 SamFileReader::SamFileReader(const char* filename,
01336 SamFileHeader* header)
01337 : SamFile(filename, READ, header)
01338 {
01339 }
01340
01341
01342
01343 SamFileReader::SamFileReader(const char* filename,
01344 ErrorHandler::HandlingType errorHandlingType,
01345 SamFileHeader* header)
01346 : SamFile(filename, READ, errorHandlingType, header)
01347 {
01348 }
01349
01350
01351 SamFileReader::~SamFileReader()
01352 {
01353 }
01354
01355
01356
01357 SamFileWriter::SamFileWriter()
01358 : SamFile()
01359 {
01360 }
01361
01362
01363
01364 SamFileWriter::SamFileWriter(const char* filename)
01365 : SamFile(filename, WRITE)
01366 {
01367 }
01368
01369
01370
01371 SamFileWriter::SamFileWriter(const char* filename,
01372 ErrorHandler::HandlingType errorHandlingType)
01373 : SamFile(filename, WRITE, errorHandlingType)
01374 {
01375 }
01376
01377
01378
01379 SamFileWriter::SamFileWriter(const char* filename,
01380 SamFileHeader* header)
01381 : SamFile(filename, WRITE, header)
01382 {
01383 }
01384
01385
01386
01387 SamFileWriter::SamFileWriter(const char* filename,
01388 ErrorHandler::HandlingType errorHandlingType,
01389 SamFileHeader* header)
01390 : SamFile(filename, WRITE, errorHandlingType, header)
01391 {
01392 }
01393
01394
01395 SamFileWriter::~SamFileWriter()
01396 {
01397 }