00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #include <stdexcept>
00018 #include "SamFile.h"
00019 #include "SamFileHeader.h"
00020 #include "SamRecord.h"
00021 #include "BamInterface.h"
00022 #include "SamInterface.h"
00023
00024
00025 SamFile::SamFile()
00026 : myFilePtr(NULL),
00027 myInterfacePtr(NULL),
00028 myStatistics(NULL),
00029 myStatus(),
00030 myBamIndex(NULL),
00031 myRefPtr(NULL),
00032 myReadTranslation(SamRecord::NONE),
00033 myWriteTranslation(SamRecord::NONE)
00034 {
00035 resetFile();
00036 }
00037
00038
00039
00040 SamFile::SamFile(ErrorHandler::HandlingType errorHandlingType)
00041 : myFilePtr(NULL),
00042 myInterfacePtr(NULL),
00043 myStatistics(NULL),
00044 myStatus(errorHandlingType),
00045 myBamIndex(NULL),
00046 myRefPtr(NULL),
00047 myReadTranslation(SamRecord::NONE),
00048 myWriteTranslation(SamRecord::NONE)
00049 {
00050 resetFile();
00051 }
00052
00053
00054
00055
00056 SamFile::SamFile(const char* filename, OpenType mode)
00057 : myStatus()
00058 {
00059 init(filename, mode, NULL);
00060 }
00061
00062
00063
00064
00065 SamFile::SamFile(const char* filename, OpenType mode,
00066 ErrorHandler::HandlingType errorHandlingType)
00067 : myStatus(errorHandlingType)
00068 {
00069 init(filename, mode, NULL);
00070 }
00071
00072
00073
00074
00075 SamFile::SamFile(const char* filename, OpenType mode, SamFileHeader* header)
00076 : myStatus()
00077 {
00078 init(filename, mode, header);
00079 }
00080
00081
00082
00083
00084 SamFile::SamFile(const char* filename, OpenType mode,
00085 ErrorHandler::HandlingType errorHandlingType,
00086 SamFileHeader* header)
00087 : myStatus(errorHandlingType)
00088 {
00089 init(filename, mode, header);
00090 }
00091
00092
00093 SamFile::~SamFile()
00094 {
00095 resetFile();
00096 if(myStatistics != NULL)
00097 {
00098 delete myStatistics;
00099 }
00100 }
00101
00102
00103
00104 bool SamFile::OpenForRead(const char * filename, SamFileHeader* header)
00105 {
00106
00107 resetFile();
00108
00109 int lastchar = 0;
00110
00111 while (filename[lastchar] != 0) lastchar++;
00112
00113
00114 if((lastchar >= 1) && (filename[0] == '-'))
00115 {
00116
00117
00118 if(strcmp(filename, "-.bam") == 0)
00119 {
00120
00121
00122 filename = "-";
00123 myFilePtr = ifopen(filename, "rb", InputFile::BGZF);
00124 myFilePtr->disableBuffering();
00125 myInterfacePtr = new BamInterface;
00126
00127
00128 char magic[4];
00129 ifread(myFilePtr, magic, 4);
00130 }
00131 else if(strcmp(filename, "-.ubam") == 0)
00132 {
00133
00134
00135 filename = "-";
00136 myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
00137
00138 myInterfacePtr = new BamInterface;
00139
00140
00141 char magic[4];
00142 ifread(myFilePtr, magic, 4);
00143 }
00144 else
00145 {
00146
00147
00148 filename = "-";
00149 myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
00150 myInterfacePtr = new SamInterface;
00151 }
00152 }
00153 else
00154 {
00155
00156 myFilePtr = ifopen(filename, "rb");
00157
00158 if (myFilePtr == NULL)
00159 {
00160 std::string errorMessage = "Failed to Open ";
00161 errorMessage += filename;
00162 errorMessage += " for reading";
00163 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
00164 return(false);
00165 }
00166
00167 char magic[4];
00168 ifread(myFilePtr, magic, 4);
00169
00170 if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' &&
00171 magic[3] == 1)
00172 {
00173 myInterfacePtr = new BamInterface;
00174
00175
00176 myIsBamOpenForRead = true;
00177 }
00178 else
00179 {
00180
00181
00182 ifrewind(myFilePtr);
00183 myInterfacePtr = new SamInterface;
00184 }
00185 }
00186
00187
00188 myIsOpenForRead = true;
00189
00190
00191 if(header != NULL)
00192 {
00193 return(ReadHeader(*header));
00194 }
00195
00196
00197 myStatus = SamStatus::SUCCESS;
00198 return(true);
00199 }
00200
00201
00202
00203 bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header)
00204 {
00205
00206 resetFile();
00207
00208 int lastchar = 0;
00209 while (filename[lastchar] != 0) lastchar++;
00210 if (lastchar >= 4 &&
00211 filename[lastchar - 4] == 'u' &&
00212 filename[lastchar - 3] == 'b' &&
00213 filename[lastchar - 2] == 'a' &&
00214 filename[lastchar - 1] == 'm')
00215 {
00216
00217
00218 if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.'))
00219 {
00220 filename = "-";
00221 }
00222 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
00223
00224 myInterfacePtr = new BamInterface;
00225 }
00226 else if (lastchar >= 3 &&
00227 filename[lastchar - 3] == 'b' &&
00228 filename[lastchar - 2] == 'a' &&
00229 filename[lastchar - 1] == 'm')
00230 {
00231
00232
00233 if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.'))
00234 {
00235 filename = "-";
00236 }
00237 myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
00238
00239 myInterfacePtr = new BamInterface;
00240 }
00241 else
00242 {
00243
00244
00245
00246 if((lastchar >= 1) && (filename[0] == '-'))
00247 {
00248 filename = "-";
00249 }
00250 myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
00251
00252 myInterfacePtr = new SamInterface;
00253 }
00254
00255 if (myFilePtr == NULL)
00256 {
00257 std::string errorMessage = "Failed to Open ";
00258 errorMessage += filename;
00259 errorMessage += " for writing";
00260 myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
00261 return(false);
00262 }
00263
00264 myIsOpenForWrite = true;
00265
00266
00267 if(header != NULL)
00268 {
00269 return(WriteHeader(*header));
00270 }
00271
00272
00273 myStatus = SamStatus::SUCCESS;
00274 return(true);
00275 }
00276
00277
00278
00279 bool SamFile::ReadBamIndex(const char* bamIndexFilename)
00280 {
00281
00282 if(myBamIndex != NULL)
00283 {
00284 delete myBamIndex;
00285 myBamIndex = NULL;
00286 }
00287
00288
00289 myBamIndex = new BamIndex();
00290 SamStatus::Status indexStat = myBamIndex->readIndex(bamIndexFilename);
00291
00292 if(indexStat != SamStatus::SUCCESS)
00293 {
00294 std::string errorMessage = "Failed to read the bam Index file: ";
00295 errorMessage += bamIndexFilename;
00296 myStatus.setStatus(indexStat, errorMessage.c_str());
00297 delete myBamIndex;
00298 myBamIndex = NULL;
00299 return(false);
00300 }
00301 myStatus = SamStatus::SUCCESS;
00302 return(true);
00303 }
00304
00305
00306
00307 bool SamFile::ReadBamIndex()
00308 {
00309 if(myFilePtr == NULL)
00310 {
00311
00312
00313 std::string errorMessage = "Failed to read the bam Index file -"
00314 " the BAM file needs to be read first in order to determine"
00315 " the index filename.";
00316 myStatus.setStatus(SamStatus::FAIL_ORDER, errorMessage.c_str());
00317 return(false);
00318 }
00319
00320 const char* bamBaseName = myFilePtr->getFileName();
00321
00322 std::string indexName = bamBaseName;
00323 indexName += ".bai";
00324
00325 bool foundFile = true;
00326 try
00327 {
00328 if(ReadBamIndex(indexName.c_str()) == false)
00329 {
00330 foundFile = false;
00331 }
00332 }
00333 catch (std::exception& e)
00334 {
00335 foundFile = false;
00336 }
00337
00338
00339 if(!foundFile)
00340 {
00341
00342
00343 size_t startExt = indexName.find(".bam");
00344 if(startExt == std::string::npos)
00345 {
00346
00347
00348 return(false);
00349 }
00350
00351 indexName.erase(startExt, 4);
00352 return(ReadBamIndex(indexName.c_str()));
00353 }
00354 return(true);
00355 }
00356
00357
00358
00359 void SamFile::SetReference(GenomeSequence* reference)
00360 {
00361 myRefPtr = reference;
00362 }
00363
00364
00365
00366 void SamFile::SetReadSequenceTranslation(SamRecord::SequenceTranslation translation)
00367 {
00368 myReadTranslation = translation;
00369 }
00370
00371
00372
00373 void SamFile::SetWriteSequenceTranslation(SamRecord::SequenceTranslation translation)
00374 {
00375 myWriteTranslation = translation;
00376 }
00377
00378
00379 void SamFile::Close()
00380 {
00381
00382
00383 resetFile();
00384 }
00385
00386
00387
00388
00389 bool SamFile::IsEOF()
00390 {
00391 if (myFilePtr != NULL)
00392 {
00393
00394 return(ifeof(myFilePtr));
00395 }
00396
00397 return true;
00398 }
00399
00400
00401
00402 bool SamFile::ReadHeader(SamFileHeader& header)
00403 {
00404 if(myIsOpenForRead == false)
00405 {
00406
00407 myStatus.setStatus(SamStatus::FAIL_ORDER,
00408 "Cannot read header since the file is not open for reading");
00409 return(false);
00410 }
00411
00412 if(myHasHeader == true)
00413 {
00414
00415 myStatus.setStatus(SamStatus::FAIL_ORDER,
00416 "Cannot read header since it has already been read.");
00417 return(false);
00418 }
00419
00420 myStatus = myInterfacePtr->readHeader(myFilePtr, header);
00421 if(myStatus == SamStatus::SUCCESS)
00422 {
00423
00424 myHasHeader = true;
00425 return(true);
00426 }
00427 return(false);
00428 }
00429
00430
00431
00432 bool SamFile::WriteHeader(SamFileHeader& header)
00433 {
00434 if(myIsOpenForWrite == false)
00435 {
00436
00437
00438
00439 myStatus.setStatus(SamStatus::FAIL_ORDER,
00440 "Cannot write header since the file is not open for writing");
00441 return(false);
00442 }
00443
00444 if(myHasHeader == true)
00445 {
00446
00447 myStatus.setStatus(SamStatus::FAIL_ORDER,
00448 "Cannot write header since it has already been written");
00449 return(false);
00450 }
00451
00452 myStatus = myInterfacePtr->writeHeader(myFilePtr, header);
00453 if(myStatus == SamStatus::SUCCESS)
00454 {
00455
00456 myHasHeader = true;
00457 return(true);
00458 }
00459
00460
00461 return(false);
00462 }
00463
00464
00465
00466 bool SamFile::ReadRecord(SamFileHeader& header,
00467 SamRecord& record)
00468 {
00469 myStatus = SamStatus::SUCCESS;
00470
00471 if(myIsOpenForRead == false)
00472 {
00473
00474 myStatus.setStatus(SamStatus::FAIL_ORDER,
00475 "Cannot read record since the file is not open for reading");
00476 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file."));
00477 return(false);
00478 }
00479
00480 if(myHasHeader == false)
00481 {
00482
00483
00484 myStatus.setStatus(SamStatus::FAIL_ORDER,
00485 "Cannot read record since the header has not been read.");
00486 throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header."));
00487 return(false);
00488 }
00489
00490
00491
00492 if(myNewSection)
00493 {
00494 if(!processNewSection(header))
00495 {
00496
00497
00498
00499
00500 return(false);
00501 }
00502 }
00503
00504
00505 if(myRefID != BamIndex::REF_ID_ALL)
00506 {
00507
00508 return(readIndexedRecord(header, record));
00509 }
00510
00511 record.setReference(myRefPtr);
00512 record.setSequenceTranslation(myReadTranslation);
00513
00514
00515
00516 myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
00517 if(myStatus == SamStatus::SUCCESS)
00518 {
00519
00520 myRecordCount++;
00521
00522 if(myStatistics != NULL)
00523 {
00524
00525 myStatistics->updateStatistics(record);
00526 }
00527
00528
00529 if(!validateSortOrder(record, header))
00530 {
00531
00532 return(false);
00533 }
00534 return(true);
00535 }
00536
00537 return(false);
00538 }
00539
00540
00541
00542
00543 bool SamFile::WriteRecord(SamFileHeader& header,
00544 SamRecord& record)
00545 {
00546 if(myIsOpenForWrite == false)
00547 {
00548
00549 myStatus.setStatus(SamStatus::FAIL_ORDER,
00550 "Cannot write record since the file is not open for writing");
00551 return(false);
00552 }
00553
00554 if(myHasHeader == false)
00555 {
00556
00557 myStatus.setStatus(SamStatus::FAIL_ORDER,
00558 "Cannot write record since the header has not been written");
00559 return(false);
00560 }
00561
00562
00563 if(!validateSortOrder(record, header))
00564 {
00565
00566 myStatus.setStatus(SamStatus::INVALID_SORT,
00567 "Cannot write the record since the file is not properly sorted.");
00568 return(false);
00569 }
00570
00571 if(myRefPtr != NULL)
00572 {
00573 record.setReference(myRefPtr);
00574 }
00575
00576
00577
00578 myStatus = myInterfacePtr->writeRecord(myFilePtr, header, record,
00579 myWriteTranslation);
00580
00581 if(myStatus == SamStatus::SUCCESS)
00582 {
00583
00584 myRecordCount++;
00585 return(true);
00586 }
00587 return(false);
00588 }
00589
00590
00591
00592
00593 void SamFile::setSortedValidation(SortedType sortType)
00594 {
00595 mySortedType = sortType;
00596 }
00597
00598
00599
00600 uint32_t SamFile::GetCurrentRecordCount()
00601 {
00602 return(myRecordCount);
00603 }
00604
00605
00606
00607 bool SamFile::SetReadSection(int32_t refID)
00608 {
00609
00610 return(SetReadSection(refID, -1, -1));
00611 }
00612
00613
00614
00615
00616 bool SamFile::SetReadSection(const char* refName)
00617 {
00618
00619 return(SetReadSection(refName, -1, -1));
00620 }
00621
00622
00623
00624 bool SamFile::SetReadSection(int32_t refID, int32_t start, int32_t end)
00625 {
00626
00627
00628
00629 if(!myIsBamOpenForRead)
00630 {
00631
00632 myStatus.setStatus(SamStatus::FAIL_ORDER,
00633 "Canot set section since there is no bam file open");
00634 return(false);
00635 }
00636
00637
00638 myFilePtr->disableBuffering();
00639
00640 myNewSection = true;
00641 myStartPos = start;
00642 myEndPos = end;
00643 myRefID = refID;
00644 myRefName.clear();
00645 myChunksToRead.clear();
00646
00647
00648 myCurrentChunkEnd = 0;
00649 myStatus = SamStatus::SUCCESS;
00650
00651
00652 myPrevCoord = -1;
00653 myPrevRefID = 0;
00654 myPrevReadName.clear();
00655
00656 return(true);
00657 }
00658
00659
00660
00661 bool SamFile::SetReadSection(const char* refName, int32_t start, int32_t end)
00662 {
00663
00664
00665
00666 if(!myIsBamOpenForRead)
00667 {
00668
00669 myStatus.setStatus(SamStatus::FAIL_ORDER,
00670 "Canot set section since there is no bam file open");
00671 return(false);
00672 }
00673
00674
00675 myFilePtr->disableBuffering();
00676
00677 myNewSection = true;
00678 myStartPos = start;
00679 myEndPos = end;
00680 if((strcmp(refName, "") == 0) || (strcmp(refName, "*") == 0))
00681 {
00682
00683 myRefID = BamIndex::REF_ID_UNMAPPED;
00684 }
00685 else
00686 {
00687
00688
00689 myRefName = refName;
00690 myRefID = BamIndex::REF_ID_ALL;
00691 }
00692 myChunksToRead.clear();
00693
00694
00695 myCurrentChunkEnd = 0;
00696 myStatus = SamStatus::SUCCESS;
00697
00698
00699 myPrevCoord = -1;
00700 myPrevRefID = 0;
00701 myPrevReadName.clear();
00702
00703 return(true);
00704 }
00705
00706
00707
00708
00709
00710 int32_t SamFile::getNumMappedReadsFromIndex(int32_t refID)
00711 {
00712
00713 if(myBamIndex == NULL)
00714 {
00715 myStatus.setStatus(SamStatus::FAIL_ORDER,
00716 "Canot get num mapped reads from the index until it has been read.");
00717 return(false);
00718 }
00719 return(myBamIndex->getNumMappedReads(refID));
00720 }
00721
00722
00723
00724
00725 int32_t SamFile::getNumUnMappedReadsFromIndex(int32_t refID)
00726 {
00727
00728 if(myBamIndex == NULL)
00729 {
00730 myStatus.setStatus(SamStatus::FAIL_ORDER,
00731 "Canot get num unmapped reads from the index until it has been read.");
00732 return(false);
00733 }
00734 return(myBamIndex->getNumUnMappedReads(refID));
00735 }
00736
00737
00738
00739
00740 int32_t SamFile::getNumMappedReadsFromIndex(const char* refName,
00741 SamFileHeader& header)
00742 {
00743
00744 if(myBamIndex == NULL)
00745 {
00746 myStatus.setStatus(SamStatus::FAIL_ORDER,
00747 "Canot get num mapped reads from the index until it has been read.");
00748 return(false);
00749 }
00750 int32_t refID = BamIndex::REF_ID_UNMAPPED;
00751 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
00752 {
00753
00754 refID = header.getReferenceID(refName);
00755 }
00756 return(myBamIndex->getNumMappedReads(refID));
00757 }
00758
00759
00760
00761
00762 int32_t SamFile::getNumUnMappedReadsFromIndex(const char* refName,
00763 SamFileHeader& header)
00764 {
00765
00766 if(myBamIndex == NULL)
00767 {
00768 myStatus.setStatus(SamStatus::FAIL_ORDER,
00769 "Canot get num unmapped reads from the index until it has been read.");
00770 return(false);
00771 }
00772 int32_t refID = BamIndex::REF_ID_UNMAPPED;
00773 if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
00774 {
00775
00776 refID = header.getReferenceID(refName);
00777 }
00778 return(myBamIndex->getNumUnMappedReads(refID));
00779 }
00780
00781
00782
00783
00784 uint32_t SamFile::GetNumOverlaps(SamRecord& samRecord)
00785 {
00786 if(myRefPtr != NULL)
00787 {
00788 samRecord.setReference(myRefPtr);
00789 }
00790 samRecord.setSequenceTranslation(myReadTranslation);
00791
00792
00793
00794 return(samRecord.getNumOverlaps(myStartPos, myEndPos));
00795 }
00796
00797
00798 void SamFile::GenerateStatistics(bool genStats)
00799 {
00800 if(genStats)
00801 {
00802 if(myStatistics == NULL)
00803 {
00804
00805
00806 myStatistics = new SamStatistics();
00807 }
00808 }
00809 else
00810 {
00811
00812
00813 if(myStatistics != NULL)
00814 {
00815 delete myStatistics;
00816 myStatistics = NULL;
00817 }
00818 }
00819
00820 }
00821
00822
00823
00824 void SamFile::init(const char* filename, OpenType mode, SamFileHeader* header)
00825 {
00826 myFilePtr = NULL;
00827 myInterfacePtr = NULL;
00828 myStatistics = NULL;
00829 myBamIndex = NULL;
00830 myRefPtr = NULL;
00831 myReadTranslation = SamRecord::NONE;
00832 myWriteTranslation = SamRecord::NONE;
00833
00834 resetFile();
00835
00836 bool openStatus = true;
00837 if(mode == READ)
00838 {
00839
00840 openStatus = OpenForRead(filename, header);
00841 }
00842 else
00843 {
00844
00845 openStatus = OpenForWrite(filename, header);
00846 }
00847 if(!openStatus)
00848 {
00849
00850 fprintf(stderr, "%s\n", GetStatusMessage());
00851 std::cerr << "FAILURE - EXITING!!!" << std::endl;
00852 exit(-1);
00853 }
00854 }
00855
00856
00857
00858 void SamFile::resetFile()
00859 {
00860
00861 if (myFilePtr != NULL)
00862 {
00863
00864 ifclose(myFilePtr);
00865 myFilePtr = NULL;
00866 }
00867 if(myInterfacePtr != NULL)
00868 {
00869 delete myInterfacePtr;
00870 myInterfacePtr = NULL;
00871 }
00872
00873 myIsOpenForRead = false;
00874 myIsOpenForWrite = false;
00875 myHasHeader = false;
00876 mySortedType = UNSORTED;
00877 myPrevReadName.clear();
00878 myPrevCoord = -1;
00879 myPrevRefID = 0;
00880 myRecordCount = 0;
00881 myStatus = SamStatus::SUCCESS;
00882
00883
00884 myIsBamOpenForRead = false;
00885 myRefID = BamIndex::REF_ID_ALL;
00886 myStartPos = -1;
00887 myEndPos = -1;
00888 myNewSection = false;
00889 myCurrentChunkEnd = 0;
00890 myChunksToRead.clear();
00891 if(myBamIndex != NULL)
00892 {
00893 delete myBamIndex;
00894 myBamIndex = NULL;
00895 }
00896
00897
00898 if(myStatistics != NULL)
00899 {
00900 myStatistics->reset();
00901 }
00902
00903 myRefName.clear();
00904 }
00905
00906
00907
00908
00909
00910 bool SamFile::validateSortOrder(SamRecord& record, SamFileHeader& header)
00911 {
00912 if(myRefPtr != NULL)
00913 {
00914 record.setReference(myRefPtr);
00915 }
00916 record.setSequenceTranslation(myReadTranslation);
00917
00918 bool status = false;
00919 if(mySortedType == UNSORTED)
00920 {
00921
00922 status = true;
00923 }
00924 else
00925 {
00926
00927 if(mySortedType == FLAG)
00928 {
00929
00930 mySortedType = getSortOrderFromHeader(header);
00931 }
00932
00933 if(mySortedType == QUERY_NAME)
00934 {
00935
00936
00937 const char* readName = record.getReadName();
00938 if(myPrevReadName.compare(readName) > 0)
00939 {
00940
00941
00942 String errorMessage = "ERROR: File is not sorted at record ";
00943 errorMessage += myRecordCount;
00944 myStatus.setStatus(SamStatus::INVALID_SORT,
00945 errorMessage.c_str());
00946 status = false;
00947 }
00948 else
00949 {
00950 myPrevReadName = readName;
00951 status = true;
00952 }
00953 }
00954 else
00955 {
00956
00957
00958 int32_t refID = record.getReferenceID();
00959 int32_t coord = record.get0BasedPosition();
00960
00961 if(refID == BamIndex::REF_ID_UNMAPPED)
00962 {
00963
00964
00965 status = true;
00966 myPrevRefID = refID;
00967 myPrevCoord = coord;
00968 }
00969 else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED)
00970 {
00971
00972
00973 String errorMessage = "ERROR: File is not sorted at record ";
00974 errorMessage += myRecordCount;
00975 myStatus.setStatus(SamStatus::INVALID_SORT,
00976 errorMessage.c_str());
00977 status = false;
00978 }
00979 else if(refID < myPrevRefID)
00980 {
00981
00982
00983 String errorMessage = "ERROR: File is not sorted at record ";
00984 errorMessage += myRecordCount;
00985 myStatus.setStatus(SamStatus::INVALID_SORT,
00986 errorMessage.c_str());
00987 status = false;
00988 }
00989 else
00990 {
00991
00992 if(refID > myPrevRefID)
00993 {
00994
00995 myPrevCoord = -1;
00996 }
00997
00998
00999 if(coord < myPrevCoord)
01000 {
01001
01002 String errorMessage = "ERROR: File is not sorted at record ";
01003 errorMessage += myRecordCount;
01004 myStatus.setStatus(SamStatus::INVALID_SORT,
01005 errorMessage.c_str());
01006 status = false;
01007 }
01008 else
01009 {
01010 myPrevRefID = refID;
01011 myPrevCoord = coord;
01012 status = true;
01013 }
01014 }
01015 }
01016 }
01017
01018 return(status);
01019 }
01020
01021
01022 SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header)
01023 {
01024 const char* tag = header.getSortOrder();
01025
01026
01027
01028 SortedType headerSortOrder = UNSORTED;
01029 if(strcmp(tag, "queryname") == 0)
01030 {
01031 headerSortOrder = QUERY_NAME;
01032 }
01033 else if(strcmp(tag, "coordinate") == 0)
01034 {
01035 headerSortOrder = COORDINATE;
01036 }
01037 return(headerSortOrder);
01038 }
01039
01040
01041
01042 bool SamFile::readIndexedRecord(SamFileHeader& header,
01043 SamRecord& record)
01044 {
01045 record.setReference(myRefPtr);
01046 record.setSequenceTranslation(myReadTranslation);
01047
01048
01049
01050 bool recordFound = false;
01051 while(!recordFound)
01052 {
01053
01054
01055
01056
01057 uint64_t currentPos = iftell(myFilePtr);
01058 if(currentPos >= myCurrentChunkEnd)
01059 {
01060
01061 if(myChunksToRead.empty())
01062 {
01063 myStatus = SamStatus::NO_MORE_RECS;
01064 return(false);
01065 }
01066
01067
01068 Chunk newChunk = myChunksToRead.pop();
01069
01070
01071
01072 if(newChunk.chunk_beg != currentPos)
01073 {
01074
01075 if(ifseek(myFilePtr, newChunk.chunk_beg, SEEK_SET) != true)
01076 {
01077
01078 myStatus.setStatus(SamStatus::FAIL_IO,
01079 "Failed to seek to the next record");
01080 return(false);
01081 }
01082 }
01083
01084 myCurrentChunkEnd = newChunk.chunk_end;
01085 }
01086
01087
01088 myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
01089 if(myStatus != SamStatus::SUCCESS)
01090 {
01091
01092 return(false);
01093 }
01094
01095
01096
01097 if(record.getReferenceID() != myRefID)
01098 {
01099
01100 myStatus = SamStatus::NO_MORE_RECS;
01101 return(false);
01102 }
01103
01104
01105 recordFound = true;
01106
01107
01108
01109 if((myStartPos != -1) && (myEndPos != -1))
01110 {
01111
01112
01113
01114
01115
01116 if(record.get0BasedPosition() >= myEndPos)
01117 {
01118 myStatus = SamStatus::NO_MORE_RECS;
01119 return(false);
01120 }
01121
01122
01123
01124
01125 if(record.get0BasedAlignmentEnd() < myStartPos)
01126 {
01127
01128
01129 recordFound = false;
01130 }
01131 }
01132 }
01133
01134
01135 if(!validateSortOrder(record, header))
01136 {
01137 myStatus.setStatus(SamStatus::INVALID_SORT,
01138 "The file is not properly sorted.");
01139 return(false);
01140 }
01141
01142
01143
01144 myRecordCount++;
01145
01146 if(myStatistics != NULL)
01147 {
01148
01149 myStatistics->updateStatistics(record);
01150 }
01151
01152 return(true);
01153 }
01154
01155
01156 bool SamFile::processNewSection(SamFileHeader &header)
01157 {
01158 myNewSection = false;
01159
01160
01161 if(myBamIndex == NULL)
01162 {
01163
01164 myStatus.setStatus(SamStatus::FAIL_ORDER,
01165 "Canot read section since there is no index file open");
01166 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the BAM Index file."));
01167 return(false);
01168 }
01169
01170
01171 if(!myIsBamOpenForRead)
01172 {
01173
01174 myStatus.setStatus(SamStatus::FAIL_ORDER,
01175 "Canot read section since there is no bam file open");
01176 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section without opening a BAM file."));
01177 return(false);
01178 }
01179
01180 if(myHasHeader == false)
01181 {
01182
01183 myStatus.setStatus(SamStatus::FAIL_ORDER,
01184 "Cannot read record since the header has not been read.");
01185 throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the header."));
01186 return(false);
01187 }
01188
01189 myChunksToRead.clear();
01190
01191
01192 myCurrentChunkEnd = 0;
01193
01194
01195
01196 if(!myRefName.empty())
01197 {
01198 myRefID = header.getReferenceID(myRefName.c_str());
01199
01200 myRefName.clear();
01201 }
01202
01203
01204 if(myBamIndex->getChunksForRegion(myRefID, myStartPos, myEndPos,
01205 myChunksToRead) == true)
01206 {
01207 myStatus = SamStatus::SUCCESS;
01208 }
01209 else
01210 {
01211 myStatus.setStatus(SamStatus::FAIL_PARSE,
01212 "Failed to get the specified region.");
01213 }
01214 return(true);
01215 }
01216
01217
01218
01219 SamFileReader::SamFileReader()
01220 : SamFile()
01221 {
01222 }
01223
01224
01225
01226 SamFileReader::SamFileReader(const char* filename)
01227 : SamFile(filename, READ)
01228 {
01229 }
01230
01231
01232
01233
01234
01235 SamFileReader::SamFileReader(const char* filename,
01236 ErrorHandler::HandlingType errorHandlingType)
01237 : SamFile(filename, READ, errorHandlingType)
01238 {
01239 }
01240
01241
01242
01243 SamFileReader::SamFileReader(const char* filename,
01244 SamFileHeader* header)
01245 : SamFile(filename, READ, header)
01246 {
01247 }
01248
01249
01250
01251 SamFileReader::SamFileReader(const char* filename,
01252 ErrorHandler::HandlingType errorHandlingType,
01253 SamFileHeader* header)
01254 : SamFile(filename, READ, errorHandlingType, header)
01255 {
01256 }
01257
01258
01259 SamFileReader::~SamFileReader()
01260 {
01261 }
01262
01263
01264
01265 SamFileWriter::SamFileWriter()
01266 : SamFile()
01267 {
01268 }
01269
01270
01271
01272 SamFileWriter::SamFileWriter(const char* filename)
01273 : SamFile(filename, WRITE)
01274 {
01275 }
01276
01277
01278
01279 SamFileWriter::SamFileWriter(const char* filename,
01280 ErrorHandler::HandlingType errorHandlingType)
01281 : SamFile(filename, WRITE, errorHandlingType)
01282 {
01283 }
01284
01285
01286
01287 SamFileWriter::SamFileWriter(const char* filename,
01288 SamFileHeader* header)
01289 : SamFile(filename, WRITE, header)
01290 {
01291 }
01292
01293
01294
01295 SamFileWriter::SamFileWriter(const char* filename,
01296 ErrorHandler::HandlingType errorHandlingType,
01297 SamFileHeader* header)
01298 : SamFile(filename, WRITE, errorHandlingType, header)
01299 {
01300 }
01301
01302
01303 SamFileWriter::~SamFileWriter()
01304 {
01305 }