SamFileHeader.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "SamFileHeader.h"
00019 #include "SamHeaderSQ.h"
00020 #include "SamHeaderRG.h"
00021
00022
00023 const std::string SamFileHeader::EMPTY_RETURN = "";
00024
00025 SamFileHeader::SamFileHeader()
00026 : myHD(NULL),
00027 myReferenceInfo()
00028 {
00029 resetHeader();
00030 }
00031
00032
00033 SamFileHeader::~SamFileHeader()
00034 {
00035 resetHeader();
00036 }
00037
00038
00039
00040 SamFileHeader::SamFileHeader(const SamFileHeader& header)
00041 {
00042 copy(header);
00043 }
00044
00045
00046
00047 SamFileHeader & SamFileHeader::operator = (const SamFileHeader& header)
00048 {
00049 copy(header);
00050 return(*this);
00051 }
00052
00053
00054 bool SamFileHeader::copy(const SamFileHeader& header)
00055 {
00056
00057 if(this == &header)
00058 {
00059 return(true);
00060 }
00061
00062 resetHeader();
00063
00064 myReferenceInfo = header.myReferenceInfo;
00065
00066
00067
00068 std::string newString;
00069 bool status = header.getHeaderString(newString);
00070 String newHeaderString = newString.c_str();
00071
00072 status &= parseHeader(newHeaderString);
00073
00074 myCurrentHeaderIndex = header.myCurrentHeaderIndex;
00075 myCurrentCommentIndex = header.myCurrentCommentIndex;
00076
00077 return(status);
00078 }
00079
00080
00081
00082 void SamFileHeader::resetHeader()
00083 {
00084 myReferenceInfo.clear();
00085
00086
00087
00088 myHD = NULL;
00089 mySQs.Clear();
00090 myRGs.Clear();
00091 myPGs.Clear();
00092
00093
00094 for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size();
00095 headerIndex++)
00096 {
00097 delete myHeaderRecords[headerIndex];
00098 myHeaderRecords[headerIndex] = NULL;
00099 }
00100 myHeaderRecords.clear();
00101
00102
00103 resetHeaderRecordIter();
00104
00105
00106 resetCommentIter();
00107
00108
00109 resetSQRecordIter();
00110 resetRGRecordIter();
00111 resetPGRecordIter();
00112
00113
00114 myComments.clear();
00115 }
00116
00117
00118
00119
00120 bool SamFileHeader::getHeaderString(std::string& header) const
00121 {
00122 header = "";
00123
00124
00125 unsigned int index = 0;
00126 while(getHeaderLine(index, header) != false)
00127 {
00128 ++index;
00129 }
00130
00131 return(true);
00132 }
00133
00134
00135 int SamFileHeader::getReferenceID(const String & referenceName)
00136 {
00137 return(myReferenceInfo.getReferenceID(referenceName));
00138 }
00139
00140
00141 int SamFileHeader::getReferenceID(const char* referenceName)
00142 {
00143 return(myReferenceInfo.getReferenceID(referenceName));
00144 }
00145
00146
00147 const String & SamFileHeader::getReferenceLabel(int id) const
00148 {
00149 return(myReferenceInfo.getReferenceLabel(id));
00150 }
00151
00152
00153
00154 const SamReferenceInfo* SamFileHeader::getReferenceInfo() const
00155 {
00156 return(&myReferenceInfo);
00157 }
00158
00159
00160
00161 void SamFileHeader::addReferenceInfo(const char* referenceSequenceName,
00162 int32_t referenceSequenceLength)
00163 {
00164 myReferenceInfo.add(referenceSequenceName, referenceSequenceLength);
00165 }
00166
00167
00168 bool SamFileHeader::addHeaderLine(const char* type, const char* tag,
00169 const char* value)
00170 {
00171 String headerLine;
00172 headerLine += "@";
00173 headerLine += type;
00174 headerLine += "\t";
00175 headerLine += tag;
00176 headerLine += ":";
00177 headerLine += value;
00178 return(addHeaderLine(headerLine.c_str()));
00179 }
00180
00181
00182
00183
00184 bool SamFileHeader::addHeaderLine(const char* headerLine)
00185 {
00186
00187 String headerString = headerLine;
00188 if(parseHeader(headerString))
00189 {
00190
00191 return(true);
00192 }
00193
00194 return(false);
00195 }
00196
00197
00198
00199 bool SamFileHeader::setHDTag(const char* tag, const char* value)
00200 {
00201 if(myHD == NULL)
00202 {
00203
00204 myHD = new SamHeaderHD();
00205 if(myHD == NULL)
00206 {
00207
00208 return(false);
00209 }
00210
00211
00212 myHeaderRecords.push_back(myHD);
00213 }
00214 return(myHD->setTag(tag, value));
00215 }
00216
00217
00218
00219
00220 bool SamFileHeader::setSQTag(const char* tag, const char* value,
00221 const char* name)
00222 {
00223
00224 SamHeaderSQ* sq = getSQ(name);
00225 if(sq == NULL)
00226 {
00227
00228
00229 sq = new SamHeaderSQ();
00230
00231 if(sq == NULL)
00232 {
00233
00234 return(false);
00235 }
00236
00237
00238 mySQs.Add(name, sq);
00239 myHeaderRecords.push_back(sq);
00240
00241
00242 if(!sq->addKey(name))
00243 {
00244
00245 return(false);
00246 }
00247 }
00248
00249 return(sq->setTag(tag, value));
00250 }
00251
00252
00253
00254
00255 bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id)
00256 {
00257
00258 SamHeaderRG* rg = getRG(id);
00259 if(rg == NULL)
00260 {
00261
00262
00263 rg = new SamHeaderRG();
00264
00265 if(rg == NULL)
00266 {
00267
00268 return(false);
00269 }
00270
00271
00272 myRGs.Add(id, rg);
00273 myHeaderRecords.push_back(rg);
00274
00275
00276 if(!rg->addKey(id))
00277 {
00278
00279 return(false);
00280 }
00281 }
00282
00283 return(rg->setTag(tag, value));
00284 }
00285
00286
00287
00288
00289
00290 bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id)
00291 {
00292
00293 SamHeaderPG* pg = getPG(id);
00294 if(pg == NULL)
00295 {
00296
00297
00298 pg = new SamHeaderPG();
00299
00300 if(pg == NULL)
00301 {
00302
00303 return(false);
00304 }
00305
00306
00307 myPGs.Add(id, pg);
00308 myHeaderRecords.push_back(pg);
00309
00310
00311 if(!pg->addKey(id))
00312 {
00313
00314 return(false);
00315 }
00316 }
00317
00318 return(pg->setTag(tag, value));
00319 }
00320
00321
00322
00323 bool SamFileHeader::addHD(SamHeaderHD* hd)
00324 {
00325
00326
00327 if((myHD != NULL) || (hd == NULL))
00328 {
00329 return(false);
00330 }
00331 myHD = hd;
00332
00333 myHeaderRecords.push_back(myHD);
00334 return(true);
00335 }
00336
00337
00338
00339 bool SamFileHeader::addSQ(SamHeaderSQ* sq)
00340 {
00341 if(sq == NULL)
00342 {
00343
00344 return(false);
00345 }
00346 const char* name = sq->getTagValue("SN");
00347 if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
00348 {
00349
00350 return(false);
00351 }
00352
00353
00354
00355 if(mySQs.Find(name) < 0)
00356 {
00357
00358
00359 mySQs.Add(name, sq);
00360 myHeaderRecords.push_back(sq);
00361 return(true);
00362 }
00363
00364
00365 return(false);
00366 }
00367
00368
00369
00370 bool SamFileHeader::addRG(SamHeaderRG* rg)
00371 {
00372 if(rg == NULL)
00373 {
00374
00375 return(false);
00376 }
00377 const char* id = rg->getTagValue("ID");
00378 if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00379 {
00380
00381 return(false);
00382 }
00383
00384
00385
00386 if(myRGs.Find(id) < 0)
00387 {
00388
00389
00390 myRGs.Add(id, rg);
00391 myHeaderRecords.push_back(rg);
00392 return(true);
00393 }
00394
00395
00396 return(false);
00397 }
00398
00399
00400
00401 bool SamFileHeader::addPG(SamHeaderPG* pg)
00402 {
00403
00404 if(pg == NULL)
00405 {
00406 return(false);
00407 }
00408 const char* id = pg->getTagValue("ID");
00409 if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00410 {
00411
00412 return(false);
00413 }
00414
00415
00416
00417 if(myPGs.Find(id) < 0)
00418 {
00419
00420
00421 myPGs.Add(id, pg);
00422 myHeaderRecords.push_back(pg);
00423 return(true);
00424 }
00425
00426
00427 return(false);
00428 }
00429
00430
00431
00432 bool SamFileHeader::removeHD()
00433 {
00434 if(myHD != NULL)
00435 {
00436
00437
00438
00439
00440 myHD->reset();
00441
00442
00443 myHD = NULL;
00444 }
00445
00446 return(true);
00447 }
00448
00449
00450
00451 bool SamFileHeader::removeSQ(const char* name)
00452 {
00453
00454 int hashIndex = mySQs.Find(name);
00455 if(hashIndex < 0)
00456 {
00457
00458
00459
00460 return(true);
00461 }
00462
00463
00464 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex));
00465
00466 if(sq == NULL)
00467 {
00468
00469
00470 return(false);
00471 }
00472
00473
00474
00475
00476
00477 sq->reset();
00478
00479
00480 mySQs.Delete(hashIndex);
00481
00482 return(true);
00483 }
00484
00485
00486
00487 bool SamFileHeader::removeRG(const char* id)
00488 {
00489
00490 int hashIndex = myRGs.Find(id);
00491 if(hashIndex < 0)
00492 {
00493
00494
00495
00496 return(true);
00497 }
00498
00499
00500 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex));
00501
00502 if(rg == NULL)
00503 {
00504
00505
00506 return(false);
00507 }
00508
00509
00510
00511
00512
00513 rg->reset();
00514
00515
00516 myRGs.Delete(hashIndex);
00517
00518 return(true);
00519 }
00520
00521
00522
00523 bool SamFileHeader::removePG(const char* id)
00524 {
00525
00526 int hashIndex = myPGs.Find(id);
00527 if(hashIndex < 0)
00528 {
00529
00530
00531
00532 return(true);
00533 }
00534
00535
00536 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex));
00537
00538 if(pg == NULL)
00539 {
00540
00541
00542 return(false);
00543 }
00544
00545
00546
00547
00548
00549 pg->reset();
00550
00551
00552 myPGs.Delete(hashIndex);
00553
00554 return(true);
00555 }
00556
00557
00558 SamStatus::Status SamFileHeader::setHeaderFromBamFile(IFILE filePtr)
00559 {
00560 if((filePtr == NULL) || (filePtr->isOpen() == false))
00561 {
00562
00563 return(SamStatus::FAIL_ORDER);
00564 }
00565
00566 int headerLength;
00567
00568 int readSize = ifread(filePtr, &headerLength, sizeof(int));
00569
00570 if(readSize != sizeof(int))
00571 {
00572
00573 return(SamStatus::FAIL_IO);
00574 }
00575
00576 String header;
00577 if (headerLength > 0)
00578 {
00579
00580 readSize =
00581 ifread(filePtr, header.LockBuffer(headerLength + 1), headerLength);
00582 header[headerLength] = 0;
00583 header.UnlockBuffer();
00584 if(readSize != headerLength)
00585 {
00586
00587 return(SamStatus::FAIL_IO);
00588 }
00589 }
00590
00591
00592 parseHeader(header);
00593 return(SamStatus::SUCCESS);
00594 }
00595
00596
00597 const char* SamFileHeader::getHDTagValue(const char* tag)
00598 {
00599 if(myHD == NULL)
00600 {
00601
00602 return(EMPTY_RETURN.c_str());
00603 }
00604 return(myHD->getTagValue(tag));
00605 }
00606
00607
00608
00609
00610 const char* SamFileHeader::getSQTagValue(const char* tag, const char* name)
00611 {
00612
00613 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name));
00614
00615
00616 if(sq == NULL)
00617 {
00618 return(EMPTY_RETURN.c_str());
00619 }
00620
00621
00622 return(sq->getTagValue(tag));
00623 }
00624
00625
00626
00627
00628 const char* SamFileHeader::getRGTagValue(const char* tag, const char* id)
00629 {
00630
00631 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id));
00632
00633
00634 if(rg == NULL)
00635 {
00636 return(EMPTY_RETURN.c_str());
00637 }
00638
00639
00640 return(rg->getTagValue(tag));
00641 }
00642
00643
00644 const char* SamFileHeader::getPGTagValue(const char* tag, const char* id)
00645 {
00646
00647 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id));
00648
00649
00650 if(pg == NULL)
00651 {
00652 return(EMPTY_RETURN.c_str());
00653 }
00654
00655
00656 return(pg->getTagValue(tag));
00657 }
00658
00659
00660
00661 int SamFileHeader::getNumSQs()
00662 {
00663 return(mySQs.Entries());
00664 }
00665
00666
00667
00668 int SamFileHeader::getNumRGs()
00669 {
00670 return(myRGs.Entries());
00671 }
00672
00673
00674
00675 int SamFileHeader::getNumPGs()
00676 {
00677 return(myPGs.Entries());
00678 }
00679
00680
00681
00682 SamHeaderHD* SamFileHeader::getHD()
00683 {
00684 return(myHD);
00685 }
00686
00687
00688
00689 SamHeaderSQ* SamFileHeader::getSQ(const char* name)
00690 {
00691 return((SamHeaderSQ*)(mySQs.Object(name)));
00692 }
00693
00694
00695
00696 SamHeaderRG* SamFileHeader::getRG(const char* id)
00697 {
00698 return((SamHeaderRG*)(myRGs.Object(id)));
00699 }
00700
00701
00702
00703 SamHeaderPG* SamFileHeader::getPG(const char* id)
00704 {
00705 return((SamHeaderPG*)(myPGs.Object(id)));
00706 }
00707
00708
00709
00710
00711 const char* SamFileHeader::getSortOrder()
00712 {
00713 if(myHD == NULL)
00714 {
00715
00716 return(EMPTY_RETURN.c_str());
00717 }
00718 return(myHD->getSortOrder());
00719 }
00720
00721
00722
00723 const char* SamFileHeader::getTagSO()
00724 {
00725 return(getSortOrder());
00726 }
00727
00728
00729
00730
00731 SamHeaderRecord* SamFileHeader::getNextSQRecord()
00732 {
00733 return(getNextHeaderRecord(myCurrentSQIndex,
00734 SamHeaderRecord::SQ));
00735 }
00736
00737
00738
00739
00740 SamHeaderRecord* SamFileHeader::getNextRGRecord()
00741 {
00742 return(getNextHeaderRecord(myCurrentRGIndex,
00743 SamHeaderRecord::RG));
00744 }
00745
00746
00747
00748
00749 SamHeaderRecord* SamFileHeader::getNextPGRecord()
00750 {
00751 return(getNextHeaderRecord(myCurrentPGIndex,
00752 SamHeaderRecord::PG));
00753 }
00754
00755
00756
00757
00758 void SamFileHeader::resetSQRecordIter()
00759 {
00760 myCurrentSQIndex = 0;
00761 }
00762
00763
00764
00765
00766 void SamFileHeader::resetRGRecordIter()
00767 {
00768 myCurrentRGIndex = 0;
00769 }
00770
00771
00772
00773
00774 void SamFileHeader::resetPGRecordIter()
00775 {
00776 myCurrentPGIndex = 0;
00777 }
00778
00779
00780
00781
00782
00783
00784
00785 SamHeaderRecord* SamFileHeader::getNextHeaderRecord(uint32_t& index,
00786 SamHeaderRecord::SamHeaderRecordType headerType)
00787 {
00788 SamHeaderRecord* foundRecord = NULL;
00789
00790
00791 while((index < myHeaderRecords.size())
00792 && (foundRecord == NULL))
00793 {
00794
00795 foundRecord = myHeaderRecords[index];
00796
00797 ++index;
00798
00799 if(!foundRecord->isActiveHeaderRecord())
00800 {
00801
00802 foundRecord = NULL;
00803 }
00804
00805 else if(foundRecord->getType() != headerType)
00806 {
00807
00808 foundRecord = NULL;
00809 }
00810 }
00811
00812
00813 return(foundRecord);
00814 }
00815
00816
00817
00818
00819
00820
00821
00822 SamHeaderRecord* SamFileHeader::getNextHeaderRecord()
00823 {
00824
00825 SamHeaderRecord* foundRecord = NULL;
00826
00827
00828 while((myCurrentHeaderIndex < myHeaderRecords.size())
00829 && (foundRecord == NULL))
00830 {
00831
00832 foundRecord = myHeaderRecords[myCurrentHeaderIndex];
00833
00834 ++myCurrentHeaderIndex;
00835
00836 if(!foundRecord->isActiveHeaderRecord())
00837 {
00838
00839 foundRecord = NULL;
00840 }
00841 }
00842
00843
00844 return(foundRecord);
00845 }
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855 bool SamFileHeader::getNextHeaderLine(std::string &headerLine)
00856 {
00857 headerLine = EMPTY_RETURN.c_str();
00858
00859
00860
00861 while(headerLine == EMPTY_RETURN.c_str())
00862 {
00863 if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false)
00864 {
00865
00866 return(false);
00867 }
00868 else
00869 {
00870
00871 ++myCurrentHeaderIndex;
00872 }
00873 }
00874 return(true);
00875 }
00876
00877
00878
00879
00880 void SamFileHeader::resetHeaderRecordIter()
00881 {
00882 myCurrentHeaderIndex = 0;
00883 }
00884
00885
00886
00887
00888 const char* SamFileHeader::getNextComment()
00889 {
00890 if(myCurrentCommentIndex < myComments.size())
00891 {
00892 return(myComments[myCurrentCommentIndex++].c_str());
00893 }
00894
00895 return(EMPTY_RETURN.c_str());
00896 }
00897
00898
00899
00900
00901 void SamFileHeader::resetCommentIter()
00902 {
00903 myCurrentCommentIndex = 0;
00904 }
00905
00906
00907
00908 bool SamFileHeader::addComment(const char* comment)
00909 {
00910 if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
00911 {
00912
00913 myComments.push_back(comment);
00914 }
00915 return(true);
00916 }
00917
00918
00919
00920 void SamFileHeader::generateReferenceInfo()
00921 {
00922
00923 uint32_t sqIndex = 0;
00924 SamHeaderRecord* hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00925 while(hdrRec != NULL)
00926 {
00927
00928 String refName = hdrRec->getTagValue("SN");
00929 String refLen = hdrRec->getTagValue("LN");
00930 long refLenInt = 0;
00931 if(refLen.AsInteger(refLenInt))
00932 {
00933
00934
00935 myReferenceInfo.add(refName, refLen);
00936 }
00937 hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00938 }
00939 }
00940
00941
00942
00943 bool SamFileHeader::parseHeader(String& header)
00944 {
00945
00946
00947
00948 bool status = true;
00949
00950
00951 std::vector<String>* types = header.Split('\n');
00952
00953
00954 for(uint32_t index = 0; index < types->size(); index++)
00955 {
00956
00957 status &= parseHeaderLine(types->at(index));
00958 }
00959
00960
00961 delete types;
00962 types = NULL;
00963
00964 return(status);
00965 }
00966
00967
00968
00969 bool SamFileHeader::parseHeaderLine(const String& headerLine)
00970 {
00971 StringArray tokens;
00972
00973
00974 tokens.ReplaceColumns(headerLine, '\t');
00975
00976 if(tokens.Length() < 1)
00977 {
00978
00979 return(true);
00980 }
00981
00982
00983 if((tokens[0].Length() != 3) || (tokens[0][0] != '@'))
00984 {
00985
00986
00987 return(false);
00988 }
00989
00990 bool status = true;
00991 if(tokens[0] == "@HD")
00992 {
00993 if(myHD == NULL)
00994 {
00995
00996 myHD = new SamHeaderHD();
00997 if(myHD == NULL)
00998 {
00999
01000 return(false);
01001 }
01002 myHeaderRecords.push_back(myHD);
01003 status &= myHD->setFields(tokens);
01004 }
01005 else
01006 {
01007
01008 status = false;
01009 }
01010 }
01011 else if(tokens[0] == "@SQ")
01012 {
01013
01014 SamHeaderSQ* sq = new SamHeaderSQ();
01015
01016 if(sq->setFields(tokens))
01017 {
01018
01019
01020 status &= addSQ(sq);
01021 }
01022 else
01023 {
01024 status = false;
01025 }
01026 }
01027 else if(tokens[0] == "@RG")
01028 {
01029
01030 SamHeaderRG* rg = new SamHeaderRG();
01031
01032 if(rg->setFields(tokens))
01033 {
01034
01035
01036 status &= addRG(rg);
01037 }
01038 else
01039 {
01040 status = false;
01041 }
01042 }
01043 else if(tokens[0] == "@PG")
01044 {
01045
01046 SamHeaderPG* pg = new SamHeaderPG();
01047
01048 if(pg->setFields(tokens))
01049 {
01050
01051
01052 status &= addPG(pg);
01053 }
01054 else
01055 {
01056 status = false;
01057 }
01058 }
01059 else if(tokens[0] == "@CO")
01060 {
01061 addComment(tokens[1]);
01062 }
01063 else
01064 {
01065
01066 status = false;
01067 }
01068
01069 return(status);
01070 }
01071
01072
01073
01074
01075
01076
01077 bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const
01078 {
01079
01080 if(index < myHeaderRecords.size())
01081 {
01082
01083
01084 SamHeaderRecord* hdrRec = myHeaderRecords[index];
01085 hdrRec->appendString(header);
01086 return(true);
01087 }
01088 else
01089 {
01090 unsigned int commentIndex = index - myHeaderRecords.size();
01091
01092 if(commentIndex < myComments.size())
01093 {
01094
01095 header += "@CO\t";
01096
01097 header += myComments[commentIndex];
01098
01099 header += "\n";
01100 return(true);
01101 }
01102 }
01103
01104 return(false);
01105 }