00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "SamFileHeader.h"
00019 #include "SamHeaderSQ.h"
00020 #include "SamHeaderRG.h"
00021
00022
00023 const std::string SamFileHeader::EMPTY_RETURN = "";
00024
00025 SamFileHeader::SamFileHeader()
00026 : myHD(NULL),
00027 myReferenceInfo()
00028 {
00029 resetHeader();
00030 }
00031
00032
00033 SamFileHeader::~SamFileHeader()
00034 {
00035 resetHeader();
00036 }
00037
00038
00039
00040 SamFileHeader::SamFileHeader(const SamFileHeader& header)
00041 {
00042 copy(header);
00043 }
00044
00045
00046
00047 SamFileHeader & SamFileHeader::operator = (const SamFileHeader& header)
00048 {
00049 copy(header);
00050 return(*this);
00051 }
00052
00053
00054 bool SamFileHeader::copy(const SamFileHeader& header)
00055 {
00056
00057 if(this == &header)
00058 {
00059 return(true);
00060 }
00061
00062 resetHeader();
00063
00064 myReferenceInfo = header.myReferenceInfo;
00065
00066
00067
00068 std::string newString;
00069 bool status = header.getHeaderString(newString);
00070 String newHeaderString = newString.c_str();
00071
00072 status &= parseHeader(newHeaderString);
00073
00074 myCurrentHeaderIndex = header.myCurrentHeaderIndex;
00075 myCurrentCommentIndex = header.myCurrentCommentIndex;
00076
00077 return(status);
00078 }
00079
00080
00081
00082 void SamFileHeader::resetHeader()
00083 {
00084 myReferenceInfo.clear();
00085
00086
00087
00088 myHD = NULL;
00089 mySQs.Clear();
00090 myRGs.Clear();
00091 myPGs.Clear();
00092
00093
00094 for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size();
00095 headerIndex++)
00096 {
00097 delete myHeaderRecords[headerIndex];
00098 myHeaderRecords[headerIndex] = NULL;
00099 }
00100 myHeaderRecords.clear();
00101
00102
00103 resetHeaderRecordIter();
00104
00105
00106 resetCommentIter();
00107
00108
00109 resetSQRecordIter();
00110 resetRGRecordIter();
00111 resetPGRecordIter();
00112
00113
00114 myComments.clear();
00115 }
00116
00117
00118
00119
00120 bool SamFileHeader::getHeaderString(std::string& header) const
00121 {
00122 header.clear();
00123
00124
00125 unsigned int index = 0;
00126 while(getHeaderLine(index, header) != false)
00127 {
00128 ++index;
00129 }
00130
00131 return(true);
00132 }
00133
00134
00135 int SamFileHeader::getReferenceID(const String & referenceName, bool addID)
00136 {
00137 return(myReferenceInfo.getReferenceID(referenceName, addID));
00138 }
00139
00140
00141 int SamFileHeader::getReferenceID(const char* referenceName, bool addID)
00142 {
00143 return(myReferenceInfo.getReferenceID(referenceName, addID));
00144 }
00145
00146
00147 const String & SamFileHeader::getReferenceLabel(int id) const
00148 {
00149 return(myReferenceInfo.getReferenceLabel(id));
00150 }
00151
00152
00153
00154 const SamReferenceInfo* SamFileHeader::getReferenceInfo() const
00155 {
00156 return(&myReferenceInfo);
00157 }
00158
00159
00160
00161 void SamFileHeader::addReferenceInfo(const char* referenceSequenceName,
00162 int32_t referenceSequenceLength)
00163 {
00164 myReferenceInfo.add(referenceSequenceName, referenceSequenceLength);
00165 }
00166
00167
00168
00169 void SamFileHeader::generateReferenceInfo()
00170 {
00171
00172 uint32_t sqIndex = 0;
00173 SamHeaderRecord* hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00174 while(hdrRec != NULL)
00175 {
00176
00177 String refName = hdrRec->getTagValue("SN");
00178 String refLen = hdrRec->getTagValue("LN");
00179 long refLenInt = 0;
00180 if(refLen.AsInteger(refLenInt))
00181 {
00182
00183
00184 myReferenceInfo.add(refName, refLen);
00185 }
00186 hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00187 }
00188 }
00189
00190
00191
00192 bool SamFileHeader::addHeaderLine(const char* type, const char* tag,
00193 const char* value)
00194 {
00195 String headerLine;
00196 headerLine += "@";
00197 headerLine += type;
00198 headerLine += "\t";
00199 headerLine += tag;
00200 headerLine += ":";
00201 headerLine += value;
00202 return(addHeaderLine(headerLine.c_str()));
00203 }
00204
00205
00206
00207
00208 bool SamFileHeader::addHeaderLine(const char* headerLine)
00209 {
00210
00211 String headerString = headerLine;
00212 if(parseHeader(headerString))
00213 {
00214
00215 return(true);
00216 }
00217
00218 return(false);
00219 }
00220
00221
00222
00223 bool SamFileHeader::addComment(const char* comment)
00224 {
00225 if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
00226 {
00227
00228 myComments.push_back(comment);
00229 }
00230 return(true);
00231 }
00232
00233
00234
00235 bool SamFileHeader::setHDTag(const char* tag, const char* value)
00236 {
00237 if(myHD == NULL)
00238 {
00239
00240 myHD = new SamHeaderHD();
00241 if(myHD == NULL)
00242 {
00243
00244 return(false);
00245 }
00246
00247
00248 myHeaderRecords.push_back(myHD);
00249 }
00250 return(myHD->setTag(tag, value));
00251 }
00252
00253
00254
00255
00256 bool SamFileHeader::setSQTag(const char* tag, const char* value,
00257 const char* name)
00258 {
00259
00260 SamHeaderSQ* sq = getSQ(name);
00261 if(sq == NULL)
00262 {
00263
00264
00265 sq = new SamHeaderSQ();
00266
00267 if(sq == NULL)
00268 {
00269
00270 return(false);
00271 }
00272
00273
00274 mySQs.Add(name, sq);
00275 myHeaderRecords.push_back(sq);
00276
00277
00278 if(!sq->addKey(name))
00279 {
00280
00281 return(false);
00282 }
00283 }
00284
00285 return(sq->setTag(tag, value));
00286 }
00287
00288
00289
00290
00291 bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id)
00292 {
00293
00294 SamHeaderRG* rg = getRG(id);
00295 if(rg == NULL)
00296 {
00297
00298
00299 rg = new SamHeaderRG();
00300
00301 if(rg == NULL)
00302 {
00303
00304 return(false);
00305 }
00306
00307
00308 myRGs.Add(id, rg);
00309 myHeaderRecords.push_back(rg);
00310
00311
00312 if(!rg->addKey(id))
00313 {
00314
00315 return(false);
00316 }
00317 }
00318
00319 return(rg->setTag(tag, value));
00320 }
00321
00322
00323
00324
00325
00326 bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id)
00327 {
00328
00329 SamHeaderPG* pg = getPG(id);
00330 if(pg == NULL)
00331 {
00332
00333
00334 pg = new SamHeaderPG();
00335
00336 if(pg == NULL)
00337 {
00338
00339 return(false);
00340 }
00341
00342
00343 myPGs.Add(id, pg);
00344 myHeaderRecords.push_back(pg);
00345
00346
00347 if(!pg->addKey(id))
00348 {
00349
00350 return(false);
00351 }
00352 }
00353
00354 return(pg->setTag(tag, value));
00355 }
00356
00357
00358
00359 bool SamFileHeader::addHD(SamHeaderHD* hd)
00360 {
00361
00362
00363 if((myHD != NULL) || (hd == NULL))
00364 {
00365 return(false);
00366 }
00367 myHD = hd;
00368
00369 myHeaderRecords.push_back(myHD);
00370 return(true);
00371 }
00372
00373
00374
00375 bool SamFileHeader::addSQ(SamHeaderSQ* sq)
00376 {
00377 if(sq == NULL)
00378 {
00379
00380 return(false);
00381 }
00382 const char* name = sq->getTagValue("SN");
00383 if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
00384 {
00385
00386 return(false);
00387 }
00388
00389
00390
00391 if(mySQs.Find(name) < 0)
00392 {
00393
00394
00395 mySQs.Add(name, sq);
00396 myHeaderRecords.push_back(sq);
00397 return(true);
00398 }
00399
00400
00401 return(false);
00402 }
00403
00404
00405
00406 bool SamFileHeader::addRG(SamHeaderRG* rg)
00407 {
00408 if(rg == NULL)
00409 {
00410
00411 return(false);
00412 }
00413 const char* id = rg->getTagValue("ID");
00414 if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00415 {
00416
00417 return(false);
00418 }
00419
00420
00421
00422 if(myRGs.Find(id) < 0)
00423 {
00424
00425
00426 myRGs.Add(id, rg);
00427 myHeaderRecords.push_back(rg);
00428 return(true);
00429 }
00430
00431
00432 return(false);
00433 }
00434
00435
00436
00437 bool SamFileHeader::addPG(SamHeaderPG* pg)
00438 {
00439
00440 if(pg == NULL)
00441 {
00442 return(false);
00443 }
00444 const char* id = pg->getTagValue("ID");
00445 if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00446 {
00447
00448 return(false);
00449 }
00450
00451
00452
00453 if(myPGs.Find(id) < 0)
00454 {
00455
00456
00457 myPGs.Add(id, pg);
00458 myHeaderRecords.push_back(pg);
00459 return(true);
00460 }
00461
00462
00463 return(false);
00464 }
00465
00466
00467
00468 bool SamFileHeader::removeHD()
00469 {
00470 if(myHD != NULL)
00471 {
00472
00473
00474
00475
00476 myHD->reset();
00477
00478
00479 myHD = NULL;
00480 }
00481
00482 return(true);
00483 }
00484
00485
00486
00487 bool SamFileHeader::removeSQ(const char* name)
00488 {
00489
00490 int hashIndex = mySQs.Find(name);
00491 if(hashIndex < 0)
00492 {
00493
00494
00495
00496 return(true);
00497 }
00498
00499
00500 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex));
00501
00502 if(sq == NULL)
00503 {
00504
00505
00506 return(false);
00507 }
00508
00509
00510
00511
00512
00513 sq->reset();
00514
00515
00516 mySQs.Delete(hashIndex);
00517
00518 return(true);
00519 }
00520
00521
00522
00523 bool SamFileHeader::removeRG(const char* id)
00524 {
00525
00526 int hashIndex = myRGs.Find(id);
00527 if(hashIndex < 0)
00528 {
00529
00530
00531
00532 return(true);
00533 }
00534
00535
00536 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex));
00537
00538 if(rg == NULL)
00539 {
00540
00541
00542 return(false);
00543 }
00544
00545
00546
00547
00548
00549 rg->reset();
00550
00551
00552 myRGs.Delete(hashIndex);
00553
00554 return(true);
00555 }
00556
00557
00558
00559 bool SamFileHeader::removePG(const char* id)
00560 {
00561
00562 int hashIndex = myPGs.Find(id);
00563 if(hashIndex < 0)
00564 {
00565
00566
00567
00568 return(true);
00569 }
00570
00571
00572 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex));
00573
00574 if(pg == NULL)
00575 {
00576
00577
00578 return(false);
00579 }
00580
00581
00582
00583
00584
00585 pg->reset();
00586
00587
00588 myPGs.Delete(hashIndex);
00589
00590 return(true);
00591 }
00592
00593
00594 SamStatus::Status SamFileHeader::setHeaderFromBamFile(IFILE filePtr)
00595 {
00596 if((filePtr == NULL) || (filePtr->isOpen() == false))
00597 {
00598
00599 return(SamStatus::FAIL_ORDER);
00600 }
00601
00602 int headerLength;
00603
00604 int readSize = ifread(filePtr, &headerLength, sizeof(int));
00605
00606 if(readSize != sizeof(int))
00607 {
00608
00609 return(SamStatus::FAIL_IO);
00610 }
00611
00612 String header;
00613 if (headerLength > 0)
00614 {
00615
00616 readSize =
00617 ifread(filePtr, header.LockBuffer(headerLength + 1), headerLength);
00618 header[headerLength] = 0;
00619 header.UnlockBuffer();
00620 if(readSize != headerLength)
00621 {
00622
00623 return(SamStatus::FAIL_IO);
00624 }
00625 }
00626
00627
00628 parseHeader(header);
00629 return(SamStatus::SUCCESS);
00630 }
00631
00632
00633 const char* SamFileHeader::getHDTagValue(const char* tag)
00634 {
00635 if(myHD == NULL)
00636 {
00637
00638 return(EMPTY_RETURN.c_str());
00639 }
00640 return(myHD->getTagValue(tag));
00641 }
00642
00643
00644
00645
00646 const char* SamFileHeader::getSQTagValue(const char* tag, const char* name)
00647 {
00648
00649 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name));
00650
00651
00652 if(sq == NULL)
00653 {
00654 return(EMPTY_RETURN.c_str());
00655 }
00656
00657
00658 return(sq->getTagValue(tag));
00659 }
00660
00661
00662
00663
00664 const char* SamFileHeader::getRGTagValue(const char* tag, const char* id)
00665 {
00666
00667 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id));
00668
00669
00670 if(rg == NULL)
00671 {
00672 return(EMPTY_RETURN.c_str());
00673 }
00674
00675
00676 return(rg->getTagValue(tag));
00677 }
00678
00679
00680 const char* SamFileHeader::getPGTagValue(const char* tag, const char* id)
00681 {
00682
00683 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id));
00684
00685
00686 if(pg == NULL)
00687 {
00688 return(EMPTY_RETURN.c_str());
00689 }
00690
00691
00692 return(pg->getTagValue(tag));
00693 }
00694
00695
00696
00697 int SamFileHeader::getNumSQs()
00698 {
00699 return(mySQs.Entries());
00700 }
00701
00702
00703
00704 int SamFileHeader::getNumRGs()
00705 {
00706 return(myRGs.Entries());
00707 }
00708
00709
00710
00711 int SamFileHeader::getNumPGs()
00712 {
00713 return(myPGs.Entries());
00714 }
00715
00716
00717
00718 SamHeaderHD* SamFileHeader::getHD()
00719 {
00720 return(myHD);
00721 }
00722
00723
00724
00725 SamHeaderSQ* SamFileHeader::getSQ(const char* name)
00726 {
00727 return((SamHeaderSQ*)(mySQs.Object(name)));
00728 }
00729
00730
00731
00732 SamHeaderRG* SamFileHeader::getRG(const char* id)
00733 {
00734 return((SamHeaderRG*)(myRGs.Object(id)));
00735 }
00736
00737
00738
00739 SamHeaderPG* SamFileHeader::getPG(const char* id)
00740 {
00741 return((SamHeaderPG*)(myPGs.Object(id)));
00742 }
00743
00744
00745
00746
00747 const char* SamFileHeader::getSortOrder()
00748 {
00749 if(myHD == NULL)
00750 {
00751
00752 return(EMPTY_RETURN.c_str());
00753 }
00754 return(myHD->getSortOrder());
00755 }
00756
00757
00758
00759 const char* SamFileHeader::getTagSO()
00760 {
00761 return(getSortOrder());
00762 }
00763
00764
00765
00766
00767 SamHeaderRecord* SamFileHeader::getNextSQRecord()
00768 {
00769 return(getNextHeaderRecord(myCurrentSQIndex,
00770 SamHeaderRecord::SQ));
00771 }
00772
00773
00774
00775
00776 SamHeaderRecord* SamFileHeader::getNextRGRecord()
00777 {
00778 return(getNextHeaderRecord(myCurrentRGIndex,
00779 SamHeaderRecord::RG));
00780 }
00781
00782
00783
00784
00785 SamHeaderRecord* SamFileHeader::getNextPGRecord()
00786 {
00787 return(getNextHeaderRecord(myCurrentPGIndex,
00788 SamHeaderRecord::PG));
00789 }
00790
00791
00792
00793
00794 void SamFileHeader::resetSQRecordIter()
00795 {
00796 myCurrentSQIndex = 0;
00797 }
00798
00799
00800
00801
00802 void SamFileHeader::resetRGRecordIter()
00803 {
00804 myCurrentRGIndex = 0;
00805 }
00806
00807
00808
00809
00810 void SamFileHeader::resetPGRecordIter()
00811 {
00812 myCurrentPGIndex = 0;
00813 }
00814
00815
00816
00817
00818
00819
00820
00821 SamHeaderRecord* SamFileHeader::getNextHeaderRecord(uint32_t& index,
00822 SamHeaderRecord::SamHeaderRecordType headerType)
00823 {
00824 SamHeaderRecord* foundRecord = NULL;
00825
00826
00827 while((index < myHeaderRecords.size())
00828 && (foundRecord == NULL))
00829 {
00830
00831 foundRecord = myHeaderRecords[index];
00832
00833 ++index;
00834
00835 if(!foundRecord->isActiveHeaderRecord())
00836 {
00837
00838 foundRecord = NULL;
00839 }
00840
00841 else if(foundRecord->getType() != headerType)
00842 {
00843
00844 foundRecord = NULL;
00845 }
00846 }
00847
00848
00849 return(foundRecord);
00850 }
00851
00852
00853
00854
00855
00856
00857
00858 SamHeaderRecord* SamFileHeader::getNextHeaderRecord()
00859 {
00860
00861 SamHeaderRecord* foundRecord = NULL;
00862
00863
00864 while((myCurrentHeaderIndex < myHeaderRecords.size())
00865 && (foundRecord == NULL))
00866 {
00867
00868 foundRecord = myHeaderRecords[myCurrentHeaderIndex];
00869
00870 ++myCurrentHeaderIndex;
00871
00872 if(!foundRecord->isActiveHeaderRecord())
00873 {
00874
00875 foundRecord = NULL;
00876 }
00877 }
00878
00879
00880 return(foundRecord);
00881 }
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891 bool SamFileHeader::getNextHeaderLine(std::string &headerLine)
00892 {
00893 headerLine = EMPTY_RETURN.c_str();
00894
00895
00896
00897 while(headerLine == EMPTY_RETURN.c_str())
00898 {
00899 if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false)
00900 {
00901
00902 return(false);
00903 }
00904 else
00905 {
00906
00907 ++myCurrentHeaderIndex;
00908 }
00909 }
00910 return(true);
00911 }
00912
00913
00914
00915
00916 void SamFileHeader::resetHeaderRecordIter()
00917 {
00918 myCurrentHeaderIndex = 0;
00919 }
00920
00921
00922
00923
00924 const char* SamFileHeader::getNextComment()
00925 {
00926 if(myCurrentCommentIndex < myComments.size())
00927 {
00928 return(myComments[myCurrentCommentIndex++].c_str());
00929 }
00930
00931 return(EMPTY_RETURN.c_str());
00932 }
00933
00934
00935
00936
00937 void SamFileHeader::resetCommentIter()
00938 {
00939 myCurrentCommentIndex = 0;
00940 }
00941
00942
00943
00944 bool SamFileHeader::parseHeader(String& header)
00945 {
00946
00947
00948
00949 bool status = true;
00950
00951
00952 std::vector<String>* types = header.Split('\n');
00953
00954
00955 for(uint32_t index = 0; index < types->size(); index++)
00956 {
00957
00958 status &= parseHeaderLine(types->at(index));
00959 }
00960
00961
00962 delete types;
00963 types = NULL;
00964
00965 return(status);
00966 }
00967
00968
00969
00970 bool SamFileHeader::parseHeaderLine(const String& headerLine)
00971 {
00972 StringArray tokens;
00973
00974
00975 tokens.ReplaceColumns(headerLine, '\t');
00976
00977 if(tokens.Length() < 1)
00978 {
00979
00980 return(true);
00981 }
00982
00983
00984 if((tokens[0].Length() != 3) || (tokens[0][0] != '@'))
00985 {
00986
00987
00988 return(false);
00989 }
00990
00991 bool status = true;
00992 if(tokens[0] == "@HD")
00993 {
00994 if(myHD == NULL)
00995 {
00996
00997 myHD = new SamHeaderHD();
00998 if(myHD == NULL)
00999 {
01000
01001 return(false);
01002 }
01003 myHeaderRecords.push_back(myHD);
01004 status &= myHD->setFields(tokens);
01005 }
01006 else
01007 {
01008
01009 status = false;
01010 }
01011 }
01012 else if(tokens[0] == "@SQ")
01013 {
01014
01015 SamHeaderSQ* sq = new SamHeaderSQ();
01016
01017 if(sq->setFields(tokens))
01018 {
01019
01020
01021 status &= addSQ(sq);
01022 }
01023 else
01024 {
01025 status = false;
01026 }
01027 }
01028 else if(tokens[0] == "@RG")
01029 {
01030
01031 SamHeaderRG* rg = new SamHeaderRG();
01032
01033 if(rg->setFields(tokens))
01034 {
01035
01036
01037 status &= addRG(rg);
01038 }
01039 else
01040 {
01041 status = false;
01042 }
01043 }
01044 else if(tokens[0] == "@PG")
01045 {
01046
01047 SamHeaderPG* pg = new SamHeaderPG();
01048
01049 if(pg->setFields(tokens))
01050 {
01051
01052
01053 status &= addPG(pg);
01054 }
01055 else
01056 {
01057 status = false;
01058 }
01059 }
01060 else if(tokens[0] == "@CO")
01061 {
01062 addComment(tokens[1]);
01063 }
01064 else
01065 {
01066
01067 status = false;
01068 }
01069
01070 return(status);
01071 }
01072
01073
01074
01075
01076
01077
01078 bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const
01079 {
01080
01081 if(index < myHeaderRecords.size())
01082 {
01083
01084
01085 SamHeaderRecord* hdrRec = myHeaderRecords[index];
01086 hdrRec->appendString(header);
01087 return(true);
01088 }
01089 else
01090 {
01091 unsigned int commentIndex = index - myHeaderRecords.size();
01092
01093 if(commentIndex < myComments.size())
01094 {
01095
01096 header += "@CO\t";
01097
01098 header += myComments[commentIndex];
01099
01100 header += "\n";
01101 return(true);
01102 }
01103 }
01104
01105 return(false);
01106 }