libStatGen Software
1
|
00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #include "SamFileHeader.h" 00019 #include "SamHeaderSQ.h" 00020 #include "SamHeaderRG.h" 00021 00022 00023 const std::string SamFileHeader::EMPTY_RETURN = ""; 00024 00025 SamFileHeader::SamFileHeader() 00026 : myHD(NULL), 00027 myReferenceInfo(), 00028 myErrorMessage("") 00029 { 00030 resetHeader(); 00031 00032 mySQs.setCaseSensitive(true); 00033 myRGs.setCaseSensitive(true); 00034 myPGs.setCaseSensitive(true); 00035 } 00036 00037 00038 SamFileHeader::~SamFileHeader() 00039 { 00040 resetHeader(); 00041 } 00042 00043 00044 // Copy Constructor 00045 SamFileHeader::SamFileHeader(const SamFileHeader& header) 00046 { 00047 copy(header); 00048 } 00049 00050 00051 // Overload operator = to copy the passed in header into this header. 00052 SamFileHeader & SamFileHeader::operator = (const SamFileHeader& header) 00053 { 00054 copy(header); 00055 return(*this); 00056 } 00057 00058 00059 bool SamFileHeader::copy(const SamFileHeader& header) 00060 { 00061 // Check to see if the passed in value is the same as this. 00062 if(this == &header) 00063 { 00064 return(true); 00065 } 00066 00067 resetHeader(); 00068 00069 // Copy the records by getting the other header's header string 00070 // and parsing it. 00071 std::string newString; 00072 bool status = header.getHeaderString(newString); 00073 String newHeaderString = newString.c_str(); 00074 00075 status &= parseHeader(newHeaderString); 00076 00077 myCurrentHeaderIndex = header.myCurrentHeaderIndex; 00078 myCurrentCommentIndex = header.myCurrentCommentIndex; 00079 00080 // Clear the reference info and copy it to ensure it is the same. 00081 myReferenceInfo.clear(); 00082 // Copy Reference contigs, hash, lengths. 00083 myReferenceInfo = header.myReferenceInfo; 00084 00085 return(status); 00086 } 00087 00088 00089 // Reset the header for a new entry, clearing out previous values. 00090 void SamFileHeader::resetHeader() 00091 { 00092 myReferenceInfo.clear(); 00093 00094 // Clear the pointers to the header records. They are deleted when the 00095 // vector is cleaned up. 00096 myHD = NULL; 00097 mySQs.Clear(); 00098 myRGs.Clear(); 00099 myPGs.Clear(); 00100 00101 // Delete the header records and clear the vector. 00102 for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size(); 00103 headerIndex++) 00104 { 00105 if(myHeaderRecords[headerIndex] != NULL) 00106 { 00107 delete myHeaderRecords[headerIndex]; 00108 myHeaderRecords[headerIndex] = NULL; 00109 } 00110 } 00111 myHeaderRecords.clear(); 00112 00113 // Reset the iterator for the header lines. 00114 resetHeaderRecordIter(); 00115 00116 // Reset the comment iterator. 00117 resetCommentIter(); 00118 00119 // Reset the individual type header iterators. 00120 resetSQRecordIter(); 00121 resetRGRecordIter(); 00122 resetPGRecordIter(); 00123 00124 // Clear the comments 00125 myComments.clear(); 00126 } 00127 00128 00129 // Set the passed in string to the entire header string. Clearing its 00130 // current contents. 00131 bool SamFileHeader::getHeaderString(std::string& header) const 00132 { 00133 header.clear(); 00134 00135 // Keep getting header lines until there are no more - false returned. 00136 unsigned int index = 0; 00137 while(getHeaderLine(index, header) != false) 00138 { 00139 ++index; 00140 } 00141 00142 return(true); 00143 } 00144 00145 00146 int SamFileHeader::getReferenceID(const String & referenceName, bool addID) 00147 { 00148 return(myReferenceInfo.getReferenceID(referenceName, addID)); 00149 } 00150 00151 00152 int SamFileHeader::getReferenceID(const char* referenceName, bool addID) 00153 { 00154 return(myReferenceInfo.getReferenceID(referenceName, addID)); 00155 } 00156 00157 00158 const String & SamFileHeader::getReferenceLabel(int id) const 00159 { 00160 return(myReferenceInfo.getReferenceLabel(id)); 00161 } 00162 00163 00164 // Get the Reference Information 00165 const SamReferenceInfo& SamFileHeader::getReferenceInfo() const 00166 { 00167 return(myReferenceInfo); 00168 } 00169 00170 00171 // Get the Reference Information for updating separately when reading 00172 // BAMs...should only be called by BamInterface. 00173 SamReferenceInfo& SamFileHeader::getReferenceInfoForBamInterface() 00174 { 00175 return(myReferenceInfo); 00176 } 00177 00178 00179 // Add a header line that has an const char* value. 00180 bool SamFileHeader::addHeaderLine(const char* type, const char* tag, 00181 const char* value) 00182 { 00183 String headerLine; 00184 headerLine += "@"; 00185 headerLine += type; 00186 headerLine += "\t"; 00187 headerLine += tag; 00188 headerLine += ":"; 00189 headerLine += value; 00190 return(addHeaderLine(headerLine.c_str())); 00191 } 00192 00193 00194 // Add a header line that is already preformatted in a const char*. 00195 bool SamFileHeader::addHeaderLine(const char* headerLine) 00196 { 00197 // Parse the added header line. 00198 String headerString = headerLine; 00199 return(parseHeader(headerString)); 00200 } 00201 00202 00203 // Add a header line that is already preformatted in a const char*. 00204 bool SamFileHeader::addHeader(const char* header) 00205 { 00206 // Parse the added header line. 00207 String headerString = header; 00208 return(parseHeader(headerString)); 00209 } 00210 00211 00212 // Add a comment. 00213 bool SamFileHeader::addComment(const char* comment) 00214 { 00215 if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0)) 00216 { 00217 // Valid comment, so add it. 00218 myComments.push_back(comment); 00219 } 00220 return(true); 00221 } 00222 00223 00224 // Add the specified tag and value to the HD header. 00225 bool SamFileHeader::setHDTag(const char* tag, const char* value) 00226 { 00227 if(myHD == NULL) 00228 { 00229 // Need to create the HD line. 00230 myHD = new SamHeaderHD(); 00231 if(myHD == NULL) 00232 { 00233 // New failed, return false. 00234 myErrorMessage = "SamFileHeader: Failed to allocate a new HD tag"; 00235 return(false); 00236 } 00237 // Succeeded to create the line, add it to the 00238 // list. 00239 myHeaderRecords.push_back(myHD); 00240 } 00241 if(!myHD->setTag(tag, value)) 00242 { 00243 myErrorMessage = "SamFileHeader: Failed to set the specified HD tag"; 00244 return(false); 00245 } 00246 return(true); 00247 } 00248 00249 00250 // Add the specified tag and value to the SQ header with the specified name. 00251 // If the header does not yet exist, the header is added. 00252 bool SamFileHeader::setSQTag(const char* tag, const char* value, 00253 const char* name) 00254 { 00255 // Get the SQ record for the specified name. 00256 SamHeaderSQ* sq = getSQ(name); 00257 if(sq == NULL) 00258 { 00259 // The SQ does not yet exist. 00260 // Make sure the tag is LN. 00261 if(strcmp(tag, "LN") != 0) 00262 { 00263 // LN is required so must be the first tag added 00264 myErrorMessage = 00265 "SamFileHeader:Failed to add the specified SQ key, LN not specified."; 00266 return(false); 00267 } 00268 00269 // Add it. 00270 sq = new SamHeaderSQ(); 00271 00272 if(sq == NULL) 00273 { 00274 // Could not create the header record. 00275 myErrorMessage = "SamFileHeader: Failed to allocate a new SQ tag"; 00276 return(false); 00277 } 00278 00279 // Created the header record, so add it to the list of SQ lines. 00280 mySQs.Add(name, sq); 00281 myHeaderRecords.push_back(sq); 00282 // value is the length, so update the reference info. 00283 myReferenceInfo.add(name, atoi(value)); 00284 00285 // Add the key tag 00286 if(!sq->addKey(name)) 00287 { 00288 // Failed to add the key tag, return false. 00289 myErrorMessage = "SamFileHeader:Failed to add the specified SQ key"; 00290 return(false); 00291 } 00292 } 00293 else if(strcmp(tag, "LN") == 0) 00294 { 00295 // Cannot modify/remove the LN tag. 00296 myErrorMessage = "SamFileHeader:Cannot modify/remove the SQ's LN tag"; 00297 return(false); 00298 } 00299 00300 if(!sq->setTag(tag, value)) 00301 { 00302 myErrorMessage = "Failed to set the specified SQ tag"; 00303 return(false); 00304 } 00305 return(true); 00306 } 00307 00308 00309 // Add the specified tag and value to the RG header with the read group 00310 // identifier. If the header does not yet exist, the header is added. 00311 bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id) 00312 { 00313 // Get the RG record for the specified name. 00314 SamHeaderRG* rg = getRG(id); 00315 if(rg == NULL) 00316 { 00317 // The RG does not yet exist. 00318 // Add it. 00319 rg = new SamHeaderRG(); 00320 00321 if(rg == NULL) 00322 { 00323 // Could not create the header record. 00324 myErrorMessage = "Failed to allocate a new RG tag"; 00325 return(false); 00326 } 00327 00328 // Created the header record, so add it to the list of RG lines. 00329 myRGs.Add(id, rg); 00330 myHeaderRecords.push_back(rg); 00331 00332 // Add the key tag 00333 if(!rg->addKey(id)) 00334 { 00335 // Failed to add the key tag, return false. 00336 myErrorMessage = "Failed to add the specified RG key"; 00337 return(false); 00338 } 00339 } 00340 00341 if(!rg->setTag(tag, value)) 00342 { 00343 myErrorMessage = "Failed to set the specified RG tag"; 00344 return(false); 00345 } 00346 return(true); 00347 } 00348 00349 00350 // Add the specified tag and value to the PG header with the specified id. 00351 // If the header does not yet exist, the header is added. 00352 // Add the specified tag and value to the PG header. 00353 bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id) 00354 { 00355 // Get the PG record for the specified name. 00356 SamHeaderPG* pg = getPG(id); 00357 if(pg == NULL) 00358 { 00359 // The PG does not yet exist. 00360 // Add it. 00361 pg = new SamHeaderPG(); 00362 00363 if(pg == NULL) 00364 { 00365 // Could not create the header record. 00366 myErrorMessage = "Failed to allocate a new PG tag"; 00367 return(false); 00368 } 00369 00370 // Created the header record, so add it to the list of PG lines. 00371 myPGs.Add(id, pg); 00372 myHeaderRecords.push_back(pg); 00373 00374 // Add the key tag 00375 if(!pg->addKey(id)) 00376 { 00377 // Failed to add the key tag, return false. 00378 myErrorMessage = "Failed to add the specified PG key"; 00379 return(false); 00380 } 00381 } 00382 00383 if(!pg->setTag(tag, value)) 00384 { 00385 myErrorMessage = "Failed to set the specified PG tag"; 00386 return(false); 00387 } 00388 return(true); 00389 } 00390 00391 00392 // Add the HD record to the header. 00393 bool SamFileHeader::addHD(SamHeaderHD* hd) 00394 { 00395 // If there is already an HD header or if null 00396 // was passed in, return false. 00397 if(myHD != NULL) 00398 { 00399 myErrorMessage = "Failed add an HD tag - there is already one"; 00400 return(false); 00401 } 00402 if(hd == NULL) 00403 { 00404 myErrorMessage = "Failed add an HD tag - no tag specified"; 00405 return(false); 00406 } 00407 myHD = hd; 00408 00409 myHeaderRecords.push_back(myHD); 00410 return(true); 00411 } 00412 00413 00414 // Add the SQ record to the header. 00415 bool SamFileHeader::addSQ(SamHeaderSQ* sq) 00416 { 00417 if(sq == NULL) 00418 { 00419 // null pointer passed in, can't add it. 00420 myErrorMessage = "SAM/BAM Header line failed to allocate SQ."; 00421 return(false); 00422 } 00423 const char* name = sq->getTagValue("SN"); 00424 const char* length = sq->getTagValue("LN"); 00425 if(strcmp(name, EMPTY_RETURN.c_str()) == 0) 00426 { 00427 // SN is not set, so can't add it. 00428 myErrorMessage = 00429 "SAM/BAM Header line failure: Skipping SQ line that is missing the SN field."; 00430 return(false); 00431 } 00432 if(strcmp(length, EMPTY_RETURN.c_str()) == 0) 00433 { 00434 // LN is not set, so can't add it. 00435 myErrorMessage = 00436 "SAM/BAM Header line failure: Skipping SQ line that is missing the LN field."; 00437 return(false); 00438 } 00439 00440 // Determine whether or not a record with this 00441 // key is already in the hash. 00442 if(mySQs.Find(name) < 0) 00443 { 00444 // It is not already in the hash so add it. 00445 mySQs.Add(name, sq); 00446 myHeaderRecords.push_back(sq); 00447 myReferenceInfo.add(name, atoi(length)); 00448 return(true); 00449 } 00450 00451 // It is already in the hash, so cannot be added. 00452 myErrorMessage = "SAM/BAM Header line failure: Skipping SQ line that has a repeated SN field."; 00453 return(false); 00454 } 00455 00456 00457 // Add the RG record to the header. 00458 bool SamFileHeader::addRG(SamHeaderRG* rg) 00459 { 00460 if(rg == NULL) 00461 { 00462 // null pointer passed in, can't add it. 00463 myErrorMessage = "SAM/BAM Header line failed to allocate RG."; 00464 return(false); 00465 } 00466 const char* id = rg->getTagValue("ID"); 00467 if(strcmp(id, EMPTY_RETURN.c_str()) == 0) 00468 { 00469 // ID is not set, so can't add it. 00470 myErrorMessage = "SAM/BAM Header line failure: Skipping RG line that is missing the ID field."; 00471 return(false); 00472 } 00473 00474 // Determine whether or not a record with this 00475 // key is already in the hash. 00476 if(myRGs.Find(id) < 0) 00477 { 00478 // It is not already in the hash so 00479 // add it. 00480 myRGs.Add(id, rg); 00481 myHeaderRecords.push_back(rg); 00482 return(true); 00483 } 00484 00485 // It is already in the hash, so cannot be added. 00486 myErrorMessage = "SAM/BAM Header line failure: Skipping RG line that has a repeated ID field."; 00487 return(false); 00488 } 00489 00490 00491 // Add the PG record to the header. 00492 bool SamFileHeader::addPG(SamHeaderPG* pg) 00493 { 00494 // If a null pointer was passed in, return false. 00495 if(pg == NULL) 00496 { 00497 myErrorMessage = "SAM/BAM Header line failed to allocate PG."; 00498 return(false); 00499 } 00500 const char* id = pg->getTagValue("ID"); 00501 if(strcmp(id, EMPTY_RETURN.c_str()) == 0) 00502 { 00503 // ID is not set, so can't add the header record. 00504 myErrorMessage = "SAM/BAM Header line failure: Skipping PG line that is missing the ID field."; 00505 return(false); 00506 } 00507 00508 // Determine whether or not a record with this 00509 // key is already in the hash. 00510 if(myPGs.Find(id) < 0) 00511 { 00512 // It is not already in the hash so 00513 // add it. 00514 myPGs.Add(id, pg); 00515 myHeaderRecords.push_back(pg); 00516 return(true); 00517 } 00518 00519 // It is already in the hash, so cannot be added. 00520 myErrorMessage = "SAM/BAM Header line failure: Skipping PG line that has a repeated ID field."; 00521 return(false); 00522 } 00523 00524 00525 // Add the RG record to the header. 00526 bool SamFileHeader::addRecordCopy(const SamHeaderRecord& hdrRec) 00527 { 00528 SamHeaderRecord* newRec = hdrRec.createCopy(); 00529 bool returnVal = true; 00530 switch(newRec->getType()) 00531 { 00532 case SamHeaderRecord::HD: 00533 returnVal = addHD((SamHeaderHD*)newRec); 00534 break; 00535 case SamHeaderRecord::PG: 00536 returnVal = addPG((SamHeaderPG*)newRec); 00537 break; 00538 case SamHeaderRecord::RG: 00539 returnVal = addRG((SamHeaderRG*)newRec); 00540 break; 00541 case SamHeaderRecord::SQ: 00542 returnVal = addSQ((SamHeaderSQ*)newRec); 00543 break; 00544 default: 00545 myErrorMessage = "Failed to copy a header record, unknown type."; 00546 returnVal = false; 00547 break; 00548 } 00549 return(returnVal); 00550 } 00551 00552 00553 // Remove the HD record. 00554 bool SamFileHeader::removeHD() 00555 { 00556 if(myHD != NULL) 00557 { 00558 // Reset the record. Do not delete it since it is in the headerRecords 00559 // vector and it is not worth the time to remove it from the middle of 00560 // that vector since this is the header and the space does not need 00561 // to be conserved. 00562 myHD->reset(); 00563 00564 // Set myHD to null so a new HD could be added. 00565 myHD = NULL; 00566 } 00567 00568 return(true); 00569 } 00570 00571 00572 // Remove the SQ record associated with the specified name. 00573 bool SamFileHeader::removeSQ(const char* name) 00574 { 00575 // Look up the name in the hash. 00576 int hashIndex = mySQs.Find(name); 00577 if(hashIndex < 0) 00578 { 00579 // Not found in the hash, so nothing to 00580 // delete, return true it does not exist 00581 // in the hash. 00582 return(true); 00583 } 00584 00585 // Get the SQ. 00586 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex)); 00587 00588 if(sq == NULL) 00589 { 00590 // sq is null, this is an error since hashIndex was greater than 0, 00591 // so it should have been found. 00592 myErrorMessage = "SAM/BAM Header line failed to get SQ object."; 00593 return(false); 00594 } 00595 00596 // Reset the record. Do not delete it since it is in the headerRecords 00597 // vector and it is not worth the time to remove it from the middle of 00598 // that vector since this is the header and the space does not need 00599 // to be conserved. 00600 sq->reset(); 00601 00602 // Delete the entry from the hash. 00603 mySQs.Delete(hashIndex); 00604 00605 return(true); 00606 } 00607 00608 00609 // Remove the RG record associated with the specified id. 00610 bool SamFileHeader::removeRG(const char* id) 00611 { 00612 // Look up the id in the hash. 00613 int hashIndex = myRGs.Find(id); 00614 if(hashIndex < 0) 00615 { 00616 // Not found in the hash, so nothing to 00617 // delete, return true it does not exist 00618 // in the hash. 00619 return(true); 00620 } 00621 00622 // Get the RG. 00623 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex)); 00624 00625 if(rg == NULL) 00626 { 00627 // rg is null, this is an error since hashIndex was greater than 0, 00628 // so it should have been found. 00629 myErrorMessage = "SAM/BAM Header line failed to get RG object."; 00630 return(false); 00631 } 00632 00633 // Reset the record. Do not delete it since it is in the headerRecords 00634 // vector and it is not worth the time to remove it from the middle of 00635 // that vector since this is the header and the space does not need 00636 // to be conserved. 00637 rg->reset(); 00638 00639 // Delete the entry from the hash. 00640 myRGs.Delete(hashIndex); 00641 00642 return(true); 00643 } 00644 00645 00646 // Remove the PG record associated with the specified id. 00647 bool SamFileHeader::removePG(const char* id) 00648 { 00649 // Look up the id in the hash. 00650 int hashIndex = myPGs.Find(id); 00651 if(hashIndex < 0) 00652 { 00653 // Not found in the hash, so nothing to 00654 // delete, return true it does not exist 00655 // in the hash. 00656 return(true); 00657 } 00658 00659 // Get the PG. 00660 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex)); 00661 00662 if(pg == NULL) 00663 { 00664 // pg is null, this is an error since hashIndex was greater than 0, 00665 // so it should have been found. 00666 myErrorMessage = "SAM/BAM Header line failed to get PG object."; 00667 return(false); 00668 } 00669 00670 // Reset the record. Do not delete it since it is in the headerRecords 00671 // vector and it is not worth the time to remove it from the middle of 00672 // that vector since this is the header and the space does not need 00673 // to be conserved. 00674 pg->reset(); 00675 00676 // Delete the entry from the hash. 00677 myPGs.Delete(hashIndex); 00678 00679 return(true); 00680 } 00681 00682 00683 const char* SamFileHeader::getHDTagValue(const char* tag) 00684 { 00685 if(myHD == NULL) 00686 { 00687 // return blank since there is no HD type. 00688 return(EMPTY_RETURN.c_str()); 00689 } 00690 return(myHD->getTagValue(tag)); 00691 } 00692 00693 00694 // Get the value associated with the specified tag on the SQ line with 00695 // the specified sequence name. 00696 const char* SamFileHeader::getSQTagValue(const char* tag, const char* name) 00697 { 00698 // Look up the name in the hash to get the associated SQ object. 00699 SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name)); 00700 00701 // If it is NULL - the tag was not found, so return 00702 if(sq == NULL) 00703 { 00704 return(EMPTY_RETURN.c_str()); 00705 } 00706 00707 // Found the object, so return the SQ Tag. 00708 return(sq->getTagValue(tag)); 00709 } 00710 00711 00712 // Get the value associated with the specified tag on the RG line with 00713 // the specified read group identifier. 00714 const char* SamFileHeader::getRGTagValue(const char* tag, const char* id) 00715 { 00716 // Look up the id in the hash to get the associated RG object. 00717 SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id)); 00718 00719 // If it is NULL - the tag was not found, so return 00720 if(rg == NULL) 00721 { 00722 return(EMPTY_RETURN.c_str()); 00723 } 00724 00725 // Found the object, so return the RG Tag. 00726 return(rg->getTagValue(tag)); 00727 } 00728 00729 00730 const char* SamFileHeader::getPGTagValue(const char* tag, const char* id) 00731 { 00732 // Look up the id in the hash to get the associated PG object. 00733 SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id)); 00734 00735 // If it is NULL - the tag was not found, so return 00736 if(pg == NULL) 00737 { 00738 return(EMPTY_RETURN.c_str()); 00739 } 00740 00741 // Found the object, so return the PG Tag. 00742 return(pg->getTagValue(tag)); 00743 } 00744 00745 00746 // Get the number of SQ objects. 00747 int SamFileHeader::getNumSQs() 00748 { 00749 return(mySQs.Entries()); 00750 } 00751 00752 00753 // Get the number of RG objects. 00754 int SamFileHeader::getNumRGs() 00755 { 00756 return(myRGs.Entries()); 00757 } 00758 00759 00760 // Get the number of PG objects. 00761 int SamFileHeader::getNumPGs() 00762 { 00763 return(myPGs.Entries()); 00764 } 00765 00766 00767 // Get the HD object. 00768 SamHeaderHD* SamFileHeader::getHD() 00769 { 00770 return(myHD); 00771 } 00772 00773 00774 // Get the SQ object with the specified sequence name. 00775 SamHeaderSQ* SamFileHeader::getSQ(const char* name) 00776 { 00777 return((SamHeaderSQ*)(mySQs.Object(name))); 00778 } 00779 00780 00781 // Get the RG object with the specified read group identifier. 00782 SamHeaderRG* SamFileHeader::getRG(const char* id) 00783 { 00784 return((SamHeaderRG*)(myRGs.Object(id))); 00785 } 00786 00787 00788 // Get the PG object. 00789 SamHeaderPG* SamFileHeader::getPG(const char* id) 00790 { 00791 return((SamHeaderPG*)(myPGs.Object(id))); 00792 } 00793 00794 00795 // Return the value of the SO tag. 00796 // If this field does not exist, EMPTY_RETURN.c_str() is returned. 00797 const char* SamFileHeader::getSortOrder() 00798 { 00799 if(myHD == NULL) 00800 { 00801 // No HD, so return blank EMPTY_RETURN.c_str() 00802 return(EMPTY_RETURN.c_str()); 00803 } 00804 return(myHD->getSortOrder()); 00805 } 00806 00807 00808 // Deprecated way of getting the sort order from the file. 00809 const char* SamFileHeader::getTagSO() 00810 { 00811 return(getSortOrder()); 00812 } 00813 00814 00815 // Get the next SQ header record. After all SQ headers have been retrieved, 00816 // NULL is returned until a reset is called. 00817 SamHeaderRecord* SamFileHeader::getNextSQRecord() 00818 { 00819 return(getNextHeaderRecord(myCurrentSQIndex, 00820 SamHeaderRecord::SQ)); 00821 } 00822 00823 00824 // Get the next RG header record. After all RG headers have been retrieved, 00825 // NULL is returned until a reset is called. 00826 SamHeaderRecord* SamFileHeader::getNextRGRecord() 00827 { 00828 return(getNextHeaderRecord(myCurrentRGIndex, 00829 SamHeaderRecord::RG)); 00830 } 00831 00832 00833 // Get the next PG header record. After all PG headers have been retrieved, 00834 // NULL is returned until a reset is called. 00835 SamHeaderRecord* SamFileHeader::getNextPGRecord() 00836 { 00837 return(getNextHeaderRecord(myCurrentPGIndex, 00838 SamHeaderRecord::PG)); 00839 } 00840 00841 00842 // Reset to the beginning of the header records so the next call 00843 // to getNextSQRecord returns the first SQ header record. 00844 void SamFileHeader::resetSQRecordIter() 00845 { 00846 myCurrentSQIndex = 0; 00847 } 00848 00849 00850 // Reset to the beginning of the header records so the next call 00851 // to getNextRGRecord returns the first RG header record. 00852 void SamFileHeader::resetRGRecordIter() 00853 { 00854 myCurrentRGIndex = 0; 00855 } 00856 00857 00858 // Reset to the beginning of the header records so the next call 00859 // to getNextPGRecord returns the first PG header record. 00860 void SamFileHeader::resetPGRecordIter() 00861 { 00862 myCurrentPGIndex = 0; 00863 } 00864 00865 00866 // Get the next header record of the specified type. 00867 // Pass in the index to start looking at and the type to look for. 00868 // Update the index. 00869 // After all headers of that type have been retrieved, 00870 // NULL is returned until a reset is called for that type. 00871 SamHeaderRecord* SamFileHeader::getNextHeaderRecord(uint32_t& index, 00872 SamHeaderRecord::SamHeaderRecordType headerType) 00873 { 00874 SamHeaderRecord* foundRecord = NULL; 00875 // Loop until a record is found or until out of range of the 00876 // headerRecord vector. 00877 while((index < myHeaderRecords.size()) 00878 && (foundRecord == NULL)) 00879 { 00880 // Get the next record. 00881 foundRecord = myHeaderRecords[index]; 00882 // Either way, increment the index. 00883 ++index; 00884 // Check to see if the next record is active. 00885 if(!foundRecord->isActiveHeaderRecord()) 00886 { 00887 // Not active, so clear the pointer. 00888 foundRecord = NULL; 00889 } 00890 // Check to see if the record is the right type. 00891 else if(foundRecord->getType() != headerType) 00892 { 00893 // Not the right type, so clear the pointer. 00894 foundRecord = NULL; 00895 } 00896 } 00897 00898 // Return the record if it was found. Will be null if none were found. 00899 return(foundRecord); 00900 } 00901 00902 00903 // Get the next header record. After all headers have been retrieved, 00904 // NULL is returned until a reset is called. Does not return the 00905 // Comment lines. 00906 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the 00907 // same iterator. 00908 SamHeaderRecord* SamFileHeader::getNextHeaderRecord() 00909 { 00910 // Get the next header record 00911 SamHeaderRecord* foundRecord = NULL; 00912 // Loop until a record is found or until out of range of the 00913 // headerRecord vector. 00914 while((myCurrentHeaderIndex < myHeaderRecords.size()) 00915 && (foundRecord == NULL)) 00916 { 00917 // Get the next record. 00918 foundRecord = myHeaderRecords[myCurrentHeaderIndex]; 00919 // Either way, increment the index. 00920 ++myCurrentHeaderIndex; 00921 // Check to see if the next record is active. 00922 if(!foundRecord->isActiveHeaderRecord()) 00923 { 00924 // Not active, so clear the pointer. 00925 foundRecord = NULL; 00926 } 00927 } 00928 00929 // Return the record if it was found. Will be null if none were found. 00930 return(foundRecord); 00931 } 00932 00933 00934 // Set the passed in string to the next header line. The passed in 00935 // string will be overwritten. If there are no more header lines or there 00936 // is an error, false is returned and the passed in string is set to EMPTY_RETURN.c_str() 00937 // until a rest is called. 00938 // Will also return the comment lines. 00939 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the 00940 // same iterator. 00941 bool SamFileHeader::getNextHeaderLine(std::string &headerLine) 00942 { 00943 headerLine = EMPTY_RETURN.c_str(); 00944 00945 // Until the header is set, keep reading. 00946 // Header could return EMPTY_RETURN.c_str() if the header line is blank. 00947 while(headerLine == EMPTY_RETURN.c_str()) 00948 { 00949 if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false) 00950 { 00951 // getHeaderLine failed, so stop processing, and return false. 00952 return(false); 00953 } 00954 else 00955 { 00956 // In range, increment the index. 00957 ++myCurrentHeaderIndex; 00958 } 00959 } 00960 return(true); 00961 } 00962 00963 00964 // Reset to the beginning of the header records so the next call 00965 // to getNextHeaderRecord returns the first header line. 00966 void SamFileHeader::resetHeaderRecordIter() 00967 { 00968 myCurrentHeaderIndex = 0; 00969 } 00970 00971 00972 void SamFileHeader::appendCommentLines(std::string &commentLines) 00973 { 00974 for(unsigned int i = 0; i < myComments.size(); i++) 00975 { 00976 commentLines += "@CO\t";; 00977 commentLines += myComments[i]; 00978 commentLines += "\n"; 00979 } 00980 } 00981 00982 00983 // Returns the comment on the next comment line. Returns EMPTY_RETURN.c_str() if all comment 00984 // lines have been returned, until resetCommentIter is called. 00985 const char* SamFileHeader::getNextComment() 00986 { 00987 if(myCurrentCommentIndex < myComments.size()) 00988 { 00989 return(myComments[myCurrentCommentIndex++].c_str()); 00990 } 00991 // Already gone through all the comments, return EMPTY_RETURN.c_str(). 00992 return(EMPTY_RETURN.c_str()); 00993 } 00994 00995 00996 // Resets to the beginning of the comments so getNextComment returns 00997 // the first comment. 00998 void SamFileHeader::resetCommentIter() 00999 { 01000 myCurrentCommentIndex = 0; 01001 } 01002 01003 01004 // Parse the header. 01005 bool SamFileHeader::parseHeader(String& header) 01006 { 01007 std::string errorMessage = ""; 01008 int numErrors = 0; 01009 int numValid = 0; 01010 01011 // Split the header into lines. 01012 std::vector<String>* types = header.Split('\n'); 01013 01014 // Loop through each header line, parsing that line. 01015 for(uint32_t index = 0; index < types->size(); index++) 01016 { 01017 // Parse the header line. 01018 if(!parseHeaderLine(types->at(index))) 01019 { 01020 errorMessage += myErrorMessage; 01021 errorMessage += "\n"; 01022 ++numErrors; 01023 } 01024 else 01025 { 01026 // valid header line 01027 ++numValid; 01028 } 01029 } 01030 01031 // Delete the types vector. 01032 delete types; 01033 types = NULL; 01034 01035 myErrorMessage = errorMessage; 01036 if((numErrors > 0) && (numValid == 0)) 01037 { 01038 // Only errors. 01039 std::cerr << numErrors 01040 << " invalid SAM/BAM Header lines were skipped due to:\n" 01041 << errorMessage << std::endl; 01042 return(false); 01043 } 01044 else if(numErrors > 0) 01045 { 01046 // Some valid & some invalid. 01047 // Going to return true, but add note about the invalid lines. 01048 std::cerr << numErrors 01049 << " invalid SAM/BAM Header lines were skipped due to:\n" 01050 << errorMessage << std::endl; 01051 } 01052 01053 return(true); 01054 } 01055 01056 01057 // Parse one line of the header. 01058 bool SamFileHeader::parseHeaderLine(const String& headerLine) 01059 { 01060 // Check if the line starts with @CO. 01061 if((headerLine.Length() >= 4) && (headerLine[0] == '@') && 01062 (headerLine[1] == 'C') && (headerLine[2] == 'O') && 01063 (headerLine[3] == '\t')) 01064 { 01065 // Comment line. 01066 String comment = headerLine.SubStr(4); 01067 return(addComment(comment)); 01068 } 01069 01070 StringArray tokens; 01071 01072 // Split the line by tabs. 01073 tokens.ReplaceColumns(headerLine, '\t'); 01074 01075 if(tokens.Length() < 1) 01076 { 01077 // Nothing on this line, just return true. 01078 return(true); 01079 } 01080 01081 // Get the header type, the first column. 01082 if((tokens[0].Length() != 3) || (tokens[0][0] != '@')) 01083 { 01084 // The header type string is incorrect. Should be 3 characters 01085 // with the first one @. 01086 myErrorMessage = "SAM/BAM Header line does not start with @ & at least 2 chars."; 01087 return(false); 01088 } 01089 01090 bool status = true; 01091 if(tokens[0] == "@HD") 01092 { 01093 if(myHD == NULL) 01094 { 01095 // Create a new hd. 01096 myHD = new SamHeaderHD(); 01097 if(myHD == NULL) 01098 { 01099 // Failed to allocate HD, so return false. 01100 myErrorMessage = "SAM/BAM Header line failed to allocate HD."; 01101 return(false); 01102 } 01103 myHeaderRecords.push_back(myHD); 01104 if(!myHD->setFields(tokens)) 01105 { 01106 myErrorMessage = "SAM/BAM Header line failed to store HD record."; 01107 status = false; 01108 } 01109 } 01110 else 01111 { 01112 // HD already set, so return false. 01113 myErrorMessage = "SAM/BAM Header line failure: multiple HD records."; 01114 status = false; 01115 } 01116 } 01117 else if(tokens[0] == "@SQ") 01118 { 01119 // Create a new SQ record. 01120 SamHeaderSQ* sq = new SamHeaderSQ(); 01121 01122 if(sq->setFields(tokens)) 01123 { 01124 // sq fields were properly set, so add it to the list of 01125 // SQ lines. 01126 // myStatus set in the method. 01127 status &= addSQ(sq); 01128 } 01129 else 01130 { 01131 myErrorMessage = "SAM/BAM Header line failed to store SQ record."; 01132 status = false; 01133 } 01134 } 01135 else if(tokens[0] == "@RG") 01136 { 01137 // Create a new RG record. 01138 SamHeaderRG* rg = new SamHeaderRG(); 01139 01140 if(rg->setFields(tokens)) 01141 { 01142 // rg fields were properly set, so add it to the list of 01143 // RG lines. 01144 // myStatus set in the method. 01145 status &= addRG(rg); 01146 } 01147 else 01148 { 01149 myErrorMessage = "SAM/BAM Header line failed to store RG record."; 01150 status = false; 01151 } 01152 } 01153 else if(tokens[0] == "@PG") 01154 { 01155 // Create a new PG record. 01156 SamHeaderPG* pg = new SamHeaderPG(); 01157 01158 if(pg->setFields(tokens)) 01159 { 01160 // pg fields were properly set, so add it to the list of 01161 // PG lines. 01162 // myStatus set in the method. 01163 status &= addPG(pg); 01164 } 01165 else 01166 { 01167 myErrorMessage = "SAM/BAM Header line failed to store PG record."; 01168 status = false; 01169 } 01170 } 01171 else 01172 { 01173 // Unknown header type. 01174 myErrorMessage = 01175 "SAM/BAM Header line failure: Skipping unknown header type, "; 01176 myErrorMessage += (const char*)(tokens[0]); 01177 status = false; 01178 } 01179 return(status); 01180 } 01181 01182 01183 01184 // Set the passed in string to the header line at the specified index. 01185 // It does NOT clear the current contents of header. 01186 // NOTE: some indexes will return blank if the entry was deleted. 01187 bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const 01188 { 01189 // Check to see if the index is in range of the header records vector. 01190 if(index < myHeaderRecords.size()) 01191 { 01192 // In range of the header records vector, so get the string for 01193 // that record. 01194 SamHeaderRecord* hdrRec = myHeaderRecords[index]; 01195 hdrRec->appendString(header); 01196 return(true); 01197 } 01198 else 01199 { 01200 unsigned int commentIndex = index - myHeaderRecords.size(); 01201 // Check to see if it is in range of the comments. 01202 if(commentIndex < myComments.size()) 01203 { 01204 // It is in range of the comments, so add the type. 01205 header += "@CO\t"; 01206 // Add the comment. 01207 header += myComments[commentIndex]; 01208 // Add the new line. 01209 header += "\n"; 01210 return(true); 01211 } 01212 } 01213 // Invalid index. 01214 return(false); 01215 }