libStatGen Software  1
SamFileHeader.cpp
00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "SamFileHeader.h"
00019 #include "SamHeaderSQ.h"
00020 #include "SamHeaderRG.h"
00021 
00022 
00023 const std::string SamFileHeader::EMPTY_RETURN = "";
00024 
00025 SamFileHeader::SamFileHeader()
00026     : myHD(NULL),
00027       myReferenceInfo(),
00028       myErrorMessage("")
00029 {
00030     resetHeader();
00031 
00032     mySQs.setCaseSensitive(true);
00033     myRGs.setCaseSensitive(true);
00034     myPGs.setCaseSensitive(true);
00035 }
00036 
00037 
00038 SamFileHeader::~SamFileHeader()
00039 {
00040     resetHeader();
00041 }
00042 
00043 
00044 // Copy Constructor   
00045 SamFileHeader::SamFileHeader(const SamFileHeader& header)
00046 {
00047     copy(header);
00048 }
00049 
00050 
00051 // Overload operator = to copy the passed in header into this header.
00052 SamFileHeader & SamFileHeader::operator = (const SamFileHeader& header)
00053 {
00054     copy(header);
00055     return(*this);
00056 }
00057 
00058 
00059 bool SamFileHeader::copy(const SamFileHeader& header)
00060 {
00061     // Check to see if the passed in value is the same as this.
00062     if(this == &header)
00063     {
00064         return(true);
00065     }
00066 
00067     resetHeader();
00068 
00069     // Copy the records by getting the other header's header string
00070     // and parsing it.
00071     std::string newString;
00072     bool status = header.getHeaderString(newString);
00073     String newHeaderString = newString.c_str();
00074     
00075     status &= parseHeader(newHeaderString);
00076 
00077     myCurrentHeaderIndex = header.myCurrentHeaderIndex;
00078     myCurrentCommentIndex = header.myCurrentCommentIndex;
00079 
00080     // Clear the reference info and copy it to ensure it is the same.
00081     myReferenceInfo.clear();
00082     // Copy Reference contigs, hash, lengths.
00083     myReferenceInfo = header.myReferenceInfo;
00084 
00085     return(status);
00086 }
00087 
00088 
00089 // Reset the header for a new entry, clearing out previous values.
00090 void SamFileHeader::resetHeader()
00091 {
00092     myReferenceInfo.clear();
00093 
00094     // Clear the pointers to the header records.  They are deleted when the
00095     // vector is cleaned up.
00096     myHD = NULL;
00097     mySQs.Clear();
00098     myRGs.Clear();
00099     myPGs.Clear();
00100 
00101     // Delete the header records and clear the vector.
00102     for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size(); 
00103         headerIndex++)
00104     {
00105         if(myHeaderRecords[headerIndex] != NULL)
00106         {
00107             delete myHeaderRecords[headerIndex];
00108             myHeaderRecords[headerIndex] = NULL;
00109         }
00110     }
00111     myHeaderRecords.clear();
00112 
00113     // Reset the iterator for the header lines.
00114     resetHeaderRecordIter();
00115 
00116     // Reset the comment iterator.
00117     resetCommentIter();
00118 
00119     // Reset the individual type header iterators.
00120     resetSQRecordIter();
00121     resetRGRecordIter();
00122     resetPGRecordIter();
00123 
00124     // Clear the comments
00125     myComments.clear();
00126 }
00127 
00128 
00129 // Set the passed in string to the entire header string.  Clearing its
00130 // current contents.
00131 bool SamFileHeader::getHeaderString(std::string& header) const
00132 {
00133     header.clear();
00134    
00135     // Keep getting header lines until there are no more - false returned.
00136     unsigned int index = 0;
00137     while(getHeaderLine(index, header) != false)
00138     {
00139         ++index;
00140     }
00141 
00142     return(true);
00143 }
00144 
00145 
00146 int SamFileHeader::getReferenceID(const String & referenceName, bool addID)
00147 {
00148     return(myReferenceInfo.getReferenceID(referenceName, addID));
00149 }
00150 
00151 
00152 int SamFileHeader::getReferenceID(const char* referenceName, bool addID)
00153 {
00154     return(myReferenceInfo.getReferenceID(referenceName, addID));
00155 }
00156 
00157 
00158 const String & SamFileHeader::getReferenceLabel(int id) const
00159 {
00160     return(myReferenceInfo.getReferenceLabel(id));
00161 }
00162 
00163 
00164 // Get the Reference Information
00165 const SamReferenceInfo& SamFileHeader::getReferenceInfo() const
00166 {
00167     return(myReferenceInfo);
00168 }
00169 
00170 
00171 // Get the Reference Information for updating separately when reading
00172 // BAMs...should only be called by BamInterface.
00173 SamReferenceInfo& SamFileHeader::getReferenceInfoForBamInterface()
00174 {
00175     return(myReferenceInfo);
00176 }
00177 
00178 
00179 // Add a header line that has an const char* value.
00180 bool SamFileHeader::addHeaderLine(const char* type, const char* tag, 
00181                                   const char* value)
00182 {
00183     String headerLine;
00184     headerLine += "@";
00185     headerLine += type;
00186     headerLine += "\t";
00187     headerLine += tag;
00188     headerLine += ":";
00189     headerLine += value;
00190     return(addHeaderLine(headerLine.c_str()));
00191 }
00192 
00193 
00194 // Add a header line that is already preformatted in a const char*.
00195 bool SamFileHeader::addHeaderLine(const char* headerLine)
00196 {
00197     // Parse the added header line.
00198     String headerString = headerLine;
00199     return(parseHeader(headerString));
00200 }
00201 
00202 
00203 // Add a header line that is already preformatted in a const char*.
00204 bool SamFileHeader::addHeader(const char* header)
00205 {
00206     // Parse the added header line.
00207     String headerString = header;
00208     return(parseHeader(headerString));
00209 }
00210 
00211 
00212 // Add a comment.
00213 bool SamFileHeader::addComment(const char* comment)
00214 {
00215     if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
00216     {
00217         // Valid comment, so add it.
00218         myComments.push_back(comment);
00219     }
00220     return(true);
00221 }
00222 
00223 
00224 // Add the specified tag and value to the HD header.
00225 bool SamFileHeader::setHDTag(const char* tag, const char* value)
00226 {
00227     if(myHD == NULL)
00228     {
00229         // Need to create the HD line.
00230         myHD = new SamHeaderHD();
00231         if(myHD == NULL)
00232         {
00233             // New failed, return false.
00234             myErrorMessage = "SamFileHeader: Failed to allocate a new HD tag";
00235             return(false);
00236         }
00237         // Succeeded to create the line, add it to the
00238         // list.
00239         myHeaderRecords.push_back(myHD);
00240     }
00241     if(!myHD->setTag(tag, value))
00242     {
00243         myErrorMessage = "SamFileHeader: Failed to set the specified HD tag";
00244         return(false);
00245     }
00246     return(true);
00247 }
00248 
00249 
00250 // Add the specified tag and value to the SQ header with the specified name.
00251 // If the header does not yet exist, the header is added.
00252 bool SamFileHeader::setSQTag(const char* tag, const char* value,
00253                              const char* name)
00254 {
00255     // Get the SQ record for the specified name.
00256     SamHeaderSQ* sq = getSQ(name);
00257     if(sq == NULL)
00258     {
00259         // The SQ does not yet exist.
00260         // Make sure the tag is LN.
00261         if(strcmp(tag, "LN") != 0)
00262         {
00263             // LN is required so must be the first tag added
00264             myErrorMessage = 
00265                 "SamFileHeader:Failed to add the specified SQ key, LN not specified.";
00266             return(false);
00267         }
00268 
00269         // Add it.
00270         sq = new SamHeaderSQ();
00271 
00272         if(sq == NULL)
00273         {
00274             // Could not create the header record.
00275             myErrorMessage = "SamFileHeader: Failed to allocate a new SQ tag";
00276             return(false);
00277         }
00278 
00279         // Created the header record, so add it to the list of SQ lines.
00280         mySQs.Add(name, sq);
00281         myHeaderRecords.push_back(sq);
00282         // value is the length, so update the reference info.
00283         myReferenceInfo.add(name, atoi(value));
00284 
00285         // Add the key tag 
00286         if(!sq->addKey(name))
00287         {
00288             // Failed to add the key tag, return false.
00289             myErrorMessage = "SamFileHeader:Failed to add the specified SQ key";
00290             return(false);
00291         }
00292     }
00293     else if(strcmp(tag, "LN") == 0)
00294     {
00295         // Cannot modify/remove the LN tag.
00296         myErrorMessage = "SamFileHeader:Cannot modify/remove the SQ's LN tag";
00297         return(false);
00298     }
00299 
00300     if(!sq->setTag(tag, value))
00301     {
00302         myErrorMessage = "Failed to set the specified SQ tag";
00303         return(false);
00304     }
00305     return(true);
00306 }
00307 
00308 
00309 // Add the specified tag and value to the RG header with the read group
00310 // identifier.  If the header does not yet exist, the header is added.
00311 bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id)
00312 {
00313     // Get the RG record for the specified name.
00314     SamHeaderRG* rg = getRG(id);
00315     if(rg == NULL)
00316     {
00317         // The RG does not yet exist.
00318         // Add it.
00319         rg = new SamHeaderRG();
00320 
00321         if(rg == NULL)
00322         {
00323             // Could not create the header record.
00324             myErrorMessage = "Failed to allocate a new RG tag";
00325             return(false);
00326         }
00327 
00328         // Created the header record, so add it to the list of RG lines.
00329         myRGs.Add(id, rg);
00330         myHeaderRecords.push_back(rg);
00331 
00332         // Add the key tag 
00333         if(!rg->addKey(id))
00334         {
00335             // Failed to add the key tag, return false.
00336             myErrorMessage = "Failed to add the specified RG key";
00337             return(false);
00338         }
00339     }
00340 
00341     if(!rg->setTag(tag, value))
00342     {
00343         myErrorMessage = "Failed to set the specified RG tag";
00344         return(false);
00345     }
00346     return(true);
00347 }
00348 
00349 
00350 // Add the specified tag and value to the PG header with the specified id.
00351 // If the header does not yet exist, the header is added.
00352 // Add the specified tag and value to the PG header.
00353 bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id)
00354 {
00355     // Get the PG record for the specified name.
00356     SamHeaderPG* pg = getPG(id);
00357     if(pg == NULL)
00358     {
00359         // The PG does not yet exist.
00360         // Add it.
00361         pg = new SamHeaderPG();
00362 
00363         if(pg == NULL)
00364         {
00365             // Could not create the header record.
00366             myErrorMessage = "Failed to allocate a new PG tag";
00367             return(false);
00368         }
00369 
00370         // Created the header record, so add it to the list of PG lines.
00371         myPGs.Add(id, pg);
00372         myHeaderRecords.push_back(pg);
00373 
00374         // Add the key tag 
00375         if(!pg->addKey(id))
00376         {
00377             // Failed to add the key tag, return false.
00378             myErrorMessage = "Failed to add the specified PG key";
00379             return(false);
00380         }
00381     }
00382 
00383     if(!pg->setTag(tag, value))
00384     {
00385         myErrorMessage = "Failed to set the specified PG tag";
00386         return(false);
00387     }
00388     return(true);
00389 }
00390 
00391 
00392 // Add the HD record to the header.
00393 bool SamFileHeader::addHD(SamHeaderHD* hd)
00394 {
00395     // If there is already an HD header or if null
00396     // was passed in, return false.
00397     if(myHD != NULL)
00398     {
00399         myErrorMessage = "Failed add an HD tag - there is already one";
00400         return(false);
00401     }
00402     if(hd == NULL)
00403     {
00404         myErrorMessage = "Failed add an HD tag - no tag specified";
00405         return(false);
00406     }
00407     myHD = hd;
00408    
00409     myHeaderRecords.push_back(myHD);
00410     return(true);
00411 }
00412 
00413 
00414 // Add the SQ record to the header.
00415 bool SamFileHeader::addSQ(SamHeaderSQ* sq)
00416 {
00417     if(sq == NULL)
00418     {
00419         // null pointer passed in, can't add it.
00420         myErrorMessage = "SAM/BAM Header line failed to allocate SQ.";
00421         return(false);
00422     }
00423     const char* name = sq->getTagValue("SN");
00424     const char* length = sq->getTagValue("LN");
00425     if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
00426     {
00427         // SN is not set, so can't add it.
00428         myErrorMessage = 
00429             "SAM/BAM Header line failure: Skipping SQ line that is missing the SN field.";
00430         return(false);
00431     }
00432     if(strcmp(length, EMPTY_RETURN.c_str()) == 0)
00433     {
00434         // LN is not set, so can't add it.
00435         myErrorMessage = 
00436             "SAM/BAM Header line failure: Skipping SQ line that is missing the LN field.";
00437         return(false);
00438     }
00439 
00440     // Determine whether or not a record with this
00441     // key is already in the hash.
00442     if(mySQs.Find(name) < 0)
00443     {
00444         // It is not already in the hash so add it.
00445         mySQs.Add(name, sq);
00446         myHeaderRecords.push_back(sq);
00447         myReferenceInfo.add(name, atoi(length));
00448         return(true);
00449     }
00450 
00451     // It is already in the hash, so cannot be added.
00452     myErrorMessage = "SAM/BAM Header line failure: Skipping SQ line that has a repeated SN field.";
00453     return(false);
00454 }
00455 
00456 
00457 // Add the RG record to the header.
00458 bool SamFileHeader::addRG(SamHeaderRG* rg)
00459 {
00460     if(rg == NULL)
00461     {
00462         // null pointer passed in, can't add it.
00463         myErrorMessage = "SAM/BAM Header line failed to allocate RG.";
00464         return(false);
00465     }
00466     const char* id = rg->getTagValue("ID");
00467     if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00468     {
00469         // ID is not set, so can't add it.
00470         myErrorMessage = "SAM/BAM Header line failure: Skipping RG line that is missing the ID field.";
00471         return(false);
00472     }
00473 
00474     // Determine whether or not a record with this
00475     // key is already in the hash.
00476     if(myRGs.Find(id) < 0)
00477     {
00478         // It is not already in the hash so
00479         // add it.
00480         myRGs.Add(id, rg);
00481         myHeaderRecords.push_back(rg);
00482         return(true);
00483     }
00484 
00485     // It is already in the hash, so cannot be added.
00486     myErrorMessage = "SAM/BAM Header line failure: Skipping RG line that has a repeated ID field.";
00487     return(false);
00488 }
00489 
00490 
00491 // Add the PG record to the header.
00492 bool SamFileHeader::addPG(SamHeaderPG* pg)
00493 {
00494     // If a null pointer was passed in, return false.
00495     if(pg == NULL)
00496     {
00497         myErrorMessage = "SAM/BAM Header line failed to allocate PG.";
00498         return(false);
00499     }
00500     const char* id = pg->getTagValue("ID");
00501     if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00502     {
00503         // ID is not set, so can't add the header record.
00504         myErrorMessage = "SAM/BAM Header line failure: Skipping PG line that is missing the ID field.";
00505         return(false);
00506     }
00507 
00508     // Determine whether or not a record with this
00509     // key is already in the hash.
00510     if(myPGs.Find(id) < 0)
00511     {
00512         // It is not already in the hash so
00513         // add it.
00514         myPGs.Add(id, pg);
00515         myHeaderRecords.push_back(pg);
00516         return(true);
00517     }
00518 
00519     // It is already in the hash, so cannot be added.
00520     myErrorMessage = "SAM/BAM Header line failure: Skipping PG line that has a repeated ID field.";
00521     return(false);
00522 }
00523 
00524 
00525 // Add the RG record to the header.
00526 bool SamFileHeader::addRecordCopy(const SamHeaderRecord& hdrRec)
00527 {
00528     SamHeaderRecord* newRec = hdrRec.createCopy();
00529     bool returnVal = true;
00530     switch(newRec->getType())
00531     {
00532         case SamHeaderRecord::HD:
00533             returnVal = addHD((SamHeaderHD*)newRec);
00534             break;
00535         case SamHeaderRecord::PG:
00536             returnVal = addPG((SamHeaderPG*)newRec);
00537             break;
00538         case SamHeaderRecord::RG:
00539             returnVal = addRG((SamHeaderRG*)newRec);
00540             break;
00541         case SamHeaderRecord::SQ:
00542             returnVal = addSQ((SamHeaderSQ*)newRec);
00543             break;
00544         default:
00545             myErrorMessage = "Failed to copy a header record, unknown type.";
00546             returnVal = false;
00547             break;
00548     }
00549     return(returnVal);
00550 }
00551 
00552 
00553 // Remove the HD record.
00554 bool SamFileHeader::removeHD()
00555 {
00556     if(myHD != NULL)
00557     {
00558         // Reset the record.  Do not delete it since it is in the headerRecords
00559         // vector and it is not worth the time to remove it from the middle of
00560         // that vector since this is the header and the space does not need
00561         // to be conserved.
00562         myHD->reset();
00563 
00564         // Set myHD to null so a new HD could be added.
00565         myHD = NULL;
00566     }
00567 
00568     return(true);
00569 }
00570 
00571 
00572 // Remove the SQ record associated with the specified name.
00573 bool SamFileHeader::removeSQ(const char* name)
00574 {
00575     // Look up the name in the hash.
00576     int hashIndex = mySQs.Find(name);
00577     if(hashIndex < 0)
00578     {
00579         // Not found in the hash, so nothing to
00580         // delete, return true it does not exist
00581         // in the hash.
00582         return(true);
00583     }
00584    
00585     // Get the SQ.
00586     SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex));
00587 
00588     if(sq == NULL)
00589     {
00590         // sq is null, this is an error since hashIndex was greater than 0,
00591         // so it should have been found.
00592         myErrorMessage = "SAM/BAM Header line failed to get SQ object.";
00593        return(false);
00594     }
00595 
00596     // Reset the record.  Do not delete it since it is in the headerRecords
00597     // vector and it is not worth the time to remove it from the middle of
00598     // that vector since this is the header and the space does not need
00599     // to be conserved.
00600     sq->reset();
00601 
00602     // Delete the entry from the hash.
00603     mySQs.Delete(hashIndex);
00604 
00605     return(true);
00606 }
00607 
00608 
00609 // Remove the RG record associated with the specified id.
00610 bool SamFileHeader::removeRG(const char* id)
00611 {
00612     // Look up the id in the hash.
00613     int hashIndex = myRGs.Find(id);
00614     if(hashIndex < 0)
00615     {
00616         // Not found in the hash, so nothing to
00617         // delete, return true it does not exist
00618         // in the hash.
00619         return(true);
00620     }
00621    
00622     // Get the RG.
00623     SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex));
00624 
00625     if(rg == NULL)
00626     {
00627         // rg is null, this is an error since hashIndex was greater than 0,
00628         // so it should have been found.
00629         myErrorMessage = "SAM/BAM Header line failed to get RG object.";
00630        return(false);
00631     }
00632 
00633     // Reset the record.  Do not delete it since it is in the headerRecords
00634     // vector and it is not worth the time to remove it from the middle of
00635     // that vector since this is the header and the space does not need
00636     // to be conserved.
00637     rg->reset();
00638 
00639     // Delete the entry from the hash.
00640     myRGs.Delete(hashIndex);
00641 
00642     return(true);
00643 }
00644 
00645 
00646 // Remove the PG record associated with the specified id.
00647 bool SamFileHeader::removePG(const char* id)
00648 {
00649     // Look up the id in the hash.
00650     int hashIndex = myPGs.Find(id);
00651     if(hashIndex < 0)
00652     {
00653         // Not found in the hash, so nothing to
00654         // delete, return true it does not exist
00655         // in the hash.
00656         return(true);
00657     }
00658    
00659     // Get the PG.
00660     SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex));
00661 
00662     if(pg == NULL)
00663     {
00664         // pg is null, this is an error since hashIndex was greater than 0,
00665         // so it should have been found.
00666         myErrorMessage = "SAM/BAM Header line failed to get PG object.";
00667         return(false);
00668     }
00669 
00670     // Reset the record.  Do not delete it since it is in the headerRecords
00671     // vector and it is not worth the time to remove it from the middle of
00672     // that vector since this is the header and the space does not need
00673     // to be conserved.
00674     pg->reset();
00675 
00676     // Delete the entry from the hash.
00677     myPGs.Delete(hashIndex);
00678 
00679     return(true);
00680 }
00681 
00682 
00683 const char* SamFileHeader::getHDTagValue(const char* tag)
00684 {
00685     if(myHD == NULL)
00686     {
00687         // return blank since there is no HD type.
00688         return(EMPTY_RETURN.c_str());
00689     }
00690     return(myHD->getTagValue(tag));
00691 }
00692 
00693 
00694 // Get the value associated with the specified tag on the SQ line with
00695 // the specified sequence name.
00696 const char* SamFileHeader::getSQTagValue(const char* tag, const char* name)
00697 {
00698     // Look up the name in the hash to get the associated SQ object.
00699     SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name));
00700    
00701     // If it is NULL - the tag was not found, so return
00702     if(sq == NULL)
00703     {
00704         return(EMPTY_RETURN.c_str());
00705     }
00706 
00707     // Found the object, so return the SQ Tag.
00708     return(sq->getTagValue(tag));
00709 }
00710 
00711 
00712 // Get the value associated with the specified tag on the RG line with
00713 // the specified read group identifier.
00714 const char* SamFileHeader::getRGTagValue(const char* tag, const char* id)
00715 {
00716     // Look up the id in the hash to get the associated RG object.
00717     SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id));
00718    
00719     // If it is NULL - the tag was not found, so return
00720     if(rg == NULL)
00721     {
00722         return(EMPTY_RETURN.c_str());
00723     }
00724 
00725     // Found the object, so return the RG Tag.
00726     return(rg->getTagValue(tag));
00727 }
00728 
00729 
00730 const char* SamFileHeader::getPGTagValue(const char* tag, const char* id)
00731 {
00732     // Look up the id in the hash to get the associated PG object.
00733     SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id));
00734    
00735     // If it is NULL - the tag was not found, so return
00736     if(pg == NULL)
00737     {
00738         return(EMPTY_RETURN.c_str());
00739     }
00740 
00741     // Found the object, so return the PG Tag.
00742     return(pg->getTagValue(tag));
00743 }
00744 
00745 
00746 // Get the number of SQ objects.
00747 int SamFileHeader::getNumSQs()
00748 {
00749     return(mySQs.Entries());
00750 }
00751 
00752 
00753 // Get the number of RG objects.
00754 int SamFileHeader::getNumRGs()
00755 {
00756     return(myRGs.Entries());
00757 }
00758 
00759 
00760 // Get the number of PG objects.
00761 int SamFileHeader::getNumPGs()
00762 {
00763     return(myPGs.Entries());
00764 }
00765 
00766 
00767 // Get the HD object.
00768 SamHeaderHD* SamFileHeader::getHD()
00769 {
00770     return(myHD);
00771 }
00772 
00773 
00774 // Get the SQ object with the specified sequence name.
00775 SamHeaderSQ* SamFileHeader::getSQ(const char* name)
00776 {
00777     return((SamHeaderSQ*)(mySQs.Object(name)));
00778 }
00779 
00780 
00781 // Get the RG object with the specified read group identifier.
00782 SamHeaderRG* SamFileHeader::getRG(const char* id)
00783 {
00784     return((SamHeaderRG*)(myRGs.Object(id)));
00785 }
00786 
00787 
00788 // Get the PG object.
00789 SamHeaderPG* SamFileHeader::getPG(const char* id)
00790 {
00791     return((SamHeaderPG*)(myPGs.Object(id)));
00792 }
00793 
00794 
00795 // Return the value of the SO tag.  
00796 // If this field does not exist, EMPTY_RETURN.c_str() is returned.
00797 const char* SamFileHeader::getSortOrder()
00798 {
00799     if(myHD == NULL)
00800     {
00801         // No HD, so return blank EMPTY_RETURN.c_str()
00802         return(EMPTY_RETURN.c_str());
00803     }
00804     return(myHD->getSortOrder());   
00805 }
00806 
00807 
00808 // Deprecated way of getting the sort order from the file.
00809 const char* SamFileHeader::getTagSO()
00810 {
00811     return(getSortOrder());
00812 }
00813 
00814 
00815 // Get the next SQ header record.  After all SQ headers have been retrieved,
00816 // NULL is returned until a reset is called.
00817 SamHeaderRecord* SamFileHeader::getNextSQRecord()
00818 {
00819     return(getNextHeaderRecord(myCurrentSQIndex, 
00820                                SamHeaderRecord::SQ));
00821 }
00822 
00823 
00824 // Get the next RG header record.  After all RG headers have been retrieved,
00825 // NULL is returned until a reset is called.
00826 SamHeaderRecord* SamFileHeader::getNextRGRecord()
00827 {
00828     return(getNextHeaderRecord(myCurrentRGIndex, 
00829                                SamHeaderRecord::RG));
00830 }
00831 
00832 
00833 // Get the next PG header record.  After all PG headers have been retrieved,
00834 // NULL is returned until a reset is called.
00835 SamHeaderRecord* SamFileHeader::getNextPGRecord()
00836 {
00837     return(getNextHeaderRecord(myCurrentPGIndex, 
00838                                SamHeaderRecord::PG));
00839 }
00840 
00841 
00842 // Reset to the beginning of the header records so the next call
00843 // to getNextSQRecord returns the first SQ header record.
00844 void SamFileHeader::resetSQRecordIter()
00845 {
00846     myCurrentSQIndex = 0;
00847 }
00848 
00849 
00850 // Reset to the beginning of the header records so the next call
00851 // to getNextRGRecord returns the first RG header record.
00852 void SamFileHeader::resetRGRecordIter()
00853 {
00854     myCurrentRGIndex = 0;
00855 }
00856 
00857 
00858 // Reset to the beginning of the header records so the next call
00859 // to getNextPGRecord returns the first PG header record.
00860 void SamFileHeader::resetPGRecordIter()
00861 {
00862     myCurrentPGIndex = 0;
00863 }
00864 
00865 
00866 // Get the next header record of the specified type.
00867 // Pass in the index to start looking at and the type to look for.
00868 // Update the index.
00869 // After all headers of that type have been retrieved,
00870 // NULL is returned until a reset is called for that type.
00871 SamHeaderRecord* SamFileHeader::getNextHeaderRecord(uint32_t& index, 
00872                                                     SamHeaderRecord::SamHeaderRecordType headerType)
00873 {
00874     SamHeaderRecord* foundRecord = NULL;
00875     // Loop until a record is found or until out of range of the 
00876     // headerRecord vector.
00877     while((index < myHeaderRecords.size()) 
00878           && (foundRecord == NULL))
00879     {
00880         // Get the next record.
00881         foundRecord = myHeaderRecords[index];
00882         // Either way, increment the index.
00883         ++index;
00884         // Check to see if the next record is active.
00885         if(!foundRecord->isActiveHeaderRecord())
00886         {
00887             // Not active, so clear the pointer.
00888             foundRecord = NULL;
00889         }
00890         // Check to see if the record is the right type.
00891         else if(foundRecord->getType() != headerType)
00892         {
00893             // Not the right type, so clear the pointer.
00894             foundRecord = NULL;
00895         }
00896     }
00897 
00898     // Return the record if it was found.  Will be null if none were found.
00899     return(foundRecord);
00900 }
00901 
00902 
00903 // Get the next header record.  After all headers have been retrieved,
00904 // NULL is returned until a reset is called.  Does not return the
00905 // Comment lines.
00906 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00907 // same iterator.
00908 SamHeaderRecord* SamFileHeader::getNextHeaderRecord()
00909 {
00910     // Get the next header record
00911     SamHeaderRecord* foundRecord = NULL;
00912     // Loop until a record is found or until out of range of the 
00913     // headerRecord vector.
00914     while((myCurrentHeaderIndex < myHeaderRecords.size()) 
00915           && (foundRecord == NULL))
00916     {
00917         // Get the next record.
00918         foundRecord = myHeaderRecords[myCurrentHeaderIndex];
00919         // Either way, increment the index.
00920         ++myCurrentHeaderIndex;
00921         // Check to see if the next record is active.
00922         if(!foundRecord->isActiveHeaderRecord())
00923         {
00924             // Not active, so clear the pointer.
00925             foundRecord = NULL;
00926         }
00927     }
00928 
00929     // Return the record if it was found.  Will be null if none were found.
00930     return(foundRecord);
00931 }
00932 
00933 
00934 // Set the passed in string to the next header line.  The passed in 
00935 // string will be overwritten.  If there are no more header lines or there
00936 // is an error, false is returned and the passed in string is set to EMPTY_RETURN.c_str()
00937 // until a rest is called.
00938 // Will also return the comment lines.
00939 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00940 // same iterator.
00941 bool SamFileHeader::getNextHeaderLine(std::string &headerLine)
00942 {
00943     headerLine = EMPTY_RETURN.c_str();
00944 
00945     // Until the header is set, keep reading.
00946     // Header could return EMPTY_RETURN.c_str() if the header line is blank.
00947     while(headerLine == EMPTY_RETURN.c_str())
00948     {
00949         if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false)
00950         {
00951             // getHeaderLine failed, so stop processing, and return false.
00952             return(false);
00953         }
00954         else
00955         {
00956             // In range, increment the index.
00957             ++myCurrentHeaderIndex;
00958         }
00959     }
00960     return(true);
00961 }
00962 
00963 
00964 // Reset to the beginning of the header records so the next call
00965 // to getNextHeaderRecord returns the first header line.
00966 void SamFileHeader::resetHeaderRecordIter()
00967 {
00968     myCurrentHeaderIndex = 0;
00969 }
00970 
00971 
00972 void SamFileHeader::appendCommentLines(std::string &commentLines)
00973 {
00974     for(unsigned int i = 0; i < myComments.size(); i++)
00975     {
00976         commentLines += "@CO\t";;
00977         commentLines += myComments[i];
00978         commentLines += "\n";
00979     }
00980 }
00981 
00982 
00983 // Returns the comment on the next comment line.  Returns EMPTY_RETURN.c_str() if all comment
00984 // lines have been returned, until resetCommentIter is called.
00985 const char* SamFileHeader::getNextComment()
00986 {
00987     if(myCurrentCommentIndex < myComments.size())
00988     {
00989         return(myComments[myCurrentCommentIndex++].c_str());
00990     }
00991     // Already gone through all the comments, return EMPTY_RETURN.c_str().
00992     return(EMPTY_RETURN.c_str());
00993 }
00994 
00995 
00996 // Resets to the beginning of the comments so getNextComment returns
00997 // the first comment.
00998 void SamFileHeader::resetCommentIter()
00999 {
01000     myCurrentCommentIndex = 0;
01001 }
01002 
01003 
01004 // Parse the header.
01005 bool SamFileHeader::parseHeader(String& header)
01006 {    
01007     std::string errorMessage = "";
01008     int numErrors = 0;
01009     int numValid = 0;
01010 
01011     // Split the header into lines.
01012     std::vector<String>* types = header.Split('\n');
01013 
01014     // Loop through each header line, parsing that line.
01015     for(uint32_t index = 0; index < types->size(); index++)
01016     {
01017         // Parse the header line.
01018         if(!parseHeaderLine(types->at(index)))
01019         {
01020             errorMessage += myErrorMessage;
01021             errorMessage += "\n";
01022             ++numErrors;
01023         }
01024         else
01025         {
01026             // valid header line
01027             ++numValid;
01028         }
01029     }
01030 
01031     // Delete the types vector.
01032     delete types;
01033     types = NULL;
01034 
01035     myErrorMessage = errorMessage;
01036     if((numErrors > 0) && (numValid == 0))
01037     {
01038         // Only errors.
01039         std::cerr << numErrors
01040                   << " invalid SAM/BAM Header lines were skipped due to:\n"
01041                   << errorMessage << std::endl;
01042         return(false);
01043     }
01044     else if(numErrors > 0)
01045     {
01046         // Some valid & some invalid.
01047         // Going to return true, but add note about the invalid lines.
01048         std::cerr << numErrors
01049                   << " invalid SAM/BAM Header lines were skipped due to:\n"
01050                   << errorMessage << std::endl;
01051     }
01052 
01053     return(true);
01054 }
01055 
01056 
01057 // Parse one line of the header.
01058 bool SamFileHeader::parseHeaderLine(const String& headerLine)
01059 {
01060     // Check if the line starts with @CO.
01061     if((headerLine.Length() >= 4) && (headerLine[0] == '@') &&
01062        (headerLine[1] == 'C') && (headerLine[2] == 'O') &&
01063        (headerLine[3] == '\t'))
01064     {
01065         // Comment line.
01066         String comment = headerLine.SubStr(4);
01067         return(addComment(comment));
01068     }
01069 
01070     StringArray tokens;
01071 
01072     // Split the line by tabs.
01073     tokens.ReplaceColumns(headerLine, '\t');
01074    
01075     if(tokens.Length() < 1)
01076     {
01077         // Nothing on this line, just return true.
01078         return(true);
01079     }
01080    
01081     // Get the header type, the first column.
01082     if((tokens[0].Length() != 3) || (tokens[0][0] != '@'))
01083     {
01084         // The header type string is incorrect.  Should be 3 characters
01085         // with the first one @.
01086         myErrorMessage = "SAM/BAM Header line does not start with @ & at least 2 chars.";
01087         return(false);
01088     }
01089    
01090     bool status = true;
01091     if(tokens[0] == "@HD")
01092     {
01093         if(myHD == NULL)
01094         {
01095             // Create a new hd.
01096             myHD = new SamHeaderHD();
01097             if(myHD == NULL)
01098             {
01099                 // Failed to allocate HD, so return false.
01100                 myErrorMessage = "SAM/BAM Header line failed to allocate HD.";
01101                 return(false);
01102             }
01103             myHeaderRecords.push_back(myHD);
01104             if(!myHD->setFields(tokens))
01105             {
01106                 myErrorMessage = "SAM/BAM Header line failed to store HD record.";
01107                 status = false;
01108             }
01109         }
01110         else
01111         {
01112             // HD already set, so return false.
01113             myErrorMessage = "SAM/BAM Header line failure: multiple HD records.";
01114             status = false;
01115         }
01116     }
01117     else if(tokens[0] == "@SQ")
01118     {
01119         // Create a new SQ record.
01120         SamHeaderSQ* sq = new SamHeaderSQ();
01121       
01122         if(sq->setFields(tokens))
01123         {
01124             // sq fields were properly set, so add it to the list of
01125             // SQ lines.
01126             // myStatus set in the method.
01127             status &= addSQ(sq);
01128         }
01129         else
01130         {
01131             myErrorMessage = "SAM/BAM Header line failed to store SQ record.";
01132             status = false;
01133         }
01134     }
01135     else if(tokens[0] == "@RG")
01136     {
01137         // Create a new RG record.
01138         SamHeaderRG* rg = new SamHeaderRG();
01139       
01140         if(rg->setFields(tokens))
01141         {
01142             // rg fields were properly set, so add it to the list of
01143             // RG lines.
01144             // myStatus set in the method.
01145             status &= addRG(rg);
01146         }
01147         else
01148         {
01149             myErrorMessage = "SAM/BAM Header line failed to store RG record.";
01150             status = false;
01151         }
01152     }
01153     else if(tokens[0] == "@PG")
01154     {
01155         // Create a new PG record.
01156         SamHeaderPG* pg = new SamHeaderPG();
01157       
01158         if(pg->setFields(tokens))
01159         {
01160             // pg fields were properly set, so add it to the list of
01161             // PG lines.
01162             // myStatus set in the method.
01163             status &= addPG(pg);
01164         }
01165         else
01166         {
01167             myErrorMessage = "SAM/BAM Header line failed to store PG record.";
01168             status = false;
01169         }
01170     }
01171     else
01172     {
01173         // Unknown header type.
01174         myErrorMessage = 
01175             "SAM/BAM Header line failure: Skipping unknown header type, ";
01176         myErrorMessage += (const char*)(tokens[0]);
01177         status = false;
01178     }
01179     return(status);
01180 }
01181 
01182 
01183 
01184 // Set the passed in string to the header line at the specified index.
01185 // It does NOT clear the current contents of header.
01186 // NOTE: some indexes will return blank if the entry was deleted.
01187 bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const
01188 {
01189     // Check to see if the index is in range of the header records vector.
01190     if(index < myHeaderRecords.size())
01191     {
01192         // In range of the header records vector, so get the string for
01193         // that record.
01194         SamHeaderRecord* hdrRec = myHeaderRecords[index];
01195         hdrRec->appendString(header);
01196         return(true);
01197     }
01198     else
01199     {
01200         unsigned int commentIndex = index - myHeaderRecords.size();
01201         // Check to see if it is in range of the comments.
01202         if(commentIndex < myComments.size())
01203         {
01204             // It is in range of the comments, so add the type.
01205             header += "@CO\t";
01206             // Add the comment.
01207             header += myComments[commentIndex];
01208             // Add the new line.
01209             header += "\n";
01210             return(true);
01211         }
01212     }
01213     // Invalid index.
01214     return(false);
01215 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends