SamFileHeader.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "SamFileHeader.h"
00019 #include "SamHeaderSQ.h"
00020 #include "SamHeaderRG.h"
00021 
00022 
00023 const std::string SamFileHeader::EMPTY_RETURN = "";
00024 
00025 SamFileHeader::SamFileHeader()
00026     : myHD(NULL),
00027       myReferenceInfo()
00028 {
00029     resetHeader();
00030 }
00031 
00032 
00033 SamFileHeader::~SamFileHeader()
00034 {
00035     resetHeader();
00036 }
00037 
00038 
00039 // Copy Constructor   
00040 SamFileHeader::SamFileHeader(const SamFileHeader& header)
00041 {
00042     copy(header);
00043 }
00044 
00045 
00046 // Overload operator = to copy the passed in header into this header.
00047 SamFileHeader & SamFileHeader::operator = (const SamFileHeader& header)
00048 {
00049     copy(header);
00050     return(*this);
00051 }
00052 
00053 
00054 bool SamFileHeader::copy(const SamFileHeader& header)
00055 {
00056     // Check to see if the passed in value is the same as this.
00057     if(this == &header)
00058     {
00059         return(true);
00060     }
00061 
00062     resetHeader();
00063     // Copy Reference contigs, hash, lengths.
00064     myReferenceInfo = header.myReferenceInfo;
00065 
00066     // Copy the records by getting the other header's header string
00067     // and parsing it.
00068     std::string newString;
00069     bool status = header.getHeaderString(newString);
00070     String newHeaderString = newString.c_str();
00071     
00072     status &= parseHeader(newHeaderString);
00073 
00074     myCurrentHeaderIndex = header.myCurrentHeaderIndex;
00075     myCurrentCommentIndex = header.myCurrentCommentIndex;
00076 
00077     return(status);
00078 }
00079 
00080 
00081 // Reset the header for a new entry, clearing out previous values.
00082 void SamFileHeader::resetHeader()
00083 {
00084     myReferenceInfo.clear();
00085 
00086     // Clear the pointers to the header records.  They are deleted when the
00087     // vector is cleaned up.
00088     myHD = NULL;
00089     mySQs.Clear();
00090     myRGs.Clear();
00091     myPGs.Clear();
00092 
00093     // Delete the header records and clear the vector.
00094     for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size(); 
00095         headerIndex++)
00096     {
00097         delete myHeaderRecords[headerIndex];
00098         myHeaderRecords[headerIndex] = NULL;
00099     }
00100     myHeaderRecords.clear();
00101 
00102     // Reset the iterator for the header lines.
00103     resetHeaderRecordIter();
00104 
00105     // Reset the comment iterator.
00106     resetCommentIter();
00107 
00108     // Reset the individual type header iterators.
00109     resetSQRecordIter();
00110     resetRGRecordIter();
00111     resetPGRecordIter();
00112 
00113     // Clear the comments
00114     myComments.clear();
00115 }
00116 
00117 
00118 // Set the passed in string to the entire header string.  Clearing its
00119 // current contents.
00120 bool SamFileHeader::getHeaderString(std::string& header) const
00121 {
00122     header = "";
00123    
00124     // Keep getting header lines until there are no more - false returned.
00125     unsigned int index = 0;
00126     while(getHeaderLine(index, header) != false)
00127     {
00128         ++index;
00129     }
00130 
00131     return(true);
00132 }
00133 
00134 
00135 int SamFileHeader::getReferenceID(const String & referenceName)
00136 {
00137     return(myReferenceInfo.getReferenceID(referenceName));
00138 }
00139 
00140 
00141 int SamFileHeader::getReferenceID(const char* referenceName)
00142 {
00143     return(myReferenceInfo.getReferenceID(referenceName));
00144 }
00145 
00146 
00147 const String & SamFileHeader::getReferenceLabel(int id) const
00148 {
00149     return(myReferenceInfo.getReferenceLabel(id));
00150 }
00151 
00152 
00153 // Get the Reference Information
00154 const SamReferenceInfo* SamFileHeader::getReferenceInfo() const
00155 {
00156     return(&myReferenceInfo);
00157 }
00158 
00159 
00160 // Add reference sequence name and reference sequence length to the header.
00161 void SamFileHeader::addReferenceInfo(const char* referenceSequenceName, 
00162                                      int32_t referenceSequenceLength)
00163 {
00164     myReferenceInfo.add(referenceSequenceName, referenceSequenceLength);
00165 }
00166 
00167 // Add a header line that has an const char* value.
00168 bool SamFileHeader::addHeaderLine(const char* type, const char* tag, 
00169                                   const char* value)
00170 {
00171     String headerLine;
00172     headerLine += "@";
00173     headerLine += type;
00174     headerLine += "\t";
00175     headerLine += tag;
00176     headerLine += ":";
00177     headerLine += value;
00178     return(addHeaderLine(headerLine.c_str()));
00179 }
00180 
00181 
00182 // Add a header line that is already preformatted in a const char*.
00183 // It is assumed that the line does not contain a \n.
00184 bool SamFileHeader::addHeaderLine(const char* headerLine)
00185 {
00186     // Parse the added header line.
00187     String headerString = headerLine;
00188     if(parseHeader(headerString))
00189     {
00190         // Successfully parsed the header line.
00191         return(true);
00192     }
00193     // Failed to parse the header line, return false.
00194     return(false);
00195 }
00196 
00197 
00198 // Add the specified tag and value to the HD header.
00199 bool SamFileHeader::setHDTag(const char* tag, const char* value)
00200 {
00201     if(myHD == NULL)
00202     {
00203         // Need to create the HD line.
00204         myHD = new SamHeaderHD();
00205         if(myHD == NULL)
00206         {
00207             // New failed, return false.
00208             return(false);
00209         }
00210         // Succeeded to create the line, add it to the
00211         // list.
00212         myHeaderRecords.push_back(myHD);
00213     }
00214     return(myHD->setTag(tag, value));
00215 }
00216 
00217 
00218 // Add the specified tag and value to the SQ header with the specified name.
00219 // If the header does not yet exist, the header is added.
00220 bool SamFileHeader::setSQTag(const char* tag, const char* value,
00221                              const char* name)
00222 {
00223     // Get the SQ record for the specified name.
00224     SamHeaderSQ* sq = getSQ(name);
00225     if(sq == NULL)
00226     {
00227         // The SQ does not yet exist.
00228         // Add it.
00229         sq = new SamHeaderSQ();
00230 
00231         if(sq == NULL)
00232         {
00233             // Could not create the header record.
00234             return(false);
00235         }
00236 
00237         // Created the header record, so add it to the list of SQ lines.
00238         mySQs.Add(name, sq);
00239         myHeaderRecords.push_back(sq);
00240 
00241         // Add the key tag 
00242         if(!sq->addKey(name))
00243         {
00244             // Failed to add the key tag, return false.
00245             return(false);
00246         }
00247     }
00248 
00249     return(sq->setTag(tag, value));
00250 }
00251 
00252 
00253 // Add the specified tag and value to the RG header with the read group
00254 // identifier.  If the header does not yet exist, the header is added.
00255 bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id)
00256 {
00257     // Get the RG record for the specified name.
00258     SamHeaderRG* rg = getRG(id);
00259     if(rg == NULL)
00260     {
00261         // The RG does not yet exist.
00262         // Add it.
00263         rg = new SamHeaderRG();
00264 
00265         if(rg == NULL)
00266         {
00267             // Could not create the header record.
00268             return(false);
00269         }
00270 
00271         // Created the header record, so add it to the list of RG lines.
00272         myRGs.Add(id, rg);
00273         myHeaderRecords.push_back(rg);
00274 
00275         // Add the key tag 
00276         if(!rg->addKey(id))
00277         {
00278             // Failed to add the key tag, return false.
00279             return(false);
00280         }
00281     }
00282 
00283     return(rg->setTag(tag, value));
00284 }
00285 
00286 
00287 // Add the specified tag and value to the PG header with the specified id.
00288 // If the header does not yet exist, the header is added.
00289 // Add the specified tag and value to the PG header.
00290 bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id)
00291 {
00292     // Get the PG record for the specified name.
00293     SamHeaderPG* pg = getPG(id);
00294     if(pg == NULL)
00295     {
00296         // The PG does not yet exist.
00297         // Add it.
00298         pg = new SamHeaderPG();
00299 
00300         if(pg == NULL)
00301         {
00302             // Could not create the header record.
00303             return(false);
00304         }
00305 
00306         // Created the header record, so add it to the list of PG lines.
00307         myPGs.Add(id, pg);
00308         myHeaderRecords.push_back(pg);
00309 
00310         // Add the key tag 
00311         if(!pg->addKey(id))
00312         {
00313             // Failed to add the key tag, return false.
00314             return(false);
00315         }
00316     }
00317 
00318     return(pg->setTag(tag, value));
00319 }
00320 
00321 
00322 // Add the HD record to the header.
00323 bool SamFileHeader::addHD(SamHeaderHD* hd)
00324 {
00325     // If there is already an HD header or if null
00326     // was passed in, return false.
00327     if((myHD != NULL) || (hd == NULL))
00328     {
00329         return(false);
00330     }
00331     myHD = hd;
00332    
00333     myHeaderRecords.push_back(myHD);
00334     return(true);
00335 }
00336 
00337 
00338 // Add the SQ record to the header.
00339 bool SamFileHeader::addSQ(SamHeaderSQ* sq)
00340 {
00341     if(sq == NULL)
00342     {
00343         // null pointer passed in, can't add it.
00344         return(false);
00345     }
00346     const char* name = sq->getTagValue("SN");
00347     if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
00348     {
00349         // SN is not set, so can't add it.
00350         return(false);
00351     }
00352 
00353     // Determine whether or not a record with this
00354     // key is already in the hash.
00355     if(mySQs.Find(name) < 0)
00356     {
00357         // It is not already in the hash so
00358         // add it.
00359         mySQs.Add(name, sq);
00360         myHeaderRecords.push_back(sq);
00361         return(true);
00362     }
00363 
00364     // It is already in the hash, so cannot be added.
00365     return(false);
00366 }
00367 
00368 
00369 // Add the RG record to the header.
00370 bool SamFileHeader::addRG(SamHeaderRG* rg)
00371 {
00372     if(rg == NULL)
00373     {
00374         // null pointer passed in, can't add it.
00375         return(false);
00376     }
00377     const char* id = rg->getTagValue("ID");
00378     if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00379     {
00380         // ID is not set, so can't add it.
00381         return(false);
00382     }
00383 
00384     // Determine whether or not a record with this
00385     // key is already in the hash.
00386     if(myRGs.Find(id) < 0)
00387     {
00388         // It is not already in the hash so
00389         // add it.
00390         myRGs.Add(id, rg);
00391         myHeaderRecords.push_back(rg);
00392         return(true);
00393     }
00394 
00395     // It is already in the hash, so cannot be added.
00396     return(false);
00397 }
00398 
00399 
00400 // Add the PG record to the header.
00401 bool SamFileHeader::addPG(SamHeaderPG* pg)
00402 {
00403     // If there is already an PG header, return false.
00404     if(pg == NULL)
00405     {
00406         return(false);
00407     }
00408     const char* id = pg->getTagValue("ID");
00409     if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00410     {
00411         // ID is not set, so can't add the header record.
00412         return(false);
00413     }
00414 
00415     // Determine whether or not a record with this
00416     // key is already in the hash.
00417     if(myPGs.Find(id) < 0)
00418     {
00419         // It is not already in the hash so
00420         // add it.
00421         myPGs.Add(id, pg);
00422         myHeaderRecords.push_back(pg);
00423         return(true);
00424     }
00425 
00426     // It is already in the hash, so cannot be added.
00427     return(false);
00428 }
00429 
00430 
00431 // Remove the HD record.
00432 bool SamFileHeader::removeHD()
00433 {
00434     if(myHD != NULL)
00435     {
00436         // Reset the record.  Do not delete it since it is in the headerRecords
00437         // vector and it is not worth the time to remove it from the middle of
00438         // that vector since this is the header and the space does not need
00439         // to be conserved.
00440         myHD->reset();
00441 
00442         // Set myHD to null so a new HD could be added.
00443         myHD = NULL;
00444     }
00445 
00446     return(true);
00447 }
00448 
00449 
00450 // Remove the SQ record associated with the specified name.
00451 bool SamFileHeader::removeSQ(const char* name)
00452 {
00453     // Look up the name in the hash.
00454     int hashIndex = mySQs.Find(name);
00455     if(hashIndex < 0)
00456     {
00457         // Not found in the hash, so nothing to
00458         // delete, return true it does not exist
00459         // in the hash.
00460         return(true);
00461     }
00462    
00463     // Get the SQ.
00464     SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex));
00465 
00466     if(sq == NULL)
00467     {
00468         // sq is null, this is an error since hashIndex was greater than 0,
00469         // so it should have been found.
00470         return(false);
00471     }
00472 
00473     // Reset the record.  Do not delete it since it is in the headerRecords
00474     // vector and it is not worth the time to remove it from the middle of
00475     // that vector since this is the header and the space does not need
00476     // to be conserved.
00477     sq->reset();
00478 
00479     // Delete the entry from the hash.
00480     mySQs.Delete(hashIndex);
00481 
00482     return(true);
00483 }
00484 
00485 
00486 // Remove the RG record associated with the specified id.
00487 bool SamFileHeader::removeRG(const char* id)
00488 {
00489     // Look up the id in the hash.
00490     int hashIndex = myRGs.Find(id);
00491     if(hashIndex < 0)
00492     {
00493         // Not found in the hash, so nothing to
00494         // delete, return true it does not exist
00495         // in the hash.
00496         return(true);
00497     }
00498    
00499     // Get the RG.
00500     SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex));
00501 
00502     if(rg == NULL)
00503     {
00504         // rg is null, this is an error since hashIndex was greater than 0,
00505         // so it should have been found.
00506         return(false);
00507     }
00508 
00509     // Reset the record.  Do not delete it since it is in the headerRecords
00510     // vector and it is not worth the time to remove it from the middle of
00511     // that vector since this is the header and the space does not need
00512     // to be conserved.
00513     rg->reset();
00514 
00515     // Delete the entry from the hash.
00516     myRGs.Delete(hashIndex);
00517 
00518     return(true);
00519 }
00520 
00521 
00522 // Remove the PG record associated with the specified id.
00523 bool SamFileHeader::removePG(const char* id)
00524 {
00525     // Look up the id in the hash.
00526     int hashIndex = myPGs.Find(id);
00527     if(hashIndex < 0)
00528     {
00529         // Not found in the hash, so nothing to
00530         // delete, return true it does not exist
00531         // in the hash.
00532         return(true);
00533     }
00534    
00535     // Get the PG.
00536     SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex));
00537 
00538     if(pg == NULL)
00539     {
00540         // pg is null, this is an error since hashIndex was greater than 0,
00541         // so it should have been found.
00542         return(false);
00543     }
00544 
00545     // Reset the record.  Do not delete it since it is in the headerRecords
00546     // vector and it is not worth the time to remove it from the middle of
00547     // that vector since this is the header and the space does not need
00548     // to be conserved.
00549     pg->reset();
00550 
00551     // Delete the entry from the hash.
00552     myPGs.Delete(hashIndex);
00553 
00554     return(true);
00555 }
00556 
00557 
00558 SamStatus::Status SamFileHeader::setHeaderFromBamFile(IFILE filePtr)
00559 {
00560     if((filePtr == NULL) || (filePtr->isOpen() == false))
00561     {
00562         // File is not open, return failure.
00563         return(SamStatus::FAIL_ORDER);
00564     }
00565 
00566     int headerLength;
00567     // Read the header length.
00568     int readSize = ifread(filePtr, &headerLength, sizeof(int));
00569    
00570     if(readSize != sizeof(int))
00571     {
00572         // Failed to read the header length.
00573         return(SamStatus::FAIL_IO);
00574     }
00575    
00576     String header;
00577     if (headerLength > 0)
00578     {
00579         // Read the header.
00580         readSize = 
00581             ifread(filePtr, header.LockBuffer(headerLength + 1), headerLength);
00582         header[headerLength] = 0;
00583         header.UnlockBuffer();
00584         if(readSize != headerLength)
00585         {
00586             // Failed to read the header.
00587             return(SamStatus::FAIL_IO);
00588         }
00589     }
00590 
00591     // Parse the header that was read.
00592     parseHeader(header);
00593     return(SamStatus::SUCCESS);
00594 }
00595 
00596 
00597 const char* SamFileHeader::getHDTagValue(const char* tag)
00598 {
00599     if(myHD == NULL)
00600     {
00601         // return blank since there is no HD type.
00602         return(EMPTY_RETURN.c_str());
00603     }
00604     return(myHD->getTagValue(tag));
00605 }
00606 
00607 
00608 // Get the value associated with the specified tag on the SQ line with
00609 // the specified sequence name.
00610 const char* SamFileHeader::getSQTagValue(const char* tag, const char* name)
00611 {
00612     // Look up the name in the hash to get the associated SQ object.
00613     SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name));
00614    
00615     // If it is NULL - the tag was not found, so return
00616     if(sq == NULL)
00617     {
00618         return(EMPTY_RETURN.c_str());
00619     }
00620 
00621     // Found the object, so return the SQ Tag.
00622     return(sq->getTagValue(tag));
00623 }
00624 
00625 
00626 // Get the value associated with the specified tag on the RG line with
00627 // the specified read group identifier.
00628 const char* SamFileHeader::getRGTagValue(const char* tag, const char* id)
00629 {
00630     // Look up the id in the hash to get the associated RG object.
00631     SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id));
00632    
00633     // If it is NULL - the tag was not found, so return
00634     if(rg == NULL)
00635     {
00636         return(EMPTY_RETURN.c_str());
00637     }
00638 
00639     // Found the object, so return the RG Tag.
00640     return(rg->getTagValue(tag));
00641 }
00642 
00643 
00644 const char* SamFileHeader::getPGTagValue(const char* tag, const char* id)
00645 {
00646     // Look up the id in the hash to get the associated PG object.
00647     SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id));
00648    
00649     // If it is NULL - the tag was not found, so return
00650     if(pg == NULL)
00651     {
00652         return(EMPTY_RETURN.c_str());
00653     }
00654 
00655     // Found the object, so return the PG Tag.
00656     return(pg->getTagValue(tag));
00657 }
00658 
00659 
00660 // Get the number of SQ objects.
00661 int SamFileHeader::getNumSQs()
00662 {
00663     return(mySQs.Entries());
00664 }
00665 
00666 
00667 // Get the number of RG objects.
00668 int SamFileHeader::getNumRGs()
00669 {
00670     return(myRGs.Entries());
00671 }
00672 
00673 
00674 // Get the number of PG objects.
00675 int SamFileHeader::getNumPGs()
00676 {
00677     return(myPGs.Entries());
00678 }
00679 
00680 
00681 // Get the HD object.
00682 SamHeaderHD* SamFileHeader::getHD()
00683 {
00684     return(myHD);
00685 }
00686 
00687 
00688 // Get the SQ object with the specified sequence name.
00689 SamHeaderSQ* SamFileHeader::getSQ(const char* name)
00690 {
00691     return((SamHeaderSQ*)(mySQs.Object(name)));
00692 }
00693 
00694 
00695 // Get the RG object with the specified read group identifier.
00696 SamHeaderRG* SamFileHeader::getRG(const char* id)
00697 {
00698     return((SamHeaderRG*)(myRGs.Object(id)));
00699 }
00700 
00701 
00702 // Get the PG object.
00703 SamHeaderPG* SamFileHeader::getPG(const char* id)
00704 {
00705     return((SamHeaderPG*)(myPGs.Object(id)));
00706 }
00707 
00708 
00709 // Return the value of the SO tag.  
00710 // If this field does not exist, EMPTY_RETURN.c_str() is returned.
00711 const char* SamFileHeader::getSortOrder()
00712 {
00713     if(myHD == NULL)
00714     {
00715         // No HD, so return blank EMPTY_RETURN.c_str()
00716         return(EMPTY_RETURN.c_str());
00717     }
00718     return(myHD->getSortOrder());   
00719 }
00720 
00721 
00722 // Deprecated way of getting the sort order from the file.
00723 const char* SamFileHeader::getTagSO()
00724 {
00725     return(getSortOrder());
00726 }
00727 
00728 
00729 // Get the next SQ header record.  After all SQ headers have been retrieved,
00730 // NULL is returned until a reset is called.
00731 SamHeaderRecord* SamFileHeader::getNextSQRecord()
00732 {
00733     return(getNextHeaderRecord(myCurrentSQIndex, 
00734                                SamHeaderRecord::SQ));
00735 }
00736 
00737 
00738 // Get the next RG header record.  After all RG headers have been retrieved,
00739 // NULL is returned until a reset is called.
00740 SamHeaderRecord* SamFileHeader::getNextRGRecord()
00741 {
00742     return(getNextHeaderRecord(myCurrentRGIndex, 
00743                                SamHeaderRecord::RG));
00744 }
00745 
00746 
00747 // Get the next PG header record.  After all PG headers have been retrieved,
00748 // NULL is returned until a reset is called.
00749 SamHeaderRecord* SamFileHeader::getNextPGRecord()
00750 {
00751     return(getNextHeaderRecord(myCurrentPGIndex, 
00752                                SamHeaderRecord::PG));
00753 }
00754 
00755 
00756 // Reset to the beginning of the header records so the next call
00757 // to getNextSQRecord returns the first SQ header record.
00758 void SamFileHeader::resetSQRecordIter()
00759 {
00760     myCurrentSQIndex = 0;
00761 }
00762 
00763 
00764 // Reset to the beginning of the header records so the next call
00765 // to getNextRGRecord returns the first RG header record.
00766 void SamFileHeader::resetRGRecordIter()
00767 {
00768     myCurrentRGIndex = 0;
00769 }
00770 
00771 
00772 // Reset to the beginning of the header records so the next call
00773 // to getNextPGRecord returns the first PG header record.
00774 void SamFileHeader::resetPGRecordIter()
00775 {
00776     myCurrentPGIndex = 0;
00777 }
00778 
00779 
00780 // Get the next header record of the specified type.
00781 // Pass in the index to start looking at and the type to look for.
00782 // Update the index.
00783 // After all headers of that type have been retrieved,
00784 // NULL is returned until a reset is called for that type.
00785 SamHeaderRecord* SamFileHeader::getNextHeaderRecord(uint32_t& index, 
00786                                                     SamHeaderRecord::SamHeaderRecordType headerType)
00787 {
00788     SamHeaderRecord* foundRecord = NULL;
00789     // Loop until a record is found or until out of range of the 
00790     // headerRecord vector.
00791     while((index < myHeaderRecords.size()) 
00792           && (foundRecord == NULL))
00793     {
00794         // Get the next record.
00795         foundRecord = myHeaderRecords[index];
00796         // Either way, increment the index.
00797         ++index;
00798         // Check to see if the next record is active.
00799         if(!foundRecord->isActiveHeaderRecord())
00800         {
00801             // Not active, so clear the pointer.
00802             foundRecord = NULL;
00803         }
00804         // Check to see if the record is the right type.
00805         else if(foundRecord->getType() != headerType)
00806         {
00807             // Not the right type, so clear the pointer.
00808             foundRecord = NULL;
00809         }
00810     }
00811 
00812     // Return the record if it was found.  Will be null if none were found.
00813     return(foundRecord);
00814 }
00815 
00816 
00817 // Get the next header record.  After all headers have been retrieved,
00818 // NULL is returned until a reset is called.  Does not return the
00819 // Comment lines.
00820 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00821 // same iterator.
00822 SamHeaderRecord* SamFileHeader::getNextHeaderRecord()
00823 {
00824     // Get the next header record
00825     SamHeaderRecord* foundRecord = NULL;
00826     // Loop until a record is found or until out of range of the 
00827     // headerRecord vector.
00828     while((myCurrentHeaderIndex < myHeaderRecords.size()) 
00829           && (foundRecord == NULL))
00830     {
00831         // Get the next record.
00832         foundRecord = myHeaderRecords[myCurrentHeaderIndex];
00833         // Either way, increment the index.
00834         ++myCurrentHeaderIndex;
00835         // Check to see if the next record is active.
00836         if(!foundRecord->isActiveHeaderRecord())
00837         {
00838             // Not active, so clear the pointer.
00839             foundRecord = NULL;
00840         }
00841     }
00842 
00843     // Return the record if it was found.  Will be null if none were found.
00844     return(foundRecord);
00845 }
00846 
00847 
00848 // Set the passed in string to the next header line.  The passed in 
00849 // string will be overwritten.  If there are no more header lines or there
00850 // is an error, false is returned and the passed in string is set to EMPTY_RETURN.c_str()
00851 // until a rest is called.
00852 // Will also return the comment lines.
00853 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00854 // same iterator.
00855 bool SamFileHeader::getNextHeaderLine(std::string &headerLine)
00856 {
00857     headerLine = EMPTY_RETURN.c_str();
00858 
00859     // Until the header is set, keep reading.
00860     // Header could return EMPTY_RETURN.c_str() if the header line is blank.
00861     while(headerLine == EMPTY_RETURN.c_str())
00862     {
00863         if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false)
00864         {
00865             // getHeaderLine failed, so stop processing, and return false.
00866             return(false);
00867         }
00868         else
00869         {
00870             // In range, increment the index.
00871             ++myCurrentHeaderIndex;
00872         }
00873     }
00874     return(true);
00875 }
00876 
00877 
00878 // Reset to the beginning of the header records so the next call
00879 // to getNextHeaderRecord returns the first header line.
00880 void SamFileHeader::resetHeaderRecordIter()
00881 {
00882     myCurrentHeaderIndex = 0;
00883 }
00884 
00885 
00886 // Returns the comment on the next comment line.  Returns EMPTY_RETURN.c_str() if all comment
00887 // lines have been returned, until resetCommentIter is called.
00888 const char* SamFileHeader::getNextComment()
00889 {
00890     if(myCurrentCommentIndex < myComments.size())
00891     {
00892         return(myComments[myCurrentCommentIndex++].c_str());
00893     }
00894     // Already gone through all the comments, return EMPTY_RETURN.c_str().
00895     return(EMPTY_RETURN.c_str());
00896 }
00897 
00898 
00899 // Resets to the beginning of the comments so getNextComment returns
00900 // the first comment.
00901 void SamFileHeader::resetCommentIter()
00902 {
00903     myCurrentCommentIndex = 0;
00904 }
00905 
00906 
00907 // Add a comment.
00908 bool SamFileHeader::addComment(const char* comment)
00909 {
00910     if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
00911     {
00912         // Valid comment, so add it.
00913         myComments.push_back(comment);
00914     }
00915     return(true);
00916 }
00917 
00918 
00919 // Populate the reference info from the SQ fields.
00920 void SamFileHeader::generateReferenceInfo()
00921 {
00922     // Loop through the SQ fields.
00923     uint32_t sqIndex = 0;
00924     SamHeaderRecord* hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00925     while(hdrRec != NULL)
00926     {
00927         // Set the reference info based on this SQ record.
00928         String refName = hdrRec->getTagValue("SN");
00929         String refLen = hdrRec->getTagValue("LN");
00930         long refLenInt = 0;
00931         if(refLen.AsInteger(refLenInt))
00932         {
00933             // Successfully converted the reference to an integer
00934             // so add the reference information.
00935             myReferenceInfo.add(refName, refLen);
00936         }
00937         hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00938     }
00939 }
00940 
00941 
00942 // Parse the header.
00943 bool SamFileHeader::parseHeader(String& header)
00944 {
00945     // Track the parsing status.
00946     // If there are any badly formatted fields found in parsing
00947     // it will be set to false.
00948     bool status = true;
00949 
00950     // Split the header into lines.
00951     std::vector<String>* types = header.Split('\n');
00952 
00953     // Loop through each header line, parsing that line.
00954     for(uint32_t index = 0; index < types->size(); index++)
00955     {
00956         // Parse the header line.
00957         status &= parseHeaderLine(types->at(index));
00958     }
00959    
00960     // Delete the types vector.
00961     delete types;
00962     types = NULL;
00963 
00964     return(status);
00965 }
00966 
00967 
00968 // Parse one line of the header.
00969 bool SamFileHeader::parseHeaderLine(const String& headerLine)
00970 {
00971     StringArray tokens;
00972 
00973     // Split the line by tabs.
00974     tokens.ReplaceColumns(headerLine, '\t');
00975    
00976     if(tokens.Length() < 1)
00977     {
00978         // Nothing on this line, just return true.
00979         return(true);
00980     }
00981    
00982     // Get the header type, the first column.
00983     if((tokens[0].Length() != 3) || (tokens[0][0] != '@'))
00984     {
00985         // The header type string is incorrect.  Should be 3 characters
00986         // with the first one @.
00987         return(false);
00988     }
00989    
00990     bool status = true;
00991     if(tokens[0] == "@HD")
00992     {
00993         if(myHD == NULL)
00994         {
00995             // Create a new hd.
00996             myHD = new SamHeaderHD();
00997             if(myHD == NULL)
00998             {
00999                 // Failed to allocate HD, so return false.
01000                 return(false);
01001             }
01002             myHeaderRecords.push_back(myHD);
01003             status &= myHD->setFields(tokens);
01004         }
01005         else
01006         {
01007             // HD already set, so return false.
01008             status = false;
01009         }
01010     }
01011     else if(tokens[0] == "@SQ")
01012     {
01013         // Create a new SQ record.
01014         SamHeaderSQ* sq = new SamHeaderSQ();
01015       
01016         if(sq->setFields(tokens))
01017         {
01018             // sq fields were properly set, so add it to the list of
01019             // SQ lines.
01020             status &= addSQ(sq);
01021         }
01022         else
01023         {
01024             status = false;
01025         }
01026     }
01027     else if(tokens[0] == "@RG")
01028     {
01029         // Create a new RG record.
01030         SamHeaderRG* rg = new SamHeaderRG();
01031       
01032         if(rg->setFields(tokens))
01033         {
01034             // rg fields were properly set, so add it to the list of
01035             // RG lines.
01036             status &= addRG(rg);
01037         }
01038         else
01039         {
01040             status = false;
01041         }
01042     }
01043     else if(tokens[0] == "@PG")
01044     {
01045         // Create a new PG record.
01046         SamHeaderPG* pg = new SamHeaderPG();
01047       
01048         if(pg->setFields(tokens))
01049         {
01050             // pg fields were properly set, so add it to the list of
01051             // PG lines.
01052             status &= addPG(pg);
01053         }
01054         else
01055         {
01056             status = false;
01057         }
01058     }
01059     else if(tokens[0] == "@CO")
01060     {
01061         addComment(tokens[1]);
01062     }
01063     else
01064     {
01065         // Unknown header type.
01066         status = false;
01067     }
01068    
01069     return(status);
01070 }
01071 
01072 
01073 
01074 // Set the passed in string to the header line at the specified index.
01075 // It does NOT clear the current contents of header.
01076 // NOTE: some indexes will return blank if the entry was deleted.
01077 bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const
01078 {
01079     // Check to see if the index is in range of the header records vector.
01080     if(index < myHeaderRecords.size())
01081     {
01082         // In range of the header records vector, so get the string for
01083         // that record.
01084         SamHeaderRecord* hdrRec = myHeaderRecords[index];
01085         hdrRec->appendString(header);
01086         return(true);
01087     }
01088     else
01089     {
01090         unsigned int commentIndex = index - myHeaderRecords.size();
01091         // Check to see if it is in range of the comments.
01092         if(commentIndex < myComments.size())
01093         {
01094             // It is in range of the comments, so add the type.
01095             header += "@CO\t";
01096             // Add the comment.
01097             header += myComments[commentIndex];
01098             // Add the new line.
01099             header += "\n";
01100             return(true);
01101         }
01102     }
01103     // Invalid index.
01104     return(false);
01105 }
Generated on Wed Nov 17 15:38:27 2010 for StatGen Software by  doxygen 1.6.3