SamFileHeader.cpp

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #include "SamFileHeader.h"
00019 #include "SamHeaderSQ.h"
00020 #include "SamHeaderRG.h"
00021 
00022 
00023 const std::string SamFileHeader::EMPTY_RETURN = "";
00024 
00025 SamFileHeader::SamFileHeader()
00026     : myHD(NULL),
00027       myReferenceInfo()
00028 {
00029     resetHeader();
00030 }
00031 
00032 
00033 SamFileHeader::~SamFileHeader()
00034 {
00035     resetHeader();
00036 }
00037 
00038 
00039 // Copy Constructor   
00040 SamFileHeader::SamFileHeader(const SamFileHeader& header)
00041 {
00042     copy(header);
00043 }
00044 
00045 
00046 // Overload operator = to copy the passed in header into this header.
00047 SamFileHeader & SamFileHeader::operator = (const SamFileHeader& header)
00048 {
00049     copy(header);
00050     return(*this);
00051 }
00052 
00053 
00054 bool SamFileHeader::copy(const SamFileHeader& header)
00055 {
00056     // Check to see if the passed in value is the same as this.
00057     if(this == &header)
00058     {
00059         return(true);
00060     }
00061 
00062     resetHeader();
00063     // Copy Reference contigs, hash, lengths.
00064     myReferenceInfo = header.myReferenceInfo;
00065 
00066     // Copy the records by getting the other header's header string
00067     // and parsing it.
00068     std::string newString;
00069     bool status = header.getHeaderString(newString);
00070     String newHeaderString = newString.c_str();
00071     
00072     status &= parseHeader(newHeaderString);
00073 
00074     myCurrentHeaderIndex = header.myCurrentHeaderIndex;
00075     myCurrentCommentIndex = header.myCurrentCommentIndex;
00076 
00077     return(status);
00078 }
00079 
00080 
00081 // Reset the header for a new entry, clearing out previous values.
00082 void SamFileHeader::resetHeader()
00083 {
00084     myReferenceInfo.clear();
00085 
00086     // Clear the pointers to the header records.  They are deleted when the
00087     // vector is cleaned up.
00088     myHD = NULL;
00089     mySQs.Clear();
00090     myRGs.Clear();
00091     myPGs.Clear();
00092 
00093     // Delete the header records and clear the vector.
00094     for(unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size(); 
00095         headerIndex++)
00096     {
00097         delete myHeaderRecords[headerIndex];
00098         myHeaderRecords[headerIndex] = NULL;
00099     }
00100     myHeaderRecords.clear();
00101 
00102     // Reset the iterator for the header lines.
00103     resetHeaderRecordIter();
00104 
00105     // Reset the comment iterator.
00106     resetCommentIter();
00107 
00108     // Reset the individual type header iterators.
00109     resetSQRecordIter();
00110     resetRGRecordIter();
00111     resetPGRecordIter();
00112 
00113     // Clear the comments
00114     myComments.clear();
00115 }
00116 
00117 
00118 // Set the passed in string to the entire header string.  Clearing its
00119 // current contents.
00120 bool SamFileHeader::getHeaderString(std::string& header) const
00121 {
00122     header.clear();
00123    
00124     // Keep getting header lines until there are no more - false returned.
00125     unsigned int index = 0;
00126     while(getHeaderLine(index, header) != false)
00127     {
00128         ++index;
00129     }
00130 
00131     return(true);
00132 }
00133 
00134 
00135 int SamFileHeader::getReferenceID(const String & referenceName, bool addID)
00136 {
00137     return(myReferenceInfo.getReferenceID(referenceName, addID));
00138 }
00139 
00140 
00141 int SamFileHeader::getReferenceID(const char* referenceName, bool addID)
00142 {
00143     return(myReferenceInfo.getReferenceID(referenceName, addID));
00144 }
00145 
00146 
00147 const String & SamFileHeader::getReferenceLabel(int id) const
00148 {
00149     return(myReferenceInfo.getReferenceLabel(id));
00150 }
00151 
00152 
00153 // Get the Reference Information
00154 const SamReferenceInfo* SamFileHeader::getReferenceInfo() const
00155 {
00156     return(&myReferenceInfo);
00157 }
00158 
00159 
00160 // Add reference sequence name and reference sequence length to the header.
00161 void SamFileHeader::addReferenceInfo(const char* referenceSequenceName, 
00162                                      int32_t referenceSequenceLength)
00163 {
00164     myReferenceInfo.add(referenceSequenceName, referenceSequenceLength);
00165 }
00166 
00167 
00168 // Populate the reference info from the SQ fields.
00169 void SamFileHeader::generateReferenceInfo()
00170 {
00171     // Loop through the SQ fields.
00172     uint32_t sqIndex = 0;
00173     SamHeaderRecord* hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00174     while(hdrRec != NULL)
00175     {
00176         // Set the reference info based on this SQ record.
00177         String refName = hdrRec->getTagValue("SN");
00178         String refLen = hdrRec->getTagValue("LN");
00179         long refLenInt = 0;
00180         if(refLen.AsInteger(refLenInt))
00181         {
00182             // Successfully converted the reference to an integer
00183             // so add the reference information.
00184             myReferenceInfo.add(refName, refLen);
00185         }
00186         hdrRec = getNextHeaderRecord(sqIndex, SamHeaderRecord::SQ);
00187     }
00188 }
00189 
00190 
00191 // Add a header line that has an const char* value.
00192 bool SamFileHeader::addHeaderLine(const char* type, const char* tag, 
00193                                   const char* value)
00194 {
00195     String headerLine;
00196     headerLine += "@";
00197     headerLine += type;
00198     headerLine += "\t";
00199     headerLine += tag;
00200     headerLine += ":";
00201     headerLine += value;
00202     return(addHeaderLine(headerLine.c_str()));
00203 }
00204 
00205 
00206 // Add a header line that is already preformatted in a const char*.
00207 // It is assumed that the line does not contain a \n.
00208 bool SamFileHeader::addHeaderLine(const char* headerLine)
00209 {
00210     // Parse the added header line.
00211     String headerString = headerLine;
00212     if(parseHeader(headerString))
00213     {
00214         // Successfully parsed the header line.
00215         return(true);
00216     }
00217     // Failed to parse the header line, return false.
00218     return(false);
00219 }
00220 
00221 
00222 // Add a comment.
00223 bool SamFileHeader::addComment(const char* comment)
00224 {
00225     if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
00226     {
00227         // Valid comment, so add it.
00228         myComments.push_back(comment);
00229     }
00230     return(true);
00231 }
00232 
00233 
00234 // Add the specified tag and value to the HD header.
00235 bool SamFileHeader::setHDTag(const char* tag, const char* value)
00236 {
00237     if(myHD == NULL)
00238     {
00239         // Need to create the HD line.
00240         myHD = new SamHeaderHD();
00241         if(myHD == NULL)
00242         {
00243             // New failed, return false.
00244             return(false);
00245         }
00246         // Succeeded to create the line, add it to the
00247         // list.
00248         myHeaderRecords.push_back(myHD);
00249     }
00250     return(myHD->setTag(tag, value));
00251 }
00252 
00253 
00254 // Add the specified tag and value to the SQ header with the specified name.
00255 // If the header does not yet exist, the header is added.
00256 bool SamFileHeader::setSQTag(const char* tag, const char* value,
00257                              const char* name)
00258 {
00259     // Get the SQ record for the specified name.
00260     SamHeaderSQ* sq = getSQ(name);
00261     if(sq == NULL)
00262     {
00263         // The SQ does not yet exist.
00264         // Add it.
00265         sq = new SamHeaderSQ();
00266 
00267         if(sq == NULL)
00268         {
00269             // Could not create the header record.
00270             return(false);
00271         }
00272 
00273         // Created the header record, so add it to the list of SQ lines.
00274         mySQs.Add(name, sq);
00275         myHeaderRecords.push_back(sq);
00276 
00277         // Add the key tag 
00278         if(!sq->addKey(name))
00279         {
00280             // Failed to add the key tag, return false.
00281             return(false);
00282         }
00283     }
00284 
00285     return(sq->setTag(tag, value));
00286 }
00287 
00288 
00289 // Add the specified tag and value to the RG header with the read group
00290 // identifier.  If the header does not yet exist, the header is added.
00291 bool SamFileHeader::setRGTag(const char* tag, const char* value, const char* id)
00292 {
00293     // Get the RG record for the specified name.
00294     SamHeaderRG* rg = getRG(id);
00295     if(rg == NULL)
00296     {
00297         // The RG does not yet exist.
00298         // Add it.
00299         rg = new SamHeaderRG();
00300 
00301         if(rg == NULL)
00302         {
00303             // Could not create the header record.
00304             return(false);
00305         }
00306 
00307         // Created the header record, so add it to the list of RG lines.
00308         myRGs.Add(id, rg);
00309         myHeaderRecords.push_back(rg);
00310 
00311         // Add the key tag 
00312         if(!rg->addKey(id))
00313         {
00314             // Failed to add the key tag, return false.
00315             return(false);
00316         }
00317     }
00318 
00319     return(rg->setTag(tag, value));
00320 }
00321 
00322 
00323 // Add the specified tag and value to the PG header with the specified id.
00324 // If the header does not yet exist, the header is added.
00325 // Add the specified tag and value to the PG header.
00326 bool SamFileHeader::setPGTag(const char* tag, const char* value, const char* id)
00327 {
00328     // Get the PG record for the specified name.
00329     SamHeaderPG* pg = getPG(id);
00330     if(pg == NULL)
00331     {
00332         // The PG does not yet exist.
00333         // Add it.
00334         pg = new SamHeaderPG();
00335 
00336         if(pg == NULL)
00337         {
00338             // Could not create the header record.
00339             return(false);
00340         }
00341 
00342         // Created the header record, so add it to the list of PG lines.
00343         myPGs.Add(id, pg);
00344         myHeaderRecords.push_back(pg);
00345 
00346         // Add the key tag 
00347         if(!pg->addKey(id))
00348         {
00349             // Failed to add the key tag, return false.
00350             return(false);
00351         }
00352     }
00353 
00354     return(pg->setTag(tag, value));
00355 }
00356 
00357 
00358 // Add the HD record to the header.
00359 bool SamFileHeader::addHD(SamHeaderHD* hd)
00360 {
00361     // If there is already an HD header or if null
00362     // was passed in, return false.
00363     if((myHD != NULL) || (hd == NULL))
00364     {
00365         return(false);
00366     }
00367     myHD = hd;
00368    
00369     myHeaderRecords.push_back(myHD);
00370     return(true);
00371 }
00372 
00373 
00374 // Add the SQ record to the header.
00375 bool SamFileHeader::addSQ(SamHeaderSQ* sq)
00376 {
00377     if(sq == NULL)
00378     {
00379         // null pointer passed in, can't add it.
00380         return(false);
00381     }
00382     const char* name = sq->getTagValue("SN");
00383     if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
00384     {
00385         // SN is not set, so can't add it.
00386         return(false);
00387     }
00388 
00389     // Determine whether or not a record with this
00390     // key is already in the hash.
00391     if(mySQs.Find(name) < 0)
00392     {
00393         // It is not already in the hash so
00394         // add it.
00395         mySQs.Add(name, sq);
00396         myHeaderRecords.push_back(sq);
00397         return(true);
00398     }
00399 
00400     // It is already in the hash, so cannot be added.
00401     return(false);
00402 }
00403 
00404 
00405 // Add the RG record to the header.
00406 bool SamFileHeader::addRG(SamHeaderRG* rg)
00407 {
00408     if(rg == NULL)
00409     {
00410         // null pointer passed in, can't add it.
00411         return(false);
00412     }
00413     const char* id = rg->getTagValue("ID");
00414     if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00415     {
00416         // ID is not set, so can't add it.
00417         return(false);
00418     }
00419 
00420     // Determine whether or not a record with this
00421     // key is already in the hash.
00422     if(myRGs.Find(id) < 0)
00423     {
00424         // It is not already in the hash so
00425         // add it.
00426         myRGs.Add(id, rg);
00427         myHeaderRecords.push_back(rg);
00428         return(true);
00429     }
00430 
00431     // It is already in the hash, so cannot be added.
00432     return(false);
00433 }
00434 
00435 
00436 // Add the PG record to the header.
00437 bool SamFileHeader::addPG(SamHeaderPG* pg)
00438 {
00439     // If there is already an PG header, return false.
00440     if(pg == NULL)
00441     {
00442         return(false);
00443     }
00444     const char* id = pg->getTagValue("ID");
00445     if(strcmp(id, EMPTY_RETURN.c_str()) == 0)
00446     {
00447         // ID is not set, so can't add the header record.
00448         return(false);
00449     }
00450 
00451     // Determine whether or not a record with this
00452     // key is already in the hash.
00453     if(myPGs.Find(id) < 0)
00454     {
00455         // It is not already in the hash so
00456         // add it.
00457         myPGs.Add(id, pg);
00458         myHeaderRecords.push_back(pg);
00459         return(true);
00460     }
00461 
00462     // It is already in the hash, so cannot be added.
00463     return(false);
00464 }
00465 
00466 
00467 // Remove the HD record.
00468 bool SamFileHeader::removeHD()
00469 {
00470     if(myHD != NULL)
00471     {
00472         // Reset the record.  Do not delete it since it is in the headerRecords
00473         // vector and it is not worth the time to remove it from the middle of
00474         // that vector since this is the header and the space does not need
00475         // to be conserved.
00476         myHD->reset();
00477 
00478         // Set myHD to null so a new HD could be added.
00479         myHD = NULL;
00480     }
00481 
00482     return(true);
00483 }
00484 
00485 
00486 // Remove the SQ record associated with the specified name.
00487 bool SamFileHeader::removeSQ(const char* name)
00488 {
00489     // Look up the name in the hash.
00490     int hashIndex = mySQs.Find(name);
00491     if(hashIndex < 0)
00492     {
00493         // Not found in the hash, so nothing to
00494         // delete, return true it does not exist
00495         // in the hash.
00496         return(true);
00497     }
00498    
00499     // Get the SQ.
00500     SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(hashIndex));
00501 
00502     if(sq == NULL)
00503     {
00504         // sq is null, this is an error since hashIndex was greater than 0,
00505         // so it should have been found.
00506         return(false);
00507     }
00508 
00509     // Reset the record.  Do not delete it since it is in the headerRecords
00510     // vector and it is not worth the time to remove it from the middle of
00511     // that vector since this is the header and the space does not need
00512     // to be conserved.
00513     sq->reset();
00514 
00515     // Delete the entry from the hash.
00516     mySQs.Delete(hashIndex);
00517 
00518     return(true);
00519 }
00520 
00521 
00522 // Remove the RG record associated with the specified id.
00523 bool SamFileHeader::removeRG(const char* id)
00524 {
00525     // Look up the id in the hash.
00526     int hashIndex = myRGs.Find(id);
00527     if(hashIndex < 0)
00528     {
00529         // Not found in the hash, so nothing to
00530         // delete, return true it does not exist
00531         // in the hash.
00532         return(true);
00533     }
00534    
00535     // Get the RG.
00536     SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(hashIndex));
00537 
00538     if(rg == NULL)
00539     {
00540         // rg is null, this is an error since hashIndex was greater than 0,
00541         // so it should have been found.
00542         return(false);
00543     }
00544 
00545     // Reset the record.  Do not delete it since it is in the headerRecords
00546     // vector and it is not worth the time to remove it from the middle of
00547     // that vector since this is the header and the space does not need
00548     // to be conserved.
00549     rg->reset();
00550 
00551     // Delete the entry from the hash.
00552     myRGs.Delete(hashIndex);
00553 
00554     return(true);
00555 }
00556 
00557 
00558 // Remove the PG record associated with the specified id.
00559 bool SamFileHeader::removePG(const char* id)
00560 {
00561     // Look up the id in the hash.
00562     int hashIndex = myPGs.Find(id);
00563     if(hashIndex < 0)
00564     {
00565         // Not found in the hash, so nothing to
00566         // delete, return true it does not exist
00567         // in the hash.
00568         return(true);
00569     }
00570    
00571     // Get the PG.
00572     SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(hashIndex));
00573 
00574     if(pg == NULL)
00575     {
00576         // pg is null, this is an error since hashIndex was greater than 0,
00577         // so it should have been found.
00578         return(false);
00579     }
00580 
00581     // Reset the record.  Do not delete it since it is in the headerRecords
00582     // vector and it is not worth the time to remove it from the middle of
00583     // that vector since this is the header and the space does not need
00584     // to be conserved.
00585     pg->reset();
00586 
00587     // Delete the entry from the hash.
00588     myPGs.Delete(hashIndex);
00589 
00590     return(true);
00591 }
00592 
00593 
00594 SamStatus::Status SamFileHeader::setHeaderFromBamFile(IFILE filePtr)
00595 {
00596     if((filePtr == NULL) || (filePtr->isOpen() == false))
00597     {
00598         // File is not open, return failure.
00599         return(SamStatus::FAIL_ORDER);
00600     }
00601 
00602     int headerLength;
00603     // Read the header length.
00604     int readSize = ifread(filePtr, &headerLength, sizeof(int));
00605    
00606     if(readSize != sizeof(int))
00607     {
00608         // Failed to read the header length.
00609         return(SamStatus::FAIL_IO);
00610     }
00611    
00612     String header;
00613     if (headerLength > 0)
00614     {
00615         // Read the header.
00616         readSize = 
00617             ifread(filePtr, header.LockBuffer(headerLength + 1), headerLength);
00618         header[headerLength] = 0;
00619         header.UnlockBuffer();
00620         if(readSize != headerLength)
00621         {
00622             // Failed to read the header.
00623             return(SamStatus::FAIL_IO);
00624         }
00625     }
00626 
00627     // Parse the header that was read.
00628     parseHeader(header);
00629     return(SamStatus::SUCCESS);
00630 }
00631 
00632 
00633 const char* SamFileHeader::getHDTagValue(const char* tag)
00634 {
00635     if(myHD == NULL)
00636     {
00637         // return blank since there is no HD type.
00638         return(EMPTY_RETURN.c_str());
00639     }
00640     return(myHD->getTagValue(tag));
00641 }
00642 
00643 
00644 // Get the value associated with the specified tag on the SQ line with
00645 // the specified sequence name.
00646 const char* SamFileHeader::getSQTagValue(const char* tag, const char* name)
00647 {
00648     // Look up the name in the hash to get the associated SQ object.
00649     SamHeaderSQ* sq = (SamHeaderSQ*)(mySQs.Object(name));
00650    
00651     // If it is NULL - the tag was not found, so return
00652     if(sq == NULL)
00653     {
00654         return(EMPTY_RETURN.c_str());
00655     }
00656 
00657     // Found the object, so return the SQ Tag.
00658     return(sq->getTagValue(tag));
00659 }
00660 
00661 
00662 // Get the value associated with the specified tag on the RG line with
00663 // the specified read group identifier.
00664 const char* SamFileHeader::getRGTagValue(const char* tag, const char* id)
00665 {
00666     // Look up the id in the hash to get the associated RG object.
00667     SamHeaderRG* rg = (SamHeaderRG*)(myRGs.Object(id));
00668    
00669     // If it is NULL - the tag was not found, so return
00670     if(rg == NULL)
00671     {
00672         return(EMPTY_RETURN.c_str());
00673     }
00674 
00675     // Found the object, so return the RG Tag.
00676     return(rg->getTagValue(tag));
00677 }
00678 
00679 
00680 const char* SamFileHeader::getPGTagValue(const char* tag, const char* id)
00681 {
00682     // Look up the id in the hash to get the associated PG object.
00683     SamHeaderPG* pg = (SamHeaderPG*)(myPGs.Object(id));
00684    
00685     // If it is NULL - the tag was not found, so return
00686     if(pg == NULL)
00687     {
00688         return(EMPTY_RETURN.c_str());
00689     }
00690 
00691     // Found the object, so return the PG Tag.
00692     return(pg->getTagValue(tag));
00693 }
00694 
00695 
00696 // Get the number of SQ objects.
00697 int SamFileHeader::getNumSQs()
00698 {
00699     return(mySQs.Entries());
00700 }
00701 
00702 
00703 // Get the number of RG objects.
00704 int SamFileHeader::getNumRGs()
00705 {
00706     return(myRGs.Entries());
00707 }
00708 
00709 
00710 // Get the number of PG objects.
00711 int SamFileHeader::getNumPGs()
00712 {
00713     return(myPGs.Entries());
00714 }
00715 
00716 
00717 // Get the HD object.
00718 SamHeaderHD* SamFileHeader::getHD()
00719 {
00720     return(myHD);
00721 }
00722 
00723 
00724 // Get the SQ object with the specified sequence name.
00725 SamHeaderSQ* SamFileHeader::getSQ(const char* name)
00726 {
00727     return((SamHeaderSQ*)(mySQs.Object(name)));
00728 }
00729 
00730 
00731 // Get the RG object with the specified read group identifier.
00732 SamHeaderRG* SamFileHeader::getRG(const char* id)
00733 {
00734     return((SamHeaderRG*)(myRGs.Object(id)));
00735 }
00736 
00737 
00738 // Get the PG object.
00739 SamHeaderPG* SamFileHeader::getPG(const char* id)
00740 {
00741     return((SamHeaderPG*)(myPGs.Object(id)));
00742 }
00743 
00744 
00745 // Return the value of the SO tag.  
00746 // If this field does not exist, EMPTY_RETURN.c_str() is returned.
00747 const char* SamFileHeader::getSortOrder()
00748 {
00749     if(myHD == NULL)
00750     {
00751         // No HD, so return blank EMPTY_RETURN.c_str()
00752         return(EMPTY_RETURN.c_str());
00753     }
00754     return(myHD->getSortOrder());   
00755 }
00756 
00757 
00758 // Deprecated way of getting the sort order from the file.
00759 const char* SamFileHeader::getTagSO()
00760 {
00761     return(getSortOrder());
00762 }
00763 
00764 
00765 // Get the next SQ header record.  After all SQ headers have been retrieved,
00766 // NULL is returned until a reset is called.
00767 SamHeaderRecord* SamFileHeader::getNextSQRecord()
00768 {
00769     return(getNextHeaderRecord(myCurrentSQIndex, 
00770                                SamHeaderRecord::SQ));
00771 }
00772 
00773 
00774 // Get the next RG header record.  After all RG headers have been retrieved,
00775 // NULL is returned until a reset is called.
00776 SamHeaderRecord* SamFileHeader::getNextRGRecord()
00777 {
00778     return(getNextHeaderRecord(myCurrentRGIndex, 
00779                                SamHeaderRecord::RG));
00780 }
00781 
00782 
00783 // Get the next PG header record.  After all PG headers have been retrieved,
00784 // NULL is returned until a reset is called.
00785 SamHeaderRecord* SamFileHeader::getNextPGRecord()
00786 {
00787     return(getNextHeaderRecord(myCurrentPGIndex, 
00788                                SamHeaderRecord::PG));
00789 }
00790 
00791 
00792 // Reset to the beginning of the header records so the next call
00793 // to getNextSQRecord returns the first SQ header record.
00794 void SamFileHeader::resetSQRecordIter()
00795 {
00796     myCurrentSQIndex = 0;
00797 }
00798 
00799 
00800 // Reset to the beginning of the header records so the next call
00801 // to getNextRGRecord returns the first RG header record.
00802 void SamFileHeader::resetRGRecordIter()
00803 {
00804     myCurrentRGIndex = 0;
00805 }
00806 
00807 
00808 // Reset to the beginning of the header records so the next call
00809 // to getNextPGRecord returns the first PG header record.
00810 void SamFileHeader::resetPGRecordIter()
00811 {
00812     myCurrentPGIndex = 0;
00813 }
00814 
00815 
00816 // Get the next header record of the specified type.
00817 // Pass in the index to start looking at and the type to look for.
00818 // Update the index.
00819 // After all headers of that type have been retrieved,
00820 // NULL is returned until a reset is called for that type.
00821 SamHeaderRecord* SamFileHeader::getNextHeaderRecord(uint32_t& index, 
00822                                                     SamHeaderRecord::SamHeaderRecordType headerType)
00823 {
00824     SamHeaderRecord* foundRecord = NULL;
00825     // Loop until a record is found or until out of range of the 
00826     // headerRecord vector.
00827     while((index < myHeaderRecords.size()) 
00828           && (foundRecord == NULL))
00829     {
00830         // Get the next record.
00831         foundRecord = myHeaderRecords[index];
00832         // Either way, increment the index.
00833         ++index;
00834         // Check to see if the next record is active.
00835         if(!foundRecord->isActiveHeaderRecord())
00836         {
00837             // Not active, so clear the pointer.
00838             foundRecord = NULL;
00839         }
00840         // Check to see if the record is the right type.
00841         else if(foundRecord->getType() != headerType)
00842         {
00843             // Not the right type, so clear the pointer.
00844             foundRecord = NULL;
00845         }
00846     }
00847 
00848     // Return the record if it was found.  Will be null if none were found.
00849     return(foundRecord);
00850 }
00851 
00852 
00853 // Get the next header record.  After all headers have been retrieved,
00854 // NULL is returned until a reset is called.  Does not return the
00855 // Comment lines.
00856 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00857 // same iterator.
00858 SamHeaderRecord* SamFileHeader::getNextHeaderRecord()
00859 {
00860     // Get the next header record
00861     SamHeaderRecord* foundRecord = NULL;
00862     // Loop until a record is found or until out of range of the 
00863     // headerRecord vector.
00864     while((myCurrentHeaderIndex < myHeaderRecords.size()) 
00865           && (foundRecord == NULL))
00866     {
00867         // Get the next record.
00868         foundRecord = myHeaderRecords[myCurrentHeaderIndex];
00869         // Either way, increment the index.
00870         ++myCurrentHeaderIndex;
00871         // Check to see if the next record is active.
00872         if(!foundRecord->isActiveHeaderRecord())
00873         {
00874             // Not active, so clear the pointer.
00875             foundRecord = NULL;
00876         }
00877     }
00878 
00879     // Return the record if it was found.  Will be null if none were found.
00880     return(foundRecord);
00881 }
00882 
00883 
00884 // Set the passed in string to the next header line.  The passed in 
00885 // string will be overwritten.  If there are no more header lines or there
00886 // is an error, false is returned and the passed in string is set to EMPTY_RETURN.c_str()
00887 // until a rest is called.
00888 // Will also return the comment lines.
00889 // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00890 // same iterator.
00891 bool SamFileHeader::getNextHeaderLine(std::string &headerLine)
00892 {
00893     headerLine = EMPTY_RETURN.c_str();
00894 
00895     // Until the header is set, keep reading.
00896     // Header could return EMPTY_RETURN.c_str() if the header line is blank.
00897     while(headerLine == EMPTY_RETURN.c_str())
00898     {
00899         if(getHeaderLine(myCurrentHeaderIndex, headerLine) == false)
00900         {
00901             // getHeaderLine failed, so stop processing, and return false.
00902             return(false);
00903         }
00904         else
00905         {
00906             // In range, increment the index.
00907             ++myCurrentHeaderIndex;
00908         }
00909     }
00910     return(true);
00911 }
00912 
00913 
00914 // Reset to the beginning of the header records so the next call
00915 // to getNextHeaderRecord returns the first header line.
00916 void SamFileHeader::resetHeaderRecordIter()
00917 {
00918     myCurrentHeaderIndex = 0;
00919 }
00920 
00921 
00922 // Returns the comment on the next comment line.  Returns EMPTY_RETURN.c_str() if all comment
00923 // lines have been returned, until resetCommentIter is called.
00924 const char* SamFileHeader::getNextComment()
00925 {
00926     if(myCurrentCommentIndex < myComments.size())
00927     {
00928         return(myComments[myCurrentCommentIndex++].c_str());
00929     }
00930     // Already gone through all the comments, return EMPTY_RETURN.c_str().
00931     return(EMPTY_RETURN.c_str());
00932 }
00933 
00934 
00935 // Resets to the beginning of the comments so getNextComment returns
00936 // the first comment.
00937 void SamFileHeader::resetCommentIter()
00938 {
00939     myCurrentCommentIndex = 0;
00940 }
00941 
00942 
00943 // Parse the header.
00944 bool SamFileHeader::parseHeader(String& header)
00945 {
00946     // Track the parsing status.
00947     // If there are any badly formatted fields found in parsing
00948     // it will be set to false.
00949     bool status = true;
00950 
00951     // Split the header into lines.
00952     std::vector<String>* types = header.Split('\n');
00953 
00954     // Loop through each header line, parsing that line.
00955     for(uint32_t index = 0; index < types->size(); index++)
00956     {
00957         // Parse the header line.
00958         status &= parseHeaderLine(types->at(index));
00959     }
00960    
00961     // Delete the types vector.
00962     delete types;
00963     types = NULL;
00964 
00965     return(status);
00966 }
00967 
00968 
00969 // Parse one line of the header.
00970 bool SamFileHeader::parseHeaderLine(const String& headerLine)
00971 {
00972     StringArray tokens;
00973 
00974     // Split the line by tabs.
00975     tokens.ReplaceColumns(headerLine, '\t');
00976    
00977     if(tokens.Length() < 1)
00978     {
00979         // Nothing on this line, just return true.
00980         return(true);
00981     }
00982    
00983     // Get the header type, the first column.
00984     if((tokens[0].Length() != 3) || (tokens[0][0] != '@'))
00985     {
00986         // The header type string is incorrect.  Should be 3 characters
00987         // with the first one @.
00988         return(false);
00989     }
00990    
00991     bool status = true;
00992     if(tokens[0] == "@HD")
00993     {
00994         if(myHD == NULL)
00995         {
00996             // Create a new hd.
00997             myHD = new SamHeaderHD();
00998             if(myHD == NULL)
00999             {
01000                 // Failed to allocate HD, so return false.
01001                 return(false);
01002             }
01003             myHeaderRecords.push_back(myHD);
01004             status &= myHD->setFields(tokens);
01005         }
01006         else
01007         {
01008             // HD already set, so return false.
01009             status = false;
01010         }
01011     }
01012     else if(tokens[0] == "@SQ")
01013     {
01014         // Create a new SQ record.
01015         SamHeaderSQ* sq = new SamHeaderSQ();
01016       
01017         if(sq->setFields(tokens))
01018         {
01019             // sq fields were properly set, so add it to the list of
01020             // SQ lines.
01021             status &= addSQ(sq);
01022         }
01023         else
01024         {
01025             status = false;
01026         }
01027     }
01028     else if(tokens[0] == "@RG")
01029     {
01030         // Create a new RG record.
01031         SamHeaderRG* rg = new SamHeaderRG();
01032       
01033         if(rg->setFields(tokens))
01034         {
01035             // rg fields were properly set, so add it to the list of
01036             // RG lines.
01037             status &= addRG(rg);
01038         }
01039         else
01040         {
01041             status = false;
01042         }
01043     }
01044     else if(tokens[0] == "@PG")
01045     {
01046         // Create a new PG record.
01047         SamHeaderPG* pg = new SamHeaderPG();
01048       
01049         if(pg->setFields(tokens))
01050         {
01051             // pg fields were properly set, so add it to the list of
01052             // PG lines.
01053             status &= addPG(pg);
01054         }
01055         else
01056         {
01057             status = false;
01058         }
01059     }
01060     else if(tokens[0] == "@CO")
01061     {
01062         addComment(tokens[1]);
01063     }
01064     else
01065     {
01066         // Unknown header type.
01067         status = false;
01068     }
01069    
01070     return(status);
01071 }
01072 
01073 
01074 
01075 // Set the passed in string to the header line at the specified index.
01076 // It does NOT clear the current contents of header.
01077 // NOTE: some indexes will return blank if the entry was deleted.
01078 bool SamFileHeader::getHeaderLine(unsigned int index, std::string& header) const
01079 {
01080     // Check to see if the index is in range of the header records vector.
01081     if(index < myHeaderRecords.size())
01082     {
01083         // In range of the header records vector, so get the string for
01084         // that record.
01085         SamHeaderRecord* hdrRec = myHeaderRecords[index];
01086         hdrRec->appendString(header);
01087         return(true);
01088     }
01089     else
01090     {
01091         unsigned int commentIndex = index - myHeaderRecords.size();
01092         // Check to see if it is in range of the comments.
01093         if(commentIndex < myComments.size())
01094         {
01095             // It is in range of the comments, so add the type.
01096             header += "@CO\t";
01097             // Add the comment.
01098             header += myComments[commentIndex];
01099             // Add the new line.
01100             header += "\n";
01101             return(true);
01102         }
01103     }
01104     // Invalid index.
01105     return(false);
01106 }
Generated on Tue Sep 6 17:51:59 2011 for libStatGen Software by  doxygen 1.6.3