SamFileHeader.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __SAM_FILE_HEADER_H__
00019 #define __SAM_FILE_HEADER_H__
00020 
00021 #include <map>
00022 #include <stdint.h>
00023 
00024 #include "SamReferenceInfo.h"
00025 #include "SamHeaderHD.h"
00026 #include "SamHeaderSQ.h"
00027 #include "SamHeaderRG.h"
00028 #include "SamHeaderPG.h"
00029 #include "SamStatus.h"
00030 
00031 class SamFileHeader
00032 {
00033 public:
00034     SamFileHeader();
00035     ~SamFileHeader();
00036 
00037     // Copy Constructor   
00038     SamFileHeader(const SamFileHeader& header);
00039 
00040     // Overload operator = to copy the passed in header into this header.
00041     SamFileHeader & operator = (const SamFileHeader& header);
00042 
00043     // Overload operator = to copy the passed in header into this header.
00044     bool copy(const SamFileHeader& header);
00045 
00046     void resetHeader();
00047 
00048     // Set the passed in string to the entire header string.  Clearing its
00049     // current contents.
00050     // Return true if successfully set (even if set to "")
00051     bool getHeaderString(std::string& header) const;
00052 
00053     // Get the reference ID for the specified name.
00054     // If addID is set to true, a reference id will be created for the
00055     // referenceName if one does not already exist.  If addID is set to
00056     // false (default), it will return SamReferenceInfo::NO_REF_ID.
00057     int   getReferenceID(const String & referenceName, bool addID = false);
00058     int   getReferenceID(const char* referenceName, bool addID = false);
00059     const String & getReferenceLabel(int id) const;
00060 
00061     // Get the Reference Information
00062     const SamReferenceInfo* getReferenceInfo() const;
00063 
00064     // Add reference sequence name and reference sequence length to the header.
00065     void addReferenceInfo(const char* referenceSequenceName, 
00066                           int32_t referenceSequenceLength);
00067 
00068     ////////////////////////////////////////////////////////////////////////
00069     // Set Values in the header
00070     ////////////////////////////////////////////////////////////////////////
00071 
00072     // Add a header line that is just one tag with a const char* value.
00073     bool addHeaderLine(const char* type, const char* tag, const char* value); 
00074     // Add a header line that is already preformatted in a const char*.
00075     // It is assumed that the line does not contain a \n.
00076     bool addHeaderLine(const char* headerLine);
00077 
00078 //     // Set the specified header type tag to the specified value in the 
00079 //     // header with the specified keyID.  keyID must be specified when
00080 //     // type = SQ, RG, or PG.
00081 //     bool setTag(SamHeaderRecord::SamHeaderRecordType type, const char* tag,
00082 //                 const char* value, const char* keyID = NULL);
00083 
00084     // Set the specified tag to the specified value in the HD header.
00085     bool setHDTag(const char* tag, const char* value);
00086 
00087     // Set the specified tag to the specified value in the SQ header with
00088     // the specified name.
00089     // If the header does not yet exist, the header is added.
00090     bool setSQTag(const char* tag, const char* value, const char* name);
00091 
00092     // Set the specified tag to the specified value in the RG header with
00093     // the read group identifier.
00094     // If the header does not yet exist, the header is added.
00095     bool setRGTag(const char* tag, const char* value, const char* id);
00096 
00097     // Set the specified tag to the specified value in the PG header with
00098     // the specified id.
00099     // If the header does not yet exist, the header is added.
00100     bool setPGTag(const char* tag, const char* value, const char* id);
00101 
00102     // Add the HD record to the header.
00103     // Note: it adds a pointer to the passed in header record.  The header
00104     // record will be deleted when it is cleaned up from this header.
00105     bool addHD(SamHeaderHD* hd);
00106 
00107     // Add the SQ record to the header.
00108     // Note: it adds a pointer to the passed in header record.  The header
00109     // record will be deleted when it is cleaned up from this header.
00110     bool addSQ(SamHeaderSQ* sq);
00111 
00112     // Add the RG record to the header.
00113     // Note: it adds a pointer to the passed in header record.  The header
00114     // record will be deleted when it is cleaned up from this header.
00115     bool addRG(SamHeaderRG* rg);
00116 
00117     // Add the PG record to the header.
00118     // Note: it adds a pointer to the passed in header record.  The header
00119     // record will be deleted when it is cleaned up from this header.
00120     bool addPG(SamHeaderPG* pg);
00121 
00122     ////////////////////////////////////////////////////////////////////////
00123     // Remove entries from the header
00124     ////////////////////////////////////////////////////////////////////////
00125     bool removeHD();  // Remove the HD record.
00126     bool removeSQ(const char* name); // Remove SQ record with the specified key.
00127     bool removeRG(const char* id); // Remove RG record with the specified key.
00128     bool removePG(const char* id); // Remove PG record with the specified key.
00129 
00130 
00131     ////////////////////////////////////////////////////////////////////////
00132     //
00133     ////////////////////////////////////////////////////////////////////////
00134     SamStatus::Status setHeaderFromBamFile(IFILE filePtr);
00135     
00136     const char* getHDTagValue(const char* tag);
00137     // Get the value associated with the specified tag on the SQ line with
00138     // the specified sequence name.
00139     const char* getSQTagValue(const char* tag, const char* name);
00140     // Get the value associated with the specified tag on the RG line with
00141     // the specified read group identifier.
00142     const char* getRGTagValue(const char* tag, const char* id);
00143     // Get the value associated with the specified tag on the RG line with
00144     // the specified id.
00145     const char* getPGTagValue(const char* tag, const char* id);
00146 
00147     // Get the number of SQ objects.
00148     int getNumSQs();
00149 
00150     // Get the number of RG objects.
00151     int getNumRGs();
00152 
00153     // Get the number of PG objects.
00154     int getNumPGs();
00155 
00156     // Get the HD object.
00157     SamHeaderHD* getHD();
00158 
00159     // Get the SQ object with the specified sequence name.
00160     SamHeaderSQ* getSQ(const char* name);
00161 
00162     // Get the RG object with the specified read group identifier.
00163     SamHeaderRG* getRG(const char* id);
00164 
00165     // Get the PG object with the specified id.
00166     SamHeaderPG* getPG(const char* id);
00167 
00168 //     //////////////////////////////////
00169 //     // Set methods for header fields.
00170 //     bool setVersion(const char* version);
00171 //     bool setSortOrder(const char* sortOrder);
00172 //     bool addSequenceName(const char* sequenceName);
00173 //     bool setSequenceLength(const char* keyID, int sequenceLength);
00174 //     bool setGenomeAssemblyId(const char* keyID, const char* genomeAssemblyId);
00175 //     bool setMD5Checksum(const char* keyID, const char* md5sum);
00176 //     bool setURI(const char* keyID, const char* uri);
00177 //     bool setSpecies(const char* keyID, const char* species);
00178 //     bool addReadGroupID(const char* readGroupID);
00179 //     bool setSample(const char* keyID, const char* sample);
00180 //     bool setLibrary(const char* keyID, const char* library);
00181 //     bool setDescription(const char* keyID, const char* description);
00182 //     bool setPlatformUnit(const char* keyID, const char* platform);
00183 //     bool setPredictedMedianInsertSize(const char* keyID, const char* isize);
00184 //     bool setSequencingCenter(const char* keyID, const char* center);
00185 //     bool setRunDate(const char* keyID, const char* runDate);
00186 //     bool setTechnology(const char* keyID, const char* technology);
00187 //     bool addProgram(const char* programID);
00188 //     bool setProgramVersion(const char* keyID, const char* version);
00189 //     bool setCommandLine(const char* keyID, const char* commandLine);
00190     
00191 //     ///////////////////////////////////
00192 //     // Get methods for header fields.
00193 //     // Returns the number of SQ entries in the header.
00194 //     int32_t getSequenceDictionaryCount();
00195     // Return the Sort Order value that is set in the Header.
00196     // If this field does not exist, "" is returned.
00197     const char* getSortOrder();
00198 
00199 
00200     // DEPRECATED
00201     const char* getTagSO();
00202 
00203     // Get the next SQ header record.  After all SQ headers have been retrieved,
00204     // NULL is returned until a reset is called.
00205     SamHeaderRecord* getNextSQRecord();
00206 
00207     // Get the next RG header record.  After all RG headers have been retrieved,
00208     // NULL is returned until a reset is called.
00209     SamHeaderRecord* getNextRGRecord();
00210 
00211     // Get the next PG header record.  After all PG headers have been retrieved,
00212     // NULL is returned until a reset is called.
00213     SamHeaderRecord* getNextPGRecord();
00214 
00215     // Reset to the beginning of the header records so the next call
00216     // to getNextSQRecord returns the first SQ header record.
00217     void resetSQRecordIter();
00218 
00219     // Reset to the beginning of the header records so the next call
00220     // to getNextRGRecord returns the first RG header record.
00221     void resetRGRecordIter();
00222 
00223     // Reset to the beginning of the header records so the next call
00224     // to getNextPGRecord returns the first PG header record.
00225     void resetPGRecordIter();
00226 
00227     // Get the next header record of the specified type.
00228     // Pass in the index to start looking at and the type to look for.
00229     // Update the index.
00230     // After all headers of that type have been retrieved,
00231     // NULL is returned until a reset is called for that type.
00232     SamHeaderRecord* getNextHeaderRecord(uint32_t& index, 
00233                                          SamHeaderRecord::SamHeaderRecordType headerType);
00234 
00235     // Get the next header record.  After all headers have been retrieved,
00236     // NULL is returned until a reset is called.  Does not return the
00237     // Comment lines.
00238     // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00239     // same iterator.
00240     SamHeaderRecord* getNextHeaderRecord();
00241 
00242 
00243     // Set the passed in string to the next header line.  The passed in 
00244     // string will be overwritten.  If there are no more header lines or there
00245     // is an error, false is returned and the passed in string is set to ""
00246     // until a rest is called.
00247     // Will also return the comment lines.
00248     // NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
00249     // same iterator.
00250     bool getNextHeaderLine(std::string &headerLine);
00251 
00252     // Reset to the beginning of the header records so the next call
00253     // to getNextHeaderRecord returns the first header line.
00254     void resetHeaderRecordIter();
00255    
00256     // Returns the comment on the next comment line.  Returns "" if all comment
00257     // lines have been returned, until resetCommentIter is called.
00258     const char* getNextComment();
00259 
00260     // Resets to the beginning of the comments so getNextComment returns
00261     // the first comment.
00262     void resetCommentIter();
00263 
00264     // Add a comment.
00265     bool addComment(const char* comment);
00266 
00267     // Populate the reference info from the SQ fields.
00268     void generateReferenceInfo();
00269 
00270 
00271 private:
00272     // Parse the header string.
00273     bool parseHeader(String& header);
00274 
00275     // Parse the specified line of the header.
00276     bool parseHeaderLine(const String& headerLine);
00277 
00278     // Set the passed in string to the header line at the specified index.
00279     // It does NOT clear the current contents of header.
00280     bool getHeaderLine(unsigned int index, std::string& header) const;
00281 
00282     int16_t makeKey(char ch1, char ch2)
00283     {
00284         return((ch1 << 8) + ch2);
00285     }
00286 
00287     // Only one HD type is allowed per file.
00288     SamHeaderHD* myHD;
00289 
00290     // There can be multiple SQ Types, indexed by SN.
00291     StringHash mySQs;
00292 
00293     // There can be multiple RG Types, indexed by ID.
00294     StringHash myRGs;
00295 
00296     // There can be multiple PG types, indexed by ID.
00297     StringHash myPGs;
00298 
00299     // Reference Name information
00300     SamReferenceInfo myReferenceInfo;
00301 
00302     // Vector of comments
00303     std::vector<std::string> myComments;
00304 
00305     std::vector<SamHeaderRecord*> myHeaderRecords;
00306 
00307     uint32_t myCurrentSQIndex;
00308 
00309     uint32_t myCurrentRGIndex;
00310 
00311     uint32_t myCurrentPGIndex;
00312 
00313     uint32_t myCurrentHeaderIndex;
00314 
00315     uint32_t myCurrentCommentIndex;
00316 
00317     static const std::string EMPTY_RETURN;
00318 };
00319 
00320 #endif
00321 
Generated on Tue Aug 23 18:19:04 2011 for libStatGen Software by  doxygen 1.6.3