SamRecord.h

00001 /*
00002  *  Copyright (C) 2010  Regents of the University of Michigan
00003  *
00004  *   This program is free software: you can redistribute it and/or modify
00005  *   it under the terms of the GNU General Public License as published by
00006  *   the Free Software Foundation, either version 3 of the License, or
00007  *   (at your option) any later version.
00008  *
00009  *   This program is distributed in the hope that it will be useful,
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  *   GNU General Public License for more details.
00013  *
00014  *   You should have received a copy of the GNU General Public License
00015  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
00016  */
00017 
00018 #ifndef __SAM_RECORD_H__
00019 #define __SAM_RECORD_H__
00020 
00021 #include <stdint.h>
00022 
00023 #include "GenomeSequence.h"
00024 #include "SamStatus.h"
00025 #include "LongHash.h"
00026 #include "MathVector.h"
00027 #include "StringArray.h"
00028 #include "IntArray.h"
00029 #include "SamFileHeader.h"
00030 #include "CigarRoller.h"
00031 
00032 /// Structure of a BAM record.
00033 struct bamRecordStruct
00034 {
00035 public:
00036     int32_t      myBlockSize;
00037     int32_t      myReferenceID;
00038     int32_t      myPosition;
00039     uint32_t     myReadNameLength : 8, myMapQuality : 8, myBin : 16;
00040     uint32_t     myCigarLength : 16, myFlag : 16;
00041     int32_t      myReadLength;
00042     int32_t      myMateReferenceID;
00043     int32_t      myMatePosition;
00044     int32_t      myInsertSize;             // Outer fragment length
00045     char  myData[1];
00046 };
00047 
00048 
00049 /// Class providing an easy to use interface to get/set/operate on the
00050 /// fields in a SAM/BAM record. 
00051 class SamRecord
00052 {
00053 public:
00054     /// Enum containing the settings on how to translate the sequence if a
00055     /// reference is available.  If no reference is available, no translation
00056     /// is done.
00057     enum SequenceTranslation { 
00058         NONE,   ///< Leave the sequence as is.
00059         EQUAL,  ///< Translate bases that match the reference to '='
00060         BASES,  ///< Translate '=' to the actual base.
00061     };
00062 
00063     /// Default Constructor.
00064     SamRecord();
00065 
00066     /// Constructor that sets the error handling type.
00067     /// \param errorHandlingType how to handle errors.
00068     SamRecord(ErrorHandler::HandlingType errorHandlingType);
00069 
00070     /// Destructor
00071     ~SamRecord();
00072 
00073     /// Reset the fields of the record to a default value.
00074     /// This is not necessary when you are reading a Sam/Bam file, 
00075     /// but if you are setting fields, it is a good idea to clean
00076     /// out a record before reusing it. Clearing it allows you to 
00077     /// not have to set any empty fields. 
00078     void resetRecord();
00079 
00080     /// Reset the tag iterator to the beginning of the tags.
00081     void resetTagIter();
00082  
00083     /// Returns whether or not the record is valid.
00084     /// Sets the status to indicate success or failure.
00085     /// \param header SAM Header associated with the record.  Used to perform
00086     /// some validation against the header.
00087     /// \return true if the record is valid, false if not.
00088     bool isValid(SamFileHeader& header);
00089 
00090     /// Read the BAM record from a file.
00091     /// \param filePtr file to read the buffer from.
00092     /// \param header BAM header for the record.
00093     /// \return status of the reading the BAM record from the file.
00094     SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader& header);
00095 
00096     /// Set the reference to the specified genome sequence object.
00097     /// \param reference pointer to the GenomeSequence object.
00098     void setReference(GenomeSequence* reference);
00099 
00100     /// Set the type of sequence translation to use when getting
00101     /// the sequence.  The default type (if this method is never called) is
00102     /// NONE (the sequence is left as-is).  Can be over-ridden by using 
00103     /// the accessors that take a SequenceTranslation parameter.
00104     /// \param translation type of sequence translation to use.
00105     void setSequenceTranslation(SequenceTranslation translation);
00106 
00107     ///////////////////////
00108     /// @name  Set Alignment Data
00109     /// Set methods for record fields.  All of the "set" methods set the
00110     /// status to indicate success or the failure reason.
00111     //@{
00112 
00113     /// Set QNAME to the passed in name.
00114     /// \param readName the readname to set the QNAME to.
00115     /// \return true if successfully set, false if not.
00116     bool setReadName(const char* readName);
00117 
00118     /// Set the bitwise flag to the specified value.
00119     /// \param flag integer flag to use.
00120     /// \return true if successfully set, false if not.
00121     bool setFlag(uint16_t flag);
00122     
00123     /// Set the reference name to the specified name, using the header to
00124     /// determine the reference id.
00125     /// \param header SAM/BAM header to use to determine the reference id.
00126     /// \param referenceName reference name to use.
00127     /// \return true if successfully set, false if not
00128     bool setReferenceName(SamFileHeader& header, 
00129                           const char* referenceName);
00130 
00131     /// Set the leftmost position using the specified 1-based (SAM format)
00132     /// value.
00133     /// Internal processing handles the switching between SAM/BAM formats 
00134     /// when read/written.
00135     /// \param position 1-based start position
00136     /// \return true if successfully set, false if not.
00137     bool set1BasedPosition(int32_t position);
00138 
00139     /// Set the leftmost position using the specified 0-based (BAM format)
00140     /// value.
00141     /// Internal processing handles the switching between SAM/BAM formats 
00142     /// when read/written.
00143     /// \param position 0-based start position
00144     /// \return true if successfully set, false if not.
00145     bool set0BasedPosition(int32_t position);
00146 
00147     /// Set the mapping quality.
00148     /// \param mapQuality map quality to set in the record.
00149     /// \return true if successfully set, false if not.
00150     bool setMapQuality(uint8_t mapQuality);
00151 
00152     /// Set the CIGAR to the specified SAM formatted cigar string.
00153     /// Internal processing handles the switching between SAM/BAM formats 
00154     /// when read/written.
00155     /// \param cigar string containing the SAM formatted cigar.
00156     /// \return true if successfully set, false if not.
00157     bool setCigar(const char* cigar);
00158 
00159     /// Set the CIGAR to the specified Cigar object.
00160     /// Internal processing handles the switching between SAM/BAM formats 
00161     /// when read/written.
00162     /// \param cigar object to set this record's cigar to have.
00163     /// \return true if successfully set, false if not.
00164     bool setCigar(const Cigar& cigar);
00165 
00166 
00167     /// Set the mate reference sequence name to the specified name, using the
00168     /// header to determine the matee reference id.
00169     /// \param header SAM/BAM header to use to determine the mate reference id.
00170     /// \param referenceName mate reference name to use.
00171     /// \return true if successfully set, false if not
00172     bool setMateReferenceName(SamFileHeader& header,
00173                               const char* mateReferenceName);
00174 
00175     /// Set the leftmost mate position using the specified 1-based (SAM format)
00176     /// value.
00177     /// Internal processing handles the switching between SAM/BAM formats 
00178     /// when read/written.
00179     /// \param position 1-based start position
00180     /// \return true if successfully set, false if not.
00181     bool set1BasedMatePosition(int32_t matePosition);
00182 
00183     /// Set the leftmost mate position using the specified 0-based (BAM format)
00184     /// value.
00185     /// Internal processing handles the switching between SAM/BAM formats 
00186     /// when read/written.
00187     /// \param position 0-based start position
00188     /// \return true if successfully set, false if not.
00189     bool set0BasedMatePosition(int32_t matePosition);
00190 
00191     /// Sets the inferred insert size.
00192     /// \param insertSize inferred insert size.
00193     /// \return true if successfully set, false if not.
00194     bool setInsertSize(int32_t insertSize);
00195 
00196     /// Sets the sequence to the specified sequence string.  This is a 
00197     /// SAM formatted sequence string.  Internal processing handles switching
00198     /// between SAM/BAM formats when read/written.
00199     /// \param seq SAM sequence string.  May contain '='.
00200     /// \return true if successfully set, false if not.
00201     bool setSequence(const char* seq);
00202 
00203     /// Sets the quality to the specified quality string.  This is a SAM 
00204     /// formatted quality string.  Internal processing handles switching 
00205     /// between SAM/BAM formats when read/written.
00206     /// \param quality SAM quality string.
00207     /// \return true if successfully set, false if not.
00208     bool setQuality(const char* quality);
00209 
00210     /// Sets the SamRecord to contain the information in BAM format
00211     /// found in fromBuffer.
00212     /// \param fromBuffer buffer to read the BAM record from.
00213     /// \param fromBufferSize size of the buffer containing the BAM record.
00214     /// \param header BAM header for the record.
00215     /// \return status of reading the BAM record from the buffer.
00216     SamStatus::Status setBuffer(const char* fromBuffer, uint32_t fromBufferSize,
00217                                 SamFileHeader& header);
00218 
00219     /// Add the specified tag to the record.  Internal processing handles 
00220     /// switching between SAM/BAM formats when read/written.
00221     /// \param tag two character tag to be added to the SAM/BAM record.
00222     /// \param vtype vtype of the specified value - either SAM/BAM vtypes.
00223     /// \param value value for the specified tag.
00224     /// \return true if the tag was successfully added, false otherwise.
00225     bool addTag(const char* tag, char vtype, const char* value);
00226 
00227     //@}
00228 
00229     ///////////////////////
00230     /// @name  Get Alignment Data
00231     /// Get methods for record fields.  All of the "get" methods set the
00232     /// status to indicate success or the failure reason.
00233     //@{
00234 
00235     /// Get a const pointer to the buffer that contains the BAM representation
00236     /// of the record.
00237     /// \return const pointer to the buffer that contains the BAM representation
00238     /// of the record.
00239     const void* getRecordBuffer();
00240 
00241     /// Get a const pointer to the buffer that contains the BAM representation
00242     /// of the record.
00243     /// \param translation type of sequence translation to use.
00244     /// \return const pointer to the buffer that contains the BAM representation
00245     /// of the record.
00246     const void* getRecordBuffer(SequenceTranslation translation);
00247 
00248     /// Write the record as a BAM into the specified file.
00249     /// \param filePtr file to write the BAM record into.
00250     /// \return status of the write.
00251     SamStatus::Status writeRecordBuffer(IFILE filePtr);
00252 
00253     /// Write the record as a BAM into the specified file.
00254     /// \param filePtr file to write the BAM record into.
00255     /// \param translation type of sequence translation to use.
00256     /// \return status of the write.
00257     SamStatus::Status writeRecordBuffer(IFILE filePtr, 
00258                                         SequenceTranslation translation);
00259 
00260     /// Get the block size of the record.
00261     /// \return BAM block size of the record.
00262     int32_t getBlockSize();
00263 
00264     /// Get the reference sequence name of the record.
00265     /// \return reference sequence name
00266     const char* getReferenceName();
00267 
00268     /// Get the reference sequence id of the record.
00269     /// \return reference sequence id
00270     int32_t getReferenceID();
00271 
00272     /// Get the 1-based(SAM) leftmost position of the record.
00273     /// \return 1-based leftmost position.
00274     int32_t get1BasedPosition();
00275  
00276     /// Get the 0-based(BAM) leftmost position of the record.
00277     /// \return 0-based leftmost position.
00278    int32_t get0BasedPosition();
00279 
00280     /// Get the length of the readname (QNAME) including the null.
00281     /// \return length of the read name (including null).
00282     uint8_t getReadNameLength();
00283 
00284     /// Get the mapping quality of the record.
00285     /// \return map quality.
00286     uint8_t getMapQuality();
00287 
00288     /// Get the BAM bin for the record.
00289     /// \return BAM bin
00290     uint16_t getBin();
00291 
00292     /// Get the length of the CIGAR in BAM format.
00293     /// \return length of BAM formatted cigar.
00294     uint16_t getCigarLength();
00295 
00296     /// Get the flag.
00297     /// \return flag.
00298     uint16_t getFlag();
00299 
00300     /// Get the length of the read.
00301     /// \return read length.
00302     int32_t getReadLength();
00303 
00304     /// Get the mate reference sequence name of the record.  If it is equal to
00305     /// the reference name, it still returns the reference name.
00306     /// \return reference sequence name
00307     const char* getMateReferenceName();
00308 
00309     /// Get the mate reference sequence name of the record, returning "=" if
00310     /// it is the same as the reference name, unless they are both "*" in
00311     /// which case "*" is returned.
00312     /// \return reference sequence name
00313     const char* getMateReferenceNameOrEqual();
00314 
00315     /// Get the mate reference id of the record.
00316     /// \return reference id
00317     int32_t getMateReferenceID();
00318 
00319     /// Get the 1-based(SAM) leftmost mate position of the record.
00320     /// \return 1-based leftmost position.
00321     int32_t get1BasedMatePosition();
00322 
00323     /// Get the 0-based(BAM) leftmost mate position of the record.
00324     /// \return 0-based leftmost position.
00325     int32_t get0BasedMatePosition();
00326 
00327     /// Get the inferred insert size of the read pair.
00328     /// \return inferred insert size.
00329     int32_t getInsertSize();
00330 
00331     /// Returns the 0-based inclusive rightmost position of the
00332     /// clipped sequence.
00333     /// \return 0-based inclusive rightmost position
00334     int32_t get0BasedAlignmentEnd();
00335 
00336     /// Returns the 1-based inclusive rightmost position of the
00337     /// clipped sequence.
00338     /// \return 1-based inclusive rightmost position
00339     int32_t get1BasedAlignmentEnd();
00340    
00341     /// Returns the length of the clipped sequence, returning 0 if the cigar
00342     /// is '*'.
00343     /// \return length of the clipped sequence.
00344     int32_t getAlignmentLength();
00345 
00346     /// Returns the 0-based inclusive left-most position adjusted for
00347     /// clipped bases.
00348     /// \return 0-based inclusive leftmost position including clips.
00349     int32_t get0BasedUnclippedStart();
00350 
00351     /// Returns the 1-based inclusive left-most position adjusted for
00352     /// clipped bases.
00353     /// \return 1-based inclusive leftmost position including clips.
00354     int32_t get1BasedUnclippedStart();
00355 
00356     /// Returns the 0-based inclusive right-most position adjusted for
00357     /// clipped bases.
00358     /// \return 0-based inclusive rightmost position including clips.
00359     int32_t get0BasedUnclippedEnd();
00360  
00361     /// Returns the 1-based inclusive right-most position adjusted for
00362     /// clipped bases.
00363     /// \return 1-based inclusive rightmost position including clips.
00364     int32_t get1BasedUnclippedEnd();
00365 
00366     /// Returns the SAM formatted Read Name (QNAME).
00367     /// \return read name.
00368     const char* getReadName();
00369 
00370     /// Returns the SAM formatted CIGAR string.
00371     /// \return cigar string.
00372     const char* getCigar();
00373 
00374     /// Returns the SAM formatted sequence string, translating the base as
00375     /// specified by setSequenceTranslation.
00376     /// \return sequence string.
00377     const char* getSequence();
00378 
00379     /// Returns the SAM formatted sequence string performing the specified
00380     /// sequence translation.
00381     /// \param translation type of sequence translation to use.
00382     /// \return sequence string.
00383     const char* getSequence(SequenceTranslation translation);
00384 
00385     /// Returns the SAM formatted quality string.
00386     /// \return quality string.
00387     const char* getQuality();
00388 
00389     /// Get the sequence base at the specified index into this sequence 0 to
00390     /// readLength - 1, translating the base as specified by
00391     /// setSequenceTranslation.
00392     /// \param index index into the sequence string (0 to readLength-1).
00393     /// \return the sequence base at the specified index into the sequence.
00394     char getSequence(int index);
00395     
00396     /// Get the sequence base at the specified index into this sequence 0 to
00397     /// readLength -  performing the specified sequence translation1.
00398     /// \param index index into the sequence string (0 to readLength-1).
00399     /// \param translation type of sequence translation to use.
00400     /// \return the sequence base at the specified index into the sequence.
00401     char getSequence(int index, SequenceTranslation translation);
00402     
00403     /// Get the quality character at the specified index into the quality 0 to
00404     /// readLength - 1.
00405     /// \param index index into the quality string (0 to readLength-1).
00406     /// \return the quality character at the specified index into the quality.
00407     char getQuality(int index);
00408    
00409     /// Returns a pointer to the Cigar object associated with this record.  
00410     /// The object is essentially read-only, only allowing modifications 
00411     /// due to lazy evaluations.
00412     /// \return pointer to the Cigar object.
00413     // TODO - want this to be getCigar
00414     Cigar* getCigarInfo();
00415 
00416     /// Returns the length of the tags in BAM format.
00417     /// \return length of tags in BAM format.
00418     uint32_t getTagLength();
00419 
00420     /// Get the next tag from the record.
00421     /// Sets the Status to SUCCESS when a tag is successfully returned or
00422     /// when there are no more tags.  Otherwise the status is set to describe
00423     /// why it failed (parsing, etc).
00424     /// \param tag set to the tag when a tag is read.
00425     /// \param vtype set to the vtype when a tag is read.
00426     /// \param value pointer to the value of the tag (will need to cast
00427     /// to int, double, char, or string based on vtype).
00428     /// \return true if a tag was read, false if there are no more tags.
00429     bool getNextSamTag(char* tag, char& vtype, void** value);
00430 
00431     /// Returns the values of all fields except the tags.
00432     /// \param recStruct structure containing the contents of all 
00433     /// non-variable length fields.
00434     /// \param readName read name from the record (return param)
00435     /// \param cigar cigar string from the record (return param)
00436     /// \param sequence sequence string from the record (return param)
00437     /// \param quality quality string from the record (return param)
00438     /// \return true if all fields were successfully set, false otherwise.
00439     bool getFields(bamRecordStruct& recStruct, String& readName, 
00440                    String& cigar, String& sequence, String& quality);
00441 
00442     /// Returns the values of all fields except the tags.
00443     /// \param recStruct structure containing the contents of all 
00444     /// non-variable length fields.
00445     /// \param readName read name from the record (return param)
00446     /// \param cigar cigar string from the record (return param)
00447     /// \param sequence sequence string from the record (return param)
00448     /// \param quality quality string from the record (return param)
00449     /// \param translation type of sequence translation to use.
00450     /// \return true if all fields were successfully set, false otherwise.
00451     bool getFields(bamRecordStruct& recStruct, String& readName, 
00452                    String& cigar, String& sequence, String& quality,
00453                    SequenceTranslation translation);
00454 
00455     //@}
00456 
00457     /// Returns whether or not the specified vtype is an integer type.
00458     /// Does not set SamStatus.
00459     /// \param vtype value type to check.
00460     /// \return true if the passed in vtype is an integer ('c', 'C', 's',
00461     /// 'S', 'i', 'I'), false otherwise.
00462     bool isIntegerType(char vtype) const;
00463 
00464     /// Returns whether or not the specified vtype is a double type.
00465     /// Does not set SamStatus.
00466     /// \param vtype value type to check.
00467     /// \return true if the passed in vtype is a double ('f'), false otherwise.
00468     bool isDoubleType(char vtype) const;
00469 
00470     /// Returns whether or not the specified vtype is a char type.
00471     /// Does not set SamStatus.
00472     /// \param vtype value type to check.
00473     /// \return true if the passed in vtype is a char ('A'), false otherwise.
00474     bool isCharType(char vtype) const;
00475 
00476     /// Returns whether or not the specified vtype is a string type.
00477     /// Does not set SamStatus.
00478     /// \param vtype value type to check.
00479     /// \return true if the passed in vtype is a string ('Z'), false othwerise.
00480     bool isStringType(char vtype) const;
00481 
00482     /// Clear the tags in this record.
00483     /// Does not set SamStatus.
00484     void clearTags();
00485    
00486     /// Returns the status associated with the last method that sets the status.
00487     /// \return SamStatus of the last command that sets status.
00488     const SamStatus& getStatus();
00489     
00490     /// Get the string value for the specified tag.
00491     /// Does not set SamStatus.
00492     String & getString(const char * tag);
00493 
00494     /// Get the integer value for the specified tag.
00495     /// Does not set SamStatus.
00496     int &    getInteger(const char * tag);
00497 
00498     /// Get the double value for the specified tag.
00499     /// Does not set SamStatus.
00500     double & getDouble(const char * tag);
00501 
00502 
00503 //     void getSamExtraFieldFromKey(int key, String& extraField);
00504     
00505     /// Check if the specified tag contains a string.
00506     /// Does not set SamStatus.
00507     /// \param tag SAM tag to check contents of.
00508     /// \return true if the value associated with the tag is a string.
00509     bool checkString(const char * tag)    { return checkTag(tag, 'Z'); }
00510     
00511     /// Check if the specified tag contains a string.
00512     /// Does not set SamStatus.
00513     /// \param tag SAM tag to check contents of.
00514     /// \return true if the value associated with the tag is a string.
00515     bool checkInteger(const char * tag)   { return checkTag(tag, 'i'); }
00516     
00517     /// Check if the specified tag contains a string.
00518     /// Does not set SamStatus.
00519     /// \param tag SAM tag to check contents of.
00520     /// \return true if the value associated with the tag is a string.
00521     bool checkDouble(const char * tag)    { return checkTag(tag, 'f'); }
00522      
00523     /// Check if the specified tag contains a value of the specified vtype.
00524     /// Does not set SamStatus.
00525     /// \param tag SAM tag to check contents of.
00526     /// \param type value type to check if the SAM tag matches.
00527     /// \return true if the value associated with the tag is a string.
00528    bool checkTag(const char * tag, char type);
00529 
00530     
00531     /// Return the number of bases in this read that overlap the passed in
00532     /// region.
00533     /// \param start inclusive 0-based start position (reference position) of
00534     ///              the region to check for overlaps in.
00535     ///              (-1 indicates to start at the beginning of the reference.)
00536     /// \param end   exclusive 0-based end position (reference position) of the
00537     ///              region to check for overlaps in.
00538     ///              (-1 indicates to go to the end of the reference.)
00539     /// \return number of overlapping bases
00540     /// (matches in the cigar - not skips/deletions)
00541     uint32_t getNumOverlaps(int32_t start, int32_t end);
00542 
00543 
00544 private:
00545     static int MAKEKEY(char ch1, char ch2, char type)
00546     { return (type << 16) + (ch2 << 8) + ch1; }
00547 
00548     // Allocate space for the record - does a realloc.  
00549     // The passed in size is the size of the entire record including the
00550     // block size field.
00551     // Adds any errors to myStatus.
00552     bool allocateRecordStructure(int size);
00553 
00554 
00555     void* getStringPtr(int offset);
00556     void* getIntegerPtr(int offset);
00557     void* getDoublePtr(int offset);
00558 
00559     // Fixes the buffer to match the variable length fields.
00560     // Adds any errors to myStatus.
00561     bool fixBuffer(SequenceTranslation translation);
00562 
00563     // Sets the Sequence and Quality strings from the buffer.
00564     // They are done together in one method because they require the same
00565     // loop, so might as well be done at the same time.
00566     // Adds any errors to myStatus.
00567     void setSequenceAndQualityFromBuffer();
00568 
00569     // Parse the cigar to calculate the alignment/unclipped ends and convert
00570     // to SAM/BAM format.
00571     // Adds any errors to myStatus.
00572     bool parseCigar();
00573     // Parse the cigar string to calculate the cigar length and alignment end
00574     // and convert to SAM format.
00575     // Adds any errors to myStatus.
00576     bool parseCigarBinary();
00577     // Parse the cigar string to calculate the cigar length and alignment end
00578     // and convert to BAM format.
00579     // Adds any errors to myStatus.
00580     bool parseCigarString();
00581 
00582     // Set the tags from the buffer.
00583     // Adds any errors to myStatus.
00584     bool setTagsFromBuffer();
00585 
00586     // Set the tags in the buffer.
00587     // Adds any errors to myStatus.
00588     bool setTagsInBuffer();
00589 
00590     void setVariablesForNewBuffer(SamFileHeader& header);
00591 
00592     void getVtype(int key, char& vtype) const;
00593     void getTag(int key, char* tag) const;
00594 
00595     String & getString(int offset);
00596     int &    getInteger(int offset);
00597     double & getDouble(int offset);
00598 
00599     static const int DEFAULT_BLOCK_SIZE = 40;
00600     static const int DEFAULT_BIN = 4680;
00601     static const int DEFAULT_READ_NAME_LENGTH = 8;
00602     static const char* DEFAULT_READ_NAME;
00603     static const char* FIELD_ABSENT_STRING;
00604 
00605     bamRecordStruct * myRecordPtr;
00606     int allocatedSize;
00607 
00608     // Pointer to a temporary cigar buffer that can be used during string
00609     // parsing before it is ready to be copied into the actual record.
00610     uint32_t* myCigarTempBuffer;
00611 
00612     // Size of the currently allocated temporary cigar buffer.
00613     int myCigarTempBufferAllocatedSize;
00614 
00615     // Length of the cigar currently contained in the temporary buffer.
00616     int myCigarTempBufferLength;
00617 
00618     // Track if the buffer is in sync with the Strings/Tags.
00619     // Set to false if any of the variable length fields are modified.
00620     // Set to true when the buffer is updated to match the variable length
00621     // fields.
00622     bool myIsBufferSynced;
00623 
00624     // Track if the tags need to be set from the buffer.
00625     bool myNeedToSetTagsFromBuffer;
00626 
00627     // Trag if the tags need to be set in the buffer.
00628     // Allows you to set just the tags if they are the only thing that changed
00629     // in the buffer.
00630     bool myNeedToSetTagsInBuffer;
00631 
00632     int myTagBufferSize;
00633     int myLastTagIndex;
00634 
00635     String myReadName;
00636     String myReferenceName;
00637     String myMateReferenceName;
00638     String myCigar;
00639     String mySequence;
00640     String myQuality;
00641 
00642     std::string mySeqWithEq;
00643     std::string mySeqWithoutEq;
00644 
00645     // The length of the alignment.
00646     int32_t myAlignmentLength;
00647     // Unclipped alignment positions.
00648     int32_t myUnclippedStartOffset;
00649     int32_t myUnclippedEndOffset;
00650     
00651     CigarRoller myCigarRoller;
00652 
00653     LongHash<int>  extras;
00654     StringArray    strings;
00655     IntArray       integers;
00656     Vector         doubles;
00657 
00658 
00659     // Track whether or not the buffer values are correct for
00660     // each setting.
00661     bool myIsReadNameBufferValid;
00662     bool myIsCigarBufferValid;
00663     bool myIsSequenceBufferValid;
00664     bool myIsQualityBufferValid;
00665     bool myIsTagsBufferValid;
00666     bool myIsBinValid;
00667 
00668     SamStatus myStatus;
00669 
00670     // The current translation of the sequence as it occurs in the buffer.
00671     // Only applicable if myIsSequenceBufferValid == true.
00672     SequenceTranslation myBufferSequenceTranslation;
00673 
00674 
00675     // Track the Reference.
00676     GenomeSequence* myRefPtr;
00677 
00678     // The type of translation to do when getting a sequence.
00679     SequenceTranslation mySequenceTranslation;
00680 
00681     String NOT_FOUND_TAG_STRING;
00682     int NOT_FOUND_TAG_INT;
00683     double NOT_FOUND_TAG_DOUBLE;
00684 };
00685 
00686 #endif
Generated on Thu Dec 9 12:22:13 2010 for StatGen Software by  doxygen 1.6.3