00001 /* 00002 * Copyright (C) 2010 Regents of the University of Michigan 00003 * 00004 * This program is free software: you can redistribute it and/or modify 00005 * it under the terms of the GNU General Public License as published by 00006 * the Free Software Foundation, either version 3 of the License, or 00007 * (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00016 */ 00017 00018 #ifndef __SAM_RECORD_H__ 00019 #define __SAM_RECORD_H__ 00020 00021 #include <stdint.h> 00022 00023 #include "GenomeSequence.h" 00024 #include "SamStatus.h" 00025 #include "LongHash.h" 00026 #include "MathVector.h" 00027 #include "StringArray.h" 00028 #include "IntArray.h" 00029 #include "SamFileHeader.h" 00030 #include "CigarRoller.h" 00031 00032 /// Structure of a BAM record. 00033 struct bamRecordStruct 00034 { 00035 public: 00036 int32_t myBlockSize; 00037 int32_t myReferenceID; 00038 int32_t myPosition; 00039 uint32_t myReadNameLength : 8, myMapQuality : 8, myBin : 16; 00040 uint32_t myCigarLength : 16, myFlag : 16; 00041 int32_t myReadLength; 00042 int32_t myMateReferenceID; 00043 int32_t myMatePosition; 00044 int32_t myInsertSize; // Outer fragment length 00045 char myData[1]; 00046 }; 00047 00048 00049 /// Class providing an easy to use interface to get/set/operate on the 00050 /// fields in a SAM/BAM record. 00051 class SamRecord 00052 { 00053 public: 00054 /// Enum containing the settings on how to translate the sequence if a 00055 /// reference is available. If no reference is available, no translation 00056 /// is done. 00057 enum SequenceTranslation { 00058 NONE, ///< Leave the sequence as is. 00059 EQUAL, ///< Translate bases that match the reference to '=' 00060 BASES, ///< Translate '=' to the actual base. 00061 }; 00062 00063 /// Default Constructor. 00064 SamRecord(); 00065 00066 /// Constructor that sets the error handling type. 00067 /// \param errorHandlingType how to handle errors. 00068 SamRecord(ErrorHandler::HandlingType errorHandlingType); 00069 00070 /// Destructor 00071 ~SamRecord(); 00072 00073 /// Reset the fields of the record to a default value. 00074 /// This is not necessary when you are reading a Sam/Bam file, 00075 /// but if you are setting fields, it is a good idea to clean 00076 /// out a record before reusing it. Clearing it allows you to 00077 /// not have to set any empty fields. 00078 void resetRecord(); 00079 00080 /// Reset the tag iterator to the beginning of the tags. 00081 void resetTagIter(); 00082 00083 /// Returns whether or not the record is valid. 00084 /// Sets the status to indicate success or failure. 00085 /// \param header SAM Header associated with the record. Used to perform 00086 /// some validation against the header. 00087 /// \return true if the record is valid, false if not. 00088 bool isValid(SamFileHeader& header); 00089 00090 /// Read the BAM record from a file. 00091 /// \param filePtr file to read the buffer from. 00092 /// \param header BAM header for the record. 00093 /// \return status of the reading the BAM record from the file. 00094 SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader& header); 00095 00096 /// Set the reference to the specified genome sequence object. 00097 /// \param reference pointer to the GenomeSequence object. 00098 void setReference(GenomeSequence* reference); 00099 00100 /// Set the type of sequence translation to use when getting 00101 /// the sequence. The default type (if this method is never called) is 00102 /// NONE (the sequence is left as-is). Can be over-ridden by using 00103 /// the accessors that take a SequenceTranslation parameter. 00104 /// \param translation type of sequence translation to use. 00105 void setSequenceTranslation(SequenceTranslation translation); 00106 00107 /////////////////////// 00108 /// @name Set Alignment Data 00109 /// Set methods for record fields. All of the "set" methods set the 00110 /// status to indicate success or the failure reason. 00111 //@{ 00112 00113 /// Set QNAME to the passed in name. 00114 /// \param readName the readname to set the QNAME to. 00115 /// \return true if successfully set, false if not. 00116 bool setReadName(const char* readName); 00117 00118 /// Set the bitwise flag to the specified value. 00119 /// \param flag integer flag to use. 00120 /// \return true if successfully set, false if not. 00121 bool setFlag(uint16_t flag); 00122 00123 /// Set the reference name to the specified name, using the header to 00124 /// determine the reference id. 00125 /// \param header SAM/BAM header to use to determine the reference id. 00126 /// \param referenceName reference name to use. 00127 /// \return true if successfully set, false if not 00128 bool setReferenceName(SamFileHeader& header, 00129 const char* referenceName); 00130 00131 /// Set the leftmost position using the specified 1-based (SAM format) 00132 /// value. 00133 /// Internal processing handles the switching between SAM/BAM formats 00134 /// when read/written. 00135 /// \param position 1-based start position 00136 /// \return true if successfully set, false if not. 00137 bool set1BasedPosition(int32_t position); 00138 00139 /// Set the leftmost position using the specified 0-based (BAM format) 00140 /// value. 00141 /// Internal processing handles the switching between SAM/BAM formats 00142 /// when read/written. 00143 /// \param position 0-based start position 00144 /// \return true if successfully set, false if not. 00145 bool set0BasedPosition(int32_t position); 00146 00147 /// Set the mapping quality. 00148 /// \param mapQuality map quality to set in the record. 00149 /// \return true if successfully set, false if not. 00150 bool setMapQuality(uint8_t mapQuality); 00151 00152 /// Set the CIGAR to the specified SAM formatted cigar string. 00153 /// Internal processing handles the switching between SAM/BAM formats 00154 /// when read/written. 00155 /// \param cigar string containing the SAM formatted cigar. 00156 /// \return true if successfully set, false if not. 00157 bool setCigar(const char* cigar); 00158 00159 /// Set the CIGAR to the specified Cigar object. 00160 /// Internal processing handles the switching between SAM/BAM formats 00161 /// when read/written. 00162 /// \param cigar object to set this record's cigar to have. 00163 /// \return true if successfully set, false if not. 00164 bool setCigar(const Cigar& cigar); 00165 00166 00167 /// Set the mate reference sequence name to the specified name, using the 00168 /// header to determine the matee reference id. 00169 /// \param header SAM/BAM header to use to determine the mate reference id. 00170 /// \param referenceName mate reference name to use. 00171 /// \return true if successfully set, false if not 00172 bool setMateReferenceName(SamFileHeader& header, 00173 const char* mateReferenceName); 00174 00175 /// Set the leftmost mate position using the specified 1-based (SAM format) 00176 /// value. 00177 /// Internal processing handles the switching between SAM/BAM formats 00178 /// when read/written. 00179 /// \param position 1-based start position 00180 /// \return true if successfully set, false if not. 00181 bool set1BasedMatePosition(int32_t matePosition); 00182 00183 /// Set the leftmost mate position using the specified 0-based (BAM format) 00184 /// value. 00185 /// Internal processing handles the switching between SAM/BAM formats 00186 /// when read/written. 00187 /// \param position 0-based start position 00188 /// \return true if successfully set, false if not. 00189 bool set0BasedMatePosition(int32_t matePosition); 00190 00191 /// Sets the inferred insert size. 00192 /// \param insertSize inferred insert size. 00193 /// \return true if successfully set, false if not. 00194 bool setInsertSize(int32_t insertSize); 00195 00196 /// Sets the sequence to the specified sequence string. This is a 00197 /// SAM formatted sequence string. Internal processing handles switching 00198 /// between SAM/BAM formats when read/written. 00199 /// \param seq SAM sequence string. May contain '='. 00200 /// \return true if successfully set, false if not. 00201 bool setSequence(const char* seq); 00202 00203 /// Sets the quality to the specified quality string. This is a SAM 00204 /// formatted quality string. Internal processing handles switching 00205 /// between SAM/BAM formats when read/written. 00206 /// \param quality SAM quality string. 00207 /// \return true if successfully set, false if not. 00208 bool setQuality(const char* quality); 00209 00210 /// Sets the SamRecord to contain the information in BAM format 00211 /// found in fromBuffer. 00212 /// \param fromBuffer buffer to read the BAM record from. 00213 /// \param fromBufferSize size of the buffer containing the BAM record. 00214 /// \param header BAM header for the record. 00215 /// \return status of reading the BAM record from the buffer. 00216 SamStatus::Status setBuffer(const char* fromBuffer, uint32_t fromBufferSize, 00217 SamFileHeader& header); 00218 00219 /// Add the specified tag to the record. Internal processing handles 00220 /// switching between SAM/BAM formats when read/written. 00221 /// \param tag two character tag to be added to the SAM/BAM record. 00222 /// \param vtype vtype of the specified value - either SAM/BAM vtypes. 00223 /// \param value value for the specified tag. 00224 /// \return true if the tag was successfully added, false otherwise. 00225 bool addTag(const char* tag, char vtype, const char* value); 00226 00227 //@} 00228 00229 /////////////////////// 00230 /// @name Get Alignment Data 00231 /// Get methods for record fields. All of the "get" methods set the 00232 /// status to indicate success or the failure reason. 00233 //@{ 00234 00235 /// Get a const pointer to the buffer that contains the BAM representation 00236 /// of the record. 00237 /// \return const pointer to the buffer that contains the BAM representation 00238 /// of the record. 00239 const void* getRecordBuffer(); 00240 00241 /// Get a const pointer to the buffer that contains the BAM representation 00242 /// of the record. 00243 /// \param translation type of sequence translation to use. 00244 /// \return const pointer to the buffer that contains the BAM representation 00245 /// of the record. 00246 const void* getRecordBuffer(SequenceTranslation translation); 00247 00248 /// Write the record as a BAM into the specified file. 00249 /// \param filePtr file to write the BAM record into. 00250 /// \return status of the write. 00251 SamStatus::Status writeRecordBuffer(IFILE filePtr); 00252 00253 /// Write the record as a BAM into the specified file. 00254 /// \param filePtr file to write the BAM record into. 00255 /// \param translation type of sequence translation to use. 00256 /// \return status of the write. 00257 SamStatus::Status writeRecordBuffer(IFILE filePtr, 00258 SequenceTranslation translation); 00259 00260 /// Get the block size of the record. 00261 /// \return BAM block size of the record. 00262 int32_t getBlockSize(); 00263 00264 /// Get the reference sequence name of the record. 00265 /// \return reference sequence name 00266 const char* getReferenceName(); 00267 00268 /// Get the reference sequence id of the record. 00269 /// \return reference sequence id 00270 int32_t getReferenceID(); 00271 00272 /// Get the 1-based(SAM) leftmost position of the record. 00273 /// \return 1-based leftmost position. 00274 int32_t get1BasedPosition(); 00275 00276 /// Get the 0-based(BAM) leftmost position of the record. 00277 /// \return 0-based leftmost position. 00278 int32_t get0BasedPosition(); 00279 00280 /// Get the length of the readname (QNAME) including the null. 00281 /// \return length of the read name (including null). 00282 uint8_t getReadNameLength(); 00283 00284 /// Get the mapping quality of the record. 00285 /// \return map quality. 00286 uint8_t getMapQuality(); 00287 00288 /// Get the BAM bin for the record. 00289 /// \return BAM bin 00290 uint16_t getBin(); 00291 00292 /// Get the length of the CIGAR in BAM format. 00293 /// \return length of BAM formatted cigar. 00294 uint16_t getCigarLength(); 00295 00296 /// Get the flag. 00297 /// \return flag. 00298 uint16_t getFlag(); 00299 00300 /// Get the length of the read. 00301 /// \return read length. 00302 int32_t getReadLength(); 00303 00304 /// Get the mate reference sequence name of the record. If it is equal to 00305 /// the reference name, it still returns the reference name. 00306 /// \return reference sequence name 00307 const char* getMateReferenceName(); 00308 00309 /// Get the mate reference sequence name of the record, returning "=" if 00310 /// it is the same as the reference name, unless they are both "*" in 00311 /// which case "*" is returned. 00312 /// \return reference sequence name 00313 const char* getMateReferenceNameOrEqual(); 00314 00315 /// Get the mate reference id of the record. 00316 /// \return reference id 00317 int32_t getMateReferenceID(); 00318 00319 /// Get the 1-based(SAM) leftmost mate position of the record. 00320 /// \return 1-based leftmost position. 00321 int32_t get1BasedMatePosition(); 00322 00323 /// Get the 0-based(BAM) leftmost mate position of the record. 00324 /// \return 0-based leftmost position. 00325 int32_t get0BasedMatePosition(); 00326 00327 /// Get the inferred insert size of the read pair. 00328 /// \return inferred insert size. 00329 int32_t getInsertSize(); 00330 00331 /// Returns the 0-based inclusive rightmost position of the 00332 /// clipped sequence. 00333 /// \return 0-based inclusive rightmost position 00334 int32_t get0BasedAlignmentEnd(); 00335 00336 /// Returns the 1-based inclusive rightmost position of the 00337 /// clipped sequence. 00338 /// \return 1-based inclusive rightmost position 00339 int32_t get1BasedAlignmentEnd(); 00340 00341 /// Returns the length of the clipped sequence, returning 0 if the cigar 00342 /// is '*'. 00343 /// \return length of the clipped sequence. 00344 int32_t getAlignmentLength(); 00345 00346 /// Returns the 0-based inclusive left-most position adjusted for 00347 /// clipped bases. 00348 /// \return 0-based inclusive leftmost position including clips. 00349 int32_t get0BasedUnclippedStart(); 00350 00351 /// Returns the 1-based inclusive left-most position adjusted for 00352 /// clipped bases. 00353 /// \return 1-based inclusive leftmost position including clips. 00354 int32_t get1BasedUnclippedStart(); 00355 00356 /// Returns the 0-based inclusive right-most position adjusted for 00357 /// clipped bases. 00358 /// \return 0-based inclusive rightmost position including clips. 00359 int32_t get0BasedUnclippedEnd(); 00360 00361 /// Returns the 1-based inclusive right-most position adjusted for 00362 /// clipped bases. 00363 /// \return 1-based inclusive rightmost position including clips. 00364 int32_t get1BasedUnclippedEnd(); 00365 00366 /// Returns the SAM formatted Read Name (QNAME). 00367 /// \return read name. 00368 const char* getReadName(); 00369 00370 /// Returns the SAM formatted CIGAR string. 00371 /// \return cigar string. 00372 const char* getCigar(); 00373 00374 /// Returns the SAM formatted sequence string, translating the base as 00375 /// specified by setSequenceTranslation. 00376 /// \return sequence string. 00377 const char* getSequence(); 00378 00379 /// Returns the SAM formatted sequence string performing the specified 00380 /// sequence translation. 00381 /// \param translation type of sequence translation to use. 00382 /// \return sequence string. 00383 const char* getSequence(SequenceTranslation translation); 00384 00385 /// Returns the SAM formatted quality string. 00386 /// \return quality string. 00387 const char* getQuality(); 00388 00389 /// Get the sequence base at the specified index into this sequence 0 to 00390 /// readLength - 1, translating the base as specified by 00391 /// setSequenceTranslation. 00392 /// \param index index into the sequence string (0 to readLength-1). 00393 /// \return the sequence base at the specified index into the sequence. 00394 char getSequence(int index); 00395 00396 /// Get the sequence base at the specified index into this sequence 0 to 00397 /// readLength - performing the specified sequence translation1. 00398 /// \param index index into the sequence string (0 to readLength-1). 00399 /// \param translation type of sequence translation to use. 00400 /// \return the sequence base at the specified index into the sequence. 00401 char getSequence(int index, SequenceTranslation translation); 00402 00403 /// Get the quality character at the specified index into the quality 0 to 00404 /// readLength - 1. 00405 /// \param index index into the quality string (0 to readLength-1). 00406 /// \return the quality character at the specified index into the quality. 00407 char getQuality(int index); 00408 00409 /// Returns a pointer to the Cigar object associated with this record. 00410 /// The object is essentially read-only, only allowing modifications 00411 /// due to lazy evaluations. 00412 /// \return pointer to the Cigar object. 00413 // TODO - want this to be getCigar 00414 Cigar* getCigarInfo(); 00415 00416 /// Returns the length of the tags in BAM format. 00417 /// \return length of tags in BAM format. 00418 uint32_t getTagLength(); 00419 00420 /// Get the next tag from the record. 00421 /// Sets the Status to SUCCESS when a tag is successfully returned or 00422 /// when there are no more tags. Otherwise the status is set to describe 00423 /// why it failed (parsing, etc). 00424 /// \param tag set to the tag when a tag is read. 00425 /// \param vtype set to the vtype when a tag is read. 00426 /// \param value pointer to the value of the tag (will need to cast 00427 /// to int, double, char, or string based on vtype). 00428 /// \return true if a tag was read, false if there are no more tags. 00429 bool getNextSamTag(char* tag, char& vtype, void** value); 00430 00431 /// Returns the values of all fields except the tags. 00432 /// \param recStruct structure containing the contents of all 00433 /// non-variable length fields. 00434 /// \param readName read name from the record (return param) 00435 /// \param cigar cigar string from the record (return param) 00436 /// \param sequence sequence string from the record (return param) 00437 /// \param quality quality string from the record (return param) 00438 /// \return true if all fields were successfully set, false otherwise. 00439 bool getFields(bamRecordStruct& recStruct, String& readName, 00440 String& cigar, String& sequence, String& quality); 00441 00442 /// Returns the values of all fields except the tags. 00443 /// \param recStruct structure containing the contents of all 00444 /// non-variable length fields. 00445 /// \param readName read name from the record (return param) 00446 /// \param cigar cigar string from the record (return param) 00447 /// \param sequence sequence string from the record (return param) 00448 /// \param quality quality string from the record (return param) 00449 /// \param translation type of sequence translation to use. 00450 /// \return true if all fields were successfully set, false otherwise. 00451 bool getFields(bamRecordStruct& recStruct, String& readName, 00452 String& cigar, String& sequence, String& quality, 00453 SequenceTranslation translation); 00454 00455 //@} 00456 00457 /// Returns whether or not the specified vtype is an integer type. 00458 /// Does not set SamStatus. 00459 /// \param vtype value type to check. 00460 /// \return true if the passed in vtype is an integer ('c', 'C', 's', 00461 /// 'S', 'i', 'I'), false otherwise. 00462 bool isIntegerType(char vtype) const; 00463 00464 /// Returns whether or not the specified vtype is a double type. 00465 /// Does not set SamStatus. 00466 /// \param vtype value type to check. 00467 /// \return true if the passed in vtype is a double ('f'), false otherwise. 00468 bool isDoubleType(char vtype) const; 00469 00470 /// Returns whether or not the specified vtype is a char type. 00471 /// Does not set SamStatus. 00472 /// \param vtype value type to check. 00473 /// \return true if the passed in vtype is a char ('A'), false otherwise. 00474 bool isCharType(char vtype) const; 00475 00476 /// Returns whether or not the specified vtype is a string type. 00477 /// Does not set SamStatus. 00478 /// \param vtype value type to check. 00479 /// \return true if the passed in vtype is a string ('Z'), false othwerise. 00480 bool isStringType(char vtype) const; 00481 00482 /// Clear the tags in this record. 00483 /// Does not set SamStatus. 00484 void clearTags(); 00485 00486 /// Returns the status associated with the last method that sets the status. 00487 /// \return SamStatus of the last command that sets status. 00488 const SamStatus& getStatus(); 00489 00490 /// Get the string value for the specified tag. 00491 /// Does not set SamStatus. 00492 String & getString(const char * tag); 00493 00494 /// Get the integer value for the specified tag. 00495 /// Does not set SamStatus. 00496 int & getInteger(const char * tag); 00497 00498 /// Get the double value for the specified tag. 00499 /// Does not set SamStatus. 00500 double & getDouble(const char * tag); 00501 00502 00503 // void getSamExtraFieldFromKey(int key, String& extraField); 00504 00505 /// Check if the specified tag contains a string. 00506 /// Does not set SamStatus. 00507 /// \param tag SAM tag to check contents of. 00508 /// \return true if the value associated with the tag is a string. 00509 bool checkString(const char * tag) { return checkTag(tag, 'Z'); } 00510 00511 /// Check if the specified tag contains a string. 00512 /// Does not set SamStatus. 00513 /// \param tag SAM tag to check contents of. 00514 /// \return true if the value associated with the tag is a string. 00515 bool checkInteger(const char * tag) { return checkTag(tag, 'i'); } 00516 00517 /// Check if the specified tag contains a string. 00518 /// Does not set SamStatus. 00519 /// \param tag SAM tag to check contents of. 00520 /// \return true if the value associated with the tag is a string. 00521 bool checkDouble(const char * tag) { return checkTag(tag, 'f'); } 00522 00523 /// Check if the specified tag contains a value of the specified vtype. 00524 /// Does not set SamStatus. 00525 /// \param tag SAM tag to check contents of. 00526 /// \param type value type to check if the SAM tag matches. 00527 /// \return true if the value associated with the tag is a string. 00528 bool checkTag(const char * tag, char type); 00529 00530 00531 /// Return the number of bases in this read that overlap the passed in 00532 /// region. 00533 /// \param start inclusive 0-based start position (reference position) of 00534 /// the region to check for overlaps in. 00535 /// (-1 indicates to start at the beginning of the reference.) 00536 /// \param end exclusive 0-based end position (reference position) of the 00537 /// region to check for overlaps in. 00538 /// (-1 indicates to go to the end of the reference.) 00539 /// \return number of overlapping bases 00540 /// (matches in the cigar - not skips/deletions) 00541 uint32_t getNumOverlaps(int32_t start, int32_t end); 00542 00543 00544 private: 00545 static int MAKEKEY(char ch1, char ch2, char type) 00546 { return (type << 16) + (ch2 << 8) + ch1; } 00547 00548 // Allocate space for the record - does a realloc. 00549 // The passed in size is the size of the entire record including the 00550 // block size field. 00551 // Adds any errors to myStatus. 00552 bool allocateRecordStructure(int size); 00553 00554 00555 void* getStringPtr(int offset); 00556 void* getIntegerPtr(int offset); 00557 void* getDoublePtr(int offset); 00558 00559 // Fixes the buffer to match the variable length fields. 00560 // Adds any errors to myStatus. 00561 bool fixBuffer(SequenceTranslation translation); 00562 00563 // Sets the Sequence and Quality strings from the buffer. 00564 // They are done together in one method because they require the same 00565 // loop, so might as well be done at the same time. 00566 // Adds any errors to myStatus. 00567 void setSequenceAndQualityFromBuffer(); 00568 00569 // Parse the cigar to calculate the alignment/unclipped ends and convert 00570 // to SAM/BAM format. 00571 // Adds any errors to myStatus. 00572 bool parseCigar(); 00573 // Parse the cigar string to calculate the cigar length and alignment end 00574 // and convert to SAM format. 00575 // Adds any errors to myStatus. 00576 bool parseCigarBinary(); 00577 // Parse the cigar string to calculate the cigar length and alignment end 00578 // and convert to BAM format. 00579 // Adds any errors to myStatus. 00580 bool parseCigarString(); 00581 00582 // Set the tags from the buffer. 00583 // Adds any errors to myStatus. 00584 bool setTagsFromBuffer(); 00585 00586 // Set the tags in the buffer. 00587 // Adds any errors to myStatus. 00588 bool setTagsInBuffer(); 00589 00590 void setVariablesForNewBuffer(SamFileHeader& header); 00591 00592 void getVtype(int key, char& vtype) const; 00593 void getTag(int key, char* tag) const; 00594 00595 String & getString(int offset); 00596 int & getInteger(int offset); 00597 double & getDouble(int offset); 00598 00599 static const int DEFAULT_BLOCK_SIZE = 40; 00600 static const int DEFAULT_BIN = 4680; 00601 static const int DEFAULT_READ_NAME_LENGTH = 8; 00602 static const char* DEFAULT_READ_NAME; 00603 static const char* FIELD_ABSENT_STRING; 00604 00605 bamRecordStruct * myRecordPtr; 00606 int allocatedSize; 00607 00608 // Pointer to a temporary cigar buffer that can be used during string 00609 // parsing before it is ready to be copied into the actual record. 00610 uint32_t* myCigarTempBuffer; 00611 00612 // Size of the currently allocated temporary cigar buffer. 00613 int myCigarTempBufferAllocatedSize; 00614 00615 // Length of the cigar currently contained in the temporary buffer. 00616 int myCigarTempBufferLength; 00617 00618 // Track if the buffer is in sync with the Strings/Tags. 00619 // Set to false if any of the variable length fields are modified. 00620 // Set to true when the buffer is updated to match the variable length 00621 // fields. 00622 bool myIsBufferSynced; 00623 00624 // Track if the tags need to be set from the buffer. 00625 bool myNeedToSetTagsFromBuffer; 00626 00627 // Trag if the tags need to be set in the buffer. 00628 // Allows you to set just the tags if they are the only thing that changed 00629 // in the buffer. 00630 bool myNeedToSetTagsInBuffer; 00631 00632 int myTagBufferSize; 00633 int myLastTagIndex; 00634 00635 String myReadName; 00636 String myReferenceName; 00637 String myMateReferenceName; 00638 String myCigar; 00639 String mySequence; 00640 String myQuality; 00641 00642 std::string mySeqWithEq; 00643 std::string mySeqWithoutEq; 00644 00645 // The length of the alignment. 00646 int32_t myAlignmentLength; 00647 // Unclipped alignment positions. 00648 int32_t myUnclippedStartOffset; 00649 int32_t myUnclippedEndOffset; 00650 00651 CigarRoller myCigarRoller; 00652 00653 LongHash<int> extras; 00654 StringArray strings; 00655 IntArray integers; 00656 Vector doubles; 00657 00658 00659 // Track whether or not the buffer values are correct for 00660 // each setting. 00661 bool myIsReadNameBufferValid; 00662 bool myIsCigarBufferValid; 00663 bool myIsSequenceBufferValid; 00664 bool myIsQualityBufferValid; 00665 bool myIsTagsBufferValid; 00666 bool myIsBinValid; 00667 00668 SamStatus myStatus; 00669 00670 // The current translation of the sequence as it occurs in the buffer. 00671 // Only applicable if myIsSequenceBufferValid == true. 00672 SequenceTranslation myBufferSequenceTranslation; 00673 00674 00675 // Track the Reference. 00676 GenomeSequence* myRefPtr; 00677 00678 // The type of translation to do when getting a sequence. 00679 SequenceTranslation mySequenceTranslation; 00680 00681 String NOT_FOUND_TAG_STRING; 00682 int NOT_FOUND_TAG_INT; 00683 double NOT_FOUND_TAG_DOUBLE; 00684 }; 00685 00686 #endif
1.6.3