Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...
#include <SamRecord.h>

Public Types | |
| enum | SequenceTranslation { NONE, EQUAL, BASES } |
Enum containing the settings on how to translate the sequence if a reference is available. More... | |
Public Member Functions | |
| SamRecord () | |
| Default Constructor. | |
| SamRecord (ErrorHandler::HandlingType errorHandlingType) | |
| Constructor that sets the error handling type. | |
| ~SamRecord () | |
| Destructor. | |
| void | resetRecord () |
| Reset the fields of the record to a default value. | |
| void | resetTagIter () |
| Reset the tag iterator to the beginning of the tags. | |
| bool | isValid (SamFileHeader &header) |
| Returns whether or not the record is valid. | |
| SamStatus::Status | setBufferFromFile (IFILE filePtr, SamFileHeader &header) |
| Read the BAM record from a file. | |
| void | setReference (GenomeSequence *reference) |
| Set the reference to the specified genome sequence object. | |
| void | setSequenceTranslation (SequenceTranslation translation) |
| Set the type of sequence translation to use when getting the sequence. | |
| bool | isIntegerType (char vtype) const |
| Returns whether or not the specified vtype is an integer type. | |
| bool | isDoubleType (char vtype) const |
| Returns whether or not the specified vtype is a double type. | |
| bool | isCharType (char vtype) const |
| Returns whether or not the specified vtype is a char type. | |
| bool | isStringType (char vtype) const |
| Returns whether or not the specified vtype is a string type. | |
| void | clearTags () |
| Clear the tags in this record. | |
| const SamStatus & | getStatus () |
| Returns the status associated with the last method that sets the status. | |
| String & | getString (const char *tag) |
| Get the string value for the specified tag. | |
| int & | getInteger (const char *tag) |
| Get the integer value for the specified tag. | |
| double & | getDouble (const char *tag) |
| Get the double value for the specified tag. | |
| bool | checkString (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkInteger (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkDouble (const char *tag) |
| Check if the specified tag contains a string. | |
| bool | checkTag (const char *tag, char type) |
| Check if the specified tag contains a value of the specified vtype. | |
| uint32_t | getNumOverlaps (int32_t start, int32_t end) |
| Return the number of bases in this read that overlap the passed in region. | |
Set Alignment Data | |
| bool | setReadName (const char *readName) |
| Set QNAME to the passed in name. | |
| bool | setFlag (uint16_t flag) |
| Set the bitwise flag to the specified value. | |
| bool | setReferenceName (SamFileHeader &header, const char *referenceName) |
| Set the reference name to the specified name, using the header to determine the reference id. | |
| bool | set1BasedPosition (int32_t position) |
| Set the leftmost position using the specified 1-based (SAM format) value. | |
| bool | set0BasedPosition (int32_t position) |
| Set the leftmost position using the specified 0-based (BAM format) value. | |
| bool | setMapQuality (uint8_t mapQuality) |
| Set the mapping quality. | |
| bool | setCigar (const char *cigar) |
| Set the CIGAR to the specified SAM formatted cigar string. | |
| bool | setCigar (const Cigar &cigar) |
| Set the CIGAR to the specified Cigar object. | |
| bool | setMateReferenceName (SamFileHeader &header, const char *mateReferenceName) |
| Set the mate reference sequence name to the specified name, using the header to determine the matee reference id. | |
| bool | set1BasedMatePosition (int32_t matePosition) |
| Set the leftmost mate position using the specified 1-based (SAM format) value. | |
| bool | set0BasedMatePosition (int32_t matePosition) |
| Set the leftmost mate position using the specified 0-based (BAM format) value. | |
| bool | setInsertSize (int32_t insertSize) |
| Sets the inferred insert size. | |
| bool | setSequence (const char *seq) |
| Sets the sequence to the specified sequence string. | |
| bool | setQuality (const char *quality) |
| Sets the quality to the specified quality string. | |
| SamStatus::Status | setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header) |
| Sets the SamRecord to contain the information in BAM format found in fromBuffer. | |
| bool | addTag (const char *tag, char vtype, const char *value) |
| Add the specified tag to the record. | |
Get Alignment Data | |
| const void * | getRecordBuffer () |
| Get a const pointer to the buffer that contains the BAM representation of the record. | |
| const void * | getRecordBuffer (SequenceTranslation translation) |
| Get a const pointer to the buffer that contains the BAM representation of the record. | |
| SamStatus::Status | writeRecordBuffer (IFILE filePtr) |
| Write the record as a BAM into the specified file. | |
| SamStatus::Status | writeRecordBuffer (IFILE filePtr, SequenceTranslation translation) |
| Write the record as a BAM into the specified file. | |
| int32_t | getBlockSize () |
| Get the block size of the record. | |
| const char * | getReferenceName () |
| Get the reference sequence name of the record. | |
| int32_t | getReferenceID () |
| Get the reference sequence id of the record. | |
| int32_t | get1BasedPosition () |
| Get the 1-based(SAM) leftmost position of the record. | |
| int32_t | get0BasedPosition () |
| Get the 0-based(BAM) leftmost position of the record. | |
| uint8_t | getReadNameLength () |
| Get the length of the readname (QNAME) including the null. | |
| uint8_t | getMapQuality () |
| Get the mapping quality of the record. | |
| uint16_t | getBin () |
| Get the BAM bin for the record. | |
| uint16_t | getCigarLength () |
| Get the length of the CIGAR in BAM format. | |
| uint16_t | getFlag () |
| Get the flag. | |
| int32_t | getReadLength () |
| Get the length of the read. | |
| const char * | getMateReferenceName () |
| Get the mate reference sequence name of the record. | |
| const char * | getMateReferenceNameOrEqual () |
| Get the mate reference sequence name of the record, returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned. | |
| int32_t | getMateReferenceID () |
| Get the mate reference id of the record. | |
| int32_t | get1BasedMatePosition () |
| Get the 1-based(SAM) leftmost mate position of the record. | |
| int32_t | get0BasedMatePosition () |
| Get the 0-based(BAM) leftmost mate position of the record. | |
| int32_t | getInsertSize () |
| Get the inferred insert size of the read pair. | |
| int32_t | get0BasedAlignmentEnd () |
| Returns the 0-based inclusive rightmost position of the clipped sequence. | |
| int32_t | get1BasedAlignmentEnd () |
| Returns the 1-based inclusive rightmost position of the clipped sequence. | |
| int32_t | getAlignmentLength () |
| Returns the length of the clipped sequence, returning 0 if the cigar is '*'. | |
| int32_t | get0BasedUnclippedStart () |
| Returns the 0-based inclusive left-most position adjusted for clipped bases. | |
| int32_t | get1BasedUnclippedStart () |
| Returns the 1-based inclusive left-most position adjusted for clipped bases. | |
| int32_t | get0BasedUnclippedEnd () |
| Returns the 0-based inclusive right-most position adjusted for clipped bases. | |
| int32_t | get1BasedUnclippedEnd () |
| Returns the 1-based inclusive right-most position adjusted for clipped bases. | |
| const char * | getReadName () |
| Returns the SAM formatted Read Name (QNAME). | |
| const char * | getCigar () |
| Returns the SAM formatted CIGAR string. | |
| const char * | getSequence () |
| Returns the SAM formatted sequence string, translating the base as specified by setSequenceTranslation. | |
| const char * | getSequence (SequenceTranslation translation) |
| Returns the SAM formatted sequence string performing the specified sequence translation. | |
| const char * | getQuality () |
| Returns the SAM formatted quality string. | |
| char | getSequence (int index) |
| Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation. | |
| char | getSequence (int index, SequenceTranslation translation) |
| Get the sequence base at the specified index into this sequence 0 to readLength - performing the specified sequence translation1. | |
| char | getQuality (int index) |
| Get the quality character at the specified index into the quality 0 to readLength - 1. | |
| Cigar * | getCigarInfo () |
| Returns a pointer to the Cigar object associated with this record. | |
| uint32_t | getTagLength () |
| Returns the length of the tags in BAM format. | |
| bool | getNextSamTag (char *tag, char &vtype, void **value) |
| Get the next tag from the record. | |
| bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality) |
| Returns the values of all fields except the tags. | |
| bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation) |
| Returns the values of all fields except the tags. | |
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition at line 51 of file SamRecord.h.
Enum containing the settings on how to translate the sequence if a reference is available.
If no reference is available, no translation is done.
| NONE |
Leave the sequence as is. |
| EQUAL |
Translate bases that match the reference to '='. |
| BASES |
Translate '=' to the actual base. |
Definition at line 57 of file SamRecord.h.
| SamRecord::SamRecord | ( | ErrorHandler::HandlingType | errorHandlingType | ) |
Constructor that sets the error handling type.
| errorHandlingType | how to handle errors. |
Definition at line 53 of file SamRecord.cpp.
References resetRecord().
00054 : myStatus(errorHandlingType), 00055 myRefPtr(NULL), 00056 mySequenceTranslation(NONE) 00057 { 00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t); 00059 00060 myRecordPtr = 00061 (bamRecordStruct *) malloc(defaultAllocSize); 00062 00063 myCigarTempBuffer = NULL; 00064 myCigarTempBufferAllocatedSize = 0; 00065 00066 allocatedSize = defaultAllocSize; 00067 00068 resetRecord(); 00069 }
| bool SamRecord::addTag | ( | const char * | tag, | |
| char | vtype, | |||
| const char * | value | |||
| ) |
Add the specified tag to the record.
Internal processing handles switching between SAM/BAM formats when read/written.
| tag | two character tag to be added to the SAM/BAM record. | |
| vtype | vtype of the specified value - either SAM/BAM vtypes. | |
| value | value for the specified tag. |
Definition at line 469 of file SamRecord.cpp.
00470 { 00471 myStatus = SamStatus::SUCCESS; 00472 bool status = true; // default to successful. 00473 int key = 0; 00474 int intVal = 0; 00475 int index = 0; 00476 char bamvtype; 00477 00478 int tagBufferSize = 0; 00479 00480 // First check to see if the tags need to be synced to the buffer. 00481 if(myNeedToSetTagsFromBuffer) 00482 { 00483 if(!setTagsFromBuffer()) 00484 { 00485 // Failed to read tags from the buffer, so cannot add new ones. 00486 return(false); 00487 } 00488 } 00489 00490 switch (vtype) 00491 { 00492 case 'A' : 00493 index = integers.Length(); 00494 bamvtype = vtype; 00495 integers.Push((const int)*(valuePtr)); 00496 tagBufferSize += 4; 00497 break; 00498 case 'i' : 00499 index = integers.Length(); 00500 intVal = atoi((const char *)valuePtr); 00501 // Ints come in as int. But it can be represented in fewer bits. 00502 // So determine a more specific type that is in line with the 00503 // types for BAM files. 00504 // First check to see if it is a negative. 00505 if(intVal < 0) 00506 { 00507 // The int is negative, so it will need to use a signed type. 00508 // See if it is greater than the min value for a char. 00509 if(intVal > std::numeric_limits<char>::min()) 00510 { 00511 // It can be stored in a signed char. 00512 bamvtype = 'c'; 00513 tagBufferSize += 4; 00514 } 00515 else if(intVal > std::numeric_limits<short>::min()) 00516 { 00517 // It fits in a signed short. 00518 bamvtype = 's'; 00519 tagBufferSize += 5; 00520 } 00521 else 00522 { 00523 // Just store it as a signed int. 00524 bamvtype = 'i'; 00525 tagBufferSize += 7; 00526 } 00527 } 00528 else 00529 { 00530 // It is positive, so an unsigned type can be used. 00531 if(intVal < std::numeric_limits<unsigned char>::max()) 00532 { 00533 // It is under the max of an unsigned char. 00534 bamvtype = 'C'; 00535 tagBufferSize += 4; 00536 } 00537 else if(intVal < std::numeric_limits<unsigned short>::max()) 00538 { 00539 // It is under the max of an unsigned short. 00540 bamvtype = 'S'; 00541 tagBufferSize += 5; 00542 } 00543 else 00544 { 00545 // Just store it as an unsigned int. 00546 bamvtype = 'I'; 00547 tagBufferSize += 7; 00548 } 00549 } 00550 integers.Push(intVal); 00551 break; 00552 case 'Z' : 00553 index = strings.Length(); 00554 bamvtype = vtype; 00555 strings.Push((const char *)valuePtr); 00556 tagBufferSize += 4 + strings.Last().Length(); 00557 break; 00558 case 'f' : 00559 index = doubles.Length(); 00560 bamvtype = vtype; 00561 doubles.Push(atof((const char *)valuePtr)); 00562 tagBufferSize += 7; 00563 break; 00564 default : 00565 fprintf(stderr, 00566 "samFile::ReadSAM() - Unknown custom field of type %c\n", 00567 vtype); 00568 myStatus.setStatus(SamStatus::FAIL_PARSE, 00569 "Unknown custom field in a tag"); 00570 status = false; 00571 } 00572 00573 // Only add the tag if it has so far been successfully processed. 00574 if(status) 00575 { 00576 // The buffer tags are now out of sync. 00577 myNeedToSetTagsInBuffer = true; 00578 myIsTagsBufferValid = false; 00579 myIsBufferSynced = false; 00580 00581 key = MAKEKEY(tag[0], tag[1], bamvtype); 00582 extras.Add(key, index); 00583 myTagBufferSize += tagBufferSize; 00584 } 00585 return(status); 00586 }
| bool SamRecord::checkDouble | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 521 of file SamRecord.h.
References checkTag().
00521 { return checkTag(tag, 'f'); }
| bool SamRecord::checkInteger | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 515 of file SamRecord.h.
References checkTag().
00515 { return checkTag(tag, 'i'); }
| bool SamRecord::checkString | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
| tag | SAM tag to check contents of. |
Definition at line 509 of file SamRecord.h.
References checkTag().
00509 { return checkTag(tag, 'Z'); }
| bool SamRecord::checkTag | ( | const char * | tag, | |
| char | type | |||
| ) |
Check if the specified tag contains a value of the specified vtype.
Does not set SamStatus.
| tag | SAM tag to check contents of. | |
| type | value type to check if the SAM tag matches. |
Definition at line 1545 of file SamRecord.cpp.
Referenced by checkDouble(), checkInteger(), and checkString().
01546 { 01547 // Init to success. 01548 myStatus = SamStatus::SUCCESS; 01549 // Parse the buffer if necessary. 01550 if(myNeedToSetTagsFromBuffer) 01551 { 01552 if(!setTagsFromBuffer()) 01553 { 01554 // Failed to read the tags from the buffer, so cannot 01555 // get tags. 01556 return(""); 01557 } 01558 } 01559 01560 int key = MAKEKEY(tag[0], tag[1], type); 01561 01562 return (extras.Find(key) != LH_NOTFOUND); 01563 }
| void SamRecord::clearTags | ( | ) |
Clear the tags in this record.
Does not set SamStatus.
Definition at line 1433 of file SamRecord.cpp.
References resetTagIter().
Referenced by resetRecord().
01434 { 01435 if(extras.Entries() != 0) 01436 { 01437 extras.Clear(); 01438 } 01439 strings.Clear(); 01440 integers.Clear(); 01441 doubles.Clear(); 01442 myTagBufferSize = 0; 01443 resetTagIter(); 01444 }
| int32_t SamRecord::get0BasedAlignmentEnd | ( | ) |
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition at line 853 of file SamRecord.cpp.
Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), and SamFile::readIndexedRecord().
00854 { 00855 myStatus = SamStatus::SUCCESS; 00856 if(myAlignmentLength == -1) 00857 { 00858 // Alignment end has not been set, so calculate it. 00859 parseCigar(); 00860 } 00861 // If alignment length > 0, subtract 1 from it to get the end. 00862 if(myAlignmentLength == 0) 00863 { 00864 // Length is 0, just return the start position. 00865 return(myRecordPtr->myPosition); 00866 } 00867 return(myRecordPtr->myPosition + myAlignmentLength - 1); 00868 }
| int32_t SamRecord::get0BasedMatePosition | ( | ) |
Get the 0-based(BAM) leftmost mate position of the record.
Definition at line 838 of file SamRecord.cpp.
| int32_t SamRecord::get0BasedPosition | ( | ) |
Get the 0-based(BAM) leftmost position of the record.
Definition at line 705 of file SamRecord.cpp.
Referenced by getNumOverlaps(), SamFile::readIndexedRecord(), SamQuerySeqWithRefIter::reset(), and SamFile::validateSortOrder().
| int32_t SamRecord::get0BasedUnclippedEnd | ( | ) |
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition at line 912 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by get1BasedUnclippedEnd().
00913 { 00914 // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the 00915 // cigar has not yet been parsed, so no need to check it here. 00916 return(get0BasedAlignmentEnd() + myUnclippedEndOffset); 00917 }
| int32_t SamRecord::get0BasedUnclippedStart | ( | ) |
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition at line 892 of file SamRecord.cpp.
Referenced by get1BasedUnclippedStart().
| int32_t SamRecord::get1BasedAlignmentEnd | ( | ) |
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition at line 872 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by getBin().
00873 { 00874 return(get0BasedAlignmentEnd() + 1); 00875 }
| int32_t SamRecord::get1BasedMatePosition | ( | ) |
Get the 1-based(SAM) leftmost mate position of the record.
Definition at line 831 of file SamRecord.cpp.
| int32_t SamRecord::get1BasedPosition | ( | ) |
Get the 1-based(SAM) leftmost position of the record.
Definition at line 698 of file SamRecord.cpp.
| int32_t SamRecord::get1BasedUnclippedEnd | ( | ) |
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition at line 921 of file SamRecord.cpp.
References get0BasedUnclippedEnd().
00922 { 00923 return(get0BasedUnclippedEnd() + 1); 00924 }
| int32_t SamRecord::get1BasedUnclippedStart | ( | ) |
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition at line 905 of file SamRecord.cpp.
References get0BasedUnclippedStart().
00906 { 00907 return(get0BasedUnclippedStart() + 1); 00908 }
| int32_t SamRecord::getAlignmentLength | ( | ) |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition at line 879 of file SamRecord.cpp.
| uint16_t SamRecord::getBin | ( | ) |
Get the BAM bin for the record.
Definition at line 733 of file SamRecord.cpp.
References get1BasedAlignmentEnd().
00734 { 00735 myStatus = SamStatus::SUCCESS; 00736 if(!myIsBinValid) 00737 { 00738 // The bin that is set in the record is not valid, so 00739 // reset it. 00740 myRecordPtr->myBin = 00741 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd()); 00742 myIsBinValid = true; 00743 } 00744 return(myRecordPtr->myBin); 00745 }
| int32_t SamRecord::getBlockSize | ( | ) |
Get the block size of the record.
Definition at line 667 of file SamRecord.cpp.
00668 { 00669 myStatus = SamStatus::SUCCESS; 00670 // If the buffer isn't synced, sync the buffer to determine the 00671 // block size. 00672 if(myIsBufferSynced == false) 00673 { 00674 // Since this just returns the block size, the translation of 00675 // the sequence does not matter, so just use the currently set 00676 // value. 00677 fixBuffer(myBufferSequenceTranslation); 00678 } 00679 return myRecordPtr->myBlockSize; 00680 }
| const char * SamRecord::getCigar | ( | ) |
Returns the SAM formatted CIGAR string.
Definition at line 941 of file SamRecord.cpp.
Referenced by getFields().
| Cigar * SamRecord::getCigarInfo | ( | ) |
Returns a pointer to the Cigar object associated with this record.
The object is essentially read-only, only allowing modifications due to lazy evaluations.
Definition at line 1212 of file SamRecord.cpp.
Referenced by SamQuerySeqWithRefIter::reset().
01213 { 01214 // Check to see whether or not the Cigar has already been 01215 // set - this is determined by checking if alignment length 01216 // is set since alignment length and the cigar are set 01217 // at the same time. 01218 if(myAlignmentLength == -1) 01219 { 01220 // Not been set, so calculate it. 01221 parseCigar(); 01222 } 01223 return(&myCigarRoller); 01224 }
| uint16_t SamRecord::getCigarLength | ( | ) |
Get the length of the CIGAR in BAM format.
Definition at line 748 of file SamRecord.cpp.
00749 { 00750 myStatus = SamStatus::SUCCESS; 00751 // If the cigar buffer is valid 00752 // then get the length from there. 00753 if(myIsCigarBufferValid) 00754 { 00755 return myRecordPtr->myCigarLength; 00756 } 00757 00758 if(myCigarTempBufferLength == -1) 00759 { 00760 // The cigar buffer is not valid and the cigar temp buffer is not set, 00761 // so parse the string. 00762 parseCigarString(); 00763 } 00764 00765 // The temp buffer is now set, so return the size. 00766 return(myCigarTempBufferLength); 00767 }
| double & SamRecord::getDouble | ( | const char * | tag | ) |
Get the double value for the specified tag.
Does not set SamStatus.
Definition at line 1514 of file SamRecord.cpp.
01515 { 01516 // Init to success. 01517 myStatus = SamStatus::SUCCESS; 01518 // Parse the buffer if necessary. 01519 if(myNeedToSetTagsFromBuffer) 01520 { 01521 if(!setTagsFromBuffer()) 01522 { 01523 // Failed to read the tags from the buffer, so cannot 01524 // get tags. 01525 // TODO - what do we want to do on failure? 01526 } 01527 } 01528 01529 int key = MAKEKEY(tag[0], tag[1], 'f'); 01530 int offset = extras.Find(key); 01531 01532 int value; 01533 if (offset < 0) 01534 { 01535 // TODO - what do we want to do on failure? 01536 return NOT_FOUND_TAG_DOUBLE; 01537 } 01538 else 01539 value = extras[offset]; 01540 01541 return doubles[value]; 01542 }
| bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, | |
| String & | readName, | |||
| String & | cigar, | |||
| String & | sequence, | |||
| String & | quality, | |||
| SequenceTranslation | translation | |||
| ) |
Returns the values of all fields except the tags.
| recStruct | structure containing the contents of all non-variable length fields. | |
| readName | read name from the record (return param) | |
| cigar | cigar string from the record (return param) | |
| sequence | sequence string from the record (return param) | |
| quality | quality string from the record (return param) | |
| translation | type of sequence translation to use. |
Definition at line 1344 of file SamRecord.cpp.
References getCigar(), getQuality(), getReadName(), and getSequence().
01347 { 01348 myStatus = SamStatus::SUCCESS; 01349 if(myIsBufferSynced == false) 01350 { 01351 if(!fixBuffer(translation)) 01352 { 01353 // failed to set the buffer, return false. 01354 return(false); 01355 } 01356 } 01357 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct)); 01358 01359 readName = getReadName(); 01360 // Check the status. 01361 if(myStatus != SamStatus::SUCCESS) 01362 { 01363 // Failed to set the fields, return false. 01364 return(false); 01365 } 01366 cigar = getCigar(); 01367 // Check the status. 01368 if(myStatus != SamStatus::SUCCESS) 01369 { 01370 // Failed to set the fields, return false. 01371 return(false); 01372 } 01373 sequence = getSequence(translation); 01374 // Check the status. 01375 if(myStatus != SamStatus::SUCCESS) 01376 { 01377 // Failed to set the fields, return false. 01378 return(false); 01379 } 01380 quality = getQuality(); 01381 // Check the status. 01382 if(myStatus != SamStatus::SUCCESS) 01383 { 01384 // Failed to set the fields, return false. 01385 return(false); 01386 } 01387 return(true); 01388 }
| bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, | |
| String & | readName, | |||
| String & | cigar, | |||
| String & | sequence, | |||
| String & | quality | |||
| ) |
Returns the values of all fields except the tags.
| recStruct | structure containing the contents of all non-variable length fields. | |
| readName | read name from the record (return param) | |
| cigar | cigar string from the record (return param) | |
| sequence | sequence string from the record (return param) | |
| quality | quality string from the record (return param) |
Definition at line 1335 of file SamRecord.cpp.
01337 { 01338 return(getFields(recStruct, readName, cigar, sequence, quality, 01339 mySequenceTranslation)); 01340 }
| uint16_t SamRecord::getFlag | ( | ) |
Get the flag.
Definition at line 770 of file SamRecord.cpp.
Referenced by SamQuerySeqWithRefIter::getNextMatchMismatch().
| int32_t SamRecord::getInsertSize | ( | ) |
Get the inferred insert size of the read pair.
Definition at line 845 of file SamRecord.cpp.
| int & SamRecord::getInteger | ( | const char * | tag | ) |
Get the integer value for the specified tag.
Does not set SamStatus.
Definition at line 1484 of file SamRecord.cpp.
01485 { 01486 // Init to success. 01487 myStatus = SamStatus::SUCCESS; 01488 // Parse the buffer if necessary. 01489 if(myNeedToSetTagsFromBuffer) 01490 { 01491 if(!setTagsFromBuffer()) 01492 { 01493 // Failed to read the tags from the buffer, so cannot 01494 // get tags. 01495 // TODO - what do we want to do on failure? 01496 } 01497 } 01498 01499 int key = MAKEKEY(tag[0], tag[1], 'i'); 01500 int offset = extras.Find(key); 01501 01502 int value; 01503 if (offset < 0) 01504 { 01505 // TODO - what do we want to do on failure? 01506 return NOT_FOUND_TAG_INT; 01507 } 01508 else 01509 value = extras[offset]; 01510 01511 return integers[value]; 01512 }
| uint8_t SamRecord::getMapQuality | ( | ) |
Get the mapping quality of the record.
Definition at line 726 of file SamRecord.cpp.
| int32_t SamRecord::getMateReferenceID | ( | ) |
Get the mate reference id of the record.
Definition at line 824 of file SamRecord.cpp.
| const char * SamRecord::getMateReferenceName | ( | ) |
Get the mate reference sequence name of the record.
If it is equal to the reference name, it still returns the reference name.
Definition at line 796 of file SamRecord.cpp.
| const char * SamRecord::getMateReferenceNameOrEqual | ( | ) |
Get the mate reference sequence name of the record, returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.
Definition at line 806 of file SamRecord.cpp.
References getReferenceName().
00807 { 00808 myStatus = SamStatus::SUCCESS; 00809 if(myMateReferenceName == "*") 00810 { 00811 return(myMateReferenceName); 00812 } 00813 if(myMateReferenceName == getReferenceName()) 00814 { 00815 return(FIELD_ABSENT_STRING); 00816 } 00817 else 00818 { 00819 return(myMateReferenceName); 00820 } 00821 }
| bool SamRecord::getNextSamTag | ( | char * | tag, | |
| char & | vtype, | |||
| void ** | value | |||
| ) |
Get the next tag from the record.
Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).
| tag | set to the tag when a tag is read. | |
| vtype | set to the vtype when a tag is read. | |
| value | pointer to the value of the tag (will need to cast to int, double, char, or string based on vtype). |
Definition at line 1260 of file SamRecord.cpp.
01261 { 01262 myStatus = SamStatus::SUCCESS; 01263 if(myNeedToSetTagsFromBuffer) 01264 { 01265 if(!setTagsFromBuffer()) 01266 { 01267 // Failed to read the tags from the buffer, so cannot 01268 // get tags. 01269 return(false); 01270 } 01271 } 01272 01273 // Increment the tag index to start looking at the next tag. 01274 // At the beginning, it is set to -1. 01275 myLastTagIndex++; 01276 int maxTagIndex = extras.Capacity(); 01277 if(myLastTagIndex >= maxTagIndex) 01278 { 01279 // Hit the end of the tags, return false, no more tags. 01280 // Status is still success since this is not an error, 01281 // it is just the end of the list. 01282 return(false); 01283 } 01284 01285 bool tagFound = false; 01286 // Loop until a tag is found or the end of extras is hit. 01287 while((tagFound == false) && (myLastTagIndex < maxTagIndex)) 01288 { 01289 if(extras.SlotInUse(myLastTagIndex)) 01290 { 01291 // Found a slot to use. 01292 int key = extras.GetKey(myLastTagIndex); 01293 getTag(key, tag); 01294 getVtype(key, vtype); 01295 tagFound = true; 01296 // Get the value associated with the key based on the vtype. 01297 switch (vtype) 01298 { 01299 case 'A' : 01300 *value = getIntegerPtr(myLastTagIndex); 01301 break; 01302 case 'f' : 01303 *value = getDoublePtr(myLastTagIndex); 01304 break; 01305 case 'c' : 01306 case 'C' : 01307 case 's' : 01308 case 'S' : 01309 case 'i' : 01310 case 'I' : 01311 vtype = 'i'; 01312 *value = getIntegerPtr(myLastTagIndex); 01313 break; 01314 case 'Z' : 01315 *value = getStringPtr(myLastTagIndex); 01316 break; 01317 default: 01318 myStatus.setStatus(SamStatus::FAIL_PARSE, 01319 "Unknown tag type"); 01320 tagFound = false; 01321 break; 01322 } 01323 } 01324 if(!tagFound) 01325 { 01326 // Increment the index since a tag was not found. 01327 myLastTagIndex++; 01328 } 01329 } 01330 return(tagFound); 01331 }
| uint32_t SamRecord::getNumOverlaps | ( | int32_t | start, | |
| int32_t | end | |||
| ) |
Return the number of bases in this read that overlap the passed in region.
| start | inclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.) | |
| end | exclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.) |
Definition at line 1568 of file SamRecord.cpp.
References get0BasedPosition().
Referenced by SamFile::GetNumOverlaps().
01569 { 01570 // Determine whether or not the cigar has been parsed, which sets up 01571 // the cigar roller. This is determined by checking the alignment length. 01572 if(myAlignmentLength == -1) 01573 { 01574 parseCigar(); 01575 } 01576 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition())); 01577 }
| char SamRecord::getQuality | ( | int | index | ) |
Get the quality character at the specified index into the quality 0 to readLength - 1.
| index | index into the quality string (0 to readLength-1). |
Definition at line 1165 of file SamRecord.cpp.
References getReadLength().
01166 { 01167 // Determine the read length. 01168 int32_t readLen = getReadLength(); 01169 01170 // If the read length is 0, return ' ' whose ascii code is below 01171 // the minimum ascii code for qualities. 01172 if(readLen == 0) 01173 { 01174 return(BaseUtilities::UNKNOWN_QUALITY_CHAR); 01175 } 01176 else if((index < 0) || (index >= readLen)) 01177 { 01178 // Only get here if the index was out of range, so thow an exception. 01179 String exceptionString = "SamRecord::getQuality("; 01180 exceptionString += index; 01181 exceptionString += ") is out of range. Index must be between 0 and "; 01182 exceptionString += (readLen - 1); 01183 throw std::runtime_error(exceptionString.c_str()); 01184 } 01185 01186 if(myQuality.Length() == 0) 01187 { 01188 // Parse BAM Quality. 01189 unsigned char * packedQuality = 01190 (unsigned char *)myRecordPtr->myData + 01191 myRecordPtr->myReadNameLength + 01192 myRecordPtr->myCigarLength * sizeof(int) + 01193 (myRecordPtr->myReadLength + 1) / 2; 01194 return(packedQuality[index] + 33); 01195 } 01196 else 01197 { 01198 // Already have string. 01199 if((myQuality.Length() == 1) && (myQuality[0] == '*')) 01200 { 01201 // Return 0xFF like it does for BAM. 01202 return(0xFF); 01203 } 01204 else 01205 { 01206 return(myQuality[index]); 01207 } 01208 } 01209 }
| const char * SamRecord::getQuality | ( | ) |
Returns the SAM formatted quality string.
Definition at line 1024 of file SamRecord.cpp.
Referenced by getFields().
| int32_t SamRecord::getReadLength | ( | ) |
Get the length of the read.
Definition at line 777 of file SamRecord.cpp.
Referenced by SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), and SamQuerySeqWithRefIter::reset().
00778 { 00779 myStatus = SamStatus::SUCCESS; 00780 if(myIsSequenceBufferValid == false) 00781 { 00782 // If the sequence is "*", then return 0. 00783 if((mySequence.Length() == 1) && (mySequence[0] == '*')) 00784 { 00785 return(0); 00786 } 00787 // Do not add 1 since it is not null terminated. 00788 return(mySequence.Length()); 00789 } 00790 return(myRecordPtr->myReadLength); 00791 }
| const char * SamRecord::getReadName | ( | ) |
Returns the SAM formatted Read Name (QNAME).
Definition at line 928 of file SamRecord.cpp.
Referenced by getFields(), and SamFile::validateSortOrder().
00929 { 00930 myStatus = SamStatus::SUCCESS; 00931 if(myReadName.Length() == 0) 00932 { 00933 // 0 Length, means that it is in the buffer, but has not yet 00934 // been synced to the string, so do the sync. 00935 myReadName = (char*)&(myRecordPtr->myData); 00936 } 00937 return myReadName.c_str(); 00938 }
| uint8_t SamRecord::getReadNameLength | ( | ) |
Get the length of the readname (QNAME) including the null.
Definition at line 712 of file SamRecord.cpp.
00713 { 00714 myStatus = SamStatus::SUCCESS; 00715 // If the buffer is valid, return the size from there, otherwise get the 00716 // size from the string length + 1 (ending null). 00717 if(myIsReadNameBufferValid) 00718 { 00719 return(myRecordPtr->myReadNameLength); 00720 } 00721 00722 return(myReadName.Length() + 1); 00723 }
| const void * SamRecord::getRecordBuffer | ( | SequenceTranslation | translation | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
| translation | type of sequence translation to use. |
Definition at line 597 of file SamRecord.cpp.
00598 { 00599 myStatus = SamStatus::SUCCESS; 00600 bool status = true; 00601 // If the buffer is not synced or the sequence in the buffer is not 00602 // properly translated, fix the buffer. 00603 if((myIsBufferSynced == false) || 00604 (myBufferSequenceTranslation != translation)) 00605 { 00606 status &= fixBuffer(translation); 00607 } 00608 // If the buffer is synced, check to see if the tags need to be synced. 00609 if(myNeedToSetTagsInBuffer) 00610 { 00611 status &= setTagsInBuffer(); 00612 } 00613 if(!status) 00614 { 00615 return(NULL); 00616 } 00617 return (const void *)myRecordPtr; 00618 }
| const void * SamRecord::getRecordBuffer | ( | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition at line 590 of file SamRecord.cpp.
00591 { 00592 return(getRecordBuffer(mySequenceTranslation)); 00593 }
| int32_t SamRecord::getReferenceID | ( | ) |
Get the reference sequence id of the record.
Definition at line 691 of file SamRecord.cpp.
Referenced by SamFile::readIndexedRecord(), and SamFile::validateSortOrder().
| const char * SamRecord::getReferenceName | ( | ) |
Get the reference sequence name of the record.
Definition at line 684 of file SamRecord.cpp.
Referenced by getMateReferenceNameOrEqual(), getSequence(), and SamQuerySeqWithRefIter::reset().
| char SamRecord::getSequence | ( | int | index, | |
| SequenceTranslation | translation | |||
| ) |
Get the sequence base at the specified index into this sequence 0 to readLength - performing the specified sequence translation1.
| index | index into the sequence string (0 to readLength-1). | |
| translation | type of sequence translation to use. |
Definition at line 1043 of file SamRecord.cpp.
References EQUAL, getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
01044 { 01045 static const char * asciiBases = "=AC.G...T......N"; 01046 01047 // Determine the read length. 01048 int32_t readLen = getReadLength(); 01049 01050 // If the read length is 0, this method should not be called. 01051 if(readLen == 0) 01052 { 01053 String exceptionString = "SamRecord::getSequence("; 01054 exceptionString += index; 01055 exceptionString += ") is not allowed since sequence = '*'"; 01056 throw std::runtime_error(exceptionString.c_str()); 01057 } 01058 else if((index < 0) || (index >= readLen)) 01059 { 01060 // Only get here if the index was out of range, so thow an exception. 01061 String exceptionString = "SamRecord::getSequence("; 01062 exceptionString += index; 01063 exceptionString += ") is out of range. Index must be between 0 and "; 01064 exceptionString += (readLen - 1); 01065 throw std::runtime_error(exceptionString.c_str()); 01066 } 01067 01068 // Determine if translation needs to be done. 01069 if((translation == NONE) || (myRefPtr == NULL)) 01070 { 01071 // No translation needs to be done. 01072 if(mySequence.Length() == 0) 01073 { 01074 // Parse BAM sequence. 01075 // TODO - maybe store this pointer - and use that to track when 01076 // valid? 01077 unsigned char * packedSequence = 01078 (unsigned char *)myRecordPtr->myData + 01079 myRecordPtr->myReadNameLength + 01080 myRecordPtr->myCigarLength * sizeof(int); 01081 01082 return(index & 1 ? 01083 asciiBases[packedSequence[index / 2] & 0xF] : 01084 asciiBases[packedSequence[index / 2] >> 4]); 01085 } 01086 // Already have string. 01087 return(mySequence[index]); 01088 } 01089 else 01090 { 01091 // Need to translate the sequence either to have '=' or to not 01092 // have it. 01093 // First check to see if the sequence has been set. 01094 if(mySequence.Length() == 0) 01095 { 01096 // 0 Length, means that it is in the buffer, but has not yet 01097 // been synced to the string, so do the sync. 01098 setSequenceAndQualityFromBuffer(); 01099 } 01100 01101 // Check the type of translation. 01102 if(translation == EQUAL) 01103 { 01104 // Check whether or not the string has already been 01105 // retrieved that has the '=' in it. 01106 if(mySeqWithEq.length() == 0) 01107 { 01108 // The string with '=' has not yet been determined, 01109 // so get the string. 01110 // Check to see if the sequence is defined. 01111 if(mySequence == "*") 01112 { 01113 // Sequence is undefined, so no translation necessary. 01114 mySeqWithEq = '*'; 01115 } 01116 else 01117 { 01118 // Sequence defined, so translate it. 01119 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 01120 myRecordPtr->myPosition, 01121 myCigarRoller, 01122 getReferenceName(), 01123 *myRefPtr, 01124 mySeqWithEq); 01125 } 01126 } 01127 // Sequence is set, so return it. 01128 return(mySeqWithEq[index]); 01129 } 01130 else 01131 { 01132 // translation == BASES 01133 // Check whether or not the string has already been 01134 // retrieved that does not have the '=' in it. 01135 if(mySeqWithoutEq.length() == 0) 01136 { 01137 // The string with '=' has not yet been determined, 01138 // so get the string. 01139 // Check to see if the sequence is defined. 01140 if(mySequence == "*") 01141 { 01142 // Sequence is undefined, so no translation necessary. 01143 mySeqWithoutEq = '*'; 01144 } 01145 else 01146 { 01147 // Sequence defined, so translate it. 01148 // The string without '=' has not yet been determined, 01149 // so get the string. 01150 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 01151 myRecordPtr->myPosition, 01152 myCigarRoller, 01153 getReferenceName(), 01154 *myRefPtr, 01155 mySeqWithoutEq); 01156 } 01157 } 01158 // Sequence is set, so return it. 01159 return(mySeqWithoutEq[index]); 01160 } 01161 } 01162 }
| char SamRecord::getSequence | ( | int | index | ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.
| index | index into the sequence string (0 to readLength-1). |
Definition at line 1037 of file SamRecord.cpp.
References getSequence().
01038 { 01039 return(getSequence(index, mySequenceTranslation)); 01040 }
| const char * SamRecord::getSequence | ( | SequenceTranslation | translation | ) |
Returns the SAM formatted sequence string performing the specified sequence translation.
| translation | type of sequence translation to use. |
Definition at line 960 of file SamRecord.cpp.
References EQUAL, getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
00961 { 00962 myStatus = SamStatus::SUCCESS; 00963 if(mySequence.Length() == 0) 00964 { 00965 // 0 Length, means that it is in the buffer, but has not yet 00966 // been synced to the string, so do the sync. 00967 setSequenceAndQualityFromBuffer(); 00968 } 00969 00970 // Determine if translation needs to be done. 00971 if((translation == NONE) || (myRefPtr == NULL)) 00972 { 00973 return mySequence.c_str(); 00974 } 00975 else if(translation == EQUAL) 00976 { 00977 if(mySeqWithEq.length() == 0) 00978 { 00979 // Check to see if the sequence is defined. 00980 if(mySequence == "*") 00981 { 00982 // Sequence is undefined, so no translation necessary. 00983 mySeqWithEq = '*'; 00984 } 00985 else 00986 { 00987 // Sequence defined, so translate it. 00988 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 00989 myRecordPtr->myPosition, 00990 myCigarRoller, 00991 getReferenceName(), 00992 *myRefPtr, 00993 mySeqWithEq); 00994 } 00995 } 00996 return(mySeqWithEq.c_str()); 00997 } 00998 else 00999 { 01000 // translation == BASES 01001 if(mySeqWithoutEq.length() == 0) 01002 { 01003 if(mySequence == "*") 01004 { 01005 // Sequence is undefined, so no translation necessary. 01006 mySeqWithoutEq = '*'; 01007 } 01008 else 01009 { 01010 // Sequence defined, so translate it. 01011 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 01012 myRecordPtr->myPosition, 01013 myCigarRoller, 01014 getReferenceName(), 01015 *myRefPtr, 01016 mySeqWithoutEq); 01017 } 01018 } 01019 return(mySeqWithoutEq.c_str()); 01020 } 01021 }
| const char * SamRecord::getSequence | ( | ) |
Returns the SAM formatted sequence string, translating the base as specified by setSequenceTranslation.
Definition at line 954 of file SamRecord.cpp.
Referenced by getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), and getSequence().
00955 { 00956 return(getSequence(mySequenceTranslation)); 00957 }
| const SamStatus & SamRecord::getStatus | ( | ) |
Returns the status associated with the last method that sets the status.
Definition at line 1448 of file SamRecord.cpp.
| String & SamRecord::getString | ( | const char * | tag | ) |
Get the string value for the specified tag.
Does not set SamStatus.
Definition at line 1454 of file SamRecord.cpp.
01455 { 01456 // Init to success. 01457 myStatus = SamStatus::SUCCESS; 01458 // Parse the buffer if necessary. 01459 if(myNeedToSetTagsFromBuffer) 01460 { 01461 if(!setTagsFromBuffer()) 01462 { 01463 // Failed to read the tags from the buffer, so cannot 01464 // get tags. 01465 // TODO - what do we want to do on failure? 01466 } 01467 } 01468 01469 int key = MAKEKEY(tag[0], tag[1], 'Z'); 01470 int offset = extras.Find(key); 01471 01472 int value; 01473 if (offset < 0) 01474 { 01475 // TODO - what do we want to do on failure? 01476 return(NOT_FOUND_TAG_STRING); 01477 } 01478 else 01479 value = extras[offset]; 01480 01481 return strings[value]; 01482 }
| uint32_t SamRecord::getTagLength | ( | ) |
Returns the length of the tags in BAM format.
Definition at line 1227 of file SamRecord.cpp.
01228 { 01229 myStatus = SamStatus::SUCCESS; 01230 if(myNeedToSetTagsFromBuffer) 01231 { 01232 // Tags are only set in the buffer, so the size of the tags is 01233 // the length of the record minus the starting location of the tags. 01234 unsigned char * tagStart = 01235 (unsigned char *)myRecordPtr->myData 01236 + myRecordPtr->myReadNameLength 01237 + myRecordPtr->myCigarLength * sizeof(int) 01238 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength; 01239 01240 // The non-tags take up from the start of the record to the tag start. 01241 // Do not include the block size part of the record since it is not 01242 // included in the size. 01243 uint32_t nonTagSize = 01244 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID); 01245 // Tags take up the size of the block minus the non-tag section. 01246 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize; 01247 return(tagSize); 01248 } 01249 01250 // Tags are stored outside the buffer, so myTagBufferSize is set. 01251 return(myTagBufferSize); 01252 }
| bool SamRecord::isCharType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a char type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1413 of file SamRecord.cpp.
| bool SamRecord::isDoubleType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a double type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1403 of file SamRecord.cpp.
| bool SamRecord::isIntegerType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is an integer type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1391 of file SamRecord.cpp.
| bool SamRecord::isStringType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a string type.
Does not set SamStatus.
| vtype | value type to check. |
Definition at line 1423 of file SamRecord.cpp.
| bool SamRecord::isValid | ( | SamFileHeader & | header | ) |
Returns whether or not the record is valid.
Sets the status to indicate success or failure.
| header | SAM Header associated with the record. Used to perform some validation against the header. |
Definition at line 164 of file SamRecord.cpp.
00165 { 00166 myStatus = SamStatus::SUCCESS; 00167 SamValidationErrors invalidSamErrors; 00168 if(!SamValidator::isValid(header, *this, invalidSamErrors)) 00169 { 00170 // The record is not valid. 00171 std::string errorMessage = ""; 00172 invalidSamErrors.getErrorString(errorMessage); 00173 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str()); 00174 return(false); 00175 } 00176 // The record is valid. 00177 return(true); 00178 }
| void SamRecord::resetRecord | ( | ) |
Reset the fields of the record to a default value.
This is not necessary when you are reading a Sam/Bam file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.
Definition at line 91 of file SamRecord.cpp.
References clearTags(), and NONE.
Referenced by SamRecord(), setBuffer(), setBufferFromFile(), and ~SamRecord().
00092 { 00093 myIsBufferSynced = true; 00094 00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE; 00096 myRecordPtr->myReferenceID = -1; 00097 myRecordPtr->myPosition = -1; 00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; 00099 myRecordPtr->myMapQuality = 0; 00100 myRecordPtr->myBin = DEFAULT_BIN; 00101 myRecordPtr->myCigarLength = 0; 00102 myRecordPtr->myFlag = 0; 00103 myRecordPtr->myReadLength = 0; 00104 myRecordPtr->myMateReferenceID = -1; 00105 myRecordPtr->myMatePosition = -1; 00106 myRecordPtr->myInsertSize = 0; 00107 00108 // Set the sam values for the variable length fields. 00109 // TODO - one way to speed this up might be to not set to "*" and just 00110 // clear them, and write out a '*' for SAM if it is empty. 00111 myReadName = DEFAULT_READ_NAME; 00112 myReferenceName = "*"; 00113 myMateReferenceName = "*"; 00114 myCigar = "*"; 00115 mySequence = "*"; 00116 mySeqWithEq.clear(); 00117 mySeqWithoutEq.clear(); 00118 myQuality = "*"; 00119 myNeedToSetTagsFromBuffer = false; 00120 myNeedToSetTagsInBuffer = false; 00121 00122 // Initialize the calculated alignment info to the uncalculated value. 00123 myAlignmentLength = -1; 00124 myUnclippedStartOffset = -1; 00125 myUnclippedEndOffset = -1; 00126 00127 clearTags(); 00128 00129 // Set the bam values for the variable length fields. 00130 // Only the read name needs to be set, the others are a length of 0. 00131 // Set the read name. The min size of myRecordPtr includes the size for 00132 // the default read name. 00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(), 00134 myRecordPtr->myReadNameLength); 00135 00136 // Set that the variable length buffer fields are valid. 00137 myIsReadNameBufferValid = true; 00138 myIsCigarBufferValid = true; 00139 myIsSequenceBufferValid = true; 00140 myBufferSequenceTranslation = NONE; 00141 myIsQualityBufferValid = true; 00142 myIsTagsBufferValid = true; 00143 myIsBinValid = true; 00144 00145 myCigarTempBufferLength = -1; 00146 00147 myStatus = SamStatus::SUCCESS; 00148 00149 NOT_FOUND_TAG_STRING = ""; 00150 NOT_FOUND_TAG_INT = -1; 00151 NOT_FOUND_TAG_DOUBLE = -1; 00152 }
| bool SamRecord::set0BasedMatePosition | ( | int32_t | matePosition | ) |
Set the leftmost mate position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 0-based start position |
Definition at line 394 of file SamRecord.cpp.
Referenced by set1BasedMatePosition().
| bool SamRecord::set0BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 0-based start position |
Definition at line 309 of file SamRecord.cpp.
Referenced by set1BasedPosition().
| bool SamRecord::set1BasedMatePosition | ( | int32_t | matePosition | ) |
Set the leftmost mate position using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 1-based start position |
Definition at line 388 of file SamRecord.cpp.
References set0BasedMatePosition().
00389 { 00390 return(set0BasedMatePosition(matePosition - 1)); 00391 }
| bool SamRecord::set1BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
| position | 1-based start position |
Definition at line 303 of file SamRecord.cpp.
References set0BasedPosition().
00304 { 00305 return(set0BasedPosition(position - 1)); 00306 }
| SamStatus::Status SamRecord::setBuffer | ( | const char * | fromBuffer, | |
| uint32_t | fromBufferSize, | |||
| SamFileHeader & | header | |||
| ) |
Sets the SamRecord to contain the information in BAM format found in fromBuffer.
| fromBuffer | buffer to read the BAM record from. | |
| fromBufferSize | size of the buffer containing the BAM record. | |
| header | BAM header for the record. |
Definition at line 435 of file SamRecord.cpp.
References resetRecord().
00438 { 00439 myStatus = SamStatus::SUCCESS; 00440 if((fromBuffer == NULL) || (fromBufferSize == 0)) 00441 { 00442 // Buffer is empty. 00443 myStatus.setStatus(SamStatus::FAIL_PARSE, 00444 "Cannot parse an empty file."); 00445 return(SamStatus::FAIL_PARSE); 00446 } 00447 00448 // Clear the record. 00449 resetRecord(); 00450 00451 // allocate space for the record size. 00452 if(!allocateRecordStructure(fromBufferSize)) 00453 { 00454 // Failed to allocate space. 00455 return(SamStatus::FAIL_MEM); 00456 } 00457 00458 memcpy(myRecordPtr, fromBuffer, fromBufferSize); 00459 00460 setVariablesForNewBuffer(header); 00461 00462 // Return the status of the record. 00463 return(SamStatus::SUCCESS); 00464 }
| SamStatus::Status SamRecord::setBufferFromFile | ( | IFILE | filePtr, | |
| SamFileHeader & | header | |||
| ) |
Read the BAM record from a file.
| filePtr | file to read the buffer from. | |
| header | BAM header for the record. |
Definition at line 182 of file SamRecord.cpp.
References resetRecord().
00184 { 00185 myStatus = SamStatus::SUCCESS; 00186 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00187 { 00188 // File is not open, return failure. 00189 myStatus.setStatus(SamStatus::FAIL_ORDER, 00190 "Can't read from an unopened file."); 00191 return(SamStatus::FAIL_ORDER); 00192 } 00193 00194 // Clear the record. 00195 resetRecord(); 00196 00197 // read the record size. 00198 int numBytes = 00199 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t)); 00200 00201 if(ifeof(filePtr)) 00202 { 00203 if(numBytes == 0) 00204 { 00205 // End of file, nothing was read, no more records. 00206 myStatus.setStatus(SamStatus::NO_MORE_RECS, 00207 "No more records left to read."); 00208 return(SamStatus::NO_MORE_RECS); 00209 } 00210 else 00211 { 00212 // Error: end of the file reached prior to reading the rest of the 00213 // record. 00214 myStatus.setStatus(SamStatus::FAIL_PARSE, 00215 "EOF reached in the middle of a record."); 00216 return(SamStatus::FAIL_PARSE); 00217 } 00218 } 00219 00220 // allocate space for the record size. 00221 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t))) 00222 { 00223 // Failed to allocate space. 00224 // Status is set by allocateRecordStructure. 00225 return(SamStatus::FAIL_MEM); 00226 } 00227 00228 // Read the rest of the alignment block, starting at the reference id. 00229 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize) 00230 != (unsigned int)myRecordPtr->myBlockSize) 00231 { 00232 // Error reading the record. Reset it and return failure. 00233 resetRecord(); 00234 myStatus.setStatus(SamStatus::FAIL_IO, 00235 "Failed to read the record"); 00236 return(SamStatus::FAIL_IO); 00237 } 00238 00239 setVariablesForNewBuffer(header); 00240 00241 // Return the status of the record. 00242 return(SamStatus::SUCCESS); 00243 }
| bool SamRecord::setCigar | ( | const Cigar & | cigar | ) |
Set the CIGAR to the specified Cigar object.
Internal processing handles the switching between SAM/BAM formats when read/written.
| cigar | object to set this record's cigar to have. |
Definition at line 345 of file SamRecord.cpp.
00346 { 00347 myStatus = SamStatus::SUCCESS; 00348 cigar.getCigarString(myCigar); 00349 00350 myIsBufferSynced = false; 00351 myIsCigarBufferValid = false; 00352 myCigarTempBufferLength = -1; 00353 myIsBinValid = false; 00354 00355 // Initialize the calculated alignment info to the uncalculated value. 00356 myAlignmentLength = -1; 00357 myUnclippedStartOffset = -1; 00358 myUnclippedEndOffset = -1; 00359 00360 return true; 00361 }
| bool SamRecord::setCigar | ( | const char * | cigar | ) |
Set the CIGAR to the specified SAM formatted cigar string.
Internal processing handles the switching between SAM/BAM formats when read/written.
| cigar | string containing the SAM formatted cigar. |
Definition at line 326 of file SamRecord.cpp.
00327 { 00328 myStatus = SamStatus::SUCCESS; 00329 myCigar = cigar; 00330 00331 myIsBufferSynced = false; 00332 myIsCigarBufferValid = false; 00333 myCigarTempBufferLength = -1; 00334 myIsBinValid = false; 00335 00336 // Initialize the calculated alignment info to the uncalculated value. 00337 myAlignmentLength = -1; 00338 myUnclippedStartOffset = -1; 00339 myUnclippedEndOffset = -1; 00340 00341 return true; 00342 }
| bool SamRecord::setFlag | ( | uint16_t | flag | ) |
Set the bitwise flag to the specified value.
| flag | integer flag to use. |
Definition at line 283 of file SamRecord.cpp.
| bool SamRecord::setInsertSize | ( | int32_t | insertSize | ) |
Sets the inferred insert size.
| insertSize | inferred insert size. |
Definition at line 402 of file SamRecord.cpp.
| bool SamRecord::setMapQuality | ( | uint8_t | mapQuality | ) |
Set the mapping quality.
| mapQuality | map quality to set in the record. |
Definition at line 318 of file SamRecord.cpp.
| bool SamRecord::setMateReferenceName | ( | SamFileHeader & | header, | |
| const char * | mateReferenceName | |||
| ) |
Set the mate reference sequence name to the specified name, using the header to determine the matee reference id.
| header | SAM/BAM header to use to determine the mate reference id. | |
| referenceName | mate reference name to use. |
Definition at line 364 of file SamRecord.cpp.
00366 { 00367 myStatus = SamStatus::SUCCESS; 00368 // Set the mate reference, if it is "=", set it to be equal 00369 // to myReferenceName. This assumes that myReferenceName has already 00370 // been called. 00371 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0) 00372 { 00373 myMateReferenceName = myReferenceName; 00374 } 00375 else 00376 { 00377 myMateReferenceName = mateReferenceName; 00378 } 00379 00380 // Set the Mate Reference ID. 00381 myRecordPtr->myMateReferenceID = 00382 header.getReferenceID(myMateReferenceName); 00383 00384 return true; 00385 }
| bool SamRecord::setQuality | ( | const char * | quality | ) |
Sets the quality to the specified quality string.
This is a SAM formatted quality string. Internal processing handles switching between SAM/BAM formats when read/written.
| quality | SAM quality string. |
Definition at line 423 of file SamRecord.cpp.
| bool SamRecord::setReadName | ( | const char * | readName | ) |
Set QNAME to the passed in name.
| readName | the readname to set the QNAME to. |
Definition at line 261 of file SamRecord.cpp.
00262 { 00263 myReadName = readName; 00264 myIsBufferSynced = false; 00265 myIsReadNameBufferValid = false; 00266 myStatus = SamStatus::SUCCESS; 00267 00268 // The read name must at least have some length, otherwise this is a parsing 00269 // error. 00270 if(myReadName.Length() == 0) 00271 { 00272 // Invalid - reset ReadName return false. 00273 myReadName = DEFAULT_READ_NAME; 00274 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; 00275 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name."); 00276 return(false); 00277 } 00278 00279 return true; 00280 }
| void SamRecord::setReference | ( | GenomeSequence * | reference | ) |
Set the reference to the specified genome sequence object.
| reference | pointer to the GenomeSequence object. |
Definition at line 246 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::readIndexedRecord(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().
| bool SamRecord::setReferenceName | ( | SamFileHeader & | header, | |
| const char * | referenceName | |||
| ) |
Set the reference name to the specified name, using the header to determine the reference id.
| header | SAM/BAM header to use to determine the reference id. | |
| referenceName | reference name to use. |
Definition at line 291 of file SamRecord.cpp.
| bool SamRecord::setSequence | ( | const char * | seq | ) |
Sets the sequence to the specified sequence string.
This is a SAM formatted sequence string. Internal processing handles switching between SAM/BAM formats when read/written.
| seq | SAM sequence string. May contain '='. |
Definition at line 410 of file SamRecord.cpp.
| void SamRecord::setSequenceTranslation | ( | SequenceTranslation | translation | ) |
Set the type of sequence translation to use when getting the sequence.
The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.
| translation | type of sequence translation to use. |
Definition at line 255 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::readIndexedRecord(), SamFile::ReadRecord(), and SamFile::validateSortOrder().
| SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr, | |
| SequenceTranslation | translation | |||
| ) |
Write the record as a BAM into the specified file.
| filePtr | file to write the BAM record into. | |
| translation | type of sequence translation to use. |
Definition at line 630 of file SamRecord.cpp.
00632 { 00633 myStatus = SamStatus::SUCCESS; 00634 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00635 { 00636 // File is not open, return failure. 00637 myStatus.setStatus(SamStatus::FAIL_ORDER, 00638 "Can't write to an unopened file."); 00639 return(SamStatus::FAIL_ORDER); 00640 } 00641 00642 if((myIsBufferSynced == false) || 00643 (myBufferSequenceTranslation != translation)) 00644 { 00645 if(!fixBuffer(translation)) 00646 { 00647 return(myStatus.getStatus()); 00648 } 00649 } 00650 00651 // Write the record. 00652 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t); 00653 unsigned int numBytesWritten = 00654 ifwrite(filePtr, myRecordPtr, numBytesToWrite); 00655 00656 // Return status based on if the correct number of bytes were written. 00657 if(numBytesToWrite == numBytesWritten) 00658 { 00659 return(SamStatus::SUCCESS); 00660 } 00661 // The correct number of bytes were not written. 00662 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record."); 00663 return(SamStatus::FAIL_IO); 00664 }
| SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr | ) |
Write the record as a BAM into the specified file.
| filePtr | file to write the BAM record into. |
Definition at line 623 of file SamRecord.cpp.
00624 { 00625 return(writeRecordBuffer(filePtr, mySequenceTranslation)); 00626 }
1.6.3