Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...
#include <SamRecord.h>
Public Types | |
enum | SequenceTranslation { NONE, EQUAL, BASES } |
Enum containing the settings on how to translate the sequence if a reference is available. More... | |
Public Member Functions | |
SamRecord () | |
Default Constructor. | |
SamRecord (ErrorHandler::HandlingType errorHandlingType) | |
Constructor that sets the error handling type. | |
~SamRecord () | |
Destructor. | |
void | resetRecord () |
Reset the fields of the record to a default value. | |
void | resetTagIter () |
Reset the tag iterator to the beginning of the tags. | |
bool | isValid (SamFileHeader &header) |
Returns whether or not the record is valid. | |
SamStatus::Status | setBufferFromFile (IFILE filePtr, SamFileHeader &header) |
Read the BAM record from a file. | |
void | setReference (GenomeSequence *reference) |
Set the reference to the specified genome sequence object. | |
void | setSequenceTranslation (SequenceTranslation translation) |
Set the type of sequence translation to use when getting the sequence. | |
bool | isIntegerType (char vtype) const |
Returns whether or not the specified vtype is an integer type. | |
bool | isDoubleType (char vtype) const |
Returns whether or not the specified vtype is a double type. | |
bool | isCharType (char vtype) const |
Returns whether or not the specified vtype is a char type. | |
bool | isStringType (char vtype) const |
Returns whether or not the specified vtype is a string type. | |
void | clearTags () |
Clear the tags in this record. | |
bool | rmTag (const char *tag, char type) |
Remove a tag. | |
bool | rmTags (const char *tags) |
Remove tags. | |
const SamStatus & | getStatus () |
Returns the status associated with the last method that sets the status. | |
bool | getTagsString (const char *tags, String &returnString, char delim= '\t') |
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE. | |
String * | getStringTag (const char *tag) |
Get the string value for the specified tag. | |
int * | getIntegerTag (const char *tag) |
Get the integer value for the specified tag. | |
char * | getCharTag (const char *tag) |
Get the char value for the specified tag. | |
double * | getDoubleTag (const char *tag) |
Get the double value for the specified tag. | |
String & | getString (const char *tag) |
Get the string value for the specified tag. | |
int & | getInteger (const char *tag) |
Get the integer value for the specified tag. | |
char & | getChar (const char *tag) |
Get the char value for the specified tag. | |
double & | getDouble (const char *tag) |
Get the double value for the specified tag. | |
bool | checkString (const char *tag) |
Check if the specified tag contains a string. | |
bool | checkInteger (const char *tag) |
Check if the specified tag contains a string. | |
bool | checkDouble (const char *tag) |
Check if the specified tag contains a string. | |
bool | checkTag (const char *tag, char type) |
Check if the specified tag contains a value of the specified vtype. | |
uint32_t | getNumOverlaps (int32_t start, int32_t end) |
Return the number of bases in this read that overlap the passed in region. | |
Set Alignment Data | |
bool | setReadName (const char *readName) |
Set QNAME to the passed in name. | |
bool | setFlag (uint16_t flag) |
Set the bitwise flag to the specified value. | |
bool | setReferenceName (SamFileHeader &header, const char *referenceName) |
Set the reference name to the specified name, using the header to determine the reference id. | |
bool | set1BasedPosition (int32_t position) |
Set the leftmost position using the specified 1-based (SAM format) value. | |
bool | set0BasedPosition (int32_t position) |
Set the leftmost position using the specified 0-based (BAM format) value. | |
bool | setMapQuality (uint8_t mapQuality) |
Set the mapping quality. | |
bool | setCigar (const char *cigar) |
Set the CIGAR to the specified SAM formatted cigar string. | |
bool | setCigar (const Cigar &cigar) |
Set the CIGAR to the specified Cigar object. | |
bool | setMateReferenceName (SamFileHeader &header, const char *mateReferenceName) |
Set the mate reference sequence name to the specified name, using the header to determine the matee reference id. | |
bool | set1BasedMatePosition (int32_t matePosition) |
Set the leftmost mate position using the specified 1-based (SAM format) value. | |
bool | set0BasedMatePosition (int32_t matePosition) |
Set the leftmost mate position using the specified 0-based (BAM format) value. | |
bool | setInsertSize (int32_t insertSize) |
Sets the inferred insert size. | |
bool | setSequence (const char *seq) |
Sets the sequence to the specified sequence string. | |
bool | setQuality (const char *quality) |
Sets the quality to the specified quality string. | |
SamStatus::Status | setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header) |
Sets the SamRecord to contain the information in BAM format found in fromBuffer. | |
bool | addIntTag (const char *tag, int32_t value) |
Add the specified integer tag to the record. | |
bool | addTag (const char *tag, char vtype, const char *value) |
Add the specified tag to the record. | |
bool | shiftIndelsLeft () |
Shift the indels (if any) to the left by updating the CIGAR. | |
Get Alignment Data | |
const void * | getRecordBuffer () |
Get a const pointer to the buffer that contains the BAM representation of the record. | |
const void * | getRecordBuffer (SequenceTranslation translation) |
Get a const pointer to the buffer that contains the BAM representation of the record. | |
SamStatus::Status | writeRecordBuffer (IFILE filePtr) |
Write the record as a BAM into the specified file. | |
SamStatus::Status | writeRecordBuffer (IFILE filePtr, SequenceTranslation translation) |
Write the record as a BAM into the specified file. | |
int32_t | getBlockSize () |
Get the block size of the record. | |
const char * | getReferenceName () |
Get the reference sequence name of the record. | |
int32_t | getReferenceID () |
Get the reference sequence id of the record. | |
int32_t | get1BasedPosition () |
Get the 1-based(SAM) leftmost position of the record. | |
int32_t | get0BasedPosition () |
Get the 0-based(BAM) leftmost position of the record. | |
uint8_t | getReadNameLength () |
Get the length of the readname (QNAME) including the null. | |
uint8_t | getMapQuality () |
Get the mapping quality of the record. | |
uint16_t | getBin () |
Get the BAM bin for the record. | |
uint16_t | getCigarLength () |
Get the length of the CIGAR in BAM format. | |
uint16_t | getFlag () |
Get the flag. | |
int32_t | getReadLength () |
Get the length of the read. | |
const char * | getMateReferenceName () |
Get the mate reference sequence name of the record. | |
const char * | getMateReferenceNameOrEqual () |
Get the mate reference sequence name of the record, returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned. | |
int32_t | getMateReferenceID () |
Get the mate reference id of the record. | |
int32_t | get1BasedMatePosition () |
Get the 1-based(SAM) leftmost mate position of the record. | |
int32_t | get0BasedMatePosition () |
Get the 0-based(BAM) leftmost mate position of the record. | |
int32_t | getInsertSize () |
Get the inferred insert size of the read pair. | |
int32_t | get0BasedAlignmentEnd () |
Returns the 0-based inclusive rightmost position of the clipped sequence. | |
int32_t | get1BasedAlignmentEnd () |
Returns the 1-based inclusive rightmost position of the clipped sequence. | |
int32_t | getAlignmentLength () |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'. | |
int32_t | get0BasedUnclippedStart () |
Returns the 0-based inclusive left-most position adjusted for clipped bases. | |
int32_t | get1BasedUnclippedStart () |
Returns the 1-based inclusive left-most position adjusted for clipped bases. | |
int32_t | get0BasedUnclippedEnd () |
Returns the 0-based inclusive right-most position adjusted for clipped bases. | |
int32_t | get1BasedUnclippedEnd () |
Returns the 1-based inclusive right-most position adjusted for clipped bases. | |
const char * | getReadName () |
Returns the SAM formatted Read Name (QNAME). | |
const char * | getCigar () |
Returns the SAM formatted CIGAR string. | |
const char * | getSequence () |
Returns the SAM formatted sequence string, translating the base as specified by setSequenceTranslation. | |
const char * | getSequence (SequenceTranslation translation) |
Returns the SAM formatted sequence string performing the specified sequence translation. | |
const char * | getQuality () |
Returns the SAM formatted quality string. | |
char | getSequence (int index) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation. | |
char | getSequence (int index, SequenceTranslation translation) |
Get the sequence base at the specified index into this sequence 0 to readLength - performing the specified sequence translation1. | |
char | getQuality (int index) |
Get the quality character at the specified index into the quality 0 to readLength - 1. | |
Cigar * | getCigarInfo () |
Returns a pointer to the Cigar object associated with this record. | |
uint32_t | getTagLength () |
Returns the length of the tags in BAM format. | |
bool | getNextSamTag (char *tag, char &vtype, void **value) |
Get the next tag from the record. | |
bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality) |
Returns the values of all fields except the tags. | |
bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation) |
Returns the values of all fields except the tags. | |
GenomeSequence * | getReference () |
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set). |
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition at line 51 of file SamRecord.h.
Enum containing the settings on how to translate the sequence if a reference is available.
If no reference is available, no translation is done.
NONE |
Leave the sequence as is. |
EQUAL |
Translate bases that match the reference to '='. |
BASES |
Translate '=' to the actual base. |
Definition at line 57 of file SamRecord.h.
SamRecord::SamRecord | ( | ErrorHandler::HandlingType | errorHandlingType | ) |
Constructor that sets the error handling type.
errorHandlingType | how to handle errors. |
Definition at line 53 of file SamRecord.cpp.
References resetRecord().
00054 : myStatus(errorHandlingType), 00055 myRefPtr(NULL), 00056 mySequenceTranslation(NONE) 00057 { 00058 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t); 00059 00060 myRecordPtr = 00061 (bamRecordStruct *) malloc(defaultAllocSize); 00062 00063 myCigarTempBuffer = NULL; 00064 myCigarTempBufferAllocatedSize = 0; 00065 00066 allocatedSize = defaultAllocSize; 00067 00068 resetRecord(); 00069 }
bool SamRecord::addIntTag | ( | const char * | tag, | |
int32_t | value | |||
) |
Add the specified integer tag to the record.
Internal processing handles switching between SAM/BAM formats when read/written and determining the type for BAM format. If the tag is already there this code will replace it if the specified value is different.
tag | two character tag to be added to the SAM/BAM record. | |
value | value for the specified tag. |
Definition at line 482 of file SamRecord.cpp.
Referenced by addTag().
00483 { 00484 myStatus = SamStatus::SUCCESS; 00485 int key = 0; 00486 int index = 0; 00487 char bamvtype; 00488 00489 int tagBufferSize = 0; 00490 00491 // First check to see if the tags need to be synced to the buffer. 00492 if(myNeedToSetTagsFromBuffer) 00493 { 00494 if(!setTagsFromBuffer()) 00495 { 00496 // Failed to read tags from the buffer, so cannot add new ones. 00497 return(false); 00498 } 00499 } 00500 00501 // Ints come in as int. But it can be represented in fewer bits. 00502 // So determine a more specific type that is in line with the 00503 // types for BAM files. 00504 // First check to see if it is a negative. 00505 if(value < 0) 00506 { 00507 // The int is negative, so it will need to use a signed type. 00508 // See if it is greater than the min value for a char. 00509 if(value > std::numeric_limits<char>::min()) 00510 { 00511 // It can be stored in a signed char. 00512 bamvtype = 'c'; 00513 tagBufferSize += 4; 00514 } 00515 else if(value > std::numeric_limits<short>::min()) 00516 { 00517 // It fits in a signed short. 00518 bamvtype = 's'; 00519 tagBufferSize += 5; 00520 } 00521 else 00522 { 00523 // Just store it as a signed int. 00524 bamvtype = 'i'; 00525 tagBufferSize += 7; 00526 } 00527 } 00528 else 00529 { 00530 // It is positive, so an unsigned type can be used. 00531 if(value < std::numeric_limits<unsigned char>::max()) 00532 { 00533 // It is under the max of an unsigned char. 00534 bamvtype = 'C'; 00535 tagBufferSize += 4; 00536 } 00537 else if(value < std::numeric_limits<unsigned short>::max()) 00538 { 00539 // It is under the max of an unsigned short. 00540 bamvtype = 'S'; 00541 tagBufferSize += 5; 00542 } 00543 else 00544 { 00545 // Just store it as an unsigned int. 00546 bamvtype = 'I'; 00547 tagBufferSize += 7; 00548 } 00549 } 00550 00551 // Check to see if the tag is already there. 00552 key = MAKEKEY(tag[0], tag[1], bamvtype); 00553 unsigned int hashIndex = extras.Find(key); 00554 if(hashIndex != LH_NOTFOUND) 00555 { 00556 // Tag was already found. 00557 index = extras[hashIndex]; 00558 00559 // First check to see if the value changed. 00560 if((integers[index] == value) && (intType[index] == bamvtype)) 00561 { 00562 // The value has not changed, so do nothing. 00563 return(true); 00564 } 00565 else 00566 { 00567 // Not the same value, so adjust the settings. 00568 // Subtract the size of the previous tag from tagBufferSize to get 00569 // the adjusted size. 00570 switch(intType[index]) 00571 { 00572 case 'c': 00573 case 'C': 00574 tagBufferSize -= 4; 00575 break; 00576 case 's': 00577 case 'S': 00578 tagBufferSize -= 5; 00579 break; 00580 case 'i': 00581 case 'I': 00582 tagBufferSize -= 7; 00583 break; 00584 default: 00585 myStatus.setStatus(SamStatus::INVALID, 00586 "unknown tag inttype type found.\n"); 00587 return(false); 00588 } 00589 00590 // Update the integer value and type. 00591 integers[index] = value; 00592 intType[index] = bamvtype; 00593 } 00594 } 00595 else 00596 { 00597 // Tag is not already there, so add it. 00598 index = integers.Length(); 00599 00600 integers.Push(value); 00601 intType.push_back(bamvtype); 00602 00603 extras.Add(key, index); 00604 } 00605 00606 // The buffer tags are now out of sync. 00607 myNeedToSetTagsInBuffer = true; 00608 myIsTagsBufferValid = false; 00609 myIsBufferSynced = false; 00610 myTagBufferSize += tagBufferSize; 00611 00612 return(true); 00613 }
bool SamRecord::addTag | ( | const char * | tag, | |
char | vtype, | |||
const char * | value | |||
) |
Add the specified tag to the record.
Internal processing handles switching between SAM/BAM formats when read/written. If the tag is already there this code will replace it if the specified value is different.
tag | two character tag to be added to the SAM/BAM record. | |
vtype | vtype of the specified value - either SAM/BAM vtypes. | |
value | value as a string for the specified tag. |
Definition at line 619 of file SamRecord.cpp.
References addIntTag().
00620 { 00621 if(vtype == 'i') 00622 { 00623 // integer type. Call addIntTag to handle it. 00624 int intVal = atoi(valuePtr); 00625 return(addIntTag(tag, intVal)); 00626 } 00627 00628 // Non-int type. 00629 myStatus = SamStatus::SUCCESS; 00630 bool status = true; // default to successful. 00631 int key = 0; 00632 int index = 0; 00633 00634 int tagBufferSize = 0; 00635 00636 // First check to see if the tags need to be synced to the buffer. 00637 if(myNeedToSetTagsFromBuffer) 00638 { 00639 if(!setTagsFromBuffer()) 00640 { 00641 // Failed to read tags from the buffer, so cannot add new ones. 00642 return(false); 00643 } 00644 } 00645 00646 // First check to see if the tag is already there. 00647 key = MAKEKEY(tag[0], tag[1], vtype); 00648 unsigned int hashIndex = extras.Find(key); 00649 if(hashIndex != LH_NOTFOUND) 00650 { 00651 // The key was found in the hash, so get the lookup index. 00652 index = extras[hashIndex]; 00653 00654 // Adjust the currently pointed to value to the new setting. 00655 switch (vtype) 00656 { 00657 case 'A' : 00658 // First check to see if the value changed. 00659 if(integers[index] == (const int)*(valuePtr)) 00660 { 00661 // The value has not changed, so do nothing. 00662 return(true); 00663 } 00664 else 00665 { 00666 // Tag buffer size doesn't change between different 'A' entries. 00667 integers[index] = (const int)*(valuePtr); 00668 intType[index] = vtype; 00669 } 00670 break; 00671 case 'Z' : 00672 // First check to see if the value changed. 00673 if(strings[index] == valuePtr) 00674 { 00675 // The value has not changed, so do nothing. 00676 return(true); 00677 } 00678 else 00679 { 00680 // Adjust the tagBufferSize by removing the size of the old string. 00681 tagBufferSize -= strings[index].Length(); 00682 strings[index] = valuePtr; 00683 // Adjust the tagBufferSize by adding the size of the new string. 00684 tagBufferSize += strings[index].Length(); 00685 } 00686 break; 00687 case 'f' : 00688 // First check to see if the value changed. 00689 if(doubles[index] == atof(valuePtr)) 00690 { 00691 // The value has not changed, so do nothing. 00692 return(true); 00693 } 00694 else 00695 { 00696 // Tag buffer size doesn't change between different 'f' entries. 00697 doubles[index] = atof(valuePtr); 00698 } 00699 break; 00700 default : 00701 fprintf(stderr, 00702 "samFile::ReadSAM() - Unknown custom field of type %c\n", 00703 vtype); 00704 myStatus.setStatus(SamStatus::FAIL_PARSE, 00705 "Unknown custom field in a tag"); 00706 status = false; 00707 break; 00708 } 00709 } 00710 else 00711 { 00712 // The key was found not found in the hash, so add it. 00713 switch (vtype) 00714 { 00715 case 'A' : 00716 index = integers.Length(); 00717 integers.Push((const int)*(valuePtr)); 00718 intType.push_back(vtype); 00719 tagBufferSize += 4; 00720 break; 00721 case 'Z' : 00722 index = strings.Length(); 00723 strings.Push(valuePtr); 00724 tagBufferSize += 4 + strings.Last().Length(); 00725 break; 00726 case 'f' : 00727 index = doubles.Length(); 00728 doubles.Push(atof(valuePtr)); 00729 tagBufferSize += 7; 00730 break; 00731 default : 00732 fprintf(stderr, 00733 "samFile::ReadSAM() - Unknown custom field of type %c\n", 00734 vtype); 00735 myStatus.setStatus(SamStatus::FAIL_PARSE, 00736 "Unknown custom field in a tag"); 00737 status = false; 00738 break; 00739 } 00740 if(status) 00741 { 00742 // If successful, add the key to extras. 00743 extras.Add(key, index); 00744 } 00745 } 00746 00747 // Only add the tag if it has so far been successfully processed. 00748 if(status) 00749 { 00750 // The buffer tags are now out of sync. 00751 myNeedToSetTagsInBuffer = true; 00752 myIsTagsBufferValid = false; 00753 myIsBufferSynced = false; 00754 myTagBufferSize += tagBufferSize; 00755 } 00756 return(status); 00757 }
bool SamRecord::checkDouble | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
tag | SAM tag to check contents of. |
Definition at line 586 of file SamRecord.h.
References checkTag().
00586 { return checkTag(tag, 'f'); }
bool SamRecord::checkInteger | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
tag | SAM tag to check contents of. |
Definition at line 580 of file SamRecord.h.
References checkTag().
00580 { return checkTag(tag, 'i'); }
bool SamRecord::checkString | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
tag | SAM tag to check contents of. |
Definition at line 574 of file SamRecord.h.
References checkTag().
00574 { return checkTag(tag, 'Z'); }
bool SamRecord::checkTag | ( | const char * | tag, | |
char | type | |||
) |
Check if the specified tag contains a value of the specified vtype.
Does not set SamStatus.
tag | SAM tag to check contents of. | |
type | value type to check if the SAM tag matches. |
Definition at line 2278 of file SamRecord.cpp.
Referenced by checkDouble(), checkInteger(), and checkString().
02279 { 02280 // Init to success. 02281 myStatus = SamStatus::SUCCESS; 02282 // Parse the buffer if necessary. 02283 if(myNeedToSetTagsFromBuffer) 02284 { 02285 if(!setTagsFromBuffer()) 02286 { 02287 // Failed to read the tags from the buffer, so cannot 02288 // get tags. setTagsFromBuffer set the error. 02289 return(""); 02290 } 02291 } 02292 02293 int key = MAKEKEY(tag[0], tag[1], type); 02294 02295 return (extras.Find(key) != LH_NOTFOUND); 02296 }
void SamRecord::clearTags | ( | ) |
Clear the tags in this record.
Does not set SamStatus.
Definition at line 1763 of file SamRecord.cpp.
References resetTagIter().
Referenced by resetRecord().
01764 { 01765 if(extras.Entries() != 0) 01766 { 01767 extras.Clear(); 01768 } 01769 strings.Clear(); 01770 integers.Clear(); 01771 intType.clear(); 01772 doubles.Clear(); 01773 myTagBufferSize = 0; 01774 resetTagIter(); 01775 }
int32_t SamRecord::get0BasedAlignmentEnd | ( | ) |
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition at line 1180 of file SamRecord.cpp.
Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), and SamFile::readIndexedRecord().
01181 { 01182 myStatus = SamStatus::SUCCESS; 01183 if(myAlignmentLength == -1) 01184 { 01185 // Alignment end has not been set, so calculate it. 01186 parseCigar(); 01187 } 01188 // If alignment length > 0, subtract 1 from it to get the end. 01189 if(myAlignmentLength == 0) 01190 { 01191 // Length is 0, just return the start position. 01192 return(myRecordPtr->myPosition); 01193 } 01194 return(myRecordPtr->myPosition + myAlignmentLength - 1); 01195 }
int32_t SamRecord::get0BasedMatePosition | ( | ) |
Get the 0-based(BAM) leftmost mate position of the record.
Definition at line 1165 of file SamRecord.cpp.
int32_t SamRecord::get0BasedPosition | ( | ) |
Get the 0-based(BAM) leftmost position of the record.
Definition at line 1032 of file SamRecord.cpp.
Referenced by SamTags::createMDTag(), getNumOverlaps(), SamFile::readIndexedRecord(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), and SamFile::validateSortOrder().
int32_t SamRecord::get0BasedUnclippedEnd | ( | ) |
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition at line 1239 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by get1BasedUnclippedEnd().
01240 { 01241 // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the 01242 // cigar has not yet been parsed, so no need to check it here. 01243 return(get0BasedAlignmentEnd() + myUnclippedEndOffset); 01244 }
int32_t SamRecord::get0BasedUnclippedStart | ( | ) |
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition at line 1219 of file SamRecord.cpp.
Referenced by get1BasedUnclippedStart().
int32_t SamRecord::get1BasedAlignmentEnd | ( | ) |
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition at line 1199 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by getBin().
01200 { 01201 return(get0BasedAlignmentEnd() + 1); 01202 }
int32_t SamRecord::get1BasedMatePosition | ( | ) |
Get the 1-based(SAM) leftmost mate position of the record.
Definition at line 1158 of file SamRecord.cpp.
int32_t SamRecord::get1BasedPosition | ( | ) |
Get the 1-based(SAM) leftmost position of the record.
Definition at line 1025 of file SamRecord.cpp.
int32_t SamRecord::get1BasedUnclippedEnd | ( | ) |
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition at line 1248 of file SamRecord.cpp.
References get0BasedUnclippedEnd().
01249 { 01250 return(get0BasedUnclippedEnd() + 1); 01251 }
int32_t SamRecord::get1BasedUnclippedStart | ( | ) |
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition at line 1232 of file SamRecord.cpp.
References get0BasedUnclippedStart().
01233 { 01234 return(get0BasedUnclippedStart() + 1); 01235 }
int32_t SamRecord::getAlignmentLength | ( | ) |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition at line 1206 of file SamRecord.cpp.
uint16_t SamRecord::getBin | ( | ) |
Get the BAM bin for the record.
Definition at line 1060 of file SamRecord.cpp.
References get1BasedAlignmentEnd().
01061 { 01062 myStatus = SamStatus::SUCCESS; 01063 if(!myIsBinValid) 01064 { 01065 // The bin that is set in the record is not valid, so 01066 // reset it. 01067 myRecordPtr->myBin = 01068 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd()); 01069 myIsBinValid = true; 01070 } 01071 return(myRecordPtr->myBin); 01072 }
int32_t SamRecord::getBlockSize | ( | ) |
Get the block size of the record.
Definition at line 994 of file SamRecord.cpp.
00995 { 00996 myStatus = SamStatus::SUCCESS; 00997 // If the buffer isn't synced, sync the buffer to determine the 00998 // block size. 00999 if(myIsBufferSynced == false) 01000 { 01001 // Since this just returns the block size, the translation of 01002 // the sequence does not matter, so just use the currently set 01003 // value. 01004 fixBuffer(myBufferSequenceTranslation); 01005 } 01006 return myRecordPtr->myBlockSize; 01007 }
char* SamRecord::getCharTag | ( | const char * | tag | ) |
Get the char value for the specified tag.
tag | tag to retrieve pointer to the tag's char value if found, NULL if not found. |
const char * SamRecord::getCigar | ( | ) |
Returns the SAM formatted CIGAR string.
Definition at line 1268 of file SamRecord.cpp.
Referenced by getFields(), and SamValidator::isValidCigar().
Cigar * SamRecord::getCigarInfo | ( | ) |
Returns a pointer to the Cigar object associated with this record.
The object is essentially read-only, only allowing modifications due to lazy evaluations.
Definition at line 1539 of file SamRecord.cpp.
Referenced by SamTags::createMDTag(), getSequence(), SamQuerySeqWithRefIter::reset(), and SamFilter::softClip().
01540 { 01541 // Check to see whether or not the Cigar has already been 01542 // set - this is determined by checking if alignment length 01543 // is set since alignment length and the cigar are set 01544 // at the same time. 01545 if(myAlignmentLength == -1) 01546 { 01547 // Not been set, so calculate it. 01548 parseCigar(); 01549 } 01550 return(&myCigarRoller); 01551 }
uint16_t SamRecord::getCigarLength | ( | ) |
Get the length of the CIGAR in BAM format.
Definition at line 1075 of file SamRecord.cpp.
01076 { 01077 myStatus = SamStatus::SUCCESS; 01078 // If the cigar buffer is valid 01079 // then get the length from there. 01080 if(myIsCigarBufferValid) 01081 { 01082 return myRecordPtr->myCigarLength; 01083 } 01084 01085 if(myCigarTempBufferLength == -1) 01086 { 01087 // The cigar buffer is not valid and the cigar temp buffer is not set, 01088 // so parse the string. 01089 parseCigarString(); 01090 } 01091 01092 // The temp buffer is now set, so return the size. 01093 return(myCigarTempBufferLength); 01094 }
double * SamRecord::getDoubleTag | ( | const char * | tag | ) |
Get the double value for the specified tag.
tag | tag to retrieve |
Definition at line 2154 of file SamRecord.cpp.
02155 { 02156 // Init to success. 02157 myStatus = SamStatus::SUCCESS; 02158 // Parse the buffer if necessary. 02159 if(myNeedToSetTagsFromBuffer) 02160 { 02161 if(!setTagsFromBuffer()) 02162 { 02163 // Failed to read the tags from the buffer, so cannot 02164 // get tags. setTagsFromBuffer set the errors, 02165 // so just return null. 02166 return(NULL); 02167 } 02168 } 02169 02170 int key = MAKEKEY(tag[0], tag[1], 'f'); 02171 int offset = extras.Find(key); 02172 02173 int value; 02174 if (offset < 0) 02175 { 02176 // Failed to find the tag. 02177 return(NULL); 02178 } 02179 else 02180 value = extras[offset]; 02181 02182 return(&(doubles[value])); 02183 }
bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, | |
String & | readName, | |||
String & | cigar, | |||
String & | sequence, | |||
String & | quality, | |||
SequenceTranslation | translation | |||
) |
Returns the values of all fields except the tags.
recStruct | structure containing the contents of all non-variable length fields. | |
readName | read name from the record (return param) | |
cigar | cigar string from the record (return param) | |
sequence | sequence string from the record (return param) | |
quality | quality string from the record (return param) | |
translation | type of sequence translation to use. |
Definition at line 1667 of file SamRecord.cpp.
References getCigar(), getQuality(), getReadName(), and getSequence().
01670 { 01671 myStatus = SamStatus::SUCCESS; 01672 if(myIsBufferSynced == false) 01673 { 01674 if(!fixBuffer(translation)) 01675 { 01676 // failed to set the buffer, return false. 01677 return(false); 01678 } 01679 } 01680 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct)); 01681 01682 readName = getReadName(); 01683 // Check the status. 01684 if(myStatus != SamStatus::SUCCESS) 01685 { 01686 // Failed to set the fields, return false. 01687 return(false); 01688 } 01689 cigar = getCigar(); 01690 // Check the status. 01691 if(myStatus != SamStatus::SUCCESS) 01692 { 01693 // Failed to set the fields, return false. 01694 return(false); 01695 } 01696 sequence = getSequence(translation); 01697 // Check the status. 01698 if(myStatus != SamStatus::SUCCESS) 01699 { 01700 // Failed to set the fields, return false. 01701 return(false); 01702 } 01703 quality = getQuality(); 01704 // Check the status. 01705 if(myStatus != SamStatus::SUCCESS) 01706 { 01707 // Failed to set the fields, return false. 01708 return(false); 01709 } 01710 return(true); 01711 }
bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, | |
String & | readName, | |||
String & | cigar, | |||
String & | sequence, | |||
String & | quality | |||
) |
Returns the values of all fields except the tags.
recStruct | structure containing the contents of all non-variable length fields. | |
readName | read name from the record (return param) | |
cigar | cigar string from the record (return param) | |
sequence | sequence string from the record (return param) | |
quality | quality string from the record (return param) |
Definition at line 1658 of file SamRecord.cpp.
01660 { 01661 return(getFields(recStruct, readName, cigar, sequence, quality, 01662 mySequenceTranslation)); 01663 }
uint16_t SamRecord::getFlag | ( | ) |
Get the flag.
Definition at line 1097 of file SamRecord.cpp.
Referenced by SamQuerySeqWithRefIter::getNextMatchMismatch().
int32_t SamRecord::getInsertSize | ( | ) |
Get the inferred insert size of the read pair.
Definition at line 1172 of file SamRecord.cpp.
int * SamRecord::getIntegerTag | ( | const char * | tag | ) |
Get the integer value for the specified tag.
tag | tag to retrieve pointer to the tag's integer value if found, NULL if not found. |
Definition at line 2122 of file SamRecord.cpp.
02123 { 02124 // Init to success. 02125 myStatus = SamStatus::SUCCESS; 02126 // Parse the buffer if necessary. 02127 if(myNeedToSetTagsFromBuffer) 02128 { 02129 if(!setTagsFromBuffer()) 02130 { 02131 // Failed to read the tags from the buffer, so cannot 02132 // get tags. setTagsFromBuffer set the errors, 02133 // so just return null. 02134 return(NULL); 02135 } 02136 } 02137 02138 int key = MAKEKEY(tag[0], tag[1], 'i'); 02139 int offset = extras.Find(key); 02140 02141 int value; 02142 if (offset < 0) 02143 { 02144 // Failed to find the tag. 02145 return(NULL); 02146 } 02147 else 02148 value = extras[offset]; 02149 02150 return(&(integers[value])); 02151 }
uint8_t SamRecord::getMapQuality | ( | ) |
Get the mapping quality of the record.
Definition at line 1053 of file SamRecord.cpp.
int32_t SamRecord::getMateReferenceID | ( | ) |
Get the mate reference id of the record.
Definition at line 1151 of file SamRecord.cpp.
const char * SamRecord::getMateReferenceName | ( | ) |
Get the mate reference sequence name of the record.
If it is equal to the reference name, it still returns the reference name.
Definition at line 1123 of file SamRecord.cpp.
const char * SamRecord::getMateReferenceNameOrEqual | ( | ) |
Get the mate reference sequence name of the record, returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.
Definition at line 1133 of file SamRecord.cpp.
References getReferenceName().
01134 { 01135 myStatus = SamStatus::SUCCESS; 01136 if(myMateReferenceName == "*") 01137 { 01138 return(myMateReferenceName); 01139 } 01140 if(myMateReferenceName == getReferenceName()) 01141 { 01142 return(FIELD_ABSENT_STRING); 01143 } 01144 else 01145 { 01146 return(myMateReferenceName); 01147 } 01148 }
bool SamRecord::getNextSamTag | ( | char * | tag, | |
char & | vtype, | |||
void ** | value | |||
) |
Get the next tag from the record.
Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).
tag | set to the tag when a tag is read. | |
vtype | set to the vtype when a tag is read. | |
value | pointer to the value of the tag (will need to cast to int, double, char, or string based on vtype). |
Definition at line 1587 of file SamRecord.cpp.
01588 { 01589 myStatus = SamStatus::SUCCESS; 01590 if(myNeedToSetTagsFromBuffer) 01591 { 01592 if(!setTagsFromBuffer()) 01593 { 01594 // Failed to read the tags from the buffer, so cannot 01595 // get tags. 01596 return(false); 01597 } 01598 } 01599 01600 // Increment the tag index to start looking at the next tag. 01601 // At the beginning, it is set to -1. 01602 myLastTagIndex++; 01603 int maxTagIndex = extras.Capacity(); 01604 if(myLastTagIndex >= maxTagIndex) 01605 { 01606 // Hit the end of the tags, return false, no more tags. 01607 // Status is still success since this is not an error, 01608 // it is just the end of the list. 01609 return(false); 01610 } 01611 01612 bool tagFound = false; 01613 // Loop until a tag is found or the end of extras is hit. 01614 while((tagFound == false) && (myLastTagIndex < maxTagIndex)) 01615 { 01616 if(extras.SlotInUse(myLastTagIndex)) 01617 { 01618 // Found a slot to use. 01619 int key = extras.GetKey(myLastTagIndex); 01620 getTag(key, tag); 01621 getTypeFromKey(key, vtype); 01622 tagFound = true; 01623 // Get the value associated with the key based on the vtype. 01624 switch (vtype) 01625 { 01626 case 'f' : 01627 *value = getDoublePtr(myLastTagIndex); 01628 break; 01629 case 'i' : 01630 *value = getIntegerPtr(myLastTagIndex, vtype); 01631 if(vtype != 'A') 01632 { 01633 // Convert all int types to 'i' 01634 vtype = 'i'; 01635 } 01636 break; 01637 case 'Z' : 01638 *value = getStringPtr(myLastTagIndex); 01639 break; 01640 default: 01641 myStatus.setStatus(SamStatus::FAIL_PARSE, 01642 "Unknown tag type"); 01643 tagFound = false; 01644 break; 01645 } 01646 } 01647 if(!tagFound) 01648 { 01649 // Increment the index since a tag was not found. 01650 myLastTagIndex++; 01651 } 01652 } 01653 return(tagFound); 01654 }
uint32_t SamRecord::getNumOverlaps | ( | int32_t | start, | |
int32_t | end | |||
) |
Return the number of bases in this read that overlap the passed in region.
start | inclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.) | |
end | exclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.) |
Definition at line 2301 of file SamRecord.cpp.
References get0BasedPosition(), and Cigar::getNumOverlaps().
Referenced by SamFile::GetNumOverlaps().
02302 { 02303 // Determine whether or not the cigar has been parsed, which sets up 02304 // the cigar roller. This is determined by checking the alignment length. 02305 if(myAlignmentLength == -1) 02306 { 02307 parseCigar(); 02308 } 02309 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition())); 02310 }
char SamRecord::getQuality | ( | int | index | ) |
Get the quality character at the specified index into the quality 0 to readLength - 1.
index | index into the quality string (0 to readLength-1). |
Definition at line 1492 of file SamRecord.cpp.
References getReadLength().
01493 { 01494 // Determine the read length. 01495 int32_t readLen = getReadLength(); 01496 01497 // If the read length is 0, return ' ' whose ascii code is below 01498 // the minimum ascii code for qualities. 01499 if(readLen == 0) 01500 { 01501 return(BaseUtilities::UNKNOWN_QUALITY_CHAR); 01502 } 01503 else if((index < 0) || (index >= readLen)) 01504 { 01505 // Only get here if the index was out of range, so thow an exception. 01506 String exceptionString = "SamRecord::getQuality("; 01507 exceptionString += index; 01508 exceptionString += ") is out of range. Index must be between 0 and "; 01509 exceptionString += (readLen - 1); 01510 throw std::runtime_error(exceptionString.c_str()); 01511 } 01512 01513 if(myQuality.Length() == 0) 01514 { 01515 // Parse BAM Quality. 01516 unsigned char * packedQuality = 01517 (unsigned char *)myRecordPtr->myData + 01518 myRecordPtr->myReadNameLength + 01519 myRecordPtr->myCigarLength * sizeof(int) + 01520 (myRecordPtr->myReadLength + 1) / 2; 01521 return(packedQuality[index] + 33); 01522 } 01523 else 01524 { 01525 // Already have string. 01526 if((myQuality.Length() == 1) && (myQuality[0] == '*')) 01527 { 01528 // Return the unknown quality character. 01529 return(BaseUtilities::UNKNOWN_QUALITY_CHAR); 01530 } 01531 else 01532 { 01533 return(myQuality[index]); 01534 } 01535 } 01536 }
const char * SamRecord::getQuality | ( | ) |
Returns the SAM formatted quality string.
Definition at line 1351 of file SamRecord.cpp.
Referenced by getFields().
int32_t SamRecord::getReadLength | ( | ) |
Get the length of the read.
Definition at line 1104 of file SamRecord.cpp.
Referenced by SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), SamValidator::isValidCigar(), and SamQuerySeqWithRefIter::reset().
01105 { 01106 myStatus = SamStatus::SUCCESS; 01107 if(myIsSequenceBufferValid == false) 01108 { 01109 // If the sequence is "*", then return 0. 01110 if((mySequence.Length() == 1) && (mySequence[0] == '*')) 01111 { 01112 return(0); 01113 } 01114 // Do not add 1 since it is not null terminated. 01115 return(mySequence.Length()); 01116 } 01117 return(myRecordPtr->myReadLength); 01118 }
const char * SamRecord::getReadName | ( | ) |
Returns the SAM formatted Read Name (QNAME).
Definition at line 1255 of file SamRecord.cpp.
Referenced by getFields(), and SamFile::validateSortOrder().
01256 { 01257 myStatus = SamStatus::SUCCESS; 01258 if(myReadName.Length() == 0) 01259 { 01260 // 0 Length, means that it is in the buffer, but has not yet 01261 // been synced to the string, so do the sync. 01262 myReadName = (char*)&(myRecordPtr->myData); 01263 } 01264 return myReadName.c_str(); 01265 }
uint8_t SamRecord::getReadNameLength | ( | ) |
Get the length of the readname (QNAME) including the null.
Definition at line 1039 of file SamRecord.cpp.
01040 { 01041 myStatus = SamStatus::SUCCESS; 01042 // If the buffer is valid, return the size from there, otherwise get the 01043 // size from the string length + 1 (ending null). 01044 if(myIsReadNameBufferValid) 01045 { 01046 return(myRecordPtr->myReadNameLength); 01047 } 01048 01049 return(myReadName.Length() + 1); 01050 }
const void * SamRecord::getRecordBuffer | ( | SequenceTranslation | translation | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
translation | type of sequence translation to use. |
Definition at line 924 of file SamRecord.cpp.
00925 { 00926 myStatus = SamStatus::SUCCESS; 00927 bool status = true; 00928 // If the buffer is not synced or the sequence in the buffer is not 00929 // properly translated, fix the buffer. 00930 if((myIsBufferSynced == false) || 00931 (myBufferSequenceTranslation != translation)) 00932 { 00933 status &= fixBuffer(translation); 00934 } 00935 // If the buffer is synced, check to see if the tags need to be synced. 00936 if(myNeedToSetTagsInBuffer) 00937 { 00938 status &= setTagsInBuffer(); 00939 } 00940 if(!status) 00941 { 00942 return(NULL); 00943 } 00944 return (const void *)myRecordPtr; 00945 }
const void * SamRecord::getRecordBuffer | ( | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition at line 917 of file SamRecord.cpp.
00918 { 00919 return(getRecordBuffer(mySequenceTranslation)); 00920 }
GenomeSequence * SamRecord::getReference | ( | ) |
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).
Definition at line 1715 of file SamRecord.cpp.
int32_t SamRecord::getReferenceID | ( | ) |
Get the reference sequence id of the record.
Definition at line 1018 of file SamRecord.cpp.
Referenced by SamFile::readIndexedRecord(), and SamFile::validateSortOrder().
const char * SamRecord::getReferenceName | ( | ) |
Get the reference sequence name of the record.
Definition at line 1011 of file SamRecord.cpp.
Referenced by SamTags::createMDTag(), getMateReferenceNameOrEqual(), getSequence(), and SamQuerySeqWithRefIter::reset().
char SamRecord::getSequence | ( | int | index, | |
SequenceTranslation | translation | |||
) |
Get the sequence base at the specified index into this sequence 0 to readLength - performing the specified sequence translation1.
index | index into the sequence string (0 to readLength-1). | |
translation | type of sequence translation to use. |
Definition at line 1370 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
01371 { 01372 static const char * asciiBases = "=AC.G...T......N"; 01373 01374 // Determine the read length. 01375 int32_t readLen = getReadLength(); 01376 01377 // If the read length is 0, this method should not be called. 01378 if(readLen == 0) 01379 { 01380 String exceptionString = "SamRecord::getSequence("; 01381 exceptionString += index; 01382 exceptionString += ") is not allowed since sequence = '*'"; 01383 throw std::runtime_error(exceptionString.c_str()); 01384 } 01385 else if((index < 0) || (index >= readLen)) 01386 { 01387 // Only get here if the index was out of range, so thow an exception. 01388 String exceptionString = "SamRecord::getSequence("; 01389 exceptionString += index; 01390 exceptionString += ") is out of range. Index must be between 0 and "; 01391 exceptionString += (readLen - 1); 01392 throw std::runtime_error(exceptionString.c_str()); 01393 } 01394 01395 // Determine if translation needs to be done. 01396 if((translation == NONE) || (myRefPtr == NULL)) 01397 { 01398 // No translation needs to be done. 01399 if(mySequence.Length() == 0) 01400 { 01401 // Parse BAM sequence. 01402 // TODO - maybe store this pointer - and use that to track when 01403 // valid? 01404 unsigned char * packedSequence = 01405 (unsigned char *)myRecordPtr->myData + 01406 myRecordPtr->myReadNameLength + 01407 myRecordPtr->myCigarLength * sizeof(int); 01408 01409 return(index & 1 ? 01410 asciiBases[packedSequence[index / 2] & 0xF] : 01411 asciiBases[packedSequence[index / 2] >> 4]); 01412 } 01413 // Already have string. 01414 return(mySequence[index]); 01415 } 01416 else 01417 { 01418 // Need to translate the sequence either to have '=' or to not 01419 // have it. 01420 // First check to see if the sequence has been set. 01421 if(mySequence.Length() == 0) 01422 { 01423 // 0 Length, means that it is in the buffer, but has not yet 01424 // been synced to the string, so do the sync. 01425 setSequenceAndQualityFromBuffer(); 01426 } 01427 01428 // Check the type of translation. 01429 if(translation == EQUAL) 01430 { 01431 // Check whether or not the string has already been 01432 // retrieved that has the '=' in it. 01433 if(mySeqWithEq.length() == 0) 01434 { 01435 // The string with '=' has not yet been determined, 01436 // so get the string. 01437 // Check to see if the sequence is defined. 01438 if(mySequence == "*") 01439 { 01440 // Sequence is undefined, so no translation necessary. 01441 mySeqWithEq = '*'; 01442 } 01443 else 01444 { 01445 // Sequence defined, so translate it. 01446 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 01447 myRecordPtr->myPosition, 01448 *(getCigarInfo()), 01449 getReferenceName(), 01450 *myRefPtr, 01451 mySeqWithEq); 01452 } 01453 } 01454 // Sequence is set, so return it. 01455 return(mySeqWithEq[index]); 01456 } 01457 else 01458 { 01459 // translation == BASES 01460 // Check whether or not the string has already been 01461 // retrieved that does not have the '=' in it. 01462 if(mySeqWithoutEq.length() == 0) 01463 { 01464 // The string with '=' has not yet been determined, 01465 // so get the string. 01466 // Check to see if the sequence is defined. 01467 if(mySequence == "*") 01468 { 01469 // Sequence is undefined, so no translation necessary. 01470 mySeqWithoutEq = '*'; 01471 } 01472 else 01473 { 01474 // Sequence defined, so translate it. 01475 // The string without '=' has not yet been determined, 01476 // so get the string. 01477 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 01478 myRecordPtr->myPosition, 01479 *(getCigarInfo()), 01480 getReferenceName(), 01481 *myRefPtr, 01482 mySeqWithoutEq); 01483 } 01484 } 01485 // Sequence is set, so return it. 01486 return(mySeqWithoutEq[index]); 01487 } 01488 } 01489 }
char SamRecord::getSequence | ( | int | index | ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.
index | index into the sequence string (0 to readLength-1). |
Definition at line 1364 of file SamRecord.cpp.
References getSequence().
01365 { 01366 return(getSequence(index, mySequenceTranslation)); 01367 }
const char * SamRecord::getSequence | ( | SequenceTranslation | translation | ) |
Returns the SAM formatted sequence string performing the specified sequence translation.
translation | type of sequence translation to use. |
Definition at line 1287 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
01288 { 01289 myStatus = SamStatus::SUCCESS; 01290 if(mySequence.Length() == 0) 01291 { 01292 // 0 Length, means that it is in the buffer, but has not yet 01293 // been synced to the string, so do the sync. 01294 setSequenceAndQualityFromBuffer(); 01295 } 01296 01297 // Determine if translation needs to be done. 01298 if((translation == NONE) || (myRefPtr == NULL)) 01299 { 01300 return mySequence.c_str(); 01301 } 01302 else if(translation == EQUAL) 01303 { 01304 if(mySeqWithEq.length() == 0) 01305 { 01306 // Check to see if the sequence is defined. 01307 if(mySequence == "*") 01308 { 01309 // Sequence is undefined, so no translation necessary. 01310 mySeqWithEq = '*'; 01311 } 01312 else 01313 { 01314 // Sequence defined, so translate it. 01315 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), 01316 myRecordPtr->myPosition, 01317 *(getCigarInfo()), 01318 getReferenceName(), 01319 *myRefPtr, 01320 mySeqWithEq); 01321 } 01322 } 01323 return(mySeqWithEq.c_str()); 01324 } 01325 else 01326 { 01327 // translation == BASES 01328 if(mySeqWithoutEq.length() == 0) 01329 { 01330 if(mySequence == "*") 01331 { 01332 // Sequence is undefined, so no translation necessary. 01333 mySeqWithoutEq = '*'; 01334 } 01335 else 01336 { 01337 // Sequence defined, so translate it. 01338 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), 01339 myRecordPtr->myPosition, 01340 *(getCigarInfo()), 01341 getReferenceName(), 01342 *myRefPtr, 01343 mySeqWithoutEq); 01344 } 01345 } 01346 return(mySeqWithoutEq.c_str()); 01347 } 01348 }
const char * SamRecord::getSequence | ( | ) |
Returns the SAM formatted sequence string, translating the base as specified by setSequenceTranslation.
Definition at line 1281 of file SamRecord.cpp.
Referenced by SamTags::createMDTag(), getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getSequence(), and shiftIndelsLeft().
01282 { 01283 return(getSequence(mySequenceTranslation)); 01284 }
const SamStatus & SamRecord::getStatus | ( | ) |
Returns the status associated with the last method that sets the status.
Definition at line 1984 of file SamRecord.cpp.
String * SamRecord::getStringTag | ( | const char * | tag | ) |
Get the string value for the specified tag.
tag | tag to retrieve | |
pointer | to the tag's string value if found, NULL if not found. |
Definition at line 2092 of file SamRecord.cpp.
Referenced by SamTags::isMDTagCorrect().
02093 { 02094 // Parse the buffer if necessary. 02095 if(myNeedToSetTagsFromBuffer) 02096 { 02097 if(!setTagsFromBuffer()) 02098 { 02099 // Failed to read the tags from the buffer, so cannot 02100 // get tags. setTagsFromBuffer set the errors, 02101 // so just return null. 02102 return(NULL); 02103 } 02104 } 02105 02106 int key = MAKEKEY(tag[0], tag[1], 'Z'); 02107 int offset = extras.Find(key); 02108 02109 int value; 02110 if (offset < 0) 02111 { 02112 // Tag not found. 02113 return(NULL); 02114 } 02115 02116 // Offset is valid, so return the tag. 02117 value = extras[offset]; 02118 return(&(strings[value])); 02119 }
uint32_t SamRecord::getTagLength | ( | ) |
Returns the length of the tags in BAM format.
Definition at line 1554 of file SamRecord.cpp.
01555 { 01556 myStatus = SamStatus::SUCCESS; 01557 if(myNeedToSetTagsFromBuffer) 01558 { 01559 // Tags are only set in the buffer, so the size of the tags is 01560 // the length of the record minus the starting location of the tags. 01561 unsigned char * tagStart = 01562 (unsigned char *)myRecordPtr->myData 01563 + myRecordPtr->myReadNameLength 01564 + myRecordPtr->myCigarLength * sizeof(int) 01565 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength; 01566 01567 // The non-tags take up from the start of the record to the tag start. 01568 // Do not include the block size part of the record since it is not 01569 // included in the size. 01570 uint32_t nonTagSize = 01571 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID); 01572 // Tags take up the size of the block minus the non-tag section. 01573 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize; 01574 return(tagSize); 01575 } 01576 01577 // Tags are stored outside the buffer, so myTagBufferSize is set. 01578 return(myTagBufferSize); 01579 }
bool SamRecord::getTagsString | ( | const char * | tags, | |
String & | returnString, | |||
char | delim = '\t' | |||
) |
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE.
.. Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found. If a different error occured, the status is set appropriately.
tags | the tags to retrieve, formatted as TAG:TYPE;TAG:TYPE... | |
returnString | the String to set (this method first clears returnString) to TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... | |
delim | delimiter to use to separate two tags, default is a tab. |
Definition at line 1990 of file SamRecord.cpp.
01991 { 01992 const char* currentTagPtr = tags; 01993 01994 returnString.Clear(); 01995 myStatus = SamStatus::SUCCESS; 01996 if(myNeedToSetTagsFromBuffer) 01997 { 01998 if(!setTagsFromBuffer()) 01999 { 02000 // Failed to read the tags from the buffer, so cannot 02001 // get tags. 02002 return(false); 02003 } 02004 } 02005 02006 bool returnStatus = true; 02007 02008 while(*currentTagPtr != '\0') 02009 { 02010 // Tags are formatted as: XY:Z 02011 // Where X is [A-Za-z], Y is [A-Za-z], and 02012 // Z is A,i,f,Z,H (cCsSI are also excepted) 02013 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') || 02014 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0')) 02015 { 02016 myStatus.setStatus(SamStatus::INVALID, 02017 "getTagsString called with improperly formatted tags.\n"); 02018 returnStatus = false; 02019 break; 02020 } 02021 02022 // Construct the key. 02023 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 02024 currentTagPtr[3]); 02025 // Look to see if the key exsists in the hash. 02026 int offset = extras.Find(key); 02027 02028 if(offset >= 0) 02029 { 02030 // Offset is set, so the key was found. 02031 if(!returnString.IsEmpty()) 02032 { 02033 returnString += delim; 02034 } 02035 returnString += currentTagPtr[0]; 02036 returnString += currentTagPtr[1]; 02037 returnString += ':'; 02038 returnString += currentTagPtr[3]; 02039 returnString += ':'; 02040 02041 // First if it is an integer, determine the actual type of the int. 02042 char vtype; 02043 getTypeFromKey(key, vtype); 02044 02045 02046 // Offset is set, so recalculate the buffer size without this entry. 02047 // Do NOT remove from strings, integers, or doubles because then 02048 // extras would need to be updated for all entries with the new indexes 02049 // into those variables. 02050 switch(vtype) 02051 { 02052 case 'i': 02053 returnString += *(int*)getIntegerPtr(offset, vtype); 02054 break; 02055 case 'f': 02056 returnString += *(double*)getDoublePtr(offset); 02057 break; 02058 case 'Z': 02059 returnString += *(String*)getStringPtr(offset); 02060 break; 02061 default: 02062 myStatus.setStatus(SamStatus::INVALID, 02063 "rmTag called with unknown type.\n"); 02064 returnStatus = false; 02065 break; 02066 }; 02067 } 02068 // Increment to the next tag. 02069 if(currentTagPtr[4] == ';') 02070 { 02071 // Increment once more. 02072 currentTagPtr += 5; 02073 } 02074 else if(currentTagPtr[4] != '\0') 02075 { 02076 // Invalid tag format. 02077 myStatus.setStatus(SamStatus::INVALID, 02078 "rmTags called with improperly formatted tags.\n"); 02079 returnStatus = false; 02080 break; 02081 } 02082 else 02083 { 02084 // Last Tag. 02085 currentTagPtr += 4; 02086 } 02087 } 02088 return(returnStatus); 02089 }
bool SamRecord::isCharType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a char type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 1743 of file SamRecord.cpp.
bool SamRecord::isDoubleType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a double type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 1733 of file SamRecord.cpp.
bool SamRecord::isIntegerType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is an integer type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 1721 of file SamRecord.cpp.
bool SamRecord::isStringType | ( | char | vtype | ) | const |
Returns whether or not the specified vtype is a string type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 1753 of file SamRecord.cpp.
bool SamRecord::isValid | ( | SamFileHeader & | header | ) |
Returns whether or not the record is valid.
Sets the status to indicate success or failure.
header | SAM Header associated with the record. Used to perform some validation against the header. |
Definition at line 164 of file SamRecord.cpp.
00165 { 00166 myStatus = SamStatus::SUCCESS; 00167 SamValidationErrors invalidSamErrors; 00168 if(!SamValidator::isValid(header, *this, invalidSamErrors)) 00169 { 00170 // The record is not valid. 00171 std::string errorMessage = ""; 00172 invalidSamErrors.getErrorString(errorMessage); 00173 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str()); 00174 return(false); 00175 } 00176 // The record is valid. 00177 return(true); 00178 }
void SamRecord::resetRecord | ( | ) |
Reset the fields of the record to a default value.
This is not necessary when you are reading a Sam/Bam file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.
Definition at line 91 of file SamRecord.cpp.
References clearTags(), and NONE.
Referenced by SamRecord(), setBuffer(), setBufferFromFile(), and ~SamRecord().
00092 { 00093 myIsBufferSynced = true; 00094 00095 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE; 00096 myRecordPtr->myReferenceID = -1; 00097 myRecordPtr->myPosition = -1; 00098 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; 00099 myRecordPtr->myMapQuality = 0; 00100 myRecordPtr->myBin = DEFAULT_BIN; 00101 myRecordPtr->myCigarLength = 0; 00102 myRecordPtr->myFlag = 0; 00103 myRecordPtr->myReadLength = 0; 00104 myRecordPtr->myMateReferenceID = -1; 00105 myRecordPtr->myMatePosition = -1; 00106 myRecordPtr->myInsertSize = 0; 00107 00108 // Set the sam values for the variable length fields. 00109 // TODO - one way to speed this up might be to not set to "*" and just 00110 // clear them, and write out a '*' for SAM if it is empty. 00111 myReadName = DEFAULT_READ_NAME; 00112 myReferenceName = "*"; 00113 myMateReferenceName = "*"; 00114 myCigar = "*"; 00115 mySequence = "*"; 00116 mySeqWithEq.clear(); 00117 mySeqWithoutEq.clear(); 00118 myQuality = "*"; 00119 myNeedToSetTagsFromBuffer = false; 00120 myNeedToSetTagsInBuffer = false; 00121 00122 // Initialize the calculated alignment info to the uncalculated value. 00123 myAlignmentLength = -1; 00124 myUnclippedStartOffset = -1; 00125 myUnclippedEndOffset = -1; 00126 00127 clearTags(); 00128 00129 // Set the bam values for the variable length fields. 00130 // Only the read name needs to be set, the others are a length of 0. 00131 // Set the read name. The min size of myRecordPtr includes the size for 00132 // the default read name. 00133 memcpy(&(myRecordPtr->myData), myReadName.c_str(), 00134 myRecordPtr->myReadNameLength); 00135 00136 // Set that the variable length buffer fields are valid. 00137 myIsReadNameBufferValid = true; 00138 myIsCigarBufferValid = true; 00139 myIsSequenceBufferValid = true; 00140 myBufferSequenceTranslation = NONE; 00141 myIsQualityBufferValid = true; 00142 myIsTagsBufferValid = true; 00143 myIsBinValid = true; 00144 00145 myCigarTempBufferLength = -1; 00146 00147 myStatus = SamStatus::SUCCESS; 00148 00149 NOT_FOUND_TAG_STRING = ""; 00150 NOT_FOUND_TAG_INT = -1; 00151 NOT_FOUND_TAG_DOUBLE = -1; 00152 }
bool SamRecord::rmTag | ( | const char * | tag, | |
char | type | |||
) |
Remove a tag.
tag | tag to remove. | |
type | of the tag to be removed. |
Definition at line 1778 of file SamRecord.cpp.
References getString().
01779 { 01780 // Check the length of tag. 01781 if(strlen(tag) != 2) 01782 { 01783 // Tag is the wrong length. 01784 myStatus.setStatus(SamStatus::INVALID, 01785 "rmTag called with tag that is not 2 characters\n"); 01786 return(false); 01787 } 01788 01789 myStatus = SamStatus::SUCCESS; 01790 if(myNeedToSetTagsFromBuffer) 01791 { 01792 if(!setTagsFromBuffer()) 01793 { 01794 // Failed to read the tags from the buffer, so cannot 01795 // get tags. 01796 return(false); 01797 } 01798 } 01799 01800 // Construct the key. 01801 int key = MAKEKEY(tag[0], tag[1], type); 01802 // Look to see if the key exsists in the hash. 01803 int offset = extras.Find(key); 01804 01805 if(offset < 0) 01806 { 01807 // Not found, so return true, successfully removed since 01808 // it is not in tag. 01809 return(true); 01810 } 01811 01812 // Offset is set, so the key was found. 01813 // First if it is an integer, determine the actual type of the int. 01814 char vtype; 01815 getTypeFromKey(key, vtype); 01816 if(vtype == 'i') 01817 { 01818 vtype = getIntegerType(offset); 01819 } 01820 01821 // Offset is set, so recalculate the buffer size without this entry. 01822 // Do NOT remove from strings, integers, or doubles because then 01823 // extras would need to be updated for all entries with the new indexes 01824 // into those variables. 01825 int rmBuffSize = 0; 01826 switch(vtype) 01827 { 01828 case 'A': 01829 case 'c': 01830 case 'C': 01831 rmBuffSize = 4; 01832 break; 01833 case 's': 01834 case 'S': 01835 rmBuffSize = 5; 01836 break; 01837 case 'i': 01838 case 'I': 01839 rmBuffSize = 7; 01840 break; 01841 case 'f': 01842 rmBuffSize = 7; 01843 break; 01844 case 'Z': 01845 rmBuffSize = 4 + getString(offset).Length(); 01846 break; 01847 default: 01848 myStatus.setStatus(SamStatus::INVALID, 01849 "rmTag called with unknown type.\n"); 01850 return(false); 01851 break; 01852 }; 01853 01854 // The buffer tags are now out of sync. 01855 myNeedToSetTagsInBuffer = true; 01856 myIsTagsBufferValid = false; 01857 myIsBufferSynced = false; 01858 myTagBufferSize -= rmBuffSize; 01859 01860 // Remove from the hash. 01861 extras.Delete(offset); 01862 return(true); 01863 }
bool SamRecord::rmTags | ( | const char * | tags | ) |
Remove tags.
tags | tags to remove, formatted as Tag:Type;Tag:Type;Tag:Type... |
Definition at line 1866 of file SamRecord.cpp.
References getString().
01867 { 01868 const char* currentTagPtr = tags; 01869 01870 myStatus = SamStatus::SUCCESS; 01871 if(myNeedToSetTagsFromBuffer) 01872 { 01873 if(!setTagsFromBuffer()) 01874 { 01875 // Failed to read the tags from the buffer, so cannot 01876 // get tags. 01877 return(false); 01878 } 01879 } 01880 01881 bool returnStatus = true; 01882 01883 int rmBuffSize = 0; 01884 while(*currentTagPtr != '\0') 01885 { 01886 01887 // Tags are formatted as: XY:Z 01888 // Where X is [A-Za-z], Y is [A-Za-z], and 01889 // Z is A,i,f,Z,H (cCsSI are also excepted) 01890 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') || 01891 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0')) 01892 { 01893 myStatus.setStatus(SamStatus::INVALID, 01894 "rmTags called with improperly formatted tags.\n"); 01895 returnStatus = false; 01896 break; 01897 } 01898 01899 // Construct the key. 01900 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], 01901 currentTagPtr[3]); 01902 // Look to see if the key exsists in the hash. 01903 int offset = extras.Find(key); 01904 01905 if(offset >= 0) 01906 { 01907 // Offset is set, so the key was found. 01908 // First if it is an integer, determine the actual type of the int. 01909 char vtype; 01910 getTypeFromKey(key, vtype); 01911 if(vtype == 'i') 01912 { 01913 vtype = getIntegerType(offset); 01914 } 01915 01916 // Offset is set, so recalculate the buffer size without this entry. 01917 // Do NOT remove from strings, integers, or doubles because then 01918 // extras would need to be updated for all entries with the new indexes 01919 // into those variables. 01920 switch(vtype) 01921 { 01922 case 'A': 01923 case 'c': 01924 case 'C': 01925 rmBuffSize += 4; 01926 break; 01927 case 's': 01928 case 'S': 01929 rmBuffSize += 5; 01930 break; 01931 case 'i': 01932 case 'I': 01933 rmBuffSize += 7; 01934 break; 01935 case 'f': 01936 rmBuffSize += 7; 01937 break; 01938 case 'Z': 01939 rmBuffSize += 4 + getString(offset).Length(); 01940 break; 01941 default: 01942 myStatus.setStatus(SamStatus::INVALID, 01943 "rmTag called with unknown type.\n"); 01944 returnStatus = false; 01945 break; 01946 }; 01947 01948 // Remove from the hash. 01949 extras.Delete(offset); 01950 } 01951 // Increment to the next tag. 01952 if(currentTagPtr[4] == ';') 01953 { 01954 // Increment once more. 01955 currentTagPtr += 5; 01956 } 01957 else if(currentTagPtr[4] != '\0') 01958 { 01959 // Invalid tag format. 01960 myStatus.setStatus(SamStatus::INVALID, 01961 "rmTags called with improperly formatted tags.\n"); 01962 returnStatus = false; 01963 break; 01964 } 01965 else 01966 { 01967 // Last Tag. 01968 currentTagPtr += 4; 01969 } 01970 } 01971 01972 // The buffer tags are now out of sync. 01973 myNeedToSetTagsInBuffer = true; 01974 myIsTagsBufferValid = false; 01975 myIsBufferSynced = false; 01976 myTagBufferSize -= rmBuffSize; 01977 01978 01979 return(returnStatus); 01980 }
bool SamRecord::set0BasedMatePosition | ( | int32_t | matePosition | ) |
Set the leftmost mate position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 0-based start position |
Definition at line 407 of file SamRecord.cpp.
Referenced by set1BasedMatePosition().
bool SamRecord::set0BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 0-based start position |
Definition at line 321 of file SamRecord.cpp.
Referenced by set1BasedPosition(), and SamFilter::softClip().
bool SamRecord::set1BasedMatePosition | ( | int32_t | matePosition | ) |
Set the leftmost mate position using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 1-based start position |
Definition at line 401 of file SamRecord.cpp.
References set0BasedMatePosition().
00402 { 00403 return(set0BasedMatePosition(matePosition - 1)); 00404 }
bool SamRecord::set1BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 1-based start position |
Definition at line 315 of file SamRecord.cpp.
References set0BasedPosition().
00316 { 00317 return(set0BasedPosition(position - 1)); 00318 }
SamStatus::Status SamRecord::setBuffer | ( | const char * | fromBuffer, | |
uint32_t | fromBufferSize, | |||
SamFileHeader & | header | |||
) |
Sets the SamRecord to contain the information in BAM format found in fromBuffer.
fromBuffer | buffer to read the BAM record from. | |
fromBufferSize | size of the buffer containing the BAM record. | |
header | BAM header for the record. |
Definition at line 448 of file SamRecord.cpp.
References resetRecord().
00451 { 00452 myStatus = SamStatus::SUCCESS; 00453 if((fromBuffer == NULL) || (fromBufferSize == 0)) 00454 { 00455 // Buffer is empty. 00456 myStatus.setStatus(SamStatus::FAIL_PARSE, 00457 "Cannot parse an empty file."); 00458 return(SamStatus::FAIL_PARSE); 00459 } 00460 00461 // Clear the record. 00462 resetRecord(); 00463 00464 // allocate space for the record size. 00465 if(!allocateRecordStructure(fromBufferSize)) 00466 { 00467 // Failed to allocate space. 00468 return(SamStatus::FAIL_MEM); 00469 } 00470 00471 memcpy(myRecordPtr, fromBuffer, fromBufferSize); 00472 00473 setVariablesForNewBuffer(header); 00474 00475 // Return the status of the record. 00476 return(SamStatus::SUCCESS); 00477 }
SamStatus::Status SamRecord::setBufferFromFile | ( | IFILE | filePtr, | |
SamFileHeader & | header | |||
) |
Read the BAM record from a file.
filePtr | file to read the buffer from. | |
header | BAM header for the record. |
Definition at line 182 of file SamRecord.cpp.
References ifeof(), ifread(), InputFile::isOpen(), and resetRecord().
00184 { 00185 myStatus = SamStatus::SUCCESS; 00186 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00187 { 00188 // File is not open, return failure. 00189 myStatus.setStatus(SamStatus::FAIL_ORDER, 00190 "Can't read from an unopened file."); 00191 return(SamStatus::FAIL_ORDER); 00192 } 00193 00194 // Clear the record. 00195 resetRecord(); 00196 00197 // read the record size. 00198 int numBytes = 00199 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t)); 00200 00201 // Check to see if the end of the file was hit and no bytes were read. 00202 if(ifeof(filePtr) && (numBytes == 0)) 00203 { 00204 // End of file, nothing was read, no more records. 00205 myStatus.setStatus(SamStatus::NO_MORE_RECS, 00206 "No more records left to read."); 00207 return(SamStatus::NO_MORE_RECS); 00208 } 00209 00210 if(numBytes != sizeof(int32_t)) 00211 { 00212 // Failed to read the entire block size. Either the end of the file 00213 // was reached early or there was an error. 00214 if(ifeof(filePtr)) 00215 { 00216 // Error: end of the file reached prior to reading the rest of the 00217 // record. 00218 myStatus.setStatus(SamStatus::FAIL_PARSE, 00219 "EOF reached in the middle of a record."); 00220 return(SamStatus::FAIL_PARSE); 00221 } 00222 else 00223 { 00224 // Error reading. 00225 myStatus.setStatus(SamStatus::FAIL_IO, 00226 "Failed to read the record size."); 00227 return(SamStatus::FAIL_IO); 00228 } 00229 } 00230 00231 // allocate space for the record size. 00232 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t))) 00233 { 00234 // Failed to allocate space. 00235 // Status is set by allocateRecordStructure. 00236 return(SamStatus::FAIL_MEM); 00237 } 00238 00239 // Read the rest of the alignment block, starting at the reference id. 00240 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize) 00241 != (unsigned int)myRecordPtr->myBlockSize) 00242 { 00243 // Error reading the record. Reset it and return failure. 00244 resetRecord(); 00245 myStatus.setStatus(SamStatus::FAIL_IO, 00246 "Failed to read the record"); 00247 return(SamStatus::FAIL_IO); 00248 } 00249 00250 setVariablesForNewBuffer(header); 00251 00252 // Return the status of the record. 00253 return(SamStatus::SUCCESS); 00254 }
bool SamRecord::setCigar | ( | const Cigar & | cigar | ) |
Set the CIGAR to the specified Cigar object.
Internal processing handles the switching between SAM/BAM formats when read/written.
cigar | object to set this record's cigar to have. |
Definition at line 357 of file SamRecord.cpp.
References Cigar::getCigarString().
00358 { 00359 myStatus = SamStatus::SUCCESS; 00360 cigar.getCigarString(myCigar); 00361 00362 myIsBufferSynced = false; 00363 myIsCigarBufferValid = false; 00364 myCigarTempBufferLength = -1; 00365 myIsBinValid = false; 00366 00367 // Initialize the calculated alignment info to the uncalculated value. 00368 myAlignmentLength = -1; 00369 myUnclippedStartOffset = -1; 00370 myUnclippedEndOffset = -1; 00371 00372 return true; 00373 }
bool SamRecord::setCigar | ( | const char * | cigar | ) |
Set the CIGAR to the specified SAM formatted cigar string.
Internal processing handles the switching between SAM/BAM formats when read/written.
cigar | string containing the SAM formatted cigar. |
Definition at line 338 of file SamRecord.cpp.
Referenced by shiftIndelsLeft(), and SamFilter::softClip().
00339 { 00340 myStatus = SamStatus::SUCCESS; 00341 myCigar = cigar; 00342 00343 myIsBufferSynced = false; 00344 myIsCigarBufferValid = false; 00345 myCigarTempBufferLength = -1; 00346 myIsBinValid = false; 00347 00348 // Initialize the calculated alignment info to the uncalculated value. 00349 myAlignmentLength = -1; 00350 myUnclippedStartOffset = -1; 00351 myUnclippedEndOffset = -1; 00352 00353 return true; 00354 }
bool SamRecord::setFlag | ( | uint16_t | flag | ) |
Set the bitwise flag to the specified value.
flag | integer flag to use. |
Definition at line 294 of file SamRecord.cpp.
bool SamRecord::setInsertSize | ( | int32_t | insertSize | ) |
Sets the inferred insert size.
insertSize | inferred insert size. |
Definition at line 415 of file SamRecord.cpp.
bool SamRecord::setMapQuality | ( | uint8_t | mapQuality | ) |
Set the mapping quality.
mapQuality | map quality to set in the record. |
Definition at line 330 of file SamRecord.cpp.
bool SamRecord::setMateReferenceName | ( | SamFileHeader & | header, | |
const char * | mateReferenceName | |||
) |
Set the mate reference sequence name to the specified name, using the header to determine the matee reference id.
header | SAM/BAM header to use to determine the mate reference id. | |
referenceName | mate reference name to use. |
Definition at line 376 of file SamRecord.cpp.
00378 { 00379 myStatus = SamStatus::SUCCESS; 00380 // Set the mate reference, if it is "=", set it to be equal 00381 // to myReferenceName. This assumes that myReferenceName has already 00382 // been called. 00383 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0) 00384 { 00385 myMateReferenceName = myReferenceName; 00386 } 00387 else 00388 { 00389 myMateReferenceName = mateReferenceName; 00390 } 00391 00392 // Set the Mate Reference ID. 00393 // If the reference ID does not already exist, add it (pass true) 00394 myRecordPtr->myMateReferenceID = 00395 header.getReferenceID(myMateReferenceName, true); 00396 00397 return true; 00398 }
bool SamRecord::setQuality | ( | const char * | quality | ) |
Sets the quality to the specified quality string.
This is a SAM formatted quality string. Internal processing handles switching between SAM/BAM formats when read/written.
quality | SAM quality string. |
Definition at line 436 of file SamRecord.cpp.
bool SamRecord::setReadName | ( | const char * | readName | ) |
Set QNAME to the passed in name.
readName | the readname to set the QNAME to. |
Definition at line 272 of file SamRecord.cpp.
00273 { 00274 myReadName = readName; 00275 myIsBufferSynced = false; 00276 myIsReadNameBufferValid = false; 00277 myStatus = SamStatus::SUCCESS; 00278 00279 // The read name must at least have some length, otherwise this is a parsing 00280 // error. 00281 if(myReadName.Length() == 0) 00282 { 00283 // Invalid - reset ReadName return false. 00284 myReadName = DEFAULT_READ_NAME; 00285 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; 00286 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name."); 00287 return(false); 00288 } 00289 00290 return true; 00291 }
void SamRecord::setReference | ( | GenomeSequence * | reference | ) |
Set the reference to the specified genome sequence object.
reference | pointer to the GenomeSequence object. |
Definition at line 257 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::readIndexedRecord(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().
bool SamRecord::setReferenceName | ( | SamFileHeader & | header, | |
const char * | referenceName | |||
) |
Set the reference name to the specified name, using the header to determine the reference id.
header | SAM/BAM header to use to determine the reference id. | |
referenceName | reference name to use. |
Definition at line 302 of file SamRecord.cpp.
bool SamRecord::setSequence | ( | const char * | seq | ) |
Sets the sequence to the specified sequence string.
This is a SAM formatted sequence string. Internal processing handles switching between SAM/BAM formats when read/written.
seq | SAM sequence string. May contain '='. |
Definition at line 423 of file SamRecord.cpp.
void SamRecord::setSequenceTranslation | ( | SequenceTranslation | translation | ) |
Set the type of sequence translation to use when getting the sequence.
The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.
translation | type of sequence translation to use. |
Definition at line 266 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::readIndexedRecord(), SamFile::ReadRecord(), and SamFile::validateSortOrder().
bool SamRecord::shiftIndelsLeft | ( | ) |
Shift the indels (if any) to the left by updating the CIGAR.
Definition at line 761 of file SamRecord.cpp.
References BASES, Cigar::foundInQuery(), getSequence(), CigarRoller::IncrementCount(), Cigar::insert, Cigar::isMatchOrMismatch(), CigarRoller::Remove(), setCigar(), Cigar::size(), and CigarRoller::Update().
00762 { 00763 // Check to see whether or not the Cigar has already been 00764 // set - this is determined by checking if alignment length 00765 // is set since alignment length and the cigar are set 00766 // at the same time. 00767 if(myAlignmentLength == -1) 00768 { 00769 // Not been set, so calculate it. 00770 parseCigar(); 00771 } 00772 00773 // Track whether or not there was a shift. 00774 bool shifted = false; 00775 00776 // Cigar is set, so now myCigarRoller can be used. 00777 // Track where in the read we are. 00778 uint32_t currentPos = 0; 00779 00780 // Since the loop starts at 1 because the first operation can't be shifted, 00781 // increment the currentPos past the first operation. 00782 if(Cigar::foundInQuery(myCigarRoller[0])) 00783 { 00784 // This op was found in the read, increment the current position. 00785 currentPos += myCigarRoller[0].count; 00786 } 00787 00788 int numOps = myCigarRoller.size(); 00789 00790 // Loop through the cigar operations from the 2nd operation since 00791 // the first operation is already on the end and can't shift. 00792 for(int currentOp = 1; currentOp < numOps; currentOp++) 00793 { 00794 if(myCigarRoller[currentOp].operation == Cigar::insert) 00795 { 00796 // For now, only shift a max of 1 operation. 00797 int prevOpIndex = currentOp-1; 00798 // Track the next op for seeing if it is the same as the 00799 // previous for merging reasons. 00800 int nextOpIndex = currentOp+1; 00801 if(nextOpIndex == numOps) 00802 { 00803 // There is no next op, so set it equal to the current one. 00804 nextOpIndex = currentOp; 00805 } 00806 // The start of the previous operation, so we know when we hit it 00807 // so we don't shift past it. 00808 uint32_t prevOpStart = 00809 currentPos - myCigarRoller[prevOpIndex].count; 00810 00811 // We can only shift if the previous operation 00812 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex])) 00813 { 00814 // TODO - shift past pads 00815 // An insert is in the read, so increment the position. 00816 currentPos += myCigarRoller[currentOp].count; 00817 // Not a match/mismatch, so can't shift into it. 00818 continue; 00819 } 00820 00821 // It is a match or mismatch, so check to see if we can 00822 // shift into it. 00823 00824 // The end of the insert is calculated by adding the size 00825 // of this insert minus 1 to the start of the insert. 00826 uint32_t insertEndPos = 00827 currentPos + myCigarRoller[currentOp].count - 1; 00828 00829 // The insert starts at the current position. 00830 uint32_t insertStartPos = currentPos; 00831 00832 // Loop as long as the position before the insert start 00833 // matches the last character in the insert. If they match, 00834 // the insert can be shifted one index left because the 00835 // implied reference will not change. If they do not match, 00836 // we can't shift because the implied reference would change. 00837 // Stop loop when insertStartPos = prevOpStart, because we 00838 // don't want to move past that. 00839 while((insertStartPos > prevOpStart) && 00840 (getSequence(insertEndPos,BASES) == 00841 getSequence(insertStartPos - 1, BASES))) 00842 { 00843 // We can shift, so move the insert start & end one left. 00844 --insertEndPos; 00845 --insertStartPos; 00846 } 00847 00848 // Determine if a shift has occurred. 00849 int shiftLen = currentPos - insertStartPos; 00850 if(shiftLen > 0) 00851 { 00852 // Shift occured, so adjust the cigar if the cigar will 00853 // not become more operations. 00854 // If the next operation is the same as the previous or 00855 // if the insert and the previous operation switch positions 00856 // then the cigar has the same number of operations. 00857 // If the next operation is different, and the shift splits 00858 // the previous operation in 2, then the cigar would 00859 // become longer, so we do not want to shift. 00860 if(myCigarRoller[nextOpIndex].operation == 00861 myCigarRoller[prevOpIndex].operation) 00862 { 00863 // The operations are the same, so merge them by adding 00864 // the length of the shift to the next operation. 00865 myCigarRoller.IncrementCount(nextOpIndex, shiftLen); 00866 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen); 00867 00868 // If the previous op length is 0, just remove that 00869 // operation. 00870 if(myCigarRoller[prevOpIndex].count == 0) 00871 { 00872 myCigarRoller.Remove(prevOpIndex); 00873 } 00874 shifted = true; 00875 } 00876 else 00877 { 00878 // Can only shift if the insert shifts past the 00879 // entire previous operation, otherwise an operation 00880 // would need to be added. 00881 if(insertStartPos == prevOpStart) 00882 { 00883 // Swap the positions of the insert and the 00884 // previous operation. 00885 myCigarRoller.Update(currentOp, 00886 myCigarRoller[prevOpIndex].operation, 00887 myCigarRoller[prevOpIndex].count); 00888 // Size of the previous op is the entire 00889 // shift length. 00890 myCigarRoller.Update(prevOpIndex, 00891 Cigar::insert, 00892 shiftLen); 00893 shifted = true; 00894 } 00895 } 00896 } 00897 // An insert is in the read, so increment the position. 00898 currentPos += myCigarRoller[currentOp].count; 00899 } 00900 else if(Cigar::foundInQuery(myCigarRoller[currentOp])) 00901 { 00902 // This op was found in the read, increment the current position. 00903 currentPos += myCigarRoller[currentOp].count; 00904 } 00905 } 00906 if(shifted) 00907 { 00908 // TODO - setCigar is currently inefficient because later the cigar 00909 // roller will be recalculated, but for now it will work. 00910 setCigar(myCigarRoller); 00911 } 00912 return(shifted); 00913 }
SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr, | |
SequenceTranslation | translation | |||
) |
Write the record as a BAM into the specified file.
filePtr | file to write the BAM record into. | |
translation | type of sequence translation to use. |
Definition at line 957 of file SamRecord.cpp.
References ifwrite(), and InputFile::isOpen().
00959 { 00960 myStatus = SamStatus::SUCCESS; 00961 if((filePtr == NULL) || (filePtr->isOpen() == false)) 00962 { 00963 // File is not open, return failure. 00964 myStatus.setStatus(SamStatus::FAIL_ORDER, 00965 "Can't write to an unopened file."); 00966 return(SamStatus::FAIL_ORDER); 00967 } 00968 00969 if((myIsBufferSynced == false) || 00970 (myBufferSequenceTranslation != translation)) 00971 { 00972 if(!fixBuffer(translation)) 00973 { 00974 return(myStatus.getStatus()); 00975 } 00976 } 00977 00978 // Write the record. 00979 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t); 00980 unsigned int numBytesWritten = 00981 ifwrite(filePtr, myRecordPtr, numBytesToWrite); 00982 00983 // Return status based on if the correct number of bytes were written. 00984 if(numBytesToWrite == numBytesWritten) 00985 { 00986 return(SamStatus::SUCCESS); 00987 } 00988 // The correct number of bytes were not written. 00989 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record."); 00990 return(SamStatus::FAIL_IO); 00991 }
SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr | ) |
Write the record as a BAM into the specified file.
filePtr | file to write the BAM record into. |
Definition at line 950 of file SamRecord.cpp.
00951 { 00952 return(writeRecordBuffer(filePtr, mySequenceTranslation)); 00953 }