libStatGen Software
1
|
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...
#include <SamRecord.h>
Public Types | |
enum | SequenceTranslation { NONE, EQUAL, BASES } |
Enum containing the settings on how to translate the sequence if a reference is available. More... | |
Public Member Functions | |
SamRecord () | |
Default Constructor. | |
SamRecord (ErrorHandler::HandlingType errorHandlingType) | |
Constructor that sets the error handling type. | |
~SamRecord () | |
Destructor. | |
void | resetRecord () |
Reset the fields of the record to a default value. | |
bool | isValid (SamFileHeader &header) |
Returns whether or not the record is valid, setting the status to indicate success or failure. | |
void | setReference (GenomeSequence *reference) |
Set the reference to the specified genome sequence object. | |
void | setSequenceTranslation (SequenceTranslation translation) |
Set the type of sequence translation to use when getting the sequence. | |
const SamStatus & | getStatus () |
Returns the status associated with the last method that sets the status. | |
Set Alignment Data | |
Set methods for record fields. All of the "set" methods set the status to indicate success or the failure reason. | |
bool | setReadName (const char *readName) |
Set QNAME to the passed in name. | |
bool | setFlag (uint16_t flag) |
Set the bitwise FLAG to the specified value. | |
bool | setReferenceName (SamFileHeader &header, const char *referenceName) |
Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id. | |
bool | set1BasedPosition (int32_t position) |
Set the leftmost position (POS) using the specified 1-based (SAM format) value. | |
bool | set0BasedPosition (int32_t position) |
Set the leftmost position using the specified 0-based (BAM format) value. | |
bool | setMapQuality (uint8_t mapQuality) |
Set the mapping quality (MAPQ). | |
bool | setCigar (const char *cigar) |
Set the CIGAR to the specified SAM formatted cigar string. | |
bool | setCigar (const Cigar &cigar) |
Set the CIGAR to the specified Cigar object. | |
bool | setMateReferenceName (SamFileHeader &header, const char *mateReferenceName) |
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id. | |
bool | set1BasedMatePosition (int32_t matePosition) |
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value. | |
bool | set0BasedMatePosition (int32_t matePosition) |
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value. | |
bool | setInsertSize (int32_t insertSize) |
Sets the inferred insert size (ISIZE)/observed template length (TLEN). | |
bool | setSequence (const char *seq) |
Sets the sequence (SEQ) to the specified SAM formatted sequence string. | |
bool | setQuality (const char *quality) |
Sets the quality (QUAL) to the specified SAM formatted quality string. | |
bool | shiftIndelsLeft () |
Shift the indels (if any) to the left by updating the CIGAR. | |
SamStatus::Status | setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header) |
Sets the SamRecord to contain the information in the BAM formatted fromBuffer. | |
SamStatus::Status | setBufferFromFile (IFILE filePtr, SamFileHeader &header) |
Read the BAM record from a file. | |
Set Tag Data | |
Set methods for tags. | |
bool | addIntTag (const char *tag, int32_t value) |
Add the specified integer tag to the record. | |
bool | addTag (const char *tag, char vtype, const char *value) |
Add the specified tag,vtype,value to the record. | |
void | clearTags () |
Clear the tags in this record. | |
bool | rmTag (const char *tag, char type) |
Remove a tag. | |
bool | rmTags (const char *tags) |
Remove tags. | |
Get Alignment Data | |
Get methods for record fields. All of the "get" methods set the status to indicate success or the failure reason. | |
const void * | getRecordBuffer () |
Get a const pointer to the buffer that contains the BAM representation of the record. | |
const void * | getRecordBuffer (SequenceTranslation translation) |
Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence. | |
SamStatus::Status | writeRecordBuffer (IFILE filePtr) |
Write the record as a BAM into the specified already opened file. | |
SamStatus::Status | writeRecordBuffer (IFILE filePtr, SequenceTranslation translation) |
Write the record as a BAM into the specified already opened file using the specified translation on the sequence. | |
int32_t | getBlockSize () |
Get the block size of the record (BAM format). | |
const char * | getReferenceName () |
Get the reference sequence name (RNAME) of the record. | |
int32_t | getReferenceID () |
Get the reference sequence id of the record (BAM format rid). | |
int32_t | get1BasedPosition () |
Get the 1-based(SAM) leftmost position (POS) of the record. | |
int32_t | get0BasedPosition () |
Get the 0-based(BAM) leftmost position of the record. | |
uint8_t | getReadNameLength () |
Get the length of the readname (QNAME) including the null. | |
uint8_t | getMapQuality () |
Get the mapping quality (MAPQ) of the record. | |
uint16_t | getBin () |
Get the BAM bin for the record. | |
uint16_t | getCigarLength () |
Get the length of the BAM formatted CIGAR. | |
uint16_t | getFlag () |
Get the flag (FLAG). | |
int32_t | getReadLength () |
Get the length of the read. | |
const char * | getMateReferenceName () |
Get the mate/next fragment's reference sequence name (RNEXT). | |
const char * | getMateReferenceNameOrEqual () |
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned. | |
int32_t | getMateReferenceID () |
Get the mate reference id of the record (BAM format: mate_rid/next_refID). | |
int32_t | get1BasedMatePosition () |
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT). | |
int32_t | get0BasedMatePosition () |
Get the 0-based(BAM) leftmost mate/next fragment's position. | |
int32_t | getInsertSize () |
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN). | |
int32_t | get0BasedAlignmentEnd () |
Returns the 0-based inclusive rightmost position of the clipped sequence. | |
int32_t | get1BasedAlignmentEnd () |
Returns the 1-based inclusive rightmost position of the clipped sequence. | |
int32_t | getAlignmentLength () |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'. | |
int32_t | get0BasedUnclippedStart () |
Returns the 0-based inclusive left-most position adjusted for clipped bases. | |
int32_t | get1BasedUnclippedStart () |
Returns the 1-based inclusive left-most position adjusted for clipped bases. | |
int32_t | get0BasedUnclippedEnd () |
Returns the 0-based inclusive right-most position adjusted for clipped bases. | |
int32_t | get1BasedUnclippedEnd () |
Returns the 1-based inclusive right-most position adjusted for clipped bases. | |
const char * | getReadName () |
Returns the SAM formatted Read Name (QNAME). | |
const char * | getCigar () |
Returns the SAM formatted CIGAR string. | |
const char * | getSequence () |
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation. | |
const char * | getSequence (SequenceTranslation translation) |
Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation. | |
const char * | getQuality () |
Returns the SAM formatted quality string (QUAL). | |
char | getSequence (int index) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation. | |
char | getSequence (int index, SequenceTranslation translation) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation. | |
char | getQuality (int index) |
Get the quality character at the specified index into the quality 0 to readLength - 1. | |
Cigar * | getCigarInfo () |
Returns a pointer to the Cigar object associated with this record. | |
uint32_t | getNumOverlaps (int32_t start, int32_t end) |
Return the number of bases in this read that overlap the passed in region. | |
bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality) |
Returns the values of all fields except the tags. | |
bool | getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation) |
Returns the values of all fields except the tags using the specified sequence translation. | |
GenomeSequence * | getReference () |
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set). | |
Get Tag Methods | |
uint32_t | getTagLength () |
Returns the length of the BAM formatted tags. | |
bool | getNextSamTag (char *tag, char &vtype, void **value) |
Get the next tag from the record. | |
void | resetTagIter () |
Reset the tag iterator to the beginning of the tags. | |
bool | getTagsString (const char *tags, String &returnString, char delim= '\t') |
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... | |
const String * | getStringTag (const char *tag) |
Get the string value for the specified tag. | |
int * | getIntegerTag (const char *tag) |
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure). | |
bool | getIntegerTag (const char *tag, int &tagVal) |
Get the integer value for the specified tag. | |
bool | getFloatTag (const char *tag, float &tagVal) |
Get the float value for the specified tag. | |
const String & | getString (const char *tag) |
Get the string value for the specified tag. | |
int & | getInteger (const char *tag) |
Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool. | |
bool | checkString (const char *tag) |
Check if the specified tag contains a string. | |
bool | checkInteger (const char *tag) |
Check if the specified tag contains an integer. | |
bool | checkFloat (const char *tag) |
Check if the specified tag contains a string. | |
bool | checkTag (const char *tag, char type) |
Check if the specified tag contains a value of the specified vtype. | |
static bool | isIntegerType (char vtype) |
Returns whether or not the specified vtype is an integer type. | |
static bool | isFloatType (char vtype) |
Returns whether or not the specified vtype is a float type. | |
static bool | isCharType (char vtype) |
Returns whether or not the specified vtype is a char type. | |
static bool | isStringType (char vtype) |
Returns whether or not the specified vtype is a string type. |
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition at line 51 of file SamRecord.h.
Enum containing the settings on how to translate the sequence if a reference is available.
If no reference is available, no translation is done.
NONE |
Leave the sequence as is. |
EQUAL |
Translate bases that match the reference to '='. |
BASES |
Translate '=' to the actual base. |
Definition at line 57 of file SamRecord.h.
SamRecord::SamRecord | ( | ErrorHandler::HandlingType | errorHandlingType | ) |
Constructor that sets the error handling type.
errorHandlingType | how to handle errors. |
Definition at line 53 of file SamRecord.cpp.
References resetRecord().
: myStatus(errorHandlingType), myRefPtr(NULL), mySequenceTranslation(NONE) { int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t); myRecordPtr = (bamRecordStruct *) malloc(defaultAllocSize); myCigarTempBuffer = NULL; myCigarTempBufferAllocatedSize = 0; allocatedSize = defaultAllocSize; resetRecord(); }
bool SamRecord::addIntTag | ( | const char * | tag, |
int32_t | value | ||
) |
Add the specified integer tag to the record.
Internal processing handles switching between SAM/BAM formats when read/written and determining the type for BAM format. If the tag is already there this code will replace it if the specified value is different.
tag | two character tag to be added to the SAM/BAM record. |
value | value for the specified tag. |
Definition at line 635 of file SamRecord.cpp.
References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
Referenced by addTag().
{ myStatus = SamStatus::SUCCESS; int key = 0; int index = 0; char bamvtype; int tagBufferSize = 0; // First check to see if the tags need to be synced to the buffer. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read tags from the buffer, so cannot add new ones. return(false); } } // Ints come in as int. But it can be represented in fewer bits. // So determine a more specific type that is in line with the // types for BAM files. // First check to see if it is a negative. if(value < 0) { // The int is negative, so it will need to use a signed type. // See if it is greater than the min value for a char. if(value > ((std::numeric_limits<char>::min)())) { // It can be stored in a signed char. bamvtype = 'c'; tagBufferSize += 4; } else if(value > ((std::numeric_limits<short>::min)())) { // It fits in a signed short. bamvtype = 's'; tagBufferSize += 5; } else { // Just store it as a signed int. bamvtype = 'i'; tagBufferSize += 7; } } else { // It is positive, so an unsigned type can be used. if(value < ((std::numeric_limits<unsigned char>::max)())) { // It is under the max of an unsigned char. bamvtype = 'C'; tagBufferSize += 4; } else if(value < ((std::numeric_limits<unsigned short>::max)())) { // It is under the max of an unsigned short. bamvtype = 'S'; tagBufferSize += 5; } else { // Just store it as an unsigned int. bamvtype = 'I'; tagBufferSize += 7; } } // Check to see if the tag is already there. key = MAKEKEY(tag[0], tag[1], bamvtype); unsigned int hashIndex = extras.Find(key); if(hashIndex != LH_NOTFOUND) { // Tag was already found. index = extras[hashIndex]; // Since the tagBufferSize was already updated with the new value, // subtract the size for the previous tag (even if they are the same). switch(intType[index]) { case 'c': case 'C': case 'A': tagBufferSize -= 4; break; case 's': case 'S': tagBufferSize -= 5; break; case 'i': case 'I': tagBufferSize -= 7; break; default: myStatus.setStatus(SamStatus::INVALID, "unknown tag inttype type found.\n"); return(false); } // Tag already existed, print message about overwriting. // WARN about dropping duplicate tags. if(myNumWarns++ < myMaxWarns) { String newVal; String origVal; appendIntArrayValue(index, origVal); appendIntArrayValue(bamvtype, value, newVal); fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n", tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str()); if(myNumWarns == myMaxWarns) { fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n"); } } // Update the integer value and type. integers[index] = value; intType[index] = bamvtype; } else { // Tag is not already there, so add it. index = integers.Length(); integers.Push(value); intType.push_back(bamvtype); extras.Add(key, index); } // The buffer tags are now out of sync. myNeedToSetTagsInBuffer = true; myIsTagsBufferValid = false; myIsBufferSynced = false; myTagBufferSize += tagBufferSize; return(true); }
bool SamRecord::addTag | ( | const char * | tag, |
char | vtype, | ||
const char * | value | ||
) |
Add the specified tag,vtype,value to the record.
Vtype can be SAM/BAM format. Internal processing handles switching between SAM/BAM formats when read/written. If the tag is already there this code will replace it if the specified value is different.
tag | two character tag to be added to the SAM/BAM record. |
vtype | vtype of the specified value - either SAM/BAM vtypes. |
value | value as a string for the specified tag. |
Definition at line 779 of file SamRecord.cpp.
References addIntTag(), StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ if(vtype == 'i') { // integer type. Call addIntTag to handle it. int intVal = atoi(valuePtr); return(addIntTag(tag, intVal)); } // Non-int type. myStatus = SamStatus::SUCCESS; bool status = true; // default to successful. int key = 0; int index = 0; int tagBufferSize = 0; // First check to see if the tags need to be synced to the buffer. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read tags from the buffer, so cannot add new ones. return(false); } } // First check to see if the tag is already there. key = MAKEKEY(tag[0], tag[1], vtype); unsigned int hashIndex = extras.Find(key); if(hashIndex != LH_NOTFOUND) { // The key was found in the hash, so get the lookup index. index = extras[hashIndex]; String origTag; char origType = vtype; // Adjust the currently pointed to value to the new setting. switch (vtype) { case 'A' : // First check to see if the value changed. if((integers[index] == (const int)*(valuePtr)) && (intType[index] == vtype)) { // The value & type has not changed, so do nothing. return(true); } else { // Tag buffer size changes if type changes, so subtract & add. origType = intType[index]; appendIntArrayValue(index, origTag); tagBufferSize -= getNumericTagTypeSize(intType[index]); tagBufferSize += getNumericTagTypeSize(vtype); integers[index] = (const int)*(valuePtr); intType[index] = vtype; } break; case 'Z' : // First check to see if the value changed. if(strings[index] == valuePtr) { // The value has not changed, so do nothing. return(true); } else { // Adjust the tagBufferSize by removing the size of the old string. origTag = strings[index]; tagBufferSize -= strings[index].Length(); strings[index] = valuePtr; // Adjust the tagBufferSize by adding the size of the new string. tagBufferSize += strings[index].Length(); } break; case 'B' : // First check to see if the value changed. if(strings[index] == valuePtr) { // The value has not changed, so do nothing. return(true); } else { // Adjust the tagBufferSize by removing the size of the old field. origTag = strings[index]; tagBufferSize -= getBtagBufferSize(strings[index]); strings[index] = valuePtr; // Adjust the tagBufferSize by adding the size of the new field. tagBufferSize += getBtagBufferSize(strings[index]); } break; case 'f' : // First check to see if the value changed. if(floats[index] == (float)atof(valuePtr)) { // The value has not changed, so do nothing. return(true); } else { // Tag buffer size doesn't change between different 'f' entries. origTag.appendFullFloat(floats[index]); floats[index] = (float)atof(valuePtr); } break; default : fprintf(stderr, "samRecord::addTag() - Unknown custom field of type %c\n", vtype); myStatus.setStatus(SamStatus::FAIL_PARSE, "Unknown custom field in a tag"); status = false; break; } // Duplicate tag in this record. // Tag already existed, print message about overwriting. // WARN about dropping duplicate tags. if(myNumWarns++ < myMaxWarns) { fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n", tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr); if(myNumWarns == myMaxWarns) { fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n"); } } } else { // The key was not found in the hash, so add it. switch (vtype) { case 'A' : index = integers.Length(); integers.Push((const int)*(valuePtr)); intType.push_back(vtype); tagBufferSize += 4; break; case 'Z' : index = strings.Length(); strings.Push(valuePtr); tagBufferSize += 4 + strings.Last().Length(); break; case 'B' : index = strings.Length(); strings.Push(valuePtr); tagBufferSize += 3 + getBtagBufferSize(strings[index]); break; case 'f' : index = floats.size(); floats.push_back((float)atof(valuePtr)); tagBufferSize += 7; break; default : fprintf(stderr, "samRecord::addTag() - Unknown custom field of type %c\n", vtype); myStatus.setStatus(SamStatus::FAIL_PARSE, "Unknown custom field in a tag"); status = false; break; } if(status) { // If successful, add the key to extras. extras.Add(key, index); } } // Only add the tag if it has so far been successfully processed. if(status) { // The buffer tags are now out of sync. myNeedToSetTagsInBuffer = true; myIsTagsBufferValid = false; myIsBufferSynced = false; myTagBufferSize += tagBufferSize; } return(status); }
bool SamRecord::checkFloat | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
tag | SAM tag to check contents of. |
Definition at line 613 of file SamRecord.h.
References checkTag().
{ return checkTag(tag, 'f'); }
bool SamRecord::checkInteger | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains an integer.
Does not set SamStatus.
tag | SAM tag to check contents of. |
Definition at line 607 of file SamRecord.h.
References checkTag().
{ return checkTag(tag, 'i'); }
bool SamRecord::checkString | ( | const char * | tag | ) | [inline] |
Check if the specified tag contains a string.
Does not set SamStatus.
tag | SAM tag to check contents of. |
Definition at line 600 of file SamRecord.h.
References checkTag().
bool SamRecord::checkTag | ( | const char * | tag, |
char | type | ||
) |
Check if the specified tag contains a value of the specified vtype.
Does not set SamStatus.
tag | SAM tag to check contents of. |
type | value type to check if the SAM tag matches. |
Definition at line 2369 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by checkFloat(), checkInteger(), and checkString().
{ // Init to success. myStatus = SamStatus::SUCCESS; // Parse the buffer if necessary. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. setTagsFromBuffer set the error. return(""); } } int key = MAKEKEY(tag[0], tag[1], type); return (extras.Find(key) != LH_NOTFOUND); }
void SamRecord::clearTags | ( | ) |
Clear the tags in this record.
Does not set SamStatus.
Definition at line 965 of file SamRecord.cpp.
References resetTagIter().
Referenced by resetRecord().
{ if(extras.Entries() != 0) { extras.Clear(); } strings.Clear(); integers.Clear(); intType.clear(); floats.clear(); myTagBufferSize = 0; resetTagIter(); }
int32_t SamRecord::get0BasedAlignmentEnd | ( | ) |
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition at line 1455 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and CigarHelper::softClipEndByRefPos().
{ myStatus = SamStatus::SUCCESS; if(myAlignmentLength == -1) { // Alignment end has not been set, so calculate it. parseCigar(); } // If alignment length > 0, subtract 1 from it to get the end. if(myAlignmentLength == 0) { // Length is 0, just return the start position. return(myRecordPtr->myPosition); } return(myRecordPtr->myPosition + myAlignmentLength - 1); }
int32_t SamRecord::get0BasedMatePosition | ( | ) |
Get the 0-based(BAM) leftmost mate/next fragment's position.
Definition at line 1440 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myMatePosition; }
int32_t SamRecord::get0BasedPosition | ( | ) |
Get the 0-based(BAM) leftmost position of the record.
Definition at line 1307 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamCoordOutput::add(), PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getNumOverlaps(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), CigarHelper::softClipBeginByRefPos(), CigarHelper::softClipEndByRefPos(), and SamFile::validateSortOrder().
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myPosition; }
int32_t SamRecord::get0BasedUnclippedEnd | ( | ) |
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition at line 1514 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by get1BasedUnclippedEnd().
{ // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the // cigar has not yet been parsed, so no need to check it here. return(get0BasedAlignmentEnd() + myUnclippedEndOffset); }
int32_t SamRecord::get0BasedUnclippedStart | ( | ) |
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition at line 1494 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by get1BasedUnclippedStart().
{ myStatus = SamStatus::SUCCESS; if(myUnclippedStartOffset == -1) { // Unclipped has not yet been calculated, so parse the cigar to get it parseCigar(); } return(myRecordPtr->myPosition - myUnclippedStartOffset); }
int32_t SamRecord::get1BasedAlignmentEnd | ( | ) |
Returns the 1-based inclusive rightmost position of the clipped sequence.
Definition at line 1474 of file SamRecord.cpp.
References get0BasedAlignmentEnd().
Referenced by getBin().
{ return(get0BasedAlignmentEnd() + 1); }
int32_t SamRecord::get1BasedMatePosition | ( | ) |
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
Definition at line 1433 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; return (myRecordPtr->myMatePosition + 1); }
int32_t SamRecord::get1BasedPosition | ( | ) |
Get the 1-based(SAM) leftmost position (POS) of the record.
Definition at line 1300 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamValidator::isValid().
{ myStatus = SamStatus::SUCCESS; return (myRecordPtr->myPosition + 1); }
int32_t SamRecord::get1BasedUnclippedEnd | ( | ) |
Returns the 1-based inclusive right-most position adjusted for clipped bases.
Definition at line 1523 of file SamRecord.cpp.
References get0BasedUnclippedEnd().
{ return(get0BasedUnclippedEnd() + 1); }
int32_t SamRecord::get1BasedUnclippedStart | ( | ) |
Returns the 1-based inclusive left-most position adjusted for clipped bases.
Definition at line 1507 of file SamRecord.cpp.
References get0BasedUnclippedStart().
{ return(get0BasedUnclippedStart() + 1); }
int32_t SamRecord::getAlignmentLength | ( | ) |
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition at line 1481 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if(myAlignmentLength == -1) { // Alignment end has not been set, so calculate it. parseCigar(); } // Return the alignment length. return(myAlignmentLength); }
uint16_t SamRecord::getBin | ( | ) |
Get the BAM bin for the record.
Definition at line 1335 of file SamRecord.cpp.
References get1BasedAlignmentEnd(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if(!myIsBinValid) { // The bin that is set in the record is not valid, so // reset it. myRecordPtr->myBin = bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd()); myIsBinValid = true; } return(myRecordPtr->myBin); }
int32_t SamRecord::getBlockSize | ( | ) |
Get the block size of the record (BAM format).
Definition at line 1269 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; // If the buffer isn't synced, sync the buffer to determine the // block size. if(myIsBufferSynced == false) { // Since this just returns the block size, the translation of // the sequence does not matter, so just use the currently set // value. fixBuffer(myBufferSequenceTranslation); } return myRecordPtr->myBlockSize; }
const char * SamRecord::getCigar | ( | ) |
Returns the SAM formatted CIGAR string.
Definition at line 1543 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by getFields(), SamValidator::isValidCigar(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().
{ myStatus = SamStatus::SUCCESS; if(myCigar.Length() == 0) { // 0 Length, means that it is in the buffer, but has not yet // been synced to the string, so do the sync. parseCigarBinary(); } return myCigar.c_str(); }
Cigar * SamRecord::getCigarInfo | ( | ) |
Returns a pointer to the Cigar object associated with this record.
The object is essentially read-only, only allowing modifications due to lazy evaluations.
Definition at line 1824 of file SamRecord.cpp.
Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getSequence(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().
{ // Check to see whether or not the Cigar has already been // set - this is determined by checking if alignment length // is set since alignment length and the cigar are set // at the same time. if(myAlignmentLength == -1) { // Not been set, so calculate it. parseCigar(); } return(&myCigarRoller); }
uint16_t SamRecord::getCigarLength | ( | ) |
Get the length of the BAM formatted CIGAR.
Definition at line 1350 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; // If the cigar buffer is valid // then get the length from there. if(myIsCigarBufferValid) { return myRecordPtr->myCigarLength; } if(myCigarTempBufferLength == -1) { // The cigar buffer is not valid and the cigar temp buffer is not set, // so parse the string. parseCigarString(); } // The temp buffer is now set, so return the size. return(myCigarTempBufferLength); }
bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, |
String & | readName, | ||
String & | cigar, | ||
String & | sequence, | ||
String & | quality | ||
) |
Returns the values of all fields except the tags.
recStruct | structure containing the contents of all non-variable length fields. |
readName | read name from the record (return param) |
cigar | cigar string from the record (return param) |
sequence | sequence string from the record (return param) |
quality | quality string from the record (return param) |
Definition at line 1854 of file SamRecord.cpp.
{ return(getFields(recStruct, readName, cigar, sequence, quality, mySequenceTranslation)); }
bool SamRecord::getFields | ( | bamRecordStruct & | recStruct, |
String & | readName, | ||
String & | cigar, | ||
String & | sequence, | ||
String & | quality, | ||
SequenceTranslation | translation | ||
) |
Returns the values of all fields except the tags using the specified sequence translation.
recStruct | structure containing the contents of all non-variable length fields. |
readName | read name from the record (return param) |
cigar | cigar string from the record (return param) |
sequence | sequence string from the record (return param) |
quality | quality string from the record (return param) |
translation | type of sequence translation to use. |
Definition at line 1863 of file SamRecord.cpp.
References getCigar(), getQuality(), getReadName(), getSequence(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if(myIsBufferSynced == false) { if(!fixBuffer(translation)) { // failed to set the buffer, return false. return(false); } } memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct)); readName = getReadName(); // Check the status. if(myStatus != SamStatus::SUCCESS) { // Failed to set the fields, return false. return(false); } cigar = getCigar(); // Check the status. if(myStatus != SamStatus::SUCCESS) { // Failed to set the fields, return false. return(false); } sequence = getSequence(translation); // Check the status. if(myStatus != SamStatus::SUCCESS) { // Failed to set the fields, return false. return(false); } quality = getQuality(); // Check the status. if(myStatus != SamStatus::SUCCESS) { // Failed to set the fields, return false. return(false); } return(true); }
uint16_t SamRecord::getFlag | ( | ) |
Get the flag (FLAG).
Definition at line 1372 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead(), SamQuerySeqWithRefIter::getNextMatchMismatch(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processFile(), and SamFile::ReadRecord().
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myFlag; }
bool SamRecord::getFloatTag | ( | const char * | tag, |
float & | tagVal | ||
) |
Get the float value for the specified tag.
tag | tag to retrieve |
tagVal | return parameter with integer value for the tag |
Definition at line 2269 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ // Init to success. myStatus = SamStatus::SUCCESS; // Parse the buffer if necessary. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. setTagsFromBuffer set the errors, // so just return false. return(false); } } int key = MAKEKEY(tag[0], tag[1], 'f'); int offset = extras.Find(key); int value; if (offset < 0) { // Failed to find the tag. return(false); } else value = extras[offset]; tagVal = floats[value]; return(true); }
int32_t SamRecord::getInsertSize | ( | ) |
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
Definition at line 1447 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myInsertSize; }
int * SamRecord::getIntegerTag | ( | const char * | tag | ) |
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure).
tag | tag to retrieve pointer to the tag's integer value if found, NULL if not found. |
Definition at line 2204 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ // Init to success. myStatus = SamStatus::SUCCESS; // Parse the buffer if necessary. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. setTagsFromBuffer set the errors, // so just return NULL. return(NULL); } } int key = MAKEKEY(tag[0], tag[1], 'i'); int offset = extras.Find(key); int value; if (offset < 0) { // Failed to find the tag. return(NULL); } else value = extras[offset]; return(&(integers[value])); }
bool SamRecord::getIntegerTag | ( | const char * | tag, |
int & | tagVal | ||
) |
Get the integer value for the specified tag.
tag | tag to retrieve |
tagVal | return parameter with integer value for the tag bool true if Integer tag was found and tagVal was set, false if not. |
Definition at line 2236 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ // Init to success. myStatus = SamStatus::SUCCESS; // Parse the buffer if necessary. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. setTagsFromBuffer set the errors, // so just return false. return(false); } } int key = MAKEKEY(tag[0], tag[1], 'i'); int offset = extras.Find(key); int value; if (offset < 0) { // Failed to find the tag. return(false); } else value = extras[offset]; tagVal = integers[value]; return(true); }
uint8_t SamRecord::getMapQuality | ( | ) |
Get the mapping quality (MAPQ) of the record.
Definition at line 1328 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamValidator::isValid().
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myMapQuality; }
int32_t SamRecord::getMateReferenceID | ( | ) |
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
Definition at line 1426 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myMateReferenceID; }
const char * SamRecord::getMateReferenceName | ( | ) |
Get the mate/next fragment's reference sequence name (RNEXT).
If it is equal to the reference name, it still returns the reference name.
Definition at line 1398 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; return myMateReferenceName.c_str(); }
const char * SamRecord::getMateReferenceNameOrEqual | ( | ) |
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.
Definition at line 1408 of file SamRecord.cpp.
References getReferenceName(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if(myMateReferenceName == "*") { return(myMateReferenceName); } if(myMateReferenceName == getReferenceName()) { return(FIELD_ABSENT_STRING); } else { return(myMateReferenceName); } }
bool SamRecord::getNextSamTag | ( | char * | tag, |
char & | vtype, | ||
void ** | value | ||
) |
Get the next tag from the record.
Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).
tag | set to the tag when a tag is read. |
vtype | set to the vtype when a tag is read. |
value | pointer to the value of the tag (will need to cast to int, float, char, or string based on vtype). |
Definition at line 1950 of file SamRecord.cpp.
References StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
Referenced by SamRecordHelper::genSamTagsString().
{ myStatus = SamStatus::SUCCESS; if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. return(false); } } // Increment the tag index to start looking at the next tag. // At the beginning, it is set to -1. myLastTagIndex++; int maxTagIndex = extras.Capacity(); if(myLastTagIndex >= maxTagIndex) { // Hit the end of the tags, return false, no more tags. // Status is still success since this is not an error, // it is just the end of the list. return(false); } bool tagFound = false; // Loop until a tag is found or the end of extras is hit. while((tagFound == false) && (myLastTagIndex < maxTagIndex)) { if(extras.SlotInUse(myLastTagIndex)) { // Found a slot to use. int key = extras.GetKey(myLastTagIndex); getTag(key, tag); getTypeFromKey(key, vtype); tagFound = true; // Get the value associated with the key based on the vtype. switch (vtype) { case 'f' : *value = getFloatPtr(myLastTagIndex); break; case 'i' : *value = getIntegerPtr(myLastTagIndex, vtype); if(vtype != 'A') { // Convert all int types to 'i' vtype = 'i'; } break; case 'Z' : case 'B' : *value = getStringPtr(myLastTagIndex); break; default: myStatus.setStatus(SamStatus::FAIL_PARSE, "Unknown tag type"); tagFound = false; break; } } if(!tagFound) { // Increment the index since a tag was not found. myLastTagIndex++; } } return(tagFound); }
uint32_t SamRecord::getNumOverlaps | ( | int32_t | start, |
int32_t | end | ||
) |
Return the number of bases in this read that overlap the passed in region.
Matches & mismatches between the read and the reference are counted as overlaps, but insertions, deletions, skips, clips, and pads are not counted.
start | inclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.) |
end | exclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.) |
Definition at line 1841 of file SamRecord.cpp.
References get0BasedPosition(), and Cigar::getNumOverlaps().
Referenced by SamFile::GetNumOverlaps().
{ // Determine whether or not the cigar has been parsed, which sets up // the cigar roller. This is determined by checking the alignment length. if(myAlignmentLength == -1) { parseCigar(); } return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition())); }
const char * SamRecord::getQuality | ( | ) |
Returns the SAM formatted quality string (QUAL).
Definition at line 1626 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by PileupElementBaseQual::addEntry(), getFields(), SamValidator::isValidQuality(), and SamFilter::sumMismatchQuality().
{ myStatus = SamStatus::SUCCESS; if(myQuality.Length() == 0) { // 0 Length, means that it is in the buffer, but has not yet // been synced to the string, so do the sync. setSequenceAndQualityFromBuffer(); } return myQuality.c_str(); }
char SamRecord::getQuality | ( | int | index | ) |
Get the quality character at the specified index into the quality 0 to readLength - 1.
Throws an exception if index is out of range.
index | index into the quality string (0 to readLength-1). |
Definition at line 1770 of file SamRecord.cpp.
References getReadLength(), and BaseUtilities::UNKNOWN_QUALITY_CHAR.
{ // Determine the read length. int32_t readLen = getReadLength(); // If the read length is 0, return ' ' whose ascii code is below // the minimum ascii code for qualities. if(readLen == 0) { return(BaseUtilities::UNKNOWN_QUALITY_CHAR); } else if((index < 0) || (index >= readLen)) { // Only get here if the index was out of range, so thow an exception. String exceptionString = "SamRecord::getQuality("; exceptionString += index; exceptionString += ") is out of range. Index must be between 0 and "; exceptionString += (readLen - 1); throw std::runtime_error(exceptionString.c_str()); } if(myQuality.Length() == 0) { // Parse BAM Quality. // Know that myPackedQuality is correct since readLen != 0. return(myPackedQuality[index] + 33); } else { // Already have string. if((myQuality.Length() == 1) && (myQuality[0] == '*')) { // Return the unknown quality character. return(BaseUtilities::UNKNOWN_QUALITY_CHAR); } else if(index >= myQuality.Length()) { // Only get here if the index was out of range, so thow an exception. // Technically the myQuality string is not guaranteed to be the same length // as the sequence, so this catches that error. String exceptionString = "SamRecord::getQuality("; exceptionString += index; exceptionString += ") is out of range. Index must be between 0 and "; exceptionString += (myQuality.Length() - 1); throw std::runtime_error(exceptionString.c_str()); } else { return(myQuality[index]); } } }
int32_t SamRecord::getReadLength | ( | ) |
Get the length of the read.
Definition at line 1379 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::clipOnMismatchThreshold(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), SamValidator::isValidCigar(), SamValidator::isValidQuality(), SamQuerySeqWithRefIter::reset(), and CigarHelper::softClipEndByRefPos().
{ myStatus = SamStatus::SUCCESS; if(myIsSequenceBufferValid == false) { // If the sequence is "*", then return 0. if((mySequence.Length() == 1) && (mySequence[0] == '*')) { return(0); } // Do not add 1 since it is not null terminated. return(mySequence.Length()); } return(myRecordPtr->myReadLength); }
const char * SamRecord::getReadName | ( | ) |
Returns the SAM formatted Read Name (QNAME).
Definition at line 1530 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by getFields(), SamValidator::isValid(), and SamFile::validateSortOrder().
{ myStatus = SamStatus::SUCCESS; if(myReadName.Length() == 0) { // 0 Length, means that it is in the buffer, but has not yet // been synced to the string, so do the sync. myReadName = (char*)&(myRecordPtr->myData); } return myReadName.c_str(); }
uint8_t SamRecord::getReadNameLength | ( | ) |
Get the length of the readname (QNAME) including the null.
Definition at line 1314 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamValidator::isValid().
{ myStatus = SamStatus::SUCCESS; // If the buffer is valid, return the size from there, otherwise get the // size from the string length + 1 (ending null). if(myIsReadNameBufferValid) { return(myRecordPtr->myReadNameLength); } return(myReadName.Length() + 1); }
const void * SamRecord::getRecordBuffer | ( | ) |
Get a const pointer to the buffer that contains the BAM representation of the record.
Definition at line 1192 of file SamRecord.cpp.
{ return(getRecordBuffer(mySequenceTranslation)); }
const void * SamRecord::getRecordBuffer | ( | SequenceTranslation | translation | ) |
Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence.
translation | type of sequence translation to use. |
Definition at line 1199 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; bool status = true; // If the buffer is not synced or the sequence in the buffer is not // properly translated, fix the buffer. if((myIsBufferSynced == false) || (myBufferSequenceTranslation != translation)) { status &= fixBuffer(translation); } // If the buffer is synced, check to see if the tags need to be synced. if(myNeedToSetTagsInBuffer) { status &= setTagsInBuffer(); } if(!status) { return(NULL); } return (const void *)myRecordPtr; }
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).
Definition at line 1911 of file SamRecord.cpp.
Referenced by SamValidator::isValidTags().
{
return(myRefPtr);
}
int32_t SamRecord::getReferenceID | ( | ) |
Get the reference sequence id of the record (BAM format rid).
Definition at line 1293 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamCoordOutput::add(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and SamFile::validateSortOrder().
{ myStatus = SamStatus::SUCCESS; return myRecordPtr->myReferenceID; }
const char * SamRecord::getReferenceName | ( | ) |
Get the reference sequence name (RNAME) of the record.
Definition at line 1286 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by PileupElement::addEntry(), SamTags::createMDTag(), getMateReferenceNameOrEqual(), getSequence(), SamValidator::isValid(), and SamQuerySeqWithRefIter::reset().
{ myStatus = SamStatus::SUCCESS; return myReferenceName.c_str(); }
const char * SamRecord::getSequence | ( | ) |
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation.
Definition at line 1556 of file SamRecord.cpp.
Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getSequence(), and shiftIndelsLeft().
{ return(getSequence(mySequenceTranslation)); }
const char * SamRecord::getSequence | ( | SequenceTranslation | translation | ) |
Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation.
translation | type of sequence translation to use. |
Definition at line 1562 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), SamQuerySeqWithRef::seqWithoutEquals(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if(mySequence.Length() == 0) { // 0 Length, means that it is in the buffer, but has not yet // been synced to the string, so do the sync. setSequenceAndQualityFromBuffer(); } // Determine if translation needs to be done. if((translation == NONE) || (myRefPtr == NULL)) { return mySequence.c_str(); } else if(translation == EQUAL) { if(mySeqWithEq.length() == 0) { // Check to see if the sequence is defined. if(mySequence == "*") { // Sequence is undefined, so no translation necessary. mySeqWithEq = '*'; } else { // Sequence defined, so translate it. SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), myRecordPtr->myPosition, *(getCigarInfo()), getReferenceName(), *myRefPtr, mySeqWithEq); } } return(mySeqWithEq.c_str()); } else { // translation == BASES if(mySeqWithoutEq.length() == 0) { if(mySequence == "*") { // Sequence is undefined, so no translation necessary. mySeqWithoutEq = '*'; } else { // Sequence defined, so translate it. SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), myRecordPtr->myPosition, *(getCigarInfo()), getReferenceName(), *myRefPtr, mySeqWithoutEq); } } return(mySeqWithoutEq.c_str()); } }
char SamRecord::getSequence | ( | int | index | ) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.
Throws an exception if index is out of range.
index | index into the sequence string (0 to readLength-1). |
Definition at line 1639 of file SamRecord.cpp.
References getSequence().
{ return(getSequence(index, mySequenceTranslation)); }
char SamRecord::getSequence | ( | int | index, |
SequenceTranslation | translation | ||
) |
Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation.
Throws an exception if index is out of range.
index | index into the sequence string (0 to readLength-1). |
translation | type of sequence translation to use. |
Definition at line 1645 of file SamRecord.cpp.
References EQUAL, getCigarInfo(), getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().
{ static const char * asciiBases = "=AC.G...T......N"; // Determine the read length. int32_t readLen = getReadLength(); // If the read length is 0, this method should not be called. if(readLen == 0) { String exceptionString = "SamRecord::getSequence("; exceptionString += index; exceptionString += ") is not allowed since sequence = '*'"; throw std::runtime_error(exceptionString.c_str()); } else if((index < 0) || (index >= readLen)) { // Only get here if the index was out of range, so thow an exception. String exceptionString = "SamRecord::getSequence("; exceptionString += index; exceptionString += ") is out of range. Index must be between 0 and "; exceptionString += (readLen - 1); throw std::runtime_error(exceptionString.c_str()); } // Determine if translation needs to be done. if((translation == NONE) || (myRefPtr == NULL)) { // No translation needs to be done. if(mySequence.Length() == 0) { // Parse BAM sequence. if(myIsSequenceBufferValid) { return(index & 1 ? asciiBases[myPackedSequence[index / 2] & 0xF] : asciiBases[myPackedSequence[index / 2] >> 4]); } else { String exceptionString = "SamRecord::getSequence("; exceptionString += index; exceptionString += ") called with no sequence set"; throw std::runtime_error(exceptionString.c_str()); } } // Already have string. return(mySequence[index]); } else { // Need to translate the sequence either to have '=' or to not // have it. // First check to see if the sequence has been set. if(mySequence.Length() == 0) { // 0 Length, means that it is in the buffer, but has not yet // been synced to the string, so do the sync. setSequenceAndQualityFromBuffer(); } // Check the type of translation. if(translation == EQUAL) { // Check whether or not the string has already been // retrieved that has the '=' in it. if(mySeqWithEq.length() == 0) { // The string with '=' has not yet been determined, // so get the string. // Check to see if the sequence is defined. if(mySequence == "*") { // Sequence is undefined, so no translation necessary. mySeqWithEq = '*'; } else { // Sequence defined, so translate it. SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(), myRecordPtr->myPosition, *(getCigarInfo()), getReferenceName(), *myRefPtr, mySeqWithEq); } } // Sequence is set, so return it. return(mySeqWithEq[index]); } else { // translation == BASES // Check whether or not the string has already been // retrieved that does not have the '=' in it. if(mySeqWithoutEq.length() == 0) { // The string with '=' has not yet been determined, // so get the string. // Check to see if the sequence is defined. if(mySequence == "*") { // Sequence is undefined, so no translation necessary. mySeqWithoutEq = '*'; } else { // Sequence defined, so translate it. // The string without '=' has not yet been determined, // so get the string. SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(), myRecordPtr->myPosition, *(getCigarInfo()), getReferenceName(), *myRefPtr, mySeqWithoutEq); } } // Sequence is set, so return it. return(mySeqWithoutEq[index]); } } }
const SamStatus & SamRecord::getStatus | ( | ) |
Returns the status associated with the last method that sets the status.
Definition at line 2391 of file SamRecord.cpp.
{
return(myStatus);
}
const String * SamRecord::getStringTag | ( | const char * | tag | ) |
Get the string value for the specified tag.
tag | tag to retrieve |
pointer | to the tag's string value if found, NULL if not found. |
Definition at line 2168 of file SamRecord.cpp.
Referenced by SamTags::isMDTagCorrect(), and SamValidator::isValidTags().
{ // Parse the buffer if necessary. if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. setTagsFromBuffer set the errors, // so just return null. return(NULL); } } int key = MAKEKEY(tag[0], tag[1], 'Z'); int offset = extras.Find(key); int value; if (offset < 0) { // Check for 'B' tag. key = MAKEKEY(tag[0], tag[1], 'B'); offset = extras.Find(key); if(offset < 0) { // Tag not found. return(NULL); } } // Offset is valid, so return the tag. value = extras[offset]; return(&(strings[value])); }
uint32_t SamRecord::getTagLength | ( | ) |
Returns the length of the BAM formatted tags.
Definition at line 1917 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if(myNeedToSetTagsFromBuffer) { // Tags are only set in the buffer, so the size of the tags is // the length of the record minus the starting location of the tags. unsigned char * tagStart = (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength * sizeof(int) + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength; // The non-tags take up from the start of the record to the tag start. // Do not include the block size part of the record since it is not // included in the size. uint32_t nonTagSize = tagStart - (unsigned char*)&(myRecordPtr->myReferenceID); // Tags take up the size of the block minus the non-tag section. uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize; return(tagSize); } // Tags are stored outside the buffer, so myTagBufferSize is set. return(myTagBufferSize); }
bool SamRecord::getTagsString | ( | const char * | tags, |
String & | returnString, | ||
char | delim = '\t' |
||
) |
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE...
Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found. If a different error occured, the status is set appropriately. The delimiter between the tags to retrieve is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.
tags | the tags to retrieve, formatted as TAG:TYPE,TAG:TYPE... |
returnString | the String to set (this method first clears returnString) to TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... |
delim | delimiter to use to separate two tags, default is a tab. |
Definition at line 2070 of file SamRecord.cpp.
References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ const char* currentTagPtr = tags; returnString.Clear(); myStatus = SamStatus::SUCCESS; if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. return(false); } } bool returnStatus = true; while(*currentTagPtr != '\0') { // Tags are formatted as: XY:Z // Where X is [A-Za-z], Y is [A-Za-z], and // Z is A,i,f,Z,H (cCsSI are also excepted) if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') || (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0')) { myStatus.setStatus(SamStatus::INVALID, "getTagsString called with improperly formatted tags.\n"); returnStatus = false; break; } // Construct the key. int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], currentTagPtr[3]); // Look to see if the key exsists in the hash. int offset = extras.Find(key); if(offset >= 0) { // Offset is set, so the key was found. if(!returnString.IsEmpty()) { returnString += delim; } returnString += currentTagPtr[0]; returnString += currentTagPtr[1]; returnString += ':'; returnString += currentTagPtr[3]; returnString += ':'; // First if it is an integer, determine the actual type of the int. char vtype; getTypeFromKey(key, vtype); switch(vtype) { case 'i': returnString += *(int*)getIntegerPtr(offset, vtype); break; case 'f': returnString += *(float*)getFloatPtr(offset); break; case 'Z': case 'B': returnString += *(String*)getStringPtr(offset); break; default: myStatus.setStatus(SamStatus::INVALID, "rmTag called with unknown type.\n"); returnStatus = false; break; }; } // Increment to the next tag. if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ',')) { // Increment once more. currentTagPtr += 5; } else if(currentTagPtr[4] != '\0') { // Invalid tag format. myStatus.setStatus(SamStatus::INVALID, "rmTags called with improperly formatted tags.\n"); returnStatus = false; break; } else { // Last Tag. currentTagPtr += 4; } } return(returnStatus); }
bool SamRecord::isCharType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is a char type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 2050 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{ if(vtype == 'A') { return(true); } return(false); }
bool SamRecord::isFloatType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is a float type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 2040 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{ if(vtype == 'f') { return(true); } return(false); }
bool SamRecord::isIntegerType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is an integer type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 2028 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{ if((vtype == 'c') || (vtype == 'C') || (vtype == 's') || (vtype == 'S') || (vtype == 'i') || (vtype == 'I')) { return(true); } return(false); }
bool SamRecord::isStringType | ( | char | vtype | ) | [static] |
Returns whether or not the specified vtype is a string type.
Does not set SamStatus.
vtype | value type to check. |
Definition at line 2060 of file SamRecord.cpp.
Referenced by SamRecordHelper::genSamTagString().
{ if((vtype == 'Z') || (vtype == 'B')) { return(true); } return(false); }
bool SamRecord::isValid | ( | SamFileHeader & | header | ) |
Returns whether or not the record is valid, setting the status to indicate success or failure.
header | SAM Header associated with the record. Used to perform some validation against the header. |
Definition at line 161 of file SamRecord.cpp.
References SamValidationErrors::getErrorString(), StatGenStatus::INVALID, SamValidator::isValid(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; SamValidationErrors invalidSamErrors; if(!SamValidator::isValid(header, *this, invalidSamErrors)) { // The record is not valid. std::string errorMessage = ""; invalidSamErrors.getErrorString(errorMessage); myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str()); return(false); } // The record is valid. return(true); }
void SamRecord::resetRecord | ( | ) |
Reset the fields of the record to a default value.
This is not necessary when you are reading a SAM/BAM file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.
Definition at line 91 of file SamRecord.cpp.
References clearTags(), NONE, and StatGenStatus::SUCCESS.
Referenced by SamRecord(), setBuffer(), setBufferFromFile(), and ~SamRecord().
{ myIsBufferSynced = true; myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE; myRecordPtr->myReferenceID = -1; myRecordPtr->myPosition = -1; myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; myRecordPtr->myMapQuality = 0; myRecordPtr->myBin = DEFAULT_BIN; myRecordPtr->myCigarLength = 0; myRecordPtr->myFlag = 0; myRecordPtr->myReadLength = 0; myRecordPtr->myMateReferenceID = -1; myRecordPtr->myMatePosition = -1; myRecordPtr->myInsertSize = 0; // Set the sam values for the variable length fields. // TODO - one way to speed this up might be to not set to "*" and just // clear them, and write out a '*' for SAM if it is empty. myReadName = DEFAULT_READ_NAME; myReferenceName = "*"; myMateReferenceName = "*"; myCigar = "*"; mySequence = "*"; mySeqWithEq.clear(); mySeqWithoutEq.clear(); myQuality = "*"; myNeedToSetTagsFromBuffer = false; myNeedToSetTagsInBuffer = false; // Initialize the calculated alignment info to the uncalculated value. myAlignmentLength = -1; myUnclippedStartOffset = -1; myUnclippedEndOffset = -1; clearTags(); // Set the bam values for the variable length fields. // Only the read name needs to be set, the others are a length of 0. // Set the read name. The min size of myRecordPtr includes the size for // the default read name. memcpy(&(myRecordPtr->myData), myReadName.c_str(), myRecordPtr->myReadNameLength); // Set that the variable length buffer fields are valid. myIsReadNameBufferValid = true; myIsCigarBufferValid = true; myPackedSequence = (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength + myRecordPtr->myCigarLength * sizeof(int); myIsSequenceBufferValid = true; myBufferSequenceTranslation = NONE; myPackedQuality = myPackedSequence; myIsQualityBufferValid = true; myIsTagsBufferValid = true; myIsBinValid = true; myCigarTempBufferLength = -1; myStatus = SamStatus::SUCCESS; NOT_FOUND_TAG_STRING = ""; NOT_FOUND_TAG_INT = -1; // TODO - deprecate }
bool SamRecord::rmTag | ( | const char * | tag, |
char | type | ||
) |
Remove a tag.
tag | tag to remove. |
type | of the tag to be removed. |
Definition at line 980 of file SamRecord.cpp.
References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ // Check the length of tag. if(strlen(tag) != 2) { // Tag is the wrong length. myStatus.setStatus(SamStatus::INVALID, "rmTag called with tag that is not 2 characters\n"); return(false); } myStatus = SamStatus::SUCCESS; if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. return(false); } } // Construct the key. int key = MAKEKEY(tag[0], tag[1], type); // Look to see if the key exsists in the hash. int offset = extras.Find(key); if(offset < 0) { // Not found, so return true, successfully removed since // it is not in tag. return(true); } // Offset is set, so the key was found. // First if it is an integer, determine the actual type of the int. char vtype; getTypeFromKey(key, vtype); if(vtype == 'i') { vtype = getIntegerType(offset); } // Offset is set, so recalculate the buffer size without this entry. // Do NOT remove from strings, integers, or floats because then // extras would need to be updated for all entries with the new indexes // into those variables. int rmBuffSize = 0; switch(vtype) { case 'A': case 'c': case 'C': rmBuffSize = 4; break; case 's': case 'S': rmBuffSize = 5; break; case 'i': case 'I': rmBuffSize = 7; break; case 'f': rmBuffSize = 7; break; case 'Z': rmBuffSize = 4 + getString(offset).Length(); break; case 'B': rmBuffSize = 3 + getBtagBufferSize(getString(offset)); break; default: myStatus.setStatus(SamStatus::INVALID, "rmTag called with unknown type.\n"); return(false); break; }; // The buffer tags are now out of sync. myNeedToSetTagsInBuffer = true; myIsTagsBufferValid = false; myIsBufferSynced = false; myTagBufferSize -= rmBuffSize; // Remove from the hash. extras.Delete(offset); return(true); }
bool SamRecord::rmTags | ( | const char * | tags | ) |
Remove tags.
The delimiter between the tags is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.
tags | tags to remove, formatted as Tag:Type,Tag:Type,Tag:Type... |
Definition at line 1071 of file SamRecord.cpp.
References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ const char* currentTagPtr = tags; myStatus = SamStatus::SUCCESS; if(myNeedToSetTagsFromBuffer) { if(!setTagsFromBuffer()) { // Failed to read the tags from the buffer, so cannot // get tags. return(false); } } bool returnStatus = true; int rmBuffSize = 0; while(*currentTagPtr != '\0') { // Tags are formatted as: XY:Z // Where X is [A-Za-z], Y is [A-Za-z], and // Z is A,i,f,Z,H (cCsSI are also excepted) if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') || (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0')) { myStatus.setStatus(SamStatus::INVALID, "rmTags called with improperly formatted tags.\n"); returnStatus = false; break; } // Construct the key. int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1], currentTagPtr[3]); // Look to see if the key exsists in the hash. int offset = extras.Find(key); if(offset >= 0) { // Offset is set, so the key was found. // First if it is an integer, determine the actual type of the int. char vtype; getTypeFromKey(key, vtype); if(vtype == 'i') { vtype = getIntegerType(offset); } // Offset is set, so recalculate the buffer size without this entry. // Do NOT remove from strings, integers, or floats because then // extras would need to be updated for all entries with the new indexes // into those variables. switch(vtype) { case 'A': case 'c': case 'C': rmBuffSize += 4; break; case 's': case 'S': rmBuffSize += 5; break; case 'i': case 'I': rmBuffSize += 7; break; case 'f': rmBuffSize += 7; break; case 'Z': rmBuffSize += 4 + getString(offset).Length(); break; case 'B': rmBuffSize += 3 + getBtagBufferSize(getString(offset)); break; default: myStatus.setStatus(SamStatus::INVALID, "rmTag called with unknown type.\n"); returnStatus = false; break; }; // Remove from the hash. extras.Delete(offset); } // Increment to the next tag. if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ',')) { // Increment once more. currentTagPtr += 5; } else if(currentTagPtr[4] != '\0') { // Invalid tag format. myStatus.setStatus(SamStatus::INVALID, "rmTags called with improperly formatted tags.\n"); returnStatus = false; break; } else { // Last Tag. currentTagPtr += 4; } } // The buffer tags are now out of sync. myNeedToSetTagsInBuffer = true; myIsTagsBufferValid = false; myIsBufferSynced = false; myTagBufferSize -= rmBuffSize; return(returnStatus); }
bool SamRecord::set0BasedMatePosition | ( | int32_t | matePosition | ) |
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 0-based start position |
Definition at line 328 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by set1BasedMatePosition().
{ myStatus = SamStatus::SUCCESS; myRecordPtr->myMatePosition = matePosition; return true; }
bool SamRecord::set0BasedPosition | ( | int32_t | position | ) |
Set the leftmost position using the specified 0-based (BAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 0-based start position |
Definition at line 242 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by set1BasedPosition(), and SamFilter::softClip().
{ myStatus = SamStatus::SUCCESS; myRecordPtr->myPosition = position; myIsBinValid = false; return true; }
bool SamRecord::set1BasedMatePosition | ( | int32_t | matePosition | ) |
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 1-based start position |
Definition at line 322 of file SamRecord.cpp.
References set0BasedMatePosition().
{ return(set0BasedMatePosition(matePosition - 1)); }
bool SamRecord::set1BasedPosition | ( | int32_t | position | ) |
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
Internal processing handles the switching between SAM/BAM formats when read/written.
position | 1-based start position |
Definition at line 236 of file SamRecord.cpp.
References set0BasedPosition().
{ return(set0BasedPosition(position - 1)); }
SamStatus::Status SamRecord::setBuffer | ( | const char * | fromBuffer, |
uint32_t | fromBufferSize, | ||
SamFileHeader & | header | ||
) |
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
fromBuffer | buffer to read the BAM record from. |
fromBufferSize | size of the buffer containing the BAM record. |
header | BAM header for the record. |
Definition at line 525 of file SamRecord.cpp.
References StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_PARSE, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if((fromBuffer == NULL) || (fromBufferSize == 0)) { // Buffer is empty. myStatus.setStatus(SamStatus::FAIL_PARSE, "Cannot parse an empty file."); return(SamStatus::FAIL_PARSE); } // Clear the record. resetRecord(); // allocate space for the record size. if(!allocateRecordStructure(fromBufferSize)) { // Failed to allocate space. return(SamStatus::FAIL_MEM); } memcpy(myRecordPtr, fromBuffer, fromBufferSize); setVariablesForNewBuffer(header); // Return the status of the record. return(SamStatus::SUCCESS); }
SamStatus::Status SamRecord::setBufferFromFile | ( | IFILE | filePtr, |
SamFileHeader & | header | ||
) |
Read the BAM record from a file.
filePtr | file to read the buffer from. |
header | BAM header for the record. |
Definition at line 558 of file SamRecord.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_ORDER, StatGenStatus::FAIL_PARSE, ifeof(), ifread(), InputFile::isOpen(), StatGenStatus::NO_MORE_RECS, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if((filePtr == NULL) || (filePtr->isOpen() == false)) { // File is not open, return failure. myStatus.setStatus(SamStatus::FAIL_ORDER, "Can't read from an unopened file."); return(SamStatus::FAIL_ORDER); } // Clear the record. resetRecord(); // read the record size. int numBytes = ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t)); // Check to see if the end of the file was hit and no bytes were read. if(ifeof(filePtr) && (numBytes == 0)) { // End of file, nothing was read, no more records. myStatus.setStatus(SamStatus::NO_MORE_RECS, "No more records left to read."); return(SamStatus::NO_MORE_RECS); } if(numBytes != sizeof(int32_t)) { // Failed to read the entire block size. Either the end of the file // was reached early or there was an error. if(ifeof(filePtr)) { // Error: end of the file reached prior to reading the rest of the // record. myStatus.setStatus(SamStatus::FAIL_PARSE, "EOF reached in the middle of a record."); return(SamStatus::FAIL_PARSE); } else { // Error reading. myStatus.setStatus(SamStatus::FAIL_IO, "Failed to read the record size."); return(SamStatus::FAIL_IO); } } // allocate space for the record size. if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t))) { // Failed to allocate space. // Status is set by allocateRecordStructure. return(SamStatus::FAIL_MEM); } // Read the rest of the alignment block, starting at the reference id. if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize) != (unsigned int)myRecordPtr->myBlockSize) { // Error reading the record. Reset it and return failure. resetRecord(); myStatus.setStatus(SamStatus::FAIL_IO, "Failed to read the record"); return(SamStatus::FAIL_IO); } setVariablesForNewBuffer(header); // Return the status of the record. return(SamStatus::SUCCESS); }
bool SamRecord::setCigar | ( | const char * | cigar | ) |
Set the CIGAR to the specified SAM formatted cigar string.
Internal processing handles the switching between SAM/BAM formats when read/written.
cigar | string containing the SAM formatted cigar. |
Definition at line 259 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead(), shiftIndelsLeft(), and SamFilter::softClip().
{ myStatus = SamStatus::SUCCESS; myCigar = cigar; myIsBufferSynced = false; myIsCigarBufferValid = false; myCigarTempBufferLength = -1; myIsBinValid = false; // Initialize the calculated alignment info to the uncalculated value. myAlignmentLength = -1; myUnclippedStartOffset = -1; myUnclippedEndOffset = -1; return true; }
bool SamRecord::setCigar | ( | const Cigar & | cigar | ) |
Set the CIGAR to the specified Cigar object.
Internal processing handles the switching between SAM/BAM formats when read/written.
cigar | object to set this record's cigar to have. |
Definition at line 278 of file SamRecord.cpp.
References Cigar::getCigarString(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; cigar.getCigarString(myCigar); myIsBufferSynced = false; myIsCigarBufferValid = false; myCigarTempBufferLength = -1; myIsBinValid = false; // Initialize the calculated alignment info to the uncalculated value. myAlignmentLength = -1; myUnclippedStartOffset = -1; myUnclippedEndOffset = -1; return true; }
bool SamRecord::setFlag | ( | uint16_t | flag | ) |
Set the bitwise FLAG to the specified value.
flag | integer flag to use. |
Definition at line 215 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead().
{ myStatus = SamStatus::SUCCESS; myRecordPtr->myFlag = flag; return true; }
bool SamRecord::setInsertSize | ( | int32_t | insertSize | ) |
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
insertSize | inferred insert size/observed template length. |
Definition at line 336 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; myRecordPtr->myInsertSize = insertSize; return true; }
bool SamRecord::setMapQuality | ( | uint8_t | mapQuality | ) |
Set the mapping quality (MAPQ).
mapQuality | map quality to set in the record. |
Definition at line 251 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
Referenced by SamFilter::filterRead().
{ myStatus = SamStatus::SUCCESS; myRecordPtr->myMapQuality = mapQuality; return true; }
bool SamRecord::setMateReferenceName | ( | SamFileHeader & | header, |
const char * | mateReferenceName | ||
) |
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.
header | SAM/BAM header to use to determine the mate reference id. |
referenceName | mate reference name to use. |
Definition at line 297 of file SamRecord.cpp.
References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; // Set the mate reference, if it is "=", set it to be equal // to myReferenceName. This assumes that myReferenceName has already // been called. if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0) { myMateReferenceName = myReferenceName; } else { myMateReferenceName = mateReferenceName; } // Set the Mate Reference ID. // If the reference ID does not already exist, add it (pass true) myRecordPtr->myMateReferenceID = header.getReferenceID(myMateReferenceName, true); return true; }
bool SamRecord::setQuality | ( | const char * | quality | ) |
Sets the quality (QUAL) to the specified SAM formatted quality string.
Internal processing handles switching between SAM/BAM formats when read/written.
quality | SAM quality string. |
Definition at line 357 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; myQuality = quality; myIsBufferSynced = false; myIsQualityBufferValid = false; return true; }
bool SamRecord::setReadName | ( | const char * | readName | ) |
Set QNAME to the passed in name.
readName | the readname to set the QNAME to. |
Definition at line 193 of file SamRecord.cpp.
References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ myReadName = readName; myIsBufferSynced = false; myIsReadNameBufferValid = false; myStatus = SamStatus::SUCCESS; // The read name must at least have some length, otherwise this is a parsing // error. if(myReadName.Length() == 0) { // Invalid - reset ReadName return false. myReadName = DEFAULT_READ_NAME; myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH; myStatus.setStatus(SamStatus::INVALID, "0 length Query Name."); return(false); } return true; }
void SamRecord::setReference | ( | GenomeSequence * | reference | ) |
Set the reference to the specified genome sequence object.
reference | pointer to the GenomeSequence object. |
Definition at line 178 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().
{ myRefPtr = reference; }
bool SamRecord::setReferenceName | ( | SamFileHeader & | header, |
const char * | referenceName | ||
) |
Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id.
header | SAM/BAM header to use to determine the reference id. |
referenceName | reference name to use. |
Definition at line 223 of file SamRecord.cpp.
References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; myReferenceName = referenceName; // If the reference ID does not already exist, add it (pass true) myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true); return true; }
bool SamRecord::setSequence | ( | const char * | seq | ) |
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
Internal processing handles switching between SAM/BAM formats when read/written.
seq | SAM sequence string. May contain '='. |
Definition at line 344 of file SamRecord.cpp.
References StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; mySequence = seq; mySeqWithEq.clear(); mySeqWithoutEq.clear(); myIsBufferSynced = false; myIsSequenceBufferValid = false; return true; }
void SamRecord::setSequenceTranslation | ( | SequenceTranslation | translation | ) |
Set the type of sequence translation to use when getting the sequence.
The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.
translation | type of sequence translation to use. |
Definition at line 187 of file SamRecord.cpp.
Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), and SamFile::validateSortOrder().
{ mySequenceTranslation = translation; }
bool SamRecord::shiftIndelsLeft | ( | ) |
Shift the indels (if any) to the left by updating the CIGAR.
Definition at line 368 of file SamRecord.cpp.
References BASES, Cigar::foundInQuery(), getSequence(), CigarRoller::IncrementCount(), Cigar::insert, Cigar::isMatchOrMismatch(), CigarRoller::Remove(), setCigar(), Cigar::size(), and CigarRoller::Update().
{ // Check to see whether or not the Cigar has already been // set - this is determined by checking if alignment length // is set since alignment length and the cigar are set // at the same time. if(myAlignmentLength == -1) { // Not been set, so calculate it. parseCigar(); } // Track whether or not there was a shift. bool shifted = false; // Cigar is set, so now myCigarRoller can be used. // Track where in the read we are. uint32_t currentPos = 0; // Since the loop starts at 1 because the first operation can't be shifted, // increment the currentPos past the first operation. if(Cigar::foundInQuery(myCigarRoller[0])) { // This op was found in the read, increment the current position. currentPos += myCigarRoller[0].count; } int numOps = myCigarRoller.size(); // Loop through the cigar operations from the 2nd operation since // the first operation is already on the end and can't shift. for(int currentOp = 1; currentOp < numOps; currentOp++) { if(myCigarRoller[currentOp].operation == Cigar::insert) { // For now, only shift a max of 1 operation. int prevOpIndex = currentOp-1; // Track the next op for seeing if it is the same as the // previous for merging reasons. int nextOpIndex = currentOp+1; if(nextOpIndex == numOps) { // There is no next op, so set it equal to the current one. nextOpIndex = currentOp; } // The start of the previous operation, so we know when we hit it // so we don't shift past it. uint32_t prevOpStart = currentPos - myCigarRoller[prevOpIndex].count; // We can only shift if the previous operation if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex])) { // TODO - shift past pads // An insert is in the read, so increment the position. currentPos += myCigarRoller[currentOp].count; // Not a match/mismatch, so can't shift into it. continue; } // It is a match or mismatch, so check to see if we can // shift into it. // The end of the insert is calculated by adding the size // of this insert minus 1 to the start of the insert. uint32_t insertEndPos = currentPos + myCigarRoller[currentOp].count - 1; // The insert starts at the current position. uint32_t insertStartPos = currentPos; // Loop as long as the position before the insert start // matches the last character in the insert. If they match, // the insert can be shifted one index left because the // implied reference will not change. If they do not match, // we can't shift because the implied reference would change. // Stop loop when insertStartPos = prevOpStart, because we // don't want to move past that. while((insertStartPos > prevOpStart) && (getSequence(insertEndPos,BASES) == getSequence(insertStartPos - 1, BASES))) { // We can shift, so move the insert start & end one left. --insertEndPos; --insertStartPos; } // Determine if a shift has occurred. int shiftLen = currentPos - insertStartPos; if(shiftLen > 0) { // Shift occured, so adjust the cigar if the cigar will // not become more operations. // If the next operation is the same as the previous or // if the insert and the previous operation switch positions // then the cigar has the same number of operations. // If the next operation is different, and the shift splits // the previous operation in 2, then the cigar would // become longer, so we do not want to shift. if(myCigarRoller[nextOpIndex].operation == myCigarRoller[prevOpIndex].operation) { // The operations are the same, so merge them by adding // the length of the shift to the next operation. myCigarRoller.IncrementCount(nextOpIndex, shiftLen); myCigarRoller.IncrementCount(prevOpIndex, -shiftLen); // If the previous op length is 0, just remove that // operation. if(myCigarRoller[prevOpIndex].count == 0) { myCigarRoller.Remove(prevOpIndex); } shifted = true; } else { // Can only shift if the insert shifts past the // entire previous operation, otherwise an operation // would need to be added. if(insertStartPos == prevOpStart) { // Swap the positions of the insert and the // previous operation. myCigarRoller.Update(currentOp, myCigarRoller[prevOpIndex].operation, myCigarRoller[prevOpIndex].count); // Size of the previous op is the entire // shift length. myCigarRoller.Update(prevOpIndex, Cigar::insert, shiftLen); shifted = true; } } } // An insert is in the read, so increment the position. currentPos += myCigarRoller[currentOp].count; } else if(Cigar::foundInQuery(myCigarRoller[currentOp])) { // This op was found in the read, increment the current position. currentPos += myCigarRoller[currentOp].count; } } if(shifted) { // TODO - setCigar is currently inefficient because later the cigar // roller will be recalculated, but for now it will work. setCigar(myCigarRoller); } return(shifted); }
SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr | ) |
Write the record as a BAM into the specified already opened file.
filePtr | file to write the BAM record into. |
Definition at line 1225 of file SamRecord.cpp.
{ return(writeRecordBuffer(filePtr, mySequenceTranslation)); }
SamStatus::Status SamRecord::writeRecordBuffer | ( | IFILE | filePtr, |
SequenceTranslation | translation | ||
) |
Write the record as a BAM into the specified already opened file using the specified translation on the sequence.
filePtr | file to write the BAM record into. |
translation | type of sequence translation to use. |
Definition at line 1232 of file SamRecord.cpp.
References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_ORDER, StatGenStatus::getStatus(), ifwrite(), InputFile::isOpen(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.
{ myStatus = SamStatus::SUCCESS; if((filePtr == NULL) || (filePtr->isOpen() == false)) { // File is not open, return failure. myStatus.setStatus(SamStatus::FAIL_ORDER, "Can't write to an unopened file."); return(SamStatus::FAIL_ORDER); } if((myIsBufferSynced == false) || (myBufferSequenceTranslation != translation)) { if(!fixBuffer(translation)) { return(myStatus.getStatus()); } } // Write the record. unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t); unsigned int numBytesWritten = ifwrite(filePtr, myRecordPtr, numBytesToWrite); // Return status based on if the correct number of bytes were written. if(numBytesToWrite == numBytesWritten) { return(SamStatus::SUCCESS); } // The correct number of bytes were not written. myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record."); return(SamStatus::FAIL_IO); }