Static Public Member Functions | |
static bool | createMDTag (String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome) |
Create the MD tag for the specified input record and the genome. | |
static bool | isMDTagCorrect (SamRecord &inputRec, GenomeSequence &genome) |
Check to see if the MD tag in the record is accurate. | |
static bool | updateMDTag (SamRecord &inputRec, GenomeSequence &genome) |
Static Public Attributes | |
static const char * | BQ_TAG = "BQ" |
Constants for parsing a tags. | |
static const char | BQ_TAG_TYPE = 'Z' |
static const char * | MD_TAG = "MD" |
static const char | MD_TAG_TYPE = 'Z' |
static const char * | ORIG_POS_TAG = "OP" |
static const char | ORIG_POS_TAG_TYPE = 'i' |
static const char * | ORIG_CIGAR_TAG = "OC" |
static const char | ORIG_CIGAR_TAG_TYPE = 'Z' |
static const char * | ORIG_QUAL_TAG = "OQ" |
static const char | ORIG_QUAL_TAG_TYPE = 'Z' |
Definition at line 25 of file SamTags.h.
bool SamTags::createMDTag | ( | String & | outputMDtag, | |
SamRecord & | inputRec, | |||
GenomeSequence & | genome | |||
) | [static] |
Create the MD tag for the specified input record and the genome.
Definition at line 34 of file SamTags.cpp.
References BaseUtilities::areEqual(), SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), Cigar::getExpectedReferenceBaseCount(), GenomeSequence::getGenomePosition(), Cigar::getQueryIndex(), SamRecord::getReferenceName(), SamRecord::getSequence(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().
Referenced by isMDTagCorrect(), and SamValidator::isValidTags().
00036 { 00037 outputMDtag.Clear(); 00038 // Get the cigar to use for determing alignment. 00039 Cigar* cigarInfo = inputRec.getCigarInfo(); 00040 if(cigarInfo == NULL) 00041 { 00042 throw(std::runtime_error("Cannot createMDTag - failed to get the cigar")); 00043 return(false); 00044 } 00045 int32_t queryIndex = Cigar::INDEX_NA; 00046 00047 // get where this read starts on the reference. 00048 uint32_t startOfReadOnRefIndex = 00049 genome.getGenomePosition(inputRec.getReferenceName()); 00050 if(startOfReadOnRefIndex == (uint32_t)INVALID_CHROMOSOME_INDEX) 00051 { 00052 // Failed to find the reference for this chromosome, so return false. 00053 return(false); 00054 } 00055 startOfReadOnRefIndex += inputRec.get0BasedPosition(); 00056 00057 // Track the number of consecutive matches. 00058 int32_t matchCount = 0; 00059 // Track if it is currently in a deletion so it knows when not to add 00060 // a '^'. 00061 bool currentDeletion = false; 00062 00063 // Loop through the Reference indices (ignores insertions/pads/clips). 00064 for(int refOffset = 0; 00065 refOffset < cigarInfo->getExpectedReferenceBaseCount(); 00066 ++refOffset) 00067 { 00068 // Get the query index for this reference position.. 00069 queryIndex = cigarInfo->getQueryIndex(refOffset); 00070 00071 char refBase = genome[startOfReadOnRefIndex + refOffset]; 00072 00073 if(queryIndex != Cigar::INDEX_NA) 00074 { 00075 // Both the reference and the read have a base, so get the bases. 00076 char readBase = inputRec.getSequence(queryIndex); 00077 currentDeletion = false; 00078 00079 // If neither base is unknown and they are the same, count it 00080 // as a match. 00081 if(!BaseUtilities::isAmbiguous(readBase) && 00082 !BaseUtilities::isAmbiguous(refBase) && 00083 (BaseUtilities::areEqual(readBase, refBase))) 00084 { 00085 // Match, so update counter. 00086 ++matchCount; 00087 } 00088 else 00089 { 00090 // Mismatch, so output the number of matches if any. 00091 if(matchCount != 0) 00092 { 00093 outputMDtag += matchCount; 00094 matchCount = 0; 00095 } 00096 outputMDtag += refBase; 00097 } 00098 } 00099 else 00100 { 00101 // This reference position is not in the query, so it is a deletion. 00102 // Deletion, so output the number of matches if any. 00103 if(matchCount != 0) 00104 { 00105 outputMDtag += matchCount; 00106 matchCount = 0; 00107 } 00108 00109 if(!currentDeletion) 00110 { 00111 // Not currently in a deletion, so add the ^ 00112 outputMDtag += '^'; 00113 } 00114 // Add the deleted base. 00115 outputMDtag += refBase; 00116 currentDeletion = true; 00117 } 00118 } 00119 00120 // output the match count at the end. 00121 outputMDtag += matchCount; 00122 return(true); 00123 }