libStatGen Software
1
|
Class for parsing/creating/operating on SAM/BAM record tags. More...
#include <SamTags.h>
Static Public Member Functions | |
static bool | createMDTag (String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome) |
Create the MD tag for the specified input record and the genome. | |
static bool | isMDTagCorrect (SamRecord &inputRec, GenomeSequence &genome) |
Check to see if the MD tag in the record is accurate. | |
static bool | updateMDTag (SamRecord &inputRec, GenomeSequence &genome) |
Static Public Attributes | |
Constants for parsing tags. | |
static const char * | BQ_TAG = "BQ" |
static const char | BQ_TAG_TYPE = 'Z' |
static const char * | MD_TAG = "MD" |
static const char | MD_TAG_TYPE = 'Z' |
static const char * | ORIG_POS_TAG = "OP" |
static const char | ORIG_POS_TAG_TYPE = 'i' |
static const char * | ORIG_CIGAR_TAG = "OC" |
static const char | ORIG_CIGAR_TAG_TYPE = 'Z' |
static const char * | ORIG_QUAL_TAG = "OQ" |
static const char | ORIG_QUAL_TAG_TYPE = 'Z' |
Class for parsing/creating/operating on SAM/BAM record tags.
bool SamTags::createMDTag | ( | String & | outputMDtag, |
SamRecord & | inputRec, | ||
GenomeSequence & | genome | ||
) | [static] |
Create the MD tag for the specified input record and the genome.
Definition at line 34 of file SamTags.cpp.
References BaseUtilities::areEqual(), SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), Cigar::getExpectedReferenceBaseCount(), GenomeSequence::getGenomePosition(), Cigar::getQueryIndex(), SamRecord::getReferenceName(), SamRecord::getSequence(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().
Referenced by isMDTagCorrect(), and SamValidator::isValidTags().
{ outputMDtag.Clear(); // Get the cigar to use for determing alignment. Cigar* cigarInfo = inputRec.getCigarInfo(); if(cigarInfo == NULL) { throw(std::runtime_error("Cannot createMDTag - failed to get the cigar")); return(false); } int32_t queryIndex = Cigar::INDEX_NA; // get where this read starts on the reference. uint32_t startOfReadOnRefIndex = genome.getGenomePosition(inputRec.getReferenceName()); if(startOfReadOnRefIndex == (uint32_t)INVALID_CHROMOSOME_INDEX) { // Failed to find the reference for this chromosome, so return false. return(false); } startOfReadOnRefIndex += inputRec.get0BasedPosition(); // Track the number of consecutive matches. int32_t matchCount = 0; // Track if it is currently in a deletion so it knows when not to add // a '^'. bool currentDeletion = false; // Loop through the Reference indices (ignores insertions/pads/clips). for(int refOffset = 0; refOffset < cigarInfo->getExpectedReferenceBaseCount(); ++refOffset) { // Get the query index for this reference position.. queryIndex = cigarInfo->getQueryIndex(refOffset); char refBase = genome[startOfReadOnRefIndex + refOffset]; if(queryIndex != Cigar::INDEX_NA) { // Both the reference and the read have a base, so get the bases. char readBase = inputRec.getSequence(queryIndex); currentDeletion = false; // If neither base is unknown and they are the same, count it // as a match. if(!BaseUtilities::isAmbiguous(readBase) && !BaseUtilities::isAmbiguous(refBase) && (BaseUtilities::areEqual(readBase, refBase))) { // Match, so update counter. ++matchCount; } else { // Mismatch, so output the number of matches if any. if(matchCount != 0) { outputMDtag += matchCount; matchCount = 0; } outputMDtag += refBase; } } else { // This reference position is not in the query, so it is a deletion. // Deletion, so output the number of matches if any. if(matchCount != 0) { outputMDtag += matchCount; matchCount = 0; } if(!currentDeletion) { // Not currently in a deletion, so add the ^ outputMDtag += '^'; } // Add the deleted base. outputMDtag += refBase; currentDeletion = true; } } // output the match count at the end. outputMDtag += matchCount; return(true); }