|
libStatGen Software
1
|
Class for parsing/creating/operating on SAM/BAM record tags. More...
#include <SamTags.h>
Static Public Member Functions | |
| static bool | createMDTag (String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome) |
| Create the MD tag for the specified input record and the genome. | |
| static bool | isMDTagCorrect (SamRecord &inputRec, GenomeSequence &genome) |
| Check to see if the MD tag in the record is accurate. | |
| static bool | updateMDTag (SamRecord &inputRec, GenomeSequence &genome) |
Static Public Attributes | |
Constants for parsing tags. | |
| static const char * | BQ_TAG = "BQ" |
| static const char | BQ_TAG_TYPE = 'Z' |
| static const char * | MD_TAG = "MD" |
| static const char | MD_TAG_TYPE = 'Z' |
| static const char * | ORIG_POS_TAG = "OP" |
| static const char | ORIG_POS_TAG_TYPE = 'i' |
| static const char * | ORIG_CIGAR_TAG = "OC" |
| static const char | ORIG_CIGAR_TAG_TYPE = 'Z' |
| static const char * | ORIG_QUAL_TAG = "OQ" |
| static const char | ORIG_QUAL_TAG_TYPE = 'Z' |
Class for parsing/creating/operating on SAM/BAM record tags.
| bool SamTags::createMDTag | ( | String & | outputMDtag, |
| SamRecord & | inputRec, | ||
| GenomeSequence & | genome | ||
| ) | [static] |
Create the MD tag for the specified input record and the genome.
Definition at line 34 of file SamTags.cpp.
References BaseUtilities::areEqual(), SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), Cigar::getExpectedReferenceBaseCount(), GenomeSequence::getGenomePosition(), Cigar::getQueryIndex(), SamRecord::getReferenceName(), SamRecord::getSequence(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().
Referenced by isMDTagCorrect(), and SamValidator::isValidTags().
{
outputMDtag.Clear();
// Get the cigar to use for determing alignment.
Cigar* cigarInfo = inputRec.getCigarInfo();
if(cigarInfo == NULL)
{
throw(std::runtime_error("Cannot createMDTag - failed to get the cigar"));
return(false);
}
int32_t queryIndex = Cigar::INDEX_NA;
// get where this read starts on the reference.
uint32_t startOfReadOnRefIndex =
genome.getGenomePosition(inputRec.getReferenceName());
if(startOfReadOnRefIndex == (uint32_t)INVALID_CHROMOSOME_INDEX)
{
// Failed to find the reference for this chromosome, so return false.
return(false);
}
startOfReadOnRefIndex += inputRec.get0BasedPosition();
// Track the number of consecutive matches.
int32_t matchCount = 0;
// Track if it is currently in a deletion so it knows when not to add
// a '^'.
bool currentDeletion = false;
// Loop through the Reference indices (ignores insertions/pads/clips).
for(int refOffset = 0;
refOffset < cigarInfo->getExpectedReferenceBaseCount();
++refOffset)
{
// Get the query index for this reference position..
queryIndex = cigarInfo->getQueryIndex(refOffset);
char refBase = genome[startOfReadOnRefIndex + refOffset];
if(queryIndex != Cigar::INDEX_NA)
{
// Both the reference and the read have a base, so get the bases.
char readBase = inputRec.getSequence(queryIndex);
currentDeletion = false;
// If neither base is unknown and they are the same, count it
// as a match.
if(!BaseUtilities::isAmbiguous(readBase) &&
!BaseUtilities::isAmbiguous(refBase) &&
(BaseUtilities::areEqual(readBase, refBase)))
{
// Match, so update counter.
++matchCount;
}
else
{
// Mismatch, so output the number of matches if any.
if(matchCount != 0)
{
outputMDtag += matchCount;
matchCount = 0;
}
outputMDtag += refBase;
}
}
else
{
// This reference position is not in the query, so it is a deletion.
// Deletion, so output the number of matches if any.
if(matchCount != 0)
{
outputMDtag += matchCount;
matchCount = 0;
}
if(!currentDeletion)
{
// Not currently in a deletion, so add the ^
outputMDtag += '^';
}
// Add the deleted base.
outputMDtag += refBase;
currentDeletion = true;
}
}
// output the match count at the end.
outputMDtag += matchCount;
return(true);
}