libStatGen Software  1
SamTags Class Reference

Class for parsing/creating/operating on SAM/BAM record tags. More...

#include <SamTags.h>

List of all members.

Static Public Member Functions

static bool createMDTag (String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome)
 Create the MD tag for the specified input record and the genome.
static bool isMDTagCorrect (SamRecord &inputRec, GenomeSequence &genome)
 Check to see if the MD tag in the record is accurate.
static bool updateMDTag (SamRecord &inputRec, GenomeSequence &genome)

Static Public Attributes

Constants for parsing tags.
static const char * BQ_TAG = "BQ"
static const char BQ_TAG_TYPE = 'Z'
static const char * MD_TAG = "MD"
static const char MD_TAG_TYPE = 'Z'
static const char * ORIG_POS_TAG = "OP"
static const char ORIG_POS_TAG_TYPE = 'i'
static const char * ORIG_CIGAR_TAG = "OC"
static const char ORIG_CIGAR_TAG_TYPE = 'Z'
static const char * ORIG_QUAL_TAG = "OQ"
static const char ORIG_QUAL_TAG_TYPE = 'Z'

Detailed Description

Class for parsing/creating/operating on SAM/BAM record tags.

Definition at line 26 of file SamTags.h.


Member Function Documentation

bool SamTags::createMDTag ( String outputMDtag,
SamRecord inputRec,
GenomeSequence genome 
) [static]

Create the MD tag for the specified input record and the genome.

Returns:
returns true if an MD tag was created, false if one could not be created.

Definition at line 34 of file SamTags.cpp.

References BaseUtilities::areEqual(), SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), Cigar::getExpectedReferenceBaseCount(), GenomeSequence::getGenomePosition(), Cigar::getQueryIndex(), SamRecord::getReferenceName(), SamRecord::getSequence(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().

Referenced by isMDTagCorrect(), and SamValidator::isValidTags().

{
    outputMDtag.Clear();
    // Get the cigar to use for determing alignment.
    Cigar* cigarInfo = inputRec.getCigarInfo();
    if(cigarInfo == NULL)
    {
        throw(std::runtime_error("Cannot createMDTag - failed to get the cigar"));
        return(false);
    }
    int32_t queryIndex = Cigar::INDEX_NA;

    // get where this read starts on the reference.
    uint32_t startOfReadOnRefIndex = 
        genome.getGenomePosition(inputRec.getReferenceName());
    if(startOfReadOnRefIndex == (uint32_t)INVALID_CHROMOSOME_INDEX)
    {
        // Failed to find the reference for this chromosome, so return false.
        return(false);
    }
    startOfReadOnRefIndex += inputRec.get0BasedPosition();

    // Track the number of consecutive matches.
    int32_t matchCount = 0;
    // Track if it is currently in a deletion so it knows when not to add
    // a '^'.
    bool currentDeletion = false;

    // Loop through the Reference indices (ignores insertions/pads/clips).
    for(int refOffset = 0; 
        refOffset < cigarInfo->getExpectedReferenceBaseCount();
        ++refOffset)
    {
        // Get the query index for this reference position..
        queryIndex = cigarInfo->getQueryIndex(refOffset);

        char refBase = genome[startOfReadOnRefIndex + refOffset];

        if(queryIndex != Cigar::INDEX_NA)
        {
            // Both the reference and the read have a base, so get the bases.
            char readBase = inputRec.getSequence(queryIndex);
            currentDeletion = false;

            // If neither base is unknown and they are the same, count it
            // as a match.
            if(!BaseUtilities::isAmbiguous(readBase) && 
               !BaseUtilities::isAmbiguous(refBase) && 
               (BaseUtilities::areEqual(readBase, refBase)))
            {
                // Match, so update counter.
                ++matchCount;
            }
            else
            {
                // Mismatch, so output the number of matches if any.
                if(matchCount != 0)
                {
                    outputMDtag += matchCount;
                    matchCount = 0;
                }
                outputMDtag += refBase;
            }
        }
        else
        {
            // This reference position is not in the query, so it is a deletion.
            // Deletion, so output the number of matches if any.
            if(matchCount != 0)
            {
                outputMDtag += matchCount;
                matchCount = 0;
            }

            if(!currentDeletion)
            {
                // Not currently in a deletion, so add the ^
                outputMDtag += '^';
            }
            // Add the deleted base.
            outputMDtag += refBase;
            currentDeletion = true;
        }
    }

    // output the match count at the end.
    outputMDtag += matchCount;
    return(true);
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends