SamTags Class Reference

List of all members.

Static Public Member Functions

static bool createMDTag (String &outputMDtag, SamRecord &inputRec, GenomeSequence &genome)
 Create the MD tag for the specified input record and the genome.
static bool isMDTagCorrect (SamRecord &inputRec, GenomeSequence &genome)
 Check to see if the MD tag in the record is accurate.
static bool updateMDTag (SamRecord &inputRec, GenomeSequence &genome)

Static Public Attributes

static const char * BQ_TAG = "BQ"
 Constants for parsing a tags.
static const char BQ_TAG_TYPE = 'Z'
static const char * MD_TAG = "MD"
static const char MD_TAG_TYPE = 'Z'
static const char * ORIG_POS_TAG = "OP"
static const char ORIG_POS_TAG_TYPE = 'i'
static const char * ORIG_CIGAR_TAG = "OC"
static const char ORIG_CIGAR_TAG_TYPE = 'Z'
static const char * ORIG_QUAL_TAG = "OQ"
static const char ORIG_QUAL_TAG_TYPE = 'Z'

Detailed Description

Definition at line 25 of file SamTags.h.


Member Function Documentation

bool SamTags::createMDTag ( String outputMDtag,
SamRecord inputRec,
GenomeSequence genome 
) [static]

Create the MD tag for the specified input record and the genome.

Returns:
returns true if an MD tag was created, false if one could not be created.

Definition at line 34 of file SamTags.cpp.

References SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), Cigar::getExpectedReferenceBaseCount(), GenomeSequence::getGenomePosition(), Cigar::getQueryIndex(), SamRecord::getReferenceName(), SamRecord::getSequence(), and Cigar::INDEX_NA.

Referenced by isMDTagCorrect().

00036 {
00037     outputMDtag.Clear();
00038     // Get the cigar to use for determing alignment.
00039     Cigar* cigarInfo = inputRec.getCigarInfo();
00040     if(cigarInfo == NULL)
00041     {
00042         throw(std::runtime_error("Cannot createMDTag - failed to get the cigar"));
00043         return(false);
00044     }
00045     int32_t queryIndex = Cigar::INDEX_NA;
00046 
00047     // get where this read starts on the reference.
00048     uint32_t startOfReadOnRefIndex = 
00049         genome.getGenomePosition(inputRec.getReferenceName());
00050     if(startOfReadOnRefIndex == (uint32_t)INVALID_CHROMOSOME_INDEX)
00051     {
00052         // Failed to find the reference for this chromosome, so return false.
00053         return(false);
00054     }
00055     startOfReadOnRefIndex += inputRec.get0BasedPosition();
00056 
00057     // Track the number of consecutive matches.
00058     int32_t matchCount = 0;
00059     // Track if it is currently in a deletion so it knows when not to add
00060     // a '^'.
00061     bool currentDeletion = false;
00062 
00063     // Loop through the Reference indices (ignores insertions/pads/clips).
00064     for(int refOffset = 0; 
00065         refOffset < cigarInfo->getExpectedReferenceBaseCount();
00066         ++refOffset)
00067     {
00068         // Get the query index for this reference position..
00069         queryIndex = cigarInfo->getQueryIndex(refOffset);
00070 
00071         char refBase = genome[startOfReadOnRefIndex + refOffset];
00072 
00073         if(queryIndex != Cigar::INDEX_NA)
00074         {
00075             // Both the reference and the read have a base, so get the bases.
00076             char readBase = inputRec.getSequence(queryIndex);
00077             currentDeletion = false;
00078 
00079             // If neither base is unknown and they are the same, count it
00080             // as a match.
00081             if(!BaseUtilities::isAmbiguous(readBase) && 
00082                !BaseUtilities::isAmbiguous(refBase) && 
00083                (BaseUtilities::areEqual(readBase, refBase)))
00084             {
00085                 // Match, so update counter.
00086                 ++matchCount;
00087             }
00088             else
00089             {
00090                 // Mismatch, so output the number of matches if any.
00091                 if(matchCount != 0)
00092                 {
00093                     outputMDtag += matchCount;
00094                     matchCount = 0;
00095                 }
00096                 outputMDtag += refBase;
00097             }
00098         }
00099         else
00100         {
00101             // This reference position is not in the query, so it is a deletion.
00102             // Deletion, so output the number of matches if any.
00103             if(matchCount != 0)
00104             {
00105                 outputMDtag += matchCount;
00106                 matchCount = 0;
00107             }
00108 
00109             if(!currentDeletion)
00110             {
00111                 // Not currently in a deletion, so add the ^
00112                 outputMDtag += '^';
00113             }
00114             // Add the deleted base.
00115             outputMDtag += refBase;
00116             currentDeletion = true;
00117         }
00118     }
00119 
00120     // output the match count at the end.
00121     outputMDtag += matchCount;
00122     return(true);
00123 }


The documentation for this class was generated from the following files:
Generated on Tue Aug 23 18:19:12 2011 for libStatGen Software by  doxygen 1.6.3