Contains methods for converting between the query sequence and reference. More...
#include <SamQuerySeqWithRefHelper.h>
Static Public Member Functions | |
static void | seqWithEquals (const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq) |
Gets the sequence with '=' in any position where the sequence matches the reference. | |
static void | seqWithoutEquals (const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq) |
Gets the sequence converting '=' to the appropriate base using the reference. |
Contains methods for converting between the query sequence and reference.
Definition at line 101 of file SamQuerySeqWithRefHelper.h.
void SamQuerySeqWithRef::seqWithEquals | ( | const char * | currentSeq, | |
int32_t | seq0BasedPos, | |||
Cigar & | cigar, | |||
const char * | referenceName, | |||
const GenomeSequence & | refSequence, | |||
std::string & | updatedSeq | |||
) | [static] |
Gets the sequence with '=' in any position where the sequence matches the reference.
NOTE: 'N' in both the sequence and the reference is not considered a match.
currentSeq | sequence that should be converted | |
seq0BasedPos | 0 based start position of currentSeq on the reference. | |
cigar | cigar string for currentSeq (used for determining how the sequence aligns to the reference) | |
referenceName | reference name associated with this sequence | |
refSequence | reference sequence object | |
updatedSeq | return parameter that this method sets to the current sequence, replacing any matches to the reference with '='. |
Definition at line 243 of file SamQuerySeqWithRefHelper.cpp.
References BaseUtilities::areEqual(), GenomeSequence::getGenomePosition(), Cigar::getRefOffset(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().
Referenced by SamRecord::getSequence().
00249 { 00250 updatedSeq = currentSeq; 00251 00252 int32_t seqLength = updatedSeq.length(); 00253 int32_t queryIndex = 0; 00254 00255 uint32_t startOfReadOnRefIndex = 00256 refSequence.getGenomePosition(referenceName); 00257 00258 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX) 00259 { 00260 // This reference name was not found in the reference file, so just 00261 // return. 00262 return; 00263 } 00264 startOfReadOnRefIndex += seq0BasedPos; 00265 00266 // Loop until the entire sequence has been updated. 00267 while(queryIndex < seqLength) 00268 { 00269 // Still more bases, look for matches. 00270 00271 // Get the reference offset for this read position. 00272 int32_t refOffset = cigar.getRefOffset(queryIndex); 00273 if(refOffset != Cigar::INDEX_NA) 00274 { 00275 // Both the reference and the read have a base, so get the bases. 00276 char readBase = currentSeq[queryIndex]; 00277 char refBase = refSequence[startOfReadOnRefIndex + refOffset]; 00278 00279 // If neither base is unknown and they are the same, count it 00280 // as a match. 00281 if(!BaseUtilities::isAmbiguous(readBase) && 00282 !BaseUtilities::isAmbiguous(refBase) && 00283 (BaseUtilities::areEqual(readBase, refBase))) 00284 { 00285 // Match. 00286 updatedSeq[queryIndex] = '='; 00287 } 00288 } 00289 // Increment the query index to the next position. 00290 ++queryIndex; 00291 continue; 00292 } 00293 }
void SamQuerySeqWithRef::seqWithoutEquals | ( | const char * | currentSeq, | |
int32_t | seq0BasedPos, | |||
Cigar & | cigar, | |||
const char * | referenceName, | |||
const GenomeSequence & | refSequence, | |||
std::string & | updatedSeq | |||
) | [static] |
Gets the sequence converting '=' to the appropriate base using the reference.
currentSeq | sequence that should be converted | |
seq0BasedPos | 0 based start position of currentSeq on the reference. | |
cigar | cigar string for currentSeq (used for determining how the sequence aligns to the reference) | |
referenceName | reference name associated with this sequence | |
refSequence | reference sequence object | |
updatedSeq | return parameter that this method sets to the current sequence, replacing any '=' with the base from the reference. |
Definition at line 296 of file SamQuerySeqWithRefHelper.cpp.
References BaseUtilities::areEqual(), GenomeSequence::getGenomePosition(), Cigar::getRefOffset(), and Cigar::INDEX_NA.
Referenced by SamRecord::getSequence().
00302 { 00303 updatedSeq = currentSeq; 00304 00305 int32_t seqLength = updatedSeq.length(); 00306 int32_t queryIndex = 0; 00307 00308 uint32_t startOfReadOnRefIndex = 00309 refSequence.getGenomePosition(referenceName); 00310 00311 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX) 00312 { 00313 // This reference name was not found in the reference file, so just 00314 // return. 00315 return; 00316 } 00317 startOfReadOnRefIndex += seq0BasedPos; 00318 00319 // Loop until the entire sequence has been updated. 00320 while(queryIndex < seqLength) 00321 { 00322 // Still more bases, look for matches. 00323 00324 // Get the reference offset for this read position. 00325 int32_t refOffset = cigar.getRefOffset(queryIndex); 00326 if(refOffset != Cigar::INDEX_NA) 00327 { 00328 // Both the reference and the read have a base, so get the bases. 00329 char readBase = currentSeq[queryIndex]; 00330 char refBase = refSequence[startOfReadOnRefIndex + refOffset]; 00331 00332 // If the bases are equal, set the sequence to the reference 00333 // base. (Skips the check for ambiguous to catch a case where 00334 // ambiguous had been converted to a '=', and if both are ambiguous, 00335 // it will still be set to ambiguous.) 00336 if(BaseUtilities::areEqual(readBase, refBase)) 00337 { 00338 // Match. 00339 updatedSeq[queryIndex] = refBase; 00340 } 00341 } 00342 00343 // Increment the query index to the next position. 00344 ++queryIndex; 00345 continue; 00346 } 00347 }