Iterates through the query and compare with reference. More...
#include <SamQuerySeqWithRefHelper.h>
Public Member Functions | |
SamQuerySeqWithRefIter (SamRecord &record, GenomeSequence &refSequence, bool forward=true) | |
bool | reset (bool forward=true) |
Reset to start at the beginning of the record. | |
bool | getNextMatchMismatch (SamSingleBaseMatchInfo &matchMismatchInfo) |
Returns information for the next position where the query and the reference match or mismatch. |
Iterates through the query and compare with reference.
NOTE: References to the GenomeSequence and SamRecord are stored, the objects are not copied, so they must remain valid as long as this class is used.
Definition at line 58 of file SamQuerySeqWithRefHelper.h.
bool SamQuerySeqWithRefIter::getNextMatchMismatch | ( | SamSingleBaseMatchInfo & | matchMismatchInfo | ) |
Returns information for the next position where the query and the reference match or mismatch.
To be a match or mismatch, both the query and reference must have a base that is not 'N'. This means: insertions and deletions are not mismatches or matches. 'N' bases are not matches or mismatches
matchMismatchInfo | return parameter with the information about the matching/mismatching base. |
Definition at line 102 of file SamQuerySeqWithRefHelper.cpp.
References BaseUtilities::areEqual(), SamRecord::getFlag(), SamRecord::getReadLength(), Cigar::getRefOffset(), SamRecord::getSequence(), Cigar::INDEX_NA, BaseUtilities::isAmbiguous(), SamRecord::NONE, SamSingleBaseMatchInfo::setQueryIndex(), and SamSingleBaseMatchInfo::setType().
00103 { 00104 // Check whether or not this read is mapped. 00105 // If the read is not mapped, return no matches. 00106 if(!SamFlag::isMapped(myRecord.getFlag())) 00107 { 00108 // Not mapped. 00109 return(false); 00110 } 00111 00112 // Check that the Cigar is set. 00113 if(myCigar == NULL) 00114 { 00115 // Error. 00116 throw(std::runtime_error("Cannot determine matches/mismatches since failed to retrieve the cigar")); 00117 return(false); 00118 } 00119 00120 // If myStartOfReadOnRefIndex is the default (unset) value, then 00121 // the reference was not found, so return false, no matches/mismatches. 00122 if(myStartOfReadOnRefIndex == INVALID_GENOME_INDEX) 00123 { 00124 // This reference name was not found in the reference file, so just 00125 // return no matches/mismatches. 00126 return(false); 00127 } 00128 00129 00130 // Repull the read length from the record to check just in case the 00131 // record has changed length. 00132 // Loop until a match or mismatch is found as long as query index 00133 // is still within the read (Loop is broken by a return). 00134 while((myQueryIndex < myRecord.getReadLength()) && 00135 (myQueryIndex >= 0)) 00136 { 00137 // Still more bases, look for a match/mismatch. 00138 00139 // Get the reference offset for this read position. 00140 int32_t refOffset = myCigar->getRefOffset(myQueryIndex); 00141 if(refOffset == Cigar::INDEX_NA) 00142 { 00143 // This is either a softclip or an insertion 00144 // which do not count as a match or a mismatch, so 00145 // go to the next index. 00146 nextIndex(); 00147 continue; 00148 } 00149 00150 // Both the reference and the read have a base, so get the bases. 00151 char readBase = myRecord.getSequence(myQueryIndex, SamRecord::NONE); 00152 char refBase = myRefSequence[myStartOfReadOnRefIndex + refOffset]; 00153 00154 // If either the read or the reference bases are unknown, then 00155 // it does not count as a match or a mismatch. 00156 if(BaseUtilities::isAmbiguous(readBase) || 00157 BaseUtilities::isAmbiguous(refBase)) 00158 { 00159 // Either the reference base or the read base are unknown, 00160 // so skip this position. 00161 nextIndex(); 00162 continue; 00163 } 00164 00165 // Both the read & the reference have a known base, so it is either 00166 // a match or a mismatch. 00167 matchMismatchInfo.setQueryIndex(myQueryIndex); 00168 00169 // Check if they are equal. 00170 if(BaseUtilities::areEqual(readBase, refBase)) 00171 { 00172 // Match. 00173 matchMismatchInfo.setType(SamSingleBaseMatchInfo::MATCH); 00174 // Increment the query index to the next position. 00175 nextIndex(); 00176 return(true); 00177 } 00178 else 00179 { 00180 // Mismatch 00181 matchMismatchInfo.setType(SamSingleBaseMatchInfo::MISMATCH); 00182 // Increment the query index to the next position. 00183 nextIndex(); 00184 return(true); 00185 } 00186 } 00187 00188 // No matches or mismatches were found, so return false. 00189 return(false); 00190 }
bool SamQuerySeqWithRefIter::reset | ( | bool | forward = true |
) |
Reset to start at the beginning of the record.
This will re-read values from SamRecord, so can be used if it has changed to contain information for a new record.
forward | true means to start from the beginning and go to the end; false means to start from the end and go to the beginning. |
Definition at line 58 of file SamQuerySeqWithRefHelper.cpp.
References SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), GenomeSequence::getGenomePosition(), SamRecord::getReadLength(), and SamRecord::getReferenceName().
00059 { 00060 myCigar = myRecord.getCigarInfo(); 00061 if(myCigar == NULL) 00062 { 00063 // Failed to get Cigar. 00064 return(false); 00065 } 00066 00067 // Get where the position of where this read starts as mapped to the 00068 // reference. 00069 myStartOfReadOnRefIndex = 00070 myRefSequence.getGenomePosition(myRecord.getReferenceName()); 00071 00072 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX) 00073 { 00074 // This reference name was found in the reference file, so 00075 // add the start position. 00076 myStartOfReadOnRefIndex += myRecord.get0BasedPosition(); 00077 } 00078 00079 myForward = forward; 00080 00081 if(myForward) 00082 { 00083 myQueryIndex = 0; 00084 } 00085 else 00086 { 00087 // reverse, so start at the last entry. 00088 myQueryIndex = myRecord.getReadLength() - 1; 00089 } 00090 return(true); 00091 }