SamQuerySeqWithRefHelper.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdexcept>
00019
00020 #include "SamQuerySeqWithRefHelper.h"
00021 #include "BaseUtilities.h"
00022 #include "SamFlag.h"
00023
00024 SamQuerySeqWithRefIter::SamQuerySeqWithRefIter(SamRecord& record,
00025 GenomeSequence& refSequence,
00026 bool forward)
00027 : myRecord(record),
00028 myRefSequence(refSequence),
00029 myCigar(NULL),
00030 myStartOfReadOnRefIndex(INVALID_GENOME_INDEX),
00031 myQueryIndex(0),
00032 myForward(forward)
00033 {
00034 myCigar = myRecord.getCigarInfo();
00035 myStartOfReadOnRefIndex =
00036 refSequence.getGenomePosition(myRecord.getReferenceName());
00037
00038 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX)
00039 {
00040
00041
00042 myStartOfReadOnRefIndex += myRecord.get0BasedPosition();
00043 }
00044
00045 if(!forward)
00046 {
00047 myQueryIndex = myRecord.getReadLength() - 1;
00048 }
00049 }
00050
00051
00052 SamQuerySeqWithRefIter::~SamQuerySeqWithRefIter()
00053 {
00054 }
00055
00056
00057
00058 bool SamQuerySeqWithRefIter::reset(bool forward)
00059 {
00060 myCigar = myRecord.getCigarInfo();
00061 if(myCigar == NULL)
00062 {
00063
00064 return(false);
00065 }
00066
00067
00068
00069 myStartOfReadOnRefIndex =
00070 myRefSequence.getGenomePosition(myRecord.getReferenceName());
00071
00072 if(myStartOfReadOnRefIndex != INVALID_GENOME_INDEX)
00073 {
00074
00075
00076 myStartOfReadOnRefIndex += myRecord.get0BasedPosition();
00077 }
00078
00079 myForward = forward;
00080
00081 if(myForward)
00082 {
00083 myQueryIndex = 0;
00084 }
00085 else
00086 {
00087
00088 myQueryIndex = myRecord.getReadLength() - 1;
00089 }
00090 return(true);
00091 }
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 bool SamQuerySeqWithRefIter::getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo)
00103 {
00104
00105
00106 if(!SamFlag::isMapped(myRecord.getFlag()))
00107 {
00108
00109 return(false);
00110 }
00111
00112
00113 if(myCigar == NULL)
00114 {
00115
00116 throw(std::runtime_error("Cannot determine matches/mismatches since failed to retrieve the cigar"));
00117 return(false);
00118 }
00119
00120
00121
00122 if(myStartOfReadOnRefIndex == INVALID_GENOME_INDEX)
00123 {
00124
00125
00126 return(false);
00127 }
00128
00129
00130
00131
00132
00133
00134 while((myQueryIndex < myRecord.getReadLength()) &&
00135 (myQueryIndex >= 0))
00136 {
00137
00138
00139
00140 int32_t refOffset = myCigar->getRefOffset(myQueryIndex);
00141 if(refOffset == Cigar::INDEX_NA)
00142 {
00143
00144
00145
00146 nextIndex();
00147 continue;
00148 }
00149
00150
00151 char readBase = myRecord.getSequence(myQueryIndex, SamRecord::NONE);
00152 char refBase = myRefSequence[myStartOfReadOnRefIndex + refOffset];
00153
00154
00155
00156 if(BaseUtilities::isAmbiguous(readBase) ||
00157 BaseUtilities::isAmbiguous(refBase))
00158 {
00159
00160
00161 nextIndex();
00162 continue;
00163 }
00164
00165
00166
00167 matchMismatchInfo.setQueryIndex(myQueryIndex);
00168
00169
00170 if(BaseUtilities::areEqual(readBase, refBase))
00171 {
00172
00173 matchMismatchInfo.setType(SamSingleBaseMatchInfo::MATCH);
00174
00175 nextIndex();
00176 return(true);
00177 }
00178 else
00179 {
00180
00181 matchMismatchInfo.setType(SamSingleBaseMatchInfo::MISMATCH);
00182
00183 nextIndex();
00184 return(true);
00185 }
00186 }
00187
00188
00189 return(false);
00190 }
00191
00192
00193 void SamQuerySeqWithRefIter::nextIndex()
00194 {
00195 if(myForward)
00196 {
00197 ++myQueryIndex;
00198 }
00199 else
00200 {
00201 --myQueryIndex;
00202 }
00203 }
00204
00205
00206 SamSingleBaseMatchInfo::SamSingleBaseMatchInfo()
00207 : myType(UNKNOWN),
00208 myQueryIndex(0)
00209 {
00210 }
00211
00212
00213 SamSingleBaseMatchInfo::~SamSingleBaseMatchInfo()
00214 {
00215 }
00216
00217
00218 SamSingleBaseMatchInfo::Type SamSingleBaseMatchInfo::getType()
00219 {
00220 return(myType);
00221 }
00222
00223
00224 int32_t SamSingleBaseMatchInfo::getQueryIndex()
00225 {
00226 return(myQueryIndex);
00227 }
00228
00229
00230 void SamSingleBaseMatchInfo::setType(Type newType)
00231 {
00232 myType = newType;
00233 }
00234
00235
00236 void SamSingleBaseMatchInfo::setQueryIndex(int32_t queryIndex)
00237 {
00238 myQueryIndex = queryIndex;
00239 }
00240
00241
00242
00243 void SamQuerySeqWithRef::seqWithEquals(const char* currentSeq,
00244 int32_t seq0BasedPos,
00245 Cigar& cigar,
00246 const char* referenceName,
00247 const GenomeSequence& refSequence,
00248 std::string& updatedSeq)
00249 {
00250 updatedSeq = currentSeq;
00251
00252 int32_t seqLength = updatedSeq.length();
00253 int32_t queryIndex = 0;
00254
00255 uint32_t startOfReadOnRefIndex =
00256 refSequence.getGenomePosition(referenceName);
00257
00258 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
00259 {
00260
00261
00262 return;
00263 }
00264 startOfReadOnRefIndex += seq0BasedPos;
00265
00266
00267 while(queryIndex < seqLength)
00268 {
00269
00270
00271
00272 int32_t refOffset = cigar.getRefOffset(queryIndex);
00273 if(refOffset != Cigar::INDEX_NA)
00274 {
00275
00276 char readBase = currentSeq[queryIndex];
00277 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
00278
00279
00280
00281 if(!BaseUtilities::isAmbiguous(readBase) &&
00282 !BaseUtilities::isAmbiguous(refBase) &&
00283 (BaseUtilities::areEqual(readBase, refBase)))
00284 {
00285
00286 updatedSeq[queryIndex] = '=';
00287 }
00288 }
00289
00290 ++queryIndex;
00291 continue;
00292 }
00293 }
00294
00295
00296 void SamQuerySeqWithRef::seqWithoutEquals(const char* currentSeq,
00297 int32_t seq0BasedPos,
00298 Cigar& cigar,
00299 const char* referenceName,
00300 const GenomeSequence& refSequence,
00301 std::string& updatedSeq)
00302 {
00303 updatedSeq = currentSeq;
00304
00305 int32_t seqLength = updatedSeq.length();
00306 int32_t queryIndex = 0;
00307
00308 uint32_t startOfReadOnRefIndex =
00309 refSequence.getGenomePosition(referenceName);
00310
00311 if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
00312 {
00313
00314
00315 return;
00316 }
00317 startOfReadOnRefIndex += seq0BasedPos;
00318
00319
00320 while(queryIndex < seqLength)
00321 {
00322
00323
00324
00325 int32_t refOffset = cigar.getRefOffset(queryIndex);
00326 if(refOffset != Cigar::INDEX_NA)
00327 {
00328
00329 char readBase = currentSeq[queryIndex];
00330 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
00331
00332
00333
00334
00335
00336 if(BaseUtilities::areEqual(readBase, refBase))
00337 {
00338
00339 updatedSeq[queryIndex] = refBase;
00340 }
00341 }
00342
00343
00344 ++queryIndex;
00345 continue;
00346 }
00347 }