SamQuerySeqWithRefHelper.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdexcept>
00019
00020 #include "SamQuerySeqWithRefHelper.h"
00021 #include "BaseUtilities.h"
00022 #include "SamFlag.h"
00023
00024 SamQuerySeqWithRefIter::SamQuerySeqWithRefIter(SamRecord& record,
00025 GenomeSequence& refSequence,
00026 bool forward)
00027 : myRecord(record),
00028 myRefSequence(refSequence),
00029 myCigar(NULL),
00030 myStartOfReadOnRefIndex(0),
00031 myQueryIndex(0),
00032 myForward(forward)
00033 {
00034 myCigar = myRecord.getCigarInfo();
00035 myStartOfReadOnRefIndex =
00036 myRefSequence.getGenomePosition(myRecord.getReferenceName()) +
00037 myRecord.get0BasedPosition();
00038
00039 if(!forward)
00040 {
00041 myQueryIndex = myRecord.getReadLength() - 1;
00042 }
00043 }
00044
00045
00046 SamQuerySeqWithRefIter::~SamQuerySeqWithRefIter()
00047 {
00048 }
00049
00050
00051
00052 bool SamQuerySeqWithRefIter::reset(bool forward)
00053 {
00054 myCigar = myRecord.getCigarInfo();
00055 if(myCigar == NULL)
00056 {
00057
00058 return(false);
00059 }
00060
00061
00062
00063 myStartOfReadOnRefIndex =
00064 myRefSequence.getGenomePosition(myRecord.getReferenceName()) +
00065 myRecord.get0BasedPosition();
00066
00067 myForward = forward;
00068
00069 if(myForward)
00070 {
00071 myQueryIndex = 0;
00072 }
00073 else
00074 {
00075
00076 myQueryIndex = myRecord.getReadLength() - 1;
00077 }
00078 return(true);
00079 }
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090 bool SamQuerySeqWithRefIter::getNextMatchMismatch(SamSingleBaseMatchInfo& matchMismatchInfo)
00091 {
00092
00093
00094 if(!SamFlag::isMapped(myRecord.getFlag()))
00095 {
00096
00097 return(false);
00098 }
00099
00100
00101 if(myCigar == NULL)
00102 {
00103
00104 throw(std::runtime_error("Cannot determine matches/mismatches since failed to retrieve the cigar"));
00105 return(false);
00106 }
00107
00108
00109
00110
00111
00112 while((myQueryIndex < myRecord.getReadLength()) &&
00113 (myQueryIndex >= 0))
00114 {
00115
00116
00117
00118 int32_t refOffset = myCigar->getRefOffset(myQueryIndex);
00119 if(refOffset == Cigar::INDEX_NA)
00120 {
00121
00122
00123
00124 nextIndex();
00125 continue;
00126 }
00127
00128
00129 char readBase = myRecord.getSequence(myQueryIndex, SamRecord::NONE);
00130 char refBase = myRefSequence[myStartOfReadOnRefIndex + refOffset];
00131
00132
00133
00134 if(BaseUtilities::isAmbiguous(readBase) ||
00135 BaseUtilities::isAmbiguous(refBase))
00136 {
00137
00138
00139 nextIndex();
00140 continue;
00141 }
00142
00143
00144
00145 matchMismatchInfo.setQueryIndex(myQueryIndex);
00146
00147
00148 if(BaseUtilities::areEqual(readBase, refBase))
00149 {
00150
00151 matchMismatchInfo.setType(SamSingleBaseMatchInfo::MATCH);
00152
00153 nextIndex();
00154 return(true);
00155 }
00156 else
00157 {
00158
00159 matchMismatchInfo.setType(SamSingleBaseMatchInfo::MISMATCH);
00160
00161 nextIndex();
00162 return(true);
00163 }
00164 }
00165
00166
00167 return(false);
00168 }
00169
00170
00171 void SamQuerySeqWithRefIter::nextIndex()
00172 {
00173 if(myForward)
00174 {
00175 ++myQueryIndex;
00176 }
00177 else
00178 {
00179 --myQueryIndex;
00180 }
00181 }
00182
00183
00184 SamSingleBaseMatchInfo::SamSingleBaseMatchInfo()
00185 : myType(UNKNOWN),
00186 myQueryIndex(0)
00187 {
00188 }
00189
00190
00191 SamSingleBaseMatchInfo::~SamSingleBaseMatchInfo()
00192 {
00193 }
00194
00195
00196 SamSingleBaseMatchInfo::Type SamSingleBaseMatchInfo::getType()
00197 {
00198 return(myType);
00199 }
00200
00201
00202 int32_t SamSingleBaseMatchInfo::getQueryIndex()
00203 {
00204 return(myQueryIndex);
00205 }
00206
00207
00208 void SamSingleBaseMatchInfo::setType(Type newType)
00209 {
00210 myType = newType;
00211 }
00212
00213
00214 void SamSingleBaseMatchInfo::setQueryIndex(int32_t queryIndex)
00215 {
00216 myQueryIndex = queryIndex;
00217 }
00218
00219
00220
00221 void SamQuerySeqWithRef::seqWithEquals(const char* currentSeq,
00222 int32_t seq0BasedPos,
00223 Cigar& cigar,
00224 const char* referenceName,
00225 const GenomeSequence& refSequence,
00226 std::string& updatedSeq)
00227 {
00228 updatedSeq = currentSeq;
00229
00230 int32_t seqLength = updatedSeq.length();
00231 int32_t queryIndex = 0;
00232
00233 uint32_t startOfReadOnRefIndex =
00234 refSequence.getGenomePosition(referenceName) + seq0BasedPos;
00235
00236
00237 while(queryIndex < seqLength)
00238 {
00239
00240
00241
00242 int32_t refOffset = cigar.getRefOffset(queryIndex);
00243 if(refOffset != Cigar::INDEX_NA)
00244 {
00245
00246 char readBase = currentSeq[queryIndex];
00247 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
00248
00249
00250
00251 if(!BaseUtilities::isAmbiguous(readBase) &&
00252 !BaseUtilities::isAmbiguous(refBase) &&
00253 (BaseUtilities::areEqual(readBase, refBase)))
00254 {
00255
00256 updatedSeq[queryIndex] = '=';
00257 }
00258 }
00259
00260 ++queryIndex;
00261 continue;
00262 }
00263 }
00264
00265
00266 void SamQuerySeqWithRef::seqWithoutEquals(const char* currentSeq,
00267 int32_t seq0BasedPos,
00268 Cigar& cigar,
00269 const char* referenceName,
00270 const GenomeSequence& refSequence,
00271 std::string& updatedSeq)
00272 {
00273 updatedSeq = currentSeq;
00274
00275 int32_t seqLength = updatedSeq.length();
00276 int32_t queryIndex = 0;
00277
00278 uint32_t startOfReadOnRefIndex =
00279 refSequence.getGenomePosition(referenceName) + seq0BasedPos;
00280
00281
00282 while(queryIndex < seqLength)
00283 {
00284
00285
00286
00287 int32_t refOffset = cigar.getRefOffset(queryIndex);
00288 if(refOffset != Cigar::INDEX_NA)
00289 {
00290
00291 char readBase = currentSeq[queryIndex];
00292 char refBase = refSequence[startOfReadOnRefIndex + refOffset];
00293
00294
00295
00296
00297
00298 if(BaseUtilities::areEqual(readBase, refBase))
00299 {
00300
00301 updatedSeq[queryIndex] = refBase;
00302 }
00303 }
00304
00305
00306 ++queryIndex;
00307 continue;
00308 }
00309 }