SamFilter.cpp
00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "SamFilter.h"
00022
00023 #include "SamQuerySeqWithRefHelper.h"
00024 #include "BaseUtilities.h"
00025 #include "SamFlag.h"
00026
00027 SamFilter::FilterStatus SamFilter::clipOnMismatchThreshold(SamRecord& record,
00028 GenomeSequence& refSequence,
00029 double mismatchThreshold)
00030 {
00031
00032 SamQuerySeqWithRefIter iterFromStart(record, refSequence, true);
00033 SamQuerySeqWithRefIter iterFromEnd(record, refSequence, false);
00034
00035 SamSingleBaseMatchInfo baseMatchInfo;
00036
00037 int32_t readLength = record.getReadLength();
00038
00039 const int32_t initialStartClipPos = -1;
00040 int32_t startClipPos = initialStartClipPos;
00041
00042 int32_t endClipPos = readLength;
00043
00044 bool fromStartComplete = false;
00045 bool fromEndComplete = false;
00046 int32_t numBasesFromStart = 0;
00047 int32_t numBasesFromEnd = 0;
00048 int32_t numMismatchFromStart = 0;
00049 int32_t numMismatchFromEnd = 0;
00050
00051 FilterStatus status = NONE;
00052
00053
00054
00055 while(!fromStartComplete || !fromEndComplete)
00056 {
00057
00058
00059 while(!fromStartComplete &&
00060 ((numBasesFromStart <= numBasesFromEnd) ||
00061 (fromEndComplete)))
00062 {
00063 if(iterFromStart.getNextMatchMismatch(baseMatchInfo) == false)
00064 {
00065
00066 fromStartComplete = true;
00067 break;
00068 }
00069
00070 if(baseMatchInfo.getQueryIndex() >= endClipPos)
00071 {
00072
00073
00074 fromStartComplete = true;
00075 break;
00076 }
00077
00078
00079 ++numBasesFromStart;
00080
00081 if(baseMatchInfo.getType() == SamSingleBaseMatchInfo::MISMATCH)
00082 {
00083
00084 ++numMismatchFromStart;
00085
00086 double mismatchPercent =
00087 (double)numMismatchFromStart / numBasesFromStart;
00088 if(mismatchPercent > mismatchThreshold)
00089 {
00090
00091 startClipPos = baseMatchInfo.getQueryIndex();
00092
00093 numBasesFromStart = 0;
00094 numMismatchFromStart = 0;
00095 }
00096 }
00097 }
00098
00099
00100
00101 while(!fromEndComplete &&
00102 ((numBasesFromEnd <= numBasesFromStart) ||
00103 (fromStartComplete)))
00104 {
00105 if(iterFromEnd.getNextMatchMismatch(baseMatchInfo) == false)
00106 {
00107
00108 fromEndComplete = true;
00109 break;
00110 }
00111
00112 if(baseMatchInfo.getQueryIndex() <= startClipPos)
00113 {
00114
00115
00116 fromEndComplete = true;
00117 break;
00118 }
00119
00120
00121 ++numBasesFromEnd;
00122
00123 if(baseMatchInfo.getType() == SamSingleBaseMatchInfo::MISMATCH)
00124 {
00125
00126 ++numMismatchFromEnd;
00127
00128 double mismatchPercent =
00129 (double)numMismatchFromEnd / numBasesFromEnd;
00130 if(mismatchPercent > mismatchThreshold)
00131 {
00132
00133 endClipPos = baseMatchInfo.getQueryIndex();
00134
00135 numBasesFromEnd = 0;
00136 numMismatchFromEnd = 0;
00137 }
00138 }
00139 }
00140 }
00141
00142
00143
00144
00145 Cigar* cigar = record.getCigarInfo();
00146 if((startClipPos != initialStartClipPos) || (endClipPos != readLength))
00147 {
00148
00149
00150 if((startClipPos + 1) >= endClipPos)
00151 {
00152
00153
00154 filterRead(record);
00155 status = FILTERED;
00156 }
00157 else
00158 {
00159
00160 status = CLIPPED;
00161
00162
00163 CigarRoller newCigar;
00164
00165
00166
00167 int origCigarOpIndex = 0;
00168
00169
00170
00171 int32_t numPositions = 0;
00172
00173
00174
00175 int32_t numSoftClips = startClipPos + 1;
00176
00177
00178 bool onlyClips = true;
00179
00180 const Cigar::CigarOperator* op = NULL;
00181
00182
00183
00184
00185
00186 while((origCigarOpIndex < cigar->size()) &&
00187 (numPositions < numSoftClips))
00188 {
00189 op = &(cigar->getOperator(origCigarOpIndex));
00190 switch(op->operation)
00191 {
00192 case Cigar::hardClip:
00193
00194
00195 newCigar += *op;
00196 break;
00197 case Cigar::del:
00198 case Cigar::skip:
00199
00200
00201
00202 break;
00203 case Cigar::insert:
00204 case Cigar::match:
00205 case Cigar::mismatch:
00206 case Cigar::softClip:
00207
00208
00209 numPositions += op->count;
00210 break;
00211 case Cigar::none:
00212 default:
00213
00214 break;
00215 };
00216 ++origCigarOpIndex;
00217 }
00218
00219 if(numSoftClips != 0)
00220 {
00221
00222 newCigar.Add(Cigar::softClip, numSoftClips);
00223
00224
00225
00226 if(numPositions > numSoftClips)
00227 {
00228
00229
00230
00231
00232
00233 int32_t newCount = numPositions - numSoftClips;
00234 if(numPositions > endClipPos)
00235 {
00236 newCount -= (numPositions - endClipPos);
00237 }
00238 if(newCount > 0)
00239 {
00240 newCigar.Add(op->operation, newCount);
00241 if(!Cigar::isClip(op->operation))
00242 {
00243 onlyClips = false;
00244 }
00245 }
00246 }
00247 }
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258 while((origCigarOpIndex < cigar->size()) &&
00259 (numPositions <= endClipPos))
00260 {
00261 op = &(cigar->getOperator(origCigarOpIndex));
00262
00263
00264
00265 if(!Cigar::foundInQuery(op->operation))
00266 {
00267
00268
00269
00270 newCigar += *op;
00271 if(!Cigar::isClip(op->operation))
00272 {
00273 onlyClips = false;
00274 }
00275 }
00276 else
00277 {
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288 uint32_t numPosTilClip = endClipPos - numPositions;
00289
00290 if(numPosTilClip < op->count)
00291 {
00292
00293
00294 if(numPosTilClip != 0)
00295 {
00296 newCigar.Add(op->operation,
00297 numPosTilClip);
00298 if(!Cigar::isClip(op->operation))
00299 {
00300 onlyClips = false;
00301 }
00302 }
00303 }
00304 else
00305 {
00306
00307 newCigar += *op;
00308 if(!Cigar::isClip(op->operation))
00309 {
00310 onlyClips = false;
00311 }
00312 }
00313
00314
00315 numPositions += op->count;
00316 }
00317
00318
00319 ++origCigarOpIndex;
00320 }
00321
00322
00323
00324 numSoftClips = readLength - endClipPos;
00325 if(numSoftClips != 0)
00326 {
00327
00328 newCigar.Add(Cigar::softClip, numSoftClips);
00329 }
00330
00331
00332
00333 while(origCigarOpIndex < cigar->size())
00334 {
00335 op = &(cigar->getOperator(origCigarOpIndex));
00336 if(op->operation == Cigar::hardClip)
00337 {
00338
00339
00340 newCigar += *op;
00341 }
00342 ++origCigarOpIndex;
00343 }
00344
00345
00346 if(onlyClips)
00347 {
00348
00349
00350
00351
00352 filterRead(record);
00353 status = FILTERED;
00354 }
00355 else
00356 {
00357
00358
00359 record.setCigar(newCigar);
00360
00361
00362
00363 if(startClipPos != initialStartClipPos)
00364 {
00365
00366
00367
00368
00369
00370
00371 uint32_t newStartPos =
00372 cigar->getRefPosition(startClipPos,
00373 record.get0BasedPosition()) + 1;
00374 record.set0BasedPosition(newStartPos);
00375 }
00376 }
00377 }
00378 }
00379 return(status);
00380 }
00381
00382
00383 SamFilter::FilterStatus SamFilter::filterOnMismatchQuality(SamRecord& record,
00384 GenomeSequence& refSequence,
00385 uint32_t qualityThreshold,
00386 uint8_t defaultQualityInt)
00387 {
00388 uint32_t totalMismatchQuality =
00389 sumMismatchQuality(record, refSequence, defaultQualityInt);
00390
00391
00392
00393 if(totalMismatchQuality > qualityThreshold)
00394 {
00395 filterRead(record);
00396 return(FILTERED);
00397 }
00398 return(NONE);
00399 }
00400
00401
00402
00403
00404 uint32_t SamFilter::sumMismatchQuality(SamRecord& record,
00405 GenomeSequence& refSequence,
00406 uint8_t defaultQualityInt)
00407 {
00408
00409 int mismatchQual = 0;
00410 int numMismatch = 0;
00411
00412 SamQuerySeqWithRefIter sequenceIter(record, refSequence);
00413
00414 SamSingleBaseMatchInfo baseMatchInfo;
00415 while(sequenceIter.getNextMatchMismatch(baseMatchInfo))
00416 {
00417 if(baseMatchInfo.getType() == SamSingleBaseMatchInfo::MISMATCH)
00418 {
00419
00420 char readQualityChar =
00421 record.getQuality(baseMatchInfo.getQueryIndex());
00422 uint8_t readQualityInt =
00423 BaseUtilities::getPhredBaseQuality(readQualityChar);
00424
00425 if(readQualityInt == BaseUtilities::UNKNOWN_QUALITY_INT)
00426 {
00427
00428 readQualityInt = defaultQualityInt;
00429 }
00430 mismatchQual += readQualityInt;
00431 ++numMismatch;
00432 }
00433 }
00434
00435 return(mismatchQual);
00436 }
00437
00438
00439 void SamFilter::filterRead(SamRecord& record)
00440 {
00441
00442 uint16_t flag = record.getFlag();
00443 SamFlag::setUnmapped(flag);
00444 record.setFlag(flag);
00445 }