SamFilter Class Reference

List of all members.

Public Types

enum  FilterStatus { NONE, CLIPPED, FILTERED }

Static Public Member Functions

static FilterStatus clipOnMismatchThreshold (SamRecord &record, GenomeSequence &refSequence, double mismatchThreshold)
static FilterStatus softClip (SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
 Soft clip the record from the front and/or the back.
static FilterStatus softClip (Cigar &oldCigar, int32_t numFrontClips, int32_t numBackClips, int32_t &startPos, CigarRoller &updatedCigar)
 Soft clip the cigar from the front and/or the back, writing the value into the new cigar, updatedCigar & startPos are only updated if the return FilterStatus is CLIPPED.
static FilterStatus filterOnMismatchQuality (SamRecord &record, GenomeSequence &refSequence, uint32_t qualityThreshold, uint8_t defaultQualityInt)
static uint32_t sumMismatchQuality (SamRecord &record, GenomeSequence &refSequence, uint8_t defaultQualityInt)
static void filterRead (SamRecord &record)

Detailed Description

Definition at line 24 of file SamFilter.h.


Member Function Documentation

SamFilter::FilterStatus SamFilter::softClip ( Cigar oldCigar,
int32_t  numFrontClips,
int32_t  numBackClips,
int32_t &  startPos,
CigarRoller updatedCigar 
) [static]

Soft clip the cigar from the front and/or the back, writing the value into the new cigar, updatedCigar & startPos are only updated if the return FilterStatus is CLIPPED.

Parameters:
oldCigar cigar prior to clipping
numFrontClips number of bases that should be clipped from the front of the sequence read. (total count, including any that are already clipped.)
numBackClips number of bases that should be clipped from the back of the sequence read. (total count, including any that are already clipped.)
startPos 0-based start position associated with the cigar prior to updating (input) and set to the 0-based start position after updating (output) the cigar if it was CLIPPED.
updatedCigar set to the clipped cigar if CLIPPED (output param).

Definition at line 191 of file SamFilter.cpp.

References CigarRoller::Add(), Cigar::del, Cigar::foundInQuery(), Cigar::getExpectedQueryBaseCount(), Cigar::getOperator(), Cigar::getRefPosition(), Cigar::hardClip, Cigar::insert, Cigar::isClip(), Cigar::match, Cigar::mismatch, Cigar::none, Cigar::size(), Cigar::skip, and Cigar::softClip.

00196 {
00197     int32_t readLength = oldCigar.getExpectedQueryBaseCount();
00198     int32_t endClipPos = readLength - numBackClips;
00199     FilterStatus status = NONE;
00200 
00201     if((numFrontClips != 0) || (numBackClips != 0))
00202     {
00203         // Clipping from front and/or from the back.
00204 
00205         // Check to see if the entire read was clipped.
00206         int32_t totalClips = numFrontClips + numBackClips;
00207         if(totalClips >= readLength)
00208         {
00209             /////////////////////////////
00210             // The entire read is clipped, so rather than clipping it,
00211             // filter it out.
00212             return(FILTERED);
00213         }
00214          
00215         // Part of the read was clipped.
00216         status = CLIPPED;
00217             
00218         // Loop through, creating an updated cigar.
00219         int origCigarOpIndex = 0;
00220         
00221         // Track how many read positions are covered up to this
00222         // point by the cigar to determine up to up to what
00223         // point in the cigar is affected by this clipping.
00224         int32_t numPositions = 0;
00225         
00226         // Track if any non-clips are in the new cigar.
00227         bool onlyClips = true;
00228 
00229         const Cigar::CigarOperator* op = NULL;
00230 
00231         //////////////////
00232         // Clip from front
00233         while((origCigarOpIndex < oldCigar.size()) &&
00234               (numPositions < numFrontClips))
00235         {
00236             op = &(oldCigar.getOperator(origCigarOpIndex));
00237             switch(op->operation)
00238             {
00239                 case Cigar::hardClip:
00240                     // Keep this operation as the new clips do not
00241                     // affect other clips.
00242                     updatedCigar += *op;
00243                     break;
00244                 case Cigar::del:
00245                 case Cigar::skip:
00246                     // Skip and delete are going to be dropped, and
00247                     // are not in the read, so the read index doesn't
00248                     // need to be updated
00249                     break;
00250                 case Cigar::insert:
00251                 case Cigar::match:
00252                 case Cigar::mismatch:
00253                 case Cigar::softClip:
00254                     // Update the read index as these types
00255                     // are found in the read.
00256                     numPositions += op->count;
00257                     break;
00258                 case Cigar::none:
00259                 default:
00260                     // Nothing to do for none.
00261                     break;
00262             };
00263             ++origCigarOpIndex;
00264         }
00265     
00266         // If bases were clipped from the front, add the clip and
00267         // any partial cigar operation as necessary.
00268         if(numFrontClips != 0)
00269         {
00270             // Add the softclip to the front of the read.
00271             updatedCigar.Add(Cigar::softClip, numFrontClips);
00272         
00273             // Add the rest of the last Cigar operation if
00274             // it is not entirely clipped.
00275             int32_t newCount = numPositions - numFrontClips;
00276             if(newCount > 0)
00277             {
00278                 // Before adding it, check to see if the same
00279                 // operation is clipped from the end.
00280                 // numPositions greater than the endClipPos
00281                 // means that it is equal or past that position,
00282                 // so shorten the number of positions.
00283                 if(numPositions > endClipPos)
00284                 {
00285                     newCount -= (numPositions - endClipPos);
00286                 }
00287                 if(newCount > 0)
00288                 {
00289                     updatedCigar.Add(op->operation, newCount);
00290                     if(!Cigar::isClip(op->operation))
00291                     {
00292                         onlyClips = false;
00293                     }
00294                 }
00295             }
00296         }
00297     
00298         // Add operations until the point of the end clip is reached.
00299         // For example...
00300         //   2M1D3M = MMDMMM  readLength = 5
00301         // readIndex: 01 234
00302         //   at cigarOpIndex 0 (2M), numPositions = 2.
00303         //   at cigarOpIndex 1 (1D), numPositions = 2.
00304         //   at cigarOpIndex 2 (3M), numPositions = 5.
00305         // if endClipPos = 2, we still want to consume the 1D, so
00306         // need to keep looping until numPositions > endClipPos
00307         while((origCigarOpIndex < oldCigar.size()) &&
00308               (numPositions <= endClipPos))
00309         {
00310             op = &(oldCigar.getOperator(origCigarOpIndex));
00311             
00312             // Update the numPositions count if the operations indicates
00313             // bases within the read.
00314             if(!Cigar::foundInQuery(op->operation))
00315             {
00316                 // This operation is not in the query read sequence,
00317                 // so it is not yet to the endClipPos, just add the
00318                 // operation do not increment the number of positions.
00319                 updatedCigar += *op;
00320                 if(!Cigar::isClip(op->operation))
00321                 {
00322                     onlyClips = false;
00323                 }
00324             }
00325             else
00326             {
00327                 // This operation appears in the query sequence, so
00328                 // check to see if the clip occurs in this operation.
00329                 
00330                 // endClipPos is 0 based & numPositions is a count.
00331                 // If endClipPos is 4, then it is the 5th position.
00332                 // If 4 positions are covered so far (numPositions = 4), 
00333                 // then we are right at endCLipPos: 4-4 = 0, none of 
00334                 // this operation should be kept. 
00335                 // If only 3 positions were covered, then we are at offset
00336                 // 3, so offset 3 should be added: 4-3 = 1.
00337                 uint32_t numPosTilClip = endClipPos - numPositions;
00338                 
00339                 if(numPosTilClip < op->count)
00340                 {
00341                     // this operation is partially clipped, write the part
00342                     // that was not clipped if it is not all clipped.
00343                     if(numPosTilClip != 0)
00344                     {
00345                         updatedCigar.Add(op->operation,
00346                                      numPosTilClip);
00347                         if(!Cigar::isClip(op->operation))
00348                         {
00349                             onlyClips = false;
00350                         }
00351                     }
00352                 }
00353                 else
00354                 {
00355                     // This operation is not clipped, so add it
00356                     updatedCigar += *op;
00357                     if(!Cigar::isClip(op->operation))
00358                     {
00359                         onlyClips = false;
00360                     }
00361                 }
00362                 // This operation occurs in the query sequence, so 
00363                 // increment the number of positions covered.
00364                 numPositions += op->count;
00365             }
00366 
00367             // Move to the next cigar position.
00368             ++origCigarOpIndex;
00369         }
00370             
00371         //////////////////
00372         // Add the softclip to the back.
00373         if(numBackClips != 0)
00374         {
00375             // Add the softclip to the end
00376             updatedCigar.Add(Cigar::softClip, numBackClips);
00377         }
00378         
00379         //////////////////
00380         // Add any hardclips remaining in the original cigar to the back.
00381         while(origCigarOpIndex < oldCigar.size())
00382         {
00383             op = &(oldCigar.getOperator(origCigarOpIndex));
00384             if(op->operation == Cigar::hardClip)
00385             {
00386                 // Keep this operation as the new clips do not
00387                 // affect other clips.
00388                 updatedCigar += *op;
00389             }
00390             ++origCigarOpIndex;
00391         }
00392         
00393         // Check to see if the new cigar is only clips.
00394         if(onlyClips)
00395         {
00396             // Only clips in the new cigar, so mark the read as filtered
00397             // instead of updating the cigar.
00398             /////////////////////////////
00399             // The entire read was clipped.
00400             status = FILTERED;
00401         }
00402         else
00403         {
00404             // Part of the read was clipped.
00405             // Update the starting position if a clip was added to
00406             // the front.
00407             if(numFrontClips > 0)
00408             {
00409                 // Convert from query index to reference position (from the
00410                 // old cigar)
00411                 // Get the position for the start clipped position by
00412                 // getting the position associated with the clipped base on
00413                 // the reference.  Then add one to get to the first
00414                 // non-clipped position.
00415                 int32_t lastFrontClipPos = numFrontClips - 1;
00416                 startPos = 
00417                     oldCigar.getRefPosition(lastFrontClipPos, 
00418                                             startPos) + 1;
00419             }
00420         }
00421     }
00422     return(status);
00423 }

SamFilter::FilterStatus SamFilter::softClip ( SamRecord record,
int32_t  numFrontClips,
int32_t  numBackClips 
) [static]

Soft clip the record from the front and/or the back.

Parameters:
record record to be clipped (input/output parameter).
numFrontClips number of bases that should be clipped from the front of the sequence read. (total count, including any that are already clipped.)
backClipPos number of bases that should be clipped from the back of the sequence read. (total count, including any that are already clipped.)

Definition at line 155 of file SamFilter.cpp.

References SamRecord::get0BasedPosition(), SamRecord::getCigarInfo(), SamRecord::set0BasedPosition(), and SamRecord::setCigar().

00158 {
00159     //////////////////////////////////////////////////////////
00160     Cigar* cigar = record.getCigarInfo();
00161     FilterStatus status = NONE;
00162     int32_t startPos = record.get0BasedPosition();
00163     CigarRoller updatedCigar;
00164 
00165     status = softClip(*cigar, numFrontClips, numBackClips, 
00166                       startPos, updatedCigar);
00167 
00168     if(status == FILTERED)
00169     {
00170         /////////////////////////////
00171         // The entire read is clipped, so rather than clipping it,
00172         // filter it out.
00173         filterRead(record);
00174         return(FILTERED);
00175     }
00176     else if(status == CLIPPED)
00177     {
00178         // Part of the read was clipped, and now that we have
00179         // an updated cigar, update the read.
00180         record.setCigar(updatedCigar);
00181 
00182         // Update the starting position.
00183         record.set0BasedPosition(startPos);
00184     }
00185     return(status);
00186 }


The documentation for this class was generated from the following files:
Generated on Tue Aug 23 18:19:11 2011 for libStatGen Software by  doxygen 1.6.3