CigarHelper Class Reference

Class for helping to filter a SAM/BAM record. More...

#include <CigarHelper.h>

List of all members.

Static Public Member Functions

static int32_t softClipBeginByRefPos (SamRecord &record, int32_t refPosition0Based, CigarRoller &newCigar, int32_t &new0BasedPosition)
 Soft clip the cigar from the beginning of the read at the specified reference position.
static int32_t softClipEndByRefPos (SamRecord &record, int32_t refPosition0Based, CigarRoller &newCigar)
 Soft clip the cigar from the back of the read at the specified reference position.

Static Public Attributes

static const int32_t NO_CLIP = -1

Detailed Description

Class for helping to filter a SAM/BAM record.

Definition at line 24 of file CigarHelper.h.


Member Function Documentation

int32_t CigarHelper::softClipBeginByRefPos ( SamRecord record,
int32_t  refPosition0Based,
CigarRoller newCigar,
int32_t &  new0BasedPosition 
) [static]

Soft clip the cigar from the beginning of the read at the specified reference position.

If the clip position is deleted/skipped or is immediately followed by a deletion/skip/pad/insert, that entire CIGAR operation is also removed. Nothing is clipped if the reference position is before the read starts, everything is clipped if the reference position is after the read ends.

Parameters:
record record to calculate the clip for.
refPosition0Based 0-based reference position to end the clip at
newCigar cigar object to set with the updated cigar.
new0BasedPosition new 0-based reference position of the read.
read position where the clip ends (last clipped position) or

Definition at line 23 of file CigarHelper.cpp.

References CigarRoller::Add(), CigarRoller::clear(), Cigar::foundInQuery(), Cigar::foundInReference(), SamRecord::get0BasedPosition(), SamRecord::getCigar(), SamRecord::getCigarInfo(), Cigar::getOperator(), ErrorHandler::handleError(), Cigar::hardClip, CigarRoller::Set(), Cigar::size(), and Cigar::softClip.

00027 {
00028     newCigar.clear();
00029     Cigar* cigar = record.getCigarInfo();
00030     if(cigar == NULL)
00031     {
00032         // Failed to get the cigar.
00033         ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
00034         return(NO_CLIP);
00035     }
00036 
00037     // No cigar or position in the record, so return no clip.
00038     if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
00039     {
00040         return(NO_CLIP);
00041     }
00042 
00043     // Check to see if the reference position occurs before the record starts,
00044     // if it does, do no clipping.
00045     if(refPosition0Based < record.get0BasedPosition())
00046     {
00047         // Not within this read, so nothing to clip.
00048         newCigar.Set(record.getCigar());
00049         return(NO_CLIP);
00050     }
00051 
00052     // The position falls after the read starts, so loop through until the
00053     // position or the end of the read is found.
00054     int32_t readClipPosition = 0;
00055     bool clipWritten = false;
00056     new0BasedPosition = record.get0BasedPosition();
00057     for(int i = 0; i < cigar->size(); i++)
00058     {
00059         const Cigar::CigarOperator* op = &(cigar->getOperator(i));
00060 
00061         if(clipWritten)
00062         {
00063             // Clip point has been found, so just add everything.
00064             newCigar += *op;
00065             // Go to the next operation.
00066             continue;
00067         }
00068 
00069         // The clip point has not yet been found, so check to see if we found 
00070         // it now.
00071 
00072         // Not a clip, check to see if the operation is found in the
00073         // reference.
00074         if(Cigar::foundInReference(*op))
00075         {
00076             // match, mismatch, deletion, skip
00077 
00078             // increment the current reference position to just past this
00079             // operation.
00080             new0BasedPosition += op->count;
00081 
00082             // Check to see if this is also in the query, because otherwise
00083             // the operation is still being consumed.
00084             if(Cigar::foundInQuery(*op))
00085             {
00086                 // Also in the query, determine if the entire thing should
00087                 // be clipped or just part of it.
00088 
00089                 uint32_t numKeep = 0;
00090                 // Check to see if we have hit our clip position.
00091                 if(refPosition0Based < new0BasedPosition)
00092                 {
00093                     // The specified clip position is in this cigar operation.
00094                     numKeep = new0BasedPosition - refPosition0Based - 1;
00095                     
00096                     if(numKeep > op->count)
00097                     {
00098                         // Keep the entire read.  This happens because
00099                         // we keep reading until the first match/mismatch
00100                         // after the clip.
00101                         numKeep = op->count;
00102                     }
00103                 }
00104 
00105                 // Add the part of this operation that is being clipped
00106                 // to the clip count.
00107                 readClipPosition += (op->count - numKeep);
00108 
00109                 // Only write the clip if we found a match/mismatch
00110                 // to write.  Otherwise we will keep accumulating clips
00111                 // for the case of insertions.
00112                 if(numKeep > 0)
00113                 {
00114                     new0BasedPosition -= numKeep;
00115 
00116                     newCigar.Add(Cigar::softClip, readClipPosition);
00117                     
00118                     // Add the clipped part of this cigar to the clip
00119                     // position.
00120                     newCigar.Add(op->operation, numKeep);
00121                     
00122                     // Found a match after the clip point, so stop
00123                     // consuming cigar operations.
00124                     clipWritten = true;
00125                     continue;
00126                 }
00127             }
00128         }
00129         else
00130         {
00131             // Only add hard clips.  The softclips will be added in
00132             // when the total number is found.
00133             if(op->operation == Cigar::hardClip)
00134             {
00135                 // Check if this is the first operation, if so, just write it.
00136                 if(i == 0)
00137                 {
00138                     newCigar += *op;
00139                 }
00140                 // Check if it is the last operation (otherwise skip it).
00141                 else if(i == (cigar->size() - 1))
00142                 {
00143                     // Check whether or not the clip was ever written, and if
00144                     // not, write it.
00145                     if(clipWritten == false)
00146                     {
00147                         newCigar.Add(Cigar::softClip, readClipPosition);
00148                         // Since no match/mismatch was ever found, set
00149                         // the new ref position to the original one.
00150                         new0BasedPosition = record.get0BasedPosition();
00151                         clipWritten = true;
00152                     }
00153                     // Add the hard clip.
00154                     newCigar += *op;
00155                 }
00156             }
00157             // Not yet to the clip position, so do not add this operation.
00158             if(Cigar::foundInQuery(*op))
00159             {
00160                 // Found in the query, so update the read clip position.
00161                 readClipPosition += op->count;
00162             }
00163         }
00164     } // End loop through cigar.
00165 
00166 
00167     // Check whether or not the clip was ever written, and if
00168     // not, write it.
00169     if(clipWritten == false)
00170     {
00171         newCigar.Add(Cigar::softClip, readClipPosition);
00172         // Since no match/mismatch was ever found, set
00173         // the new ref position to the original one.
00174         new0BasedPosition = record.get0BasedPosition();
00175     }
00176 
00177     // Subtract 1 since readClipPosition atually contains the first 0based 
00178     // position that is not clipped.
00179     return(readClipPosition - 1);
00180 }

int32_t CigarHelper::softClipEndByRefPos ( SamRecord record,
int32_t  refPosition0Based,
CigarRoller newCigar 
) [static]

Soft clip the cigar from the back of the read at the specified reference position.

If the clip position is deleted/skipped or is immediately preceded by a deletion/skip/pad, that entire CIGAR operation is also removed. If the clip position is immediately preceded by an insertion, the insertion is left in the CIGAR. Nothing is clipped if the reference position is after the read ends, everything is clipped if the reference position is before the read starts (including insertions).

Parameters:
record record to calculate the clip for.
refPosition0Based 0-based reference position to start clip at
newCigar cigar object to set with the updated cigar.
read position where the clip starts or

Definition at line 184 of file CigarHelper.cpp.

References CigarRoller::Add(), CigarRoller::clear(), Cigar::foundInQuery(), Cigar::foundInReference(), SamRecord::get0BasedAlignmentEnd(), SamRecord::get0BasedPosition(), SamRecord::getCigar(), SamRecord::getCigarInfo(), Cigar::getOperator(), SamRecord::getReadLength(), ErrorHandler::handleError(), Cigar::hardClip, Cigar::isClip(), CigarRoller::Remove(), CigarRoller::Set(), Cigar::size(), and Cigar::softClip.

00187 {
00188     newCigar.clear();
00189     Cigar* cigar = record.getCigarInfo();
00190     if(cigar == NULL)
00191     {
00192         // Failed to get the cigar.
00193         ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
00194         return(NO_CLIP);
00195     }
00196 
00197     // No cigar or position in the record, so return no clip.
00198     if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
00199     {
00200         return(NO_CLIP);
00201     }
00202 
00203     // Check to see if the reference position occurs after the record ends,
00204     // if so, do no clipping.
00205     if(refPosition0Based > record.get0BasedAlignmentEnd())
00206     {
00207         // Not within this read, so nothing to clip.
00208         newCigar.Set(record.getCigar());
00209         return(NO_CLIP);
00210     }
00211 
00212     // The position falls before the read ends, so loop through until the
00213     // position is found.
00214     int32_t currentRefPosition = record.get0BasedPosition();
00215     int32_t readClipPosition = 0;
00216     for(int i = 0; i < cigar->size(); i++)
00217     {
00218         const Cigar::CigarOperator* op = &(cigar->getOperator(i));
00219 
00220         // If the operation is found in the reference, increase the
00221         // reference position.
00222         if(Cigar::foundInReference(*op))
00223         {
00224             // match, mismatch, deletion, skip
00225             // increment the current reference position to just past
00226             // this operation.
00227             currentRefPosition += op->count;
00228         }
00229          
00230         // Check to see if we have hit our clip position.
00231         if(refPosition0Based < currentRefPosition)
00232         {
00233             // If this read is also in the query (match/mismatch), 
00234             // write the partial op to the new cigar.
00235             int32_t numKeep = 0;
00236             if(Cigar::foundInQuery(*op))
00237             {
00238                 numKeep = op->count - (currentRefPosition - refPosition0Based);
00239                 if(numKeep > 0)
00240                 {
00241                     newCigar.Add(op->operation, numKeep);
00242                     readClipPosition += numKeep;
00243                 }
00244             }
00245             else if(Cigar::isClip(*op))
00246             {
00247                 // This is a hard clip, so write it.
00248                 newCigar.Add(op->operation, op->count);
00249             }
00250             else
00251             {
00252 
00253                 // Not found in the query (skip/deletion),
00254                 // so don't write any of the operation.
00255             }
00256             // Found the clip point, so break.
00257             break;
00258         }
00259         else if(refPosition0Based == currentRefPosition)
00260         {
00261             newCigar += *op;
00262             if(Cigar::foundInQuery(*op))
00263             {
00264                 readClipPosition += op->count;
00265             }
00266         }
00267         else
00268         {
00269             // Not yet to the clip position, so add this operation/size to
00270             // the new cigar.
00271             newCigar += *op;
00272             if(Cigar::foundInQuery(*op))
00273             {
00274                 // Found in the query, so update the read clip position.
00275                 readClipPosition += op->count;
00276             }
00277         }
00278     } // End loop through cigar.
00279 
00280     // Before adding the softclip, read from the end of the cigar checking to
00281     // see if the operations are in the query, removing operations that are
00282     // not (pad/delete/skip) until a hardclip or an operation in the query is
00283     // found. We do not want a pad/delete/skip right before a softclip.
00284     for(int j = newCigar.size() - 1; j >= 0; j--)
00285     {
00286         const Cigar::CigarOperator* op = &(newCigar.getOperator(j));
00287         if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op))
00288         {
00289             // pad/delete/skip
00290             newCigar.Remove(j);
00291         }
00292         else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op))
00293         {
00294             // Soft clip, so increment the clip position for the return value.
00295             // Remove the softclip since the readClipPosition is used to
00296             // calculate teh size of the soft clip added.
00297             readClipPosition -= op->count;
00298             newCigar.Remove(j);
00299         }
00300         else
00301         {
00302             // Found a cigar operation that should not be deleted, so stop deleting.
00303             break;
00304         }
00305     } 
00306 
00307     // Determine the number of soft clips.
00308     int32_t numSoftClips = record.getReadLength() - readClipPosition;
00309     // NOTE that if the previous operation is a softclip, the CigarRoller logic
00310     // will merge this with that one.
00311     newCigar.Add(Cigar::softClip, numSoftClips);
00312 
00313     // Check if an ending hard clip needs to be added.
00314     if(cigar->size() != 0)
00315     {
00316         const Cigar::CigarOperator* lastOp = 
00317             &(cigar->getOperator(cigar->size() - 1));
00318         if(lastOp->operation == Cigar::hardClip)
00319         {
00320             newCigar += *lastOp;
00321         }
00322     }
00323 
00324     return(readClipPosition);
00325 }


The documentation for this class was generated from the following files:
Generated on Mon Feb 11 13:45:21 2013 for libStatGen Software by  doxygen 1.6.3