Class for helping to filter a SAM/BAM record. More...
#include <CigarHelper.h>
Static Public Member Functions | |
static int32_t | softClipBeginByRefPos (SamRecord &record, int32_t refPosition0Based, CigarRoller &newCigar, int32_t &new0BasedPosition) |
Soft clip the cigar from the beginning of the read at the specified reference position. | |
static int32_t | softClipEndByRefPos (SamRecord &record, int32_t refPosition0Based, CigarRoller &newCigar) |
Soft clip the cigar from the back of the read at the specified reference position. | |
Static Public Attributes | |
static const int32_t | NO_CLIP = -1 |
Class for helping to filter a SAM/BAM record.
Definition at line 24 of file CigarHelper.h.
int32_t CigarHelper::softClipBeginByRefPos | ( | SamRecord & | record, | |
int32_t | refPosition0Based, | |||
CigarRoller & | newCigar, | |||
int32_t & | new0BasedPosition | |||
) | [static] |
Soft clip the cigar from the beginning of the read at the specified reference position.
If the clip position is deleted/skipped or is immediately followed by a deletion/skip/pad/insert, that entire CIGAR operation is also removed. Nothing is clipped if the reference position is before the read starts, everything is clipped if the reference position is after the read ends.
record | record to calculate the clip for. | |
refPosition0Based | 0-based reference position to end the clip at | |
newCigar | cigar object to set with the updated cigar. | |
new0BasedPosition | new 0-based reference position of the read. | |
read | position where the clip ends (last clipped position) or |
Definition at line 23 of file CigarHelper.cpp.
References CigarRoller::Add(), CigarRoller::clear(), Cigar::foundInQuery(), Cigar::foundInReference(), SamRecord::get0BasedPosition(), SamRecord::getCigar(), SamRecord::getCigarInfo(), Cigar::getOperator(), ErrorHandler::handleError(), Cigar::hardClip, CigarRoller::Set(), Cigar::size(), and Cigar::softClip.
00027 { 00028 newCigar.clear(); 00029 Cigar* cigar = record.getCigarInfo(); 00030 if(cigar == NULL) 00031 { 00032 // Failed to get the cigar. 00033 ErrorHandler::handleError("Soft clipping, but failed to read the cigar"); 00034 return(NO_CLIP); 00035 } 00036 00037 // No cigar or position in the record, so return no clip. 00038 if((cigar->size() == 0) || (record.get0BasedPosition() == -1)) 00039 { 00040 return(NO_CLIP); 00041 } 00042 00043 // Check to see if the reference position occurs before the record starts, 00044 // if it does, do no clipping. 00045 if(refPosition0Based < record.get0BasedPosition()) 00046 { 00047 // Not within this read, so nothing to clip. 00048 newCigar.Set(record.getCigar()); 00049 return(NO_CLIP); 00050 } 00051 00052 // The position falls after the read starts, so loop through until the 00053 // position or the end of the read is found. 00054 int32_t readClipPosition = 0; 00055 bool clipWritten = false; 00056 new0BasedPosition = record.get0BasedPosition(); 00057 for(int i = 0; i < cigar->size(); i++) 00058 { 00059 const Cigar::CigarOperator* op = &(cigar->getOperator(i)); 00060 00061 if(clipWritten) 00062 { 00063 // Clip point has been found, so just add everything. 00064 newCigar += *op; 00065 // Go to the next operation. 00066 continue; 00067 } 00068 00069 // The clip point has not yet been found, so check to see if we found 00070 // it now. 00071 00072 // Not a clip, check to see if the operation is found in the 00073 // reference. 00074 if(Cigar::foundInReference(*op)) 00075 { 00076 // match, mismatch, deletion, skip 00077 00078 // increment the current reference position to just past this 00079 // operation. 00080 new0BasedPosition += op->count; 00081 00082 // Check to see if this is also in the query, because otherwise 00083 // the operation is still being consumed. 00084 if(Cigar::foundInQuery(*op)) 00085 { 00086 // Also in the query, determine if the entire thing should 00087 // be clipped or just part of it. 00088 00089 uint32_t numKeep = 0; 00090 // Check to see if we have hit our clip position. 00091 if(refPosition0Based < new0BasedPosition) 00092 { 00093 // The specified clip position is in this cigar operation. 00094 numKeep = new0BasedPosition - refPosition0Based - 1; 00095 00096 if(numKeep > op->count) 00097 { 00098 // Keep the entire read. This happens because 00099 // we keep reading until the first match/mismatch 00100 // after the clip. 00101 numKeep = op->count; 00102 } 00103 } 00104 00105 // Add the part of this operation that is being clipped 00106 // to the clip count. 00107 readClipPosition += (op->count - numKeep); 00108 00109 // Only write the clip if we found a match/mismatch 00110 // to write. Otherwise we will keep accumulating clips 00111 // for the case of insertions. 00112 if(numKeep > 0) 00113 { 00114 new0BasedPosition -= numKeep; 00115 00116 newCigar.Add(Cigar::softClip, readClipPosition); 00117 00118 // Add the clipped part of this cigar to the clip 00119 // position. 00120 newCigar.Add(op->operation, numKeep); 00121 00122 // Found a match after the clip point, so stop 00123 // consuming cigar operations. 00124 clipWritten = true; 00125 continue; 00126 } 00127 } 00128 } 00129 else 00130 { 00131 // Only add hard clips. The softclips will be added in 00132 // when the total number is found. 00133 if(op->operation == Cigar::hardClip) 00134 { 00135 // Check if this is the first operation, if so, just write it. 00136 if(i == 0) 00137 { 00138 newCigar += *op; 00139 } 00140 // Check if it is the last operation (otherwise skip it). 00141 else if(i == (cigar->size() - 1)) 00142 { 00143 // Check whether or not the clip was ever written, and if 00144 // not, write it. 00145 if(clipWritten == false) 00146 { 00147 newCigar.Add(Cigar::softClip, readClipPosition); 00148 // Since no match/mismatch was ever found, set 00149 // the new ref position to the original one. 00150 new0BasedPosition = record.get0BasedPosition(); 00151 clipWritten = true; 00152 } 00153 // Add the hard clip. 00154 newCigar += *op; 00155 } 00156 } 00157 // Not yet to the clip position, so do not add this operation. 00158 if(Cigar::foundInQuery(*op)) 00159 { 00160 // Found in the query, so update the read clip position. 00161 readClipPosition += op->count; 00162 } 00163 } 00164 } // End loop through cigar. 00165 00166 00167 // Check whether or not the clip was ever written, and if 00168 // not, write it. 00169 if(clipWritten == false) 00170 { 00171 newCigar.Add(Cigar::softClip, readClipPosition); 00172 // Since no match/mismatch was ever found, set 00173 // the new ref position to the original one. 00174 new0BasedPosition = record.get0BasedPosition(); 00175 } 00176 00177 // Subtract 1 since readClipPosition atually contains the first 0based 00178 // position that is not clipped. 00179 return(readClipPosition - 1); 00180 }
int32_t CigarHelper::softClipEndByRefPos | ( | SamRecord & | record, | |
int32_t | refPosition0Based, | |||
CigarRoller & | newCigar | |||
) | [static] |
Soft clip the cigar from the back of the read at the specified reference position.
If the clip position is deleted/skipped or is immediately preceded by a deletion/skip/pad, that entire CIGAR operation is also removed. If the clip position is immediately preceded by an insertion, the insertion is left in the CIGAR. Nothing is clipped if the reference position is after the read ends, everything is clipped if the reference position is before the read starts (including insertions).
record | record to calculate the clip for. | |
refPosition0Based | 0-based reference position to start clip at | |
newCigar | cigar object to set with the updated cigar. | |
read | position where the clip starts or |
Definition at line 184 of file CigarHelper.cpp.
References CigarRoller::Add(), CigarRoller::clear(), Cigar::foundInQuery(), Cigar::foundInReference(), SamRecord::get0BasedAlignmentEnd(), SamRecord::get0BasedPosition(), SamRecord::getCigar(), SamRecord::getCigarInfo(), Cigar::getOperator(), SamRecord::getReadLength(), ErrorHandler::handleError(), Cigar::hardClip, Cigar::isClip(), CigarRoller::Remove(), CigarRoller::Set(), Cigar::size(), and Cigar::softClip.
00187 { 00188 newCigar.clear(); 00189 Cigar* cigar = record.getCigarInfo(); 00190 if(cigar == NULL) 00191 { 00192 // Failed to get the cigar. 00193 ErrorHandler::handleError("Soft clipping, but failed to read the cigar"); 00194 return(NO_CLIP); 00195 } 00196 00197 // No cigar or position in the record, so return no clip. 00198 if((cigar->size() == 0) || (record.get0BasedPosition() == -1)) 00199 { 00200 return(NO_CLIP); 00201 } 00202 00203 // Check to see if the reference position occurs after the record ends, 00204 // if so, do no clipping. 00205 if(refPosition0Based > record.get0BasedAlignmentEnd()) 00206 { 00207 // Not within this read, so nothing to clip. 00208 newCigar.Set(record.getCigar()); 00209 return(NO_CLIP); 00210 } 00211 00212 // The position falls before the read ends, so loop through until the 00213 // position is found. 00214 int32_t currentRefPosition = record.get0BasedPosition(); 00215 int32_t readClipPosition = 0; 00216 for(int i = 0; i < cigar->size(); i++) 00217 { 00218 const Cigar::CigarOperator* op = &(cigar->getOperator(i)); 00219 00220 // If the operation is found in the reference, increase the 00221 // reference position. 00222 if(Cigar::foundInReference(*op)) 00223 { 00224 // match, mismatch, deletion, skip 00225 // increment the current reference position to just past 00226 // this operation. 00227 currentRefPosition += op->count; 00228 } 00229 00230 // Check to see if we have hit our clip position. 00231 if(refPosition0Based < currentRefPosition) 00232 { 00233 // If this read is also in the query (match/mismatch), 00234 // write the partial op to the new cigar. 00235 int32_t numKeep = 0; 00236 if(Cigar::foundInQuery(*op)) 00237 { 00238 numKeep = op->count - (currentRefPosition - refPosition0Based); 00239 if(numKeep > 0) 00240 { 00241 newCigar.Add(op->operation, numKeep); 00242 readClipPosition += numKeep; 00243 } 00244 } 00245 else if(Cigar::isClip(*op)) 00246 { 00247 // This is a hard clip, so write it. 00248 newCigar.Add(op->operation, op->count); 00249 } 00250 else 00251 { 00252 00253 // Not found in the query (skip/deletion), 00254 // so don't write any of the operation. 00255 } 00256 // Found the clip point, so break. 00257 break; 00258 } 00259 else if(refPosition0Based == currentRefPosition) 00260 { 00261 newCigar += *op; 00262 if(Cigar::foundInQuery(*op)) 00263 { 00264 readClipPosition += op->count; 00265 } 00266 } 00267 else 00268 { 00269 // Not yet to the clip position, so add this operation/size to 00270 // the new cigar. 00271 newCigar += *op; 00272 if(Cigar::foundInQuery(*op)) 00273 { 00274 // Found in the query, so update the read clip position. 00275 readClipPosition += op->count; 00276 } 00277 } 00278 } // End loop through cigar. 00279 00280 // Before adding the softclip, read from the end of the cigar checking to 00281 // see if the operations are in the query, removing operations that are 00282 // not (pad/delete/skip) until a hardclip or an operation in the query is 00283 // found. We do not want a pad/delete/skip right before a softclip. 00284 for(int j = newCigar.size() - 1; j >= 0; j--) 00285 { 00286 const Cigar::CigarOperator* op = &(newCigar.getOperator(j)); 00287 if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op)) 00288 { 00289 // pad/delete/skip 00290 newCigar.Remove(j); 00291 } 00292 else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op)) 00293 { 00294 // Soft clip, so increment the clip position for the return value. 00295 // Remove the softclip since the readClipPosition is used to 00296 // calculate teh size of the soft clip added. 00297 readClipPosition -= op->count; 00298 newCigar.Remove(j); 00299 } 00300 else 00301 { 00302 // Found a cigar operation that should not be deleted, so stop deleting. 00303 break; 00304 } 00305 } 00306 00307 // Determine the number of soft clips. 00308 int32_t numSoftClips = record.getReadLength() - readClipPosition; 00309 // NOTE that if the previous operation is a softclip, the CigarRoller logic 00310 // will merge this with that one. 00311 newCigar.Add(Cigar::softClip, numSoftClips); 00312 00313 // Check if an ending hard clip needs to be added. 00314 if(cigar->size() != 0) 00315 { 00316 const Cigar::CigarOperator* lastOp = 00317 &(cigar->getOperator(cigar->size() - 1)); 00318 if(lastOp->operation == Cigar::hardClip) 00319 { 00320 newCigar += *lastOp; 00321 } 00322 } 00323 00324 return(readClipPosition); 00325 }